Re: [PATCH] powerpc/mm: Use refcount_t for refcount

2019-08-09 Thread Chuhong Yuan
On Fri, Aug 9, 2019 at 8:36 PM Michael Ellerman  wrote:
>
> Chuhong Yuan  writes:
> > Reference counters are preferred to use refcount_t instead of
> > atomic_t.
> > This is because the implementation of refcount_t can prevent
> > overflows and detect possible use-after-free.
> > So convert atomic_t ref counters to refcount_t.
> >
> > Signed-off-by: Chuhong Yuan 
>
> Thanks.
>
> We don't have a fast implementation of refcount_t, so I'm worried this
> could cause a measurable performance regression.
>
> Did you benchmark it at all?
>

I did not benchmark it and I don't have the testing environment...

> cheers
>
> > diff --git a/arch/powerpc/mm/book3s64/mmu_context.c 
> > b/arch/powerpc/mm/book3s64/mmu_context.c
> > index 2d0cb5ba9a47..f836fd5a6abc 100644
> > --- a/arch/powerpc/mm/book3s64/mmu_context.c
> > +++ b/arch/powerpc/mm/book3s64/mmu_context.c
> > @@ -231,7 +231,7 @@ static void pmd_frag_destroy(void *pmd_frag)
> >   /* drop all the pending references */
> >   count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
> >   /* We allow PTE_FRAG_NR fragments from a PTE page */
> > - if (atomic_sub_and_test(PMD_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> > + if (refcount_sub_and_test(PMD_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> >   pgtable_pmd_page_dtor(page);
> >   __free_page(page);
> >   }
> > diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> > b/arch/powerpc/mm/book3s64/pgtable.c
> > index 7d0e0d0d22c4..40056896ce4e 100644
> > --- a/arch/powerpc/mm/book3s64/pgtable.c
> > +++ b/arch/powerpc/mm/book3s64/pgtable.c
> > @@ -277,7 +277,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
> >   return NULL;
> >   }
> >
> > - atomic_set(>pt_frag_refcount, 1);
> > + refcount_set(>pt_frag_refcount, 1);
> >
> >   ret = page_address(page);
> >   /*
> > @@ -294,7 +294,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
> >* count.
> >*/
> >   if (likely(!mm->context.pmd_frag)) {
> > - atomic_set(>pt_frag_refcount, PMD_FRAG_NR);
> > + refcount_set(>pt_frag_refcount, PMD_FRAG_NR);
> >   mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
> >   }
> >   spin_unlock(>page_table_lock);
> > @@ -317,8 +317,7 @@ void pmd_fragment_free(unsigned long *pmd)
> >  {
> >   struct page *page = virt_to_page(pmd);
> >
> > - BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
> > - if (atomic_dec_and_test(>pt_frag_refcount)) {
> > + if (refcount_dec_and_test(>pt_frag_refcount)) {
> >   pgtable_pmd_page_dtor(page);
> >   __free_page(page);
> >   }
> > diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
> > index a7b05214760c..4ef8231b677f 100644
> > --- a/arch/powerpc/mm/pgtable-frag.c
> > +++ b/arch/powerpc/mm/pgtable-frag.c
> > @@ -24,7 +24,7 @@ void pte_frag_destroy(void *pte_frag)
> >   /* drop all the pending references */
> >   count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
> >   /* We allow PTE_FRAG_NR fragments from a PTE page */
> > - if (atomic_sub_and_test(PTE_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> > + if (refcount_sub_and_test(PTE_FRAG_NR - count, 
> > >pt_frag_refcount)) {
> >   pgtable_page_dtor(page);
> >   __free_page(page);
> >   }
> > @@ -71,7 +71,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, 
> > int kernel)
> >   return NULL;
> >   }
> >
> > - atomic_set(>pt_frag_refcount, 1);
> > + refcount_set(>pt_frag_refcount, 1);
> >
> >   ret = page_address(page);
> >   /*
> > @@ -87,7 +87,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, 
> > int kernel)
> >* count.
> >*/
> >   if (likely(!pte_frag_get(>context))) {
> > - atomic_set(>pt_frag_refcount, PTE_FRAG_NR);
> > + refcount_set(>pt_frag_refcount, PTE_FRAG_NR);
> >   pte_frag_set(>context, ret + PTE_FRAG_SIZE);
> >   }
> >   spin_unlock(>page_table_lock);
> > @@ -110,8 +110,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
> >  {
> >   struct page *page = virt_to_page(table);
> >
> > - BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
> > - if (atomic_dec_and_test(>pt_frag_refcount)) {
> > + if (refcount_dec_and_test(>pt_frag_refcount)) {
> >   if (!kernel)
> >   pgtable_page_dtor(page);
> >   __free_page(page);
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 3a37a89eb7a7..7fe23a3faf95 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -14,6 +14,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include 
> >
> > @@ -147,7 +148,7 @@ struct page {
> >   unsigned long _pt_pad_2;/* mapping */
> >   union {
> > 

Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Segher Boessenkool
On Fri, Aug 09, 2019 at 10:12:56PM +0200, Arnd Bergmann wrote:
> @@ -106,7 +106,7 @@ static inline u##size name(const volatile u##size
> __iomem *addr)\
>  {  \
> u##size ret;\
> __asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync" \
> -   : "=r" (ret) : "Z" (*addr) : "memory"); \
> +   : "=r" (ret) : "m" (*addr) : "memory"); \
> return ret; \
>  }

That will no longer compile something like
  u8 *p;
  u16 x = in_le16(p + 12);
(you'll get something like "invalid %y value, try using the 'Z' constraint").

So then you remove the %y, but that makes you get something like
  sync;lhbrx 3,12(3);twi 0,3,0;isync
which is completely wrong.


Segher


Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Nick Desaulniers
On Fri, Aug 9, 2019 at 1:13 PM Arnd Bergmann  wrote:
>
> On Fri, Aug 9, 2019 at 10:02 PM Christophe Leroy
>  wrote:
> >
> > Arnd Bergmann  a écrit :
> > > On Fri, Aug 9, 2019 at 8:21 PM 'Nick Desaulniers' via Clang Built
> > > Linux  wrote:
> > >
> > >>  static inline void dcbz(void *addr)
> > >>  {
> > >> -   __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : 
> > >> "memory");
> > >> +   __asm__ __volatile__ ("dcbz %y0" : "=Z"(*(u8 *)addr) :: 
> > >> "memory");
> > >>  }
> > >>
> > >>  static inline void dcbi(void *addr)
> > >>  {
> > >> -   __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : 
> > >> "memory");
> > >> +   __asm__ __volatile__ ("dcbi %y0" : "=Z"(*(u8 *)addr) :: 
> > >> "memory");
> > >>  }
> > >
> > > I think the result of the discussion was that an output argument only 
> > > kind-of
> > > makes sense for dcbz, but for the others it's really an input, and clang 
> > > is
> > > wrong in the way it handles the "Z" constraint by making a copy, which it
> > > doesn't do for "m".
> > >
> > > I'm not sure whether it's correct to use "m" instead of "Z" here, which
> > > would be a better workaround if that works. More importantly though,
> > > clang really needs to be fixed to handle "Z" correctly.
> >
> > As the benefit is null, I think the best is probably to reverse my
> > original commit until at least CLang is fixed, as initialy suggested
> > by mpe
>
> Yes, makes sense.
>
> There is one other use of the "Z" constraint, so on top of the revert, I
> think it might be helpful if Nick could check if the patch below makes
> any difference with clang and, if it does, whether the current version
> is broken.
>
>Arnd
>
> diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
> index 23e5d5d16c7e..28b467779328 100644
> --- a/arch/powerpc/include/asm/io.h
> +++ b/arch/powerpc/include/asm/io.h
> @@ -106,7 +106,7 @@ static inline u##size name(const volatile u##size
> __iomem *addr)\
>  {  \
> u##size ret;\
> __asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync" \
> -   : "=r" (ret) : "Z" (*addr) : "memory"); \
> +   : "=r" (ret) : "m" (*addr) : "memory"); \
> return ret; \
>  }
>
> @@ -114,7 +114,7 @@ static inline u##size name(const volatile u##size
> __iomem *addr)\
>  static inline void name(volatile u##size __iomem *addr, u##size val)   \
>  {  \
> __asm__ __volatile__("sync;"#insn" %1,%y0"  \
> -   : "=Z" (*addr) : "r" (val) : "memory"); \
> +   : "=m" (*addr) : "r" (val) : "memory"); \
> mmiowb_set_pending();   \
>  }

Does not work:
https://travis-ci.com/ClangBuiltLinux/continuous-integration/builds/122654899
https://github.com/ClangBuiltLinux/continuous-integration/pull/197/files#diff-40bd16e3188587e4d648c30e0c2d6d37

-- 
Thanks,
~Nick Desaulniers


Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Segher Boessenkool
On Fri, Aug 09, 2019 at 10:03:01PM +0200, Christophe Leroy wrote:
> Arnd Bergmann  a écrit :
> 
> >On Fri, Aug 9, 2019 at 8:21 PM 'Nick Desaulniers' via Clang Built
> >Linux  wrote:
> >
> >> static inline void dcbz(void *addr)
> >> {
> >>-   __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : "memory");
> >>+   __asm__ __volatile__ ("dcbz %y0" : "=Z"(*(u8 *)addr) :: "memory");
> >> }
> >>
> >> static inline void dcbi(void *addr)
> >> {
> >>-   __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : "memory");
> >>+   __asm__ __volatile__ ("dcbi %y0" : "=Z"(*(u8 *)addr) :: "memory");
> >> }
> >
> >I think the result of the discussion was that an output argument only 
> >kind-of
> >makes sense for dcbz, but for the others it's really an input, and clang is
> >wrong in the way it handles the "Z" constraint by making a copy, which it
> >doesn't do for "m".
> >
> >I'm not sure whether it's correct to use "m" instead of "Z" here, which
> >would be a better workaround if that works. More importantly though,
> >clang really needs to be fixed to handle "Z" correctly.
> 
> As the benefit is null, I think the best is probably to reverse my  
> original commit until at least CLang is fixed, as initialy suggested  
> by mpe

And what about the other uses of "Z"?


Also, if you use C routines (instead of assembler code) for the basic
"clear a block" and the like routines, as there have been patches for
recently, the benefit is not zero.


Segher


Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Segher Boessenkool
On Fri, Aug 09, 2019 at 08:28:19PM +0200, Arnd Bergmann wrote:
> On Fri, Aug 9, 2019 at 8:21 PM 'Nick Desaulniers' via Clang Built
> Linux  wrote:
> 
> >  static inline void dcbz(void *addr)
> >  {
> > -   __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : "memory");
> > +   __asm__ __volatile__ ("dcbz %y0" : "=Z"(*(u8 *)addr) :: "memory");
> >  }
> >
> >  static inline void dcbi(void *addr)
> >  {
> > -   __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : "memory");
> > +   __asm__ __volatile__ ("dcbi %y0" : "=Z"(*(u8 *)addr) :: "memory");
> >  }
> 
> I think the result of the discussion was that an output argument only kind-of
> makes sense for dcbz, but for the others it's really an input, and clang is
> wrong in the way it handles the "Z" constraint by making a copy, which it
> doesn't do for "m".

Yes.  And clang has probably miscompiled this in all kernels since we
have used "Z" for the first time, in 2008 (0f3d6bcd391b).

It is not necessarily fatal or at least not easily visible for the I/O
accessors: it "just" gets memory ordering wrong slightly (it looks like
it does the sync;tw;isync thing around an extra stack access, after it
has performed the actual I/O as any other memory load, without any
synchronisation).

> I'm not sure whether it's correct to use "m" instead of "Z" here, which
> would be a better workaround if that works. More importantly though,
> clang really needs to be fixed to handle "Z" correctly.

"m" allows offset addressing, which these insns do not.  That is the
same reason you need the "y" output modifier.  "m" is wrong here.

We have other memory constraints, but do those work with LLVM?


Segher


Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Nathan Chancellor
On Fri, Aug 09, 2019 at 11:21:05AM -0700, Nick Desaulniers wrote:
> The input parameter is modified, so it should be an output parameter
> with "=" to make it so that a copy of the input is not made by Clang.
> 
> Link: https://bugs.llvm.org/show_bug.cgi?id=42762
> Link: https://gcc.gnu.org/onlinedocs/gcc/Modifiers.html#Modifiers
> Link: https://github.com/ClangBuiltLinux/linux/issues/593
> Link: https://godbolt.org/z/QwhZXi
> Link: 
> https://lore.kernel.org/lkml/20190721075846.GA97701@archlinux-threadripper/
> Fixes: 6c5875843b87 ("powerpc: slightly improve cache helpers")
> Debugged-by: Nathan Chancellor 
> Reported-by: Nathan Chancellor 
> Reported-by: kbuild test robot 
> Suggested-by: Arnd Bergmann 
> Suggested-by: Nathan Chancellor 
> Signed-off-by: Nick Desaulniers 

I applied this patch as well as a revert of the original patch and both
clang and GCC appear to generate the same code; I think a straight
revert would be better.

Crude testing script and the generated files attached.

Cheers,
Nathan


tmp.bRmcRT0jd0.sh
Description: Bourne shell script


testing-output.tar.gz
Description: application/gzip


[Bug 204479] KASAN hit at modprobe zram

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204479

--- Comment #15 from Christophe Leroy (christophe.le...@c-s.fr) ---
As far as I can see in the latest dmesg, the Oops occurs in raid6 pq module.
An this time it is not anymore in kasan register global.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Arnd Bergmann
On Fri, Aug 9, 2019 at 10:02 PM Christophe Leroy
 wrote:
>
> Arnd Bergmann  a écrit :
> > On Fri, Aug 9, 2019 at 8:21 PM 'Nick Desaulniers' via Clang Built
> > Linux  wrote:
> >
> >>  static inline void dcbz(void *addr)
> >>  {
> >> -   __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : "memory");
> >> +   __asm__ __volatile__ ("dcbz %y0" : "=Z"(*(u8 *)addr) :: "memory");
> >>  }
> >>
> >>  static inline void dcbi(void *addr)
> >>  {
> >> -   __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : "memory");
> >> +   __asm__ __volatile__ ("dcbi %y0" : "=Z"(*(u8 *)addr) :: "memory");
> >>  }
> >
> > I think the result of the discussion was that an output argument only 
> > kind-of
> > makes sense for dcbz, but for the others it's really an input, and clang is
> > wrong in the way it handles the "Z" constraint by making a copy, which it
> > doesn't do for "m".
> >
> > I'm not sure whether it's correct to use "m" instead of "Z" here, which
> > would be a better workaround if that works. More importantly though,
> > clang really needs to be fixed to handle "Z" correctly.
>
> As the benefit is null, I think the best is probably to reverse my
> original commit until at least CLang is fixed, as initialy suggested
> by mpe

Yes, makes sense.

There is one other use of the "Z" constraint, so on top of the revert, I
think it might be helpful if Nick could check if the patch below makes
any difference with clang and, if it does, whether the current version
is broken.

   Arnd

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 23e5d5d16c7e..28b467779328 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -106,7 +106,7 @@ static inline u##size name(const volatile u##size
__iomem *addr)\
 {  \
u##size ret;\
__asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync" \
-   : "=r" (ret) : "Z" (*addr) : "memory"); \
+   : "=r" (ret) : "m" (*addr) : "memory"); \
return ret; \
 }

@@ -114,7 +114,7 @@ static inline u##size name(const volatile u##size
__iomem *addr)\
 static inline void name(volatile u##size __iomem *addr, u##size val)   \
 {  \
__asm__ __volatile__("sync;"#insn" %1,%y0"  \
-   : "=Z" (*addr) : "r" (val) : "memory"); \
+   : "=m" (*addr) : "r" (val) : "memory"); \
mmiowb_set_pending();   \
 }


Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Christophe Leroy

Arnd Bergmann  a écrit :


On Fri, Aug 9, 2019 at 8:21 PM 'Nick Desaulniers' via Clang Built
Linux  wrote:


 static inline void dcbz(void *addr)
 {
-   __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : "memory");
+   __asm__ __volatile__ ("dcbz %y0" : "=Z"(*(u8 *)addr) :: "memory");
 }

 static inline void dcbi(void *addr)
 {
-   __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : "memory");
+   __asm__ __volatile__ ("dcbi %y0" : "=Z"(*(u8 *)addr) :: "memory");
 }


I think the result of the discussion was that an output argument only kind-of
makes sense for dcbz, but for the others it's really an input, and clang is
wrong in the way it handles the "Z" constraint by making a copy, which it
doesn't do for "m".

I'm not sure whether it's correct to use "m" instead of "Z" here, which
would be a better workaround if that works. More importantly though,
clang really needs to be fixed to handle "Z" correctly.


As the benefit is null, I think the best is probably to reverse my  
original commit until at least CLang is fixed, as initialy suggested  
by mpe


Christophe





Re: [PATCH 1/2] ASoC: fsl_esai: Add compatible string for imx6ull

2019-08-09 Thread Nicolin Chen
On Fri, Aug 09, 2019 at 06:27:46PM +0800, Shengjiu Wang wrote:
> Add compatible string for imx6ull, from imx6ull platform,
> the issue of channel swap after xrun is fixed in hardware.
> 
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 

> ---
>  sound/soc/fsl/fsl_esai.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c
> index 10d2210c91ef..4b4a8e831e9e 100644
> --- a/sound/soc/fsl/fsl_esai.c
> +++ b/sound/soc/fsl/fsl_esai.c
> @@ -920,6 +920,7 @@ static int fsl_esai_remove(struct platform_device *pdev)
>  static const struct of_device_id fsl_esai_dt_ids[] = {
>   { .compatible = "fsl,imx35-esai", },
>   { .compatible = "fsl,vf610-esai", },
> + { .compatible = "fsl,imx6ull-esai", },
>   {}
>  };
>  MODULE_DEVICE_TABLE(of, fsl_esai_dt_ids);
> -- 
> 2.21.0
> 


Re: [PATCH 2/2] ASoC: fsl_esai: Add new compatible string for imx6ull

2019-08-09 Thread Nicolin Chen
On Fri, Aug 09, 2019 at 06:27:47PM +0800, Shengjiu Wang wrote:
> Add new compatible string "fsl,imx6ull-esai" in the binding document.
> 
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 

> ---
>  Documentation/devicetree/bindings/sound/fsl,esai.txt | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/sound/fsl,esai.txt 
> b/Documentation/devicetree/bindings/sound/fsl,esai.txt
> index 5b9914367610..0e6e2166f76c 100644
> --- a/Documentation/devicetree/bindings/sound/fsl,esai.txt
> +++ b/Documentation/devicetree/bindings/sound/fsl,esai.txt
> @@ -7,8 +7,11 @@ other DSPs. It has up to six transmitters and four receivers.
>  
>  Required properties:
>  
> -  - compatible   : Compatible list, must contain 
> "fsl,imx35-esai" or
> -   "fsl,vf610-esai"
> +  - compatible   : Compatible list, should contain one of the 
> following
> +   compatibles:
> +   "fsl,imx35-esai",
> +   "fsl,vf610-esai",
> +   "fsl,imx6ull-esai",
>  
>- reg  : Offset and length of the register set for the 
> device.
>  
> -- 
> 2.21.0
> 


Re: [PATCH net-next v2] ibmveth: Allow users to update reported speed and duplex

2019-08-09 Thread David Miller
From: Jakub Kicinski 
Date: Tue, 6 Aug 2019 15:15:24 -0700

> On Tue,  6 Aug 2019 11:23:08 -0500, Thomas Falcon wrote:
>> Reported ethtool link settings for the ibmveth driver are currently
>> hardcoded and no longer reflect the actual capabilities of supported
>> hardware. There is no interface designed for retrieving this information
>> from device firmware nor is there any way to update current settings
>> to reflect observed or expected link speeds.
>> 
>> To avoid breaking existing configurations, retain current values as
>> default settings but let users update them to match the expected
>> capabilities of underlying hardware if needed. This update would
>> allow the use of configurations that rely on certain link speed
>> settings, such as LACP. This patch is based on the implementation
>> in virtio_net.
>> 
>> Signed-off-by: Thomas Falcon 
> 
> Looks like this is the third copy of the same code virtio and
> netvsc have :(  Is there a chance we could factor this out into
> helpers in the core?

Yeah, let's stop the duplication of code while we can.

Thomas please perform the consolidation and respin.

Thank you.


Re: [PATCH v8 3/7] powerpc/mce: Fix MCE handling for huge pages

2019-08-09 Thread Mahesh Jagannath Salgaonkar
On 8/7/19 8:26 PM, Santosh Sivaraj wrote:
> From: Balbir Singh 
> 
> The current code would fail on huge pages addresses, since the shift would
> be incorrect. Use the correct page shift value returned by
> __find_linux_pte() to get the correct physical address. The code is more
> generic and can handle both regular and compound pages.
> 
> Fixes: ba41e1e1ccb9 ("powerpc/mce: Hookup derror (load/store) UE errors")
> Signed-off-by: Balbir Singh 
> [ar...@linux.ibm.com: Fixup pseries_do_memory_failure()]
> Signed-off-by: Reza Arbab 
> Co-developed-by: Santosh Sivaraj 
> Signed-off-by: Santosh Sivaraj 
> ---
>  arch/powerpc/include/asm/mce.h   |  2 +-
>  arch/powerpc/kernel/mce_power.c  | 50 ++--
>  arch/powerpc/platforms/pseries/ras.c |  9 ++---
>  3 files changed, 29 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index a4c6a74ad2fb..f3a6036b6bc0 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -209,7 +209,7 @@ extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
>  extern void machine_check_print_event_info(struct machine_check_event *evt,
>  bool user_mode, bool in_guest);
> -unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
> +unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr);
>  #ifdef CONFIG_PPC_BOOK3S_64
>  void flush_and_reload_slb(void);
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index a814d2dfb5b0..bed38a8e2e50 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -20,13 +20,14 @@
>  #include 
>  
>  /*
> - * Convert an address related to an mm to a PFN. NOTE: we are in real
> - * mode, we could potentially race with page table updates.
> + * Convert an address related to an mm to a physical address.
> + * NOTE: we are in real mode, we could potentially race with page table 
> updates.
>   */
> -unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
> +unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr)
>  {
> - pte_t *ptep;
> - unsigned long flags;
> + pte_t *ptep, pte;
> + unsigned int shift;
> + unsigned long flags, phys_addr;
>   struct mm_struct *mm;
>  
>   if (user_mode(regs))
> @@ -35,14 +36,21 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned 
> long addr)
>   mm = _mm;
>  
>   local_irq_save(flags);
> - if (mm == current->mm)
> - ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
> - else
> - ptep = find_init_mm_pte(addr, NULL);
> + ptep = __find_linux_pte(mm->pgd, addr, NULL, );
>   local_irq_restore(flags);
> +
>   if (!ptep || pte_special(*ptep))
>   return ULONG_MAX;
> - return pte_pfn(*ptep);
> +
> + pte = *ptep;
> + if (shift > PAGE_SHIFT) {
> + unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
> +
> + pte = __pte(pte_val(pte) | (addr & rpnmask));
> + }
> + phys_addr = pte_pfn(pte) << PAGE_SHIFT;
> +
> + return phys_addr;
>  }
>  
>  /* flush SLBs and reload */
> @@ -354,18 +362,16 @@ static int mce_find_instr_ea_and_pfn(struct pt_regs 
> *regs, uint64_t *addr,

Now that we have addr_to_phys() can we change this function name as well
to mce_find_instr_ea_and_phys() ?

Tested-by: Mahesh Salgaonkar 

This should go to stable tree. Can you move this patch to 2nd position ?

Thanks,
-Mahesh.



Re: [PATCH v8 1/7] powerpc/mce: Schedule work from irq_work

2019-08-09 Thread Mahesh Jagannath Salgaonkar
On 8/7/19 8:26 PM, Santosh Sivaraj wrote:
> schedule_work() cannot be called from MCE exception context as MCE can
> interrupt even in interrupt disabled context.
> 
> fixes: 733e4a4c ("powerpc/mce: hookup memory_failure for UE errors")
> Signed-off-by: Santosh Sivaraj 
> ---
>  arch/powerpc/kernel/mce.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index b18df633eae9..0ab6fa7c 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -144,7 +144,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
>   if (phys_addr != ULONG_MAX) {
>   mce->u.ue_error.physical_address_provided = true;
>   mce->u.ue_error.physical_address = phys_addr;
> - machine_check_ue_event(mce);
>   }
>   }
>   return;
> @@ -275,8 +274,7 @@ static void machine_process_ue_event(struct work_struct 
> *work)
>   }
>  }
>  /*
> - * process pending MCE event from the mce event queue. This function will be
> - * called during syscall exit.
> + * process pending MCE event from the mce event queue.
>   */
>  static void machine_check_process_queued_event(struct irq_work *work)
>  {
> @@ -292,6 +290,10 @@ static void machine_check_process_queued_event(struct 
> irq_work *work)
>   while (__this_cpu_read(mce_queue_count) > 0) {
>   index = __this_cpu_read(mce_queue_count) - 1;
>   evt = this_cpu_ptr(_event_queue[index]);
> +
> + if (evt->error_type == MCE_ERROR_TYPE_UE)
> + machine_check_ue_event(evt);

This will work only for the event that are queued by mce handler, others
will get ignored. I think you should introduce a separate irq work queue
for schedule_work().

Thanks,
-Mahesh.



Re: [PATCH] powerpc: fix inline asm constraints for dcbz

2019-08-09 Thread Arnd Bergmann
On Fri, Aug 9, 2019 at 8:21 PM 'Nick Desaulniers' via Clang Built
Linux  wrote:

>  static inline void dcbz(void *addr)
>  {
> -   __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : "memory");
> +   __asm__ __volatile__ ("dcbz %y0" : "=Z"(*(u8 *)addr) :: "memory");
>  }
>
>  static inline void dcbi(void *addr)
>  {
> -   __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : "memory");
> +   __asm__ __volatile__ ("dcbi %y0" : "=Z"(*(u8 *)addr) :: "memory");
>  }

I think the result of the discussion was that an output argument only kind-of
makes sense for dcbz, but for the others it's really an input, and clang is
wrong in the way it handles the "Z" constraint by making a copy, which it
doesn't do for "m".

I'm not sure whether it's correct to use "m" instead of "Z" here, which
would be a better workaround if that works. More importantly though,
clang really needs to be fixed to handle "Z" correctly.

Arnd


[Bug 204479] KASAN hit at modprobe zram

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204479

--- Comment #14 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 284303
  --> https://bugzilla.kernel.org/attachment.cgi?id=284303=edit
dmesg (kernel 5.3-rc3 + patch + 2nd patch, without CONFIG_SMP, v2, PowerMac G4
DP)

However the radeon module und btrfs (if built as module) still freeze the
machine until the 2min reboot timer kicks in. Also some EHCI driver modules
oopses, but not always.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

[Bug 204479] KASAN hit at modprobe zram

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204479

--- Comment #13 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 284301
  --> https://bugzilla.kernel.org/attachment.cgi?id=284301=edit
dmesg (kernel 5.3-rc3 + patch + 2nd patch, without CONFIG_SMP, PowerMac G4 DP)

Definitely an improvement with the latest patch. b43legacy and nfs load now
reliably without Oops.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: [PATCH v4 0/6] Remove x86-specific code from generic headers

2019-08-09 Thread Thiago Jung Bauermann


m...@ellerman.id.au writes:

> Thiago Jung Bauermann  writes:
>> Hello,
>>
>> This version has only a small change in the last patch as requested by
>> Christoph and Halil, and collects Reviewed-by's.
>>
>> These patches are applied on top of v5.3-rc2.
>>
>> I don't have a way to test SME, SEV, nor s390's PEF so the patches have only
>> been build tested.
>
> I need to take this series via the powerpc tree because there is another
> fairly large powerpc specific series dependent on it.
>
> I think this series already has pretty much all the acks it needs, which
> almost never happens, amazing work!

Yes, thank you very much to everyone who reviewed the patches!

> I'll put the series in a topic branch, just in case there's any bad
> conflicts and other folks want to merge it later on. I'll then merge the
> topic branch into my next, and so this series will be tested in
> linux-next that way.

That's awesome. Thank you very much!

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [PATCH 4/4] powerpc: Book3S 64-bit "heavyweight" KASAN support

2019-08-09 Thread Christophe Leroy

Hi Daniel,

Le 07/08/2019 à 18:34, Christophe Leroy a écrit :



Le 07/08/2019 à 01:38, Daniel Axtens a écrit :

KASAN support on powerpc64 is interesting:

  - We want to be able to support inline instrumentation so as to be
    able to catch global and stack issues.

  - We run a lot of code at boot in real mode. This includes stuff like
    printk(), so it's not feasible to just disable instrumentation
    around it.


Have you definitely given up the idea of doing a standard implementation 
of KASAN like other 64 bits arches have done ?


Isn't it possible to setup an early 1:1 mapping and go in virtual mode 
earlier ? What is so different between book3s64 and book3e64 ?
On book3e64, we've been able to setup KASAN before printing anything 
(except when using EARLY_DEBUG). Isn't it feasible on book3s64 too ?




I looked at it once more, and cannot find that "We run a lot of code at 
boot in real mode. This includes stuff like printk()".


Can you provide exemples ?

AFAICS, there are two things which are run in real mode at boot:
1/ prom_init() in kernel/prom_init.c
2/ early_setup() in kernel/setup_64.c

1/ KASAN is already inhibited for prom_init(), and prom_init() only uses 
prom_printf() to display stuff.
2/ early_setup() only call a subset of simple functions. By regrouping 
things in a new file called early_64.c as done for PPC32 with 
early_32.c, we can easily inhibit kasan for those few stuff. printk() is 
not used there either, there is even a comment at the startup of 
early_setup() telling /*  printk is _NOT_ safe to use here ! 
--- */. The only things that perform display is the function 
udbg_printf(), which is called only when DEBUG is set and which is 
linked to CONFIG_PPC_EARLY_DEBUG. We already discussed that and agreed 
that CONFIG_PPC_EARLY_DEBUG could be made exclusive of CONFIG_KASAN.


Once early_setup() has run, BOOK3S64 goes in virtual mode, just like 
BOOK3E does.


What am I missing ?

Thanks
Christophe


[Bug 204479] KASAN hit at modprobe zram

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204479

--- Comment #12 from Christophe Leroy (christophe.le...@c-s.fr) ---
Patch at https://patchwork.ozlabs.org/patch/1144756/

-- 
You are receiving this mail because:
You are on the CC list for the bug.

[PATCH] powerpc/kasan: fix parallele loading of modules.

2019-08-09 Thread Christophe Leroy
Parallele loading of modules may lead to bad setup of shadow
page table entries.

First, lets align modules so that two modules never share the same
shadow page.

Second, ensure that two modules cannot allocate two page tables for
the same PMD entry at the same time. This is done by using
init_mm.page_table_lock in the same way as __pte_alloc_kernel()

Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/kasan/kasan_init_32.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c 
b/arch/powerpc/mm/kasan/kasan_init_32.c
index 99eac3fab83c..802387b231ad 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -46,7 +47,19 @@ static int __ref kasan_init_shadow_page_tables(unsigned long 
k_start, unsigned l
kasan_populate_pte(new, PAGE_READONLY);
else
kasan_populate_pte(new, PAGE_KERNEL_RO);
-   pmd_populate_kernel(_mm, pmd, new);
+
+   smp_wmb(); /* See comment in __pte_alloc */
+
+   spin_lock(_mm.page_table_lock);
+   /* Has another populated it ? */
+   if (likely((void *)pmd_page_vaddr(*pmd) == 
kasan_early_shadow_pte)) {
+   pmd_populate_kernel(_mm, pmd, new);
+   new = NULL;
+   }
+   spin_unlock(_mm.page_table_lock);
+
+   if (new && slab_is_available())
+   pte_free_kernel(_mm, new);
}
return 0;
 }
@@ -137,7 +150,11 @@ void __init kasan_init(void)
 #ifdef CONFIG_MODULES
 void *module_alloc(unsigned long size)
 {
-   void *base = vmalloc_exec(size);
+   void *base;
+
+   base = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, 
VMALLOC_END,
+   GFP_KERNEL, PAGE_KERNEL_EXEC, 
VM_FLUSH_RESET_PERMS,
+   NUMA_NO_NODE, __builtin_return_address(0));
 
if (!base)
return NULL;
-- 
2.13.3



[PATCH] powerpc/kasan: fix shadow area set up for modules.

2019-08-09 Thread Christophe Leroy
When loading modules, from time to time an Oops is encountered
during the init of shadow area for globals. This is due to the
last page not always being mapped depending on the exact distance
between the start and the end of the shadow area and the alignment
with the page addresses.

Fix this by aligning the starting address with the page address.

Reported-by: Erhard F. 
Link: https://bugzilla.kernel.org/show_bug.cgi?id=204479
Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/kasan/kasan_init_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c 
b/arch/powerpc/mm/kasan/kasan_init_32.c
index 74f4555a62ba..99eac3fab83c 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -74,7 +74,7 @@ static int __ref kasan_init_region(void *start, size_t size)
if (!slab_is_available())
block = memblock_alloc(k_end - k_start, PAGE_SIZE);
 
-   for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
+   for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), 
k_cur);
void *va = block ? block + k_cur - k_start : 
kasan_get_one_page();
pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
-- 
2.13.3



[PATCH] powerpc/ptdump: fix addresses display on PPC32

2019-08-09 Thread Christophe Leroy
Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")
wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a
shift in the displayed addresses.

Lets revert that change to resync walk_pagetables()'s addr val and
pgd_t pointer for PPC32.

Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")
Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/ptdump/ptdump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 6a88a9f585d4..3ad64fc11419 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -27,7 +27,7 @@
 #include "ptdump.h"
 
 #ifdef CONFIG_PPC32
-#define KERN_VIRT_STARTPAGE_OFFSET
+#define KERN_VIRT_START0
 #endif
 
 /*
-- 
2.13.3



[Bug 204479] KASAN hit at modprobe zram

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204479

--- Comment #11 from Christophe Leroy (christophe.le...@c-s.fr) ---
Thanks. Then it is not about SMP allthough there's anyway a theoritical problem
with SMP that's I'll address in another patch.

I think I finally spotted the issue. Let's take the first occurence of the
first log:

Aug 08 23:39:58 T600 kernel: ## module_alloc(4718) = f1065000
[fe20ca00-fe20d2e3]
[...]
Aug 08 23:39:59 T600 kernel: BUG: Unable to handle kernel data access at
0xfe20d040

In kasan_init_region(), the loop starts with k_cur = 0xfe20ca00 to set the pte
for the first shadow page at 0xfe20c000. Then k_cur is increased by PAGE_SIZE
so now k_cur = 0xfe20da00.

As this is over 0xfe20d2e3, it doesn't set the pte for the second page at
0xfe20d000.

It should be fixed by changing the init value of k_cur in the for() loop of
kasan_init_region() by:

for (k_cur = k_start & PAGE_MASK; )

Can you test it ?

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: [RFC V2 0/1] mm/debug: Add tests for architecture exported page table helpers

2019-08-09 Thread Matthew Wilcox
On Fri, Aug 09, 2019 at 04:05:07PM +0530, Anshuman Khandual wrote:
> On 08/09/2019 03:46 PM, Matthew Wilcox wrote:
> > On Fri, Aug 09, 2019 at 01:03:17PM +0530, Anshuman Khandual wrote:
> >> Should alloc_gigantic_page() be made available as an interface for general
> >> use in the kernel. The test module here uses very similar implementation 
> >> from
> >> HugeTLB to allocate a PUD aligned memory block. Similar for mm_alloc() 
> >> which
> >> needs to be exported through a header.
> > 
> > Why are you allocating memory at all instead of just using some
> > known-to-exist PFNs like I suggested?
> 
> We needed PFN to be PUD aligned for pfn_pud() and PMD aligned for mk_pmd().
> Now walking the kernel page table for a known symbol like kernel_init()

I didn't say to walk the kernel page table.  I said to call virt_to_pfn()
for a known symbol like kernel_init().

> as you had suggested earlier we might encounter page table page entries at PMD
> and PUD which might not be PMD or PUD aligned respectively. It seemed to me
> that alignment requirement is applicable only for mk_pmd() and pfn_pud()
> which create large mappings at those levels but that requirement does not
> exist for page table pages pointing to next level. Is not that correct ? Or
> I am missing something here ?

Just clear the bottom bits off the PFN until you get a PMD or PUD aligned
PFN.  It's really not hard.



Re: [PATCH 2/2] powerpc/mm: Warn if W+X pages found on boot

2019-08-09 Thread Christophe Leroy




Le 02/05/2019 à 07:51, Russell Currey a écrit :

+   if (radix_enabled())
+   st.start_address = PAGE_OFFSET;
+   else

+   st.start_address = KERN_VIRT_START;

KERN_VIRT_START doesn't exist on PPC32.

Christophe


Thanks a lot for the review!  Applied all your suggestions.  What
should I use on PPC32 instead?


Indeed it looks like KERN_VIRT_START is defined as 0 for PPC32 at
the
top of ptdump.c, which look strange to me.

I guess PAGE_OFFSET should be the good value for KERN_VIRT_START on
PPC32.

Christophe


git blame says you put it there :) I'll set it to PAGE_OFFSET instead
of zero.  Cheers



Finally it seems that I was right at first place. KERN_VIRT_START should 
be 0 because in walk_pagetables(), it starts with:


pgd_t *pgd = pgd_offset_k(0UL);

Now that KERN_VIRT_START has changed to 0xc000, I get a shift of 
0xc000 in the display, ie the kernel pages are displayed starting at 
0x8000 instead of 0xc000 (0x8000 = 0xc000 + 0xc000)


Since we only want to display kernel pages, I guess we should use

pgd_t *pgd = pgd_offset_k(KERN_VIRT_START); but then we can't use the 
for () loop as it is.


Does it work properly on PPC64 ? If so, that's surprising.

Christophe


Re: [PATCH v4 0/6] Remove x86-specific code from generic headers

2019-08-09 Thread mpe
Thiago Jung Bauermann  writes:
> Hello,
>
> This version has only a small change in the last patch as requested by
> Christoph and Halil, and collects Reviewed-by's.
>
> These patches are applied on top of v5.3-rc2.
>
> I don't have a way to test SME, SEV, nor s390's PEF so the patches have only
> been build tested.

I need to take this series via the powerpc tree because there is another
fairly large powerpc specific series dependent on it.

I think this series already has pretty much all the acks it needs, which
almost never happens, amazing work!

I'll put the series in a topic branch, just in case there's any bad
conflicts and other folks want to merge it later on. I'll then merge the
topic branch into my next, and so this series will be tested in
linux-next that way.

cheers


> Changelog
>
> Since v3:
>
> - Patch "s390/mm: Remove sev_active() function"
>   - Preserve comment from sev_active() in force_dma_unencrypted().
> Suggested by Christoph Hellwig.
>
> Since v2:
>
> - Patch "x86,s390: Move ARCH_HAS_MEM_ENCRYPT definition to arch/Kconfig"
>   - Added "select ARCH_HAS_MEM_ENCRYPT" to config S390. Suggested by Janani.
>
> - Patch "DMA mapping: Move SME handling to x86-specific files"
>   - Split up into 3 new patches. Suggested by Christoph Hellwig.
>
> - Patch "swiotlb: Remove call to sme_active()"
>   - New patch.
>
> - Patch "dma-mapping: Remove dma_check_mask()"
>   - New patch.
>
> - Patch "x86,s390/mm: Move sme_active() and sme_me_mask to x86-specific 
> header"
>   - New patch.
>   - Removed export of sme_active symbol. Suggested by Christoph Hellwig.
>
> - Patch "fs/core/vmcore: Move sev_active() reference to x86 arch code"
>   - Removed export of sev_active symbol. Suggested by Christoph Hellwig.
>
> - Patch "s390/mm: Remove sev_active() function"
>   - New patch.
>
> Since v1:
>
> - Patch "x86,s390: Move ARCH_HAS_MEM_ENCRYPT definition to arch/Kconfig"
>   - Remove definition of ARCH_HAS_MEM_ENCRYPT from s390/Kconfig as well.
>   - Reworded patch title and message a little bit.
>
> - Patch "DMA mapping: Move SME handling to x86-specific files"
>   - Adapt s390's  as well.
>   - Remove dma_check_mask() from kernel/dma/mapping.c. Suggested by
> Christoph Hellwig.
>
> Thiago Jung Bauermann (6):
>   x86,s390: Move ARCH_HAS_MEM_ENCRYPT definition to arch/Kconfig
>   swiotlb: Remove call to sme_active()
>   dma-mapping: Remove dma_check_mask()
>   x86,s390/mm: Move sme_active() and sme_me_mask to x86-specific header
>   fs/core/vmcore: Move sev_active() reference to x86 arch code
>   s390/mm: Remove sev_active() function
>
>  arch/Kconfig|  3 +++
>  arch/s390/Kconfig   |  4 +---
>  arch/s390/include/asm/mem_encrypt.h |  5 +
>  arch/s390/mm/init.c |  7 +--
>  arch/x86/Kconfig|  4 +---
>  arch/x86/include/asm/mem_encrypt.h  | 10 ++
>  arch/x86/kernel/crash_dump_64.c |  5 +
>  arch/x86/mm/mem_encrypt.c   |  2 --
>  fs/proc/vmcore.c|  8 
>  include/linux/crash_dump.h  | 14 ++
>  include/linux/mem_encrypt.h | 15 +--
>  kernel/dma/mapping.c|  8 
>  kernel/dma/swiotlb.c|  3 +--
>  13 files changed, 42 insertions(+), 46 deletions(-)


Re: [PATCH v5 1/7] Documentation/powerpc: Ultravisor API

2019-08-09 Thread Michael Ellerman
Claudio Carvalho  writes:
> From: Sukadev Bhattiprolu 
>
> POWER9 processor includes support for Protected Execution Facility (PEF).

Which POWER9? Please be more precise.

It's public knowledge that some versions of Power9 don't have PEF (or
have it broken / fused off).

People are going to try and test this on various chip revisions that are
out in the wild, we need to make it clear where it's expected to work
and where it's not.

> Attached documentation provides an overview of PEF and defines the API
> for various interfaces that must be implemented in the Ultravisor
> firmware as well as in the KVM Hypervisor.
>
> Based on input from Mike Anderson, Thiago Bauermann, Claudio Carvalho,
> Ben Herrenschmidt, Guerney Hunt, Paul Mackerras.
>
> Signed-off-by: Sukadev Bhattiprolu 
> Signed-off-by: Ram Pai 
> Signed-off-by: Guerney Hunt 
> Reviewed-by: Claudio Carvalho 
> Reviewed-by: Michael Anderson 
> Reviewed-by: Thiago Bauermann 
> Signed-off-by: Claudio Carvalho 
> ---
>  Documentation/powerpc/ultravisor.rst | 1055 ++
>  1 file changed, 1055 insertions(+)
>  create mode 100644 Documentation/powerpc/ultravisor.rst
>
> diff --git a/Documentation/powerpc/ultravisor.rst 
> b/Documentation/powerpc/ultravisor.rst
> new file mode 100644
> index ..8d5246585b66
> --- /dev/null
> +++ b/Documentation/powerpc/ultravisor.rst
> @@ -0,0 +1,1055 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +.. _ultravisor:
> +
> +
> +Protected Execution Facility
> +
> +
> +.. contents::
> +:depth: 3
> +
> +.. sectnum::
> +:depth: 3
> +
> +Protected Execution Facility
> +
> +
> +Protected Execution Facility (PEF) is an architectural change for
> +POWER 9 that enables Secure Virtual Machines (SVMs). When enabled,

Ditto here.

Also you don't mention which ISA version PEF is (will be) documented in.
Do we know? Or can we at least reference the RFC number so folks can
find it.

Otherwise this looks really good. I'll try and find time to proof read
it thoroughly.

cheers

> +PEF adds a new higher privileged mode, called Ultravisor mode, to
> +POWER architecture. Along with the new mode there is new firmware
> +called the Protected Execution Ultravisor (or Ultravisor for short).
> +Ultravisor mode is the highest privileged mode in POWER architecture.
> +
> + +--+
> + | Privilege States |
> + +==+
> + |  Problem |
> + +--+
> + |  Supervisor  |
> + +--+
> + |  Hypervisor  |
> + +--+
> + |  Ultravisor  |
> + +--+
> +
> +PEF protects SVMs from the hypervisor, privileged users, and other
> +VMs in the system. SVMs are protected while at rest and can only be
> +executed by an authorized machine. All virtual machines utilize
> +hypervisor services. The Ultravisor filters calls between the SVMs
> +and the hypervisor to assure that information does not accidentally
> +leak. All hypercalls except H_RANDOM are reflected to the hypervisor.
> +H_RANDOM is not reflected to prevent the hypervisor from influencing
> +random values in the SVM.
> +
> +To support this there is a refactoring of the ownership of resources
> +in the CPU. Some of the resources which were previously hypervisor
> +privileged are now ultravisor privileged.
> +
> +Hardware
> +
> +
> +The hardware changes include the following:
> +
> +* There is a new bit in the MSR that determines whether the current
> +  process is running in secure mode, MSR(S) bit 41. MSR(S)=1, process
> +  is in secure mode, MSR(s)=0 process is in normal mode.
> +
> +* The MSR(S) bit can only be set by the Ultravisor.
> +
> +* HRFID cannot be used to set the MSR(S) bit. If the hypervisor needs
> +  to return to a SVM it must use an ultracall. It can determine if
> +  the VM it is returning to is secure.
> +
> +* There is a new Ultravisor privileged register, SMFCTRL, which has an
> +  enable/disable bit SMFCTRL(E).
> +
> +* The privilege of a process is now determined by three MSR bits,
> +  MSR(S, HV, PR). In each of the tables below the modes are listed
> +  from least privilege to highest privilege. The higher privilege
> +  modes can access all the resources of the lower privilege modes.
> +
> +  **Secure Mode MSR Settings**
> +
> +  +---+---+---+---+
> +  | S | HV| PR|Privilege  |
> +  +===+===+===+===+
> +  | 1 | 0 | 1 | Problem   |
> +  +---+---+---+---+
> +  | 1 | 0 | 0 | Privileged(OS)|
> +  +---+---+---+---+
> +  | 1 | 1 | 0 | Ultravisor|
> +  +---+---+---+---+
> +  | 1 | 1 | 1 | Reserved  |
> +  +---+---+---+---+
> +
> +  **Normal Mode MSR Settings**
> +

Re: [PATCH] powerpc/mm: Use refcount_t for refcount

2019-08-09 Thread Michael Ellerman
Chuhong Yuan  writes:
> Reference counters are preferred to use refcount_t instead of
> atomic_t.
> This is because the implementation of refcount_t can prevent
> overflows and detect possible use-after-free.
> So convert atomic_t ref counters to refcount_t.
>
> Signed-off-by: Chuhong Yuan 

Thanks.

We don't have a fast implementation of refcount_t, so I'm worried this
could cause a measurable performance regression.

Did you benchmark it at all?

cheers

> diff --git a/arch/powerpc/mm/book3s64/mmu_context.c 
> b/arch/powerpc/mm/book3s64/mmu_context.c
> index 2d0cb5ba9a47..f836fd5a6abc 100644
> --- a/arch/powerpc/mm/book3s64/mmu_context.c
> +++ b/arch/powerpc/mm/book3s64/mmu_context.c
> @@ -231,7 +231,7 @@ static void pmd_frag_destroy(void *pmd_frag)
>   /* drop all the pending references */
>   count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
>   /* We allow PTE_FRAG_NR fragments from a PTE page */
> - if (atomic_sub_and_test(PMD_FRAG_NR - count, >pt_frag_refcount)) {
> + if (refcount_sub_and_test(PMD_FRAG_NR - count, 
> >pt_frag_refcount)) {
>   pgtable_pmd_page_dtor(page);
>   __free_page(page);
>   }
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> b/arch/powerpc/mm/book3s64/pgtable.c
> index 7d0e0d0d22c4..40056896ce4e 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -277,7 +277,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
>   return NULL;
>   }
>  
> - atomic_set(>pt_frag_refcount, 1);
> + refcount_set(>pt_frag_refcount, 1);
>  
>   ret = page_address(page);
>   /*
> @@ -294,7 +294,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
>* count.
>*/
>   if (likely(!mm->context.pmd_frag)) {
> - atomic_set(>pt_frag_refcount, PMD_FRAG_NR);
> + refcount_set(>pt_frag_refcount, PMD_FRAG_NR);
>   mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
>   }
>   spin_unlock(>page_table_lock);
> @@ -317,8 +317,7 @@ void pmd_fragment_free(unsigned long *pmd)
>  {
>   struct page *page = virt_to_page(pmd);
>  
> - BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
> - if (atomic_dec_and_test(>pt_frag_refcount)) {
> + if (refcount_dec_and_test(>pt_frag_refcount)) {
>   pgtable_pmd_page_dtor(page);
>   __free_page(page);
>   }
> diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
> index a7b05214760c..4ef8231b677f 100644
> --- a/arch/powerpc/mm/pgtable-frag.c
> +++ b/arch/powerpc/mm/pgtable-frag.c
> @@ -24,7 +24,7 @@ void pte_frag_destroy(void *pte_frag)
>   /* drop all the pending references */
>   count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
>   /* We allow PTE_FRAG_NR fragments from a PTE page */
> - if (atomic_sub_and_test(PTE_FRAG_NR - count, >pt_frag_refcount)) {
> + if (refcount_sub_and_test(PTE_FRAG_NR - count, 
> >pt_frag_refcount)) {
>   pgtable_page_dtor(page);
>   __free_page(page);
>   }
> @@ -71,7 +71,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, 
> int kernel)
>   return NULL;
>   }
>  
> - atomic_set(>pt_frag_refcount, 1);
> + refcount_set(>pt_frag_refcount, 1);
>  
>   ret = page_address(page);
>   /*
> @@ -87,7 +87,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, 
> int kernel)
>* count.
>*/
>   if (likely(!pte_frag_get(>context))) {
> - atomic_set(>pt_frag_refcount, PTE_FRAG_NR);
> + refcount_set(>pt_frag_refcount, PTE_FRAG_NR);
>   pte_frag_set(>context, ret + PTE_FRAG_SIZE);
>   }
>   spin_unlock(>page_table_lock);
> @@ -110,8 +110,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
>  {
>   struct page *page = virt_to_page(table);
>  
> - BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
> - if (atomic_dec_and_test(>pt_frag_refcount)) {
> + if (refcount_dec_and_test(>pt_frag_refcount)) {
>   if (!kernel)
>   pgtable_page_dtor(page);
>   __free_page(page);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 3a37a89eb7a7..7fe23a3faf95 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -14,6 +14,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -147,7 +148,7 @@ struct page {
>   unsigned long _pt_pad_2;/* mapping */
>   union {
>   struct mm_struct *pt_mm; /* x86 pgds only */
> - atomic_t pt_frag_refcount; /* powerpc */
> + refcount_t pt_frag_refcount; /* powerpc */
>   };
>  #if ALLOC_SPLIT_PTLOCKS
>   spinlock_t *ptl;
> -- 
> 2.20.1


[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #11 from m...@ellerman.id.au ---
bugzilla-dae...@bugzilla.kernel.org writes:
> https://bugzilla.kernel.org/show_bug.cgi?id=204371
>
> --- Comment #10 from David Sterba (dste...@suse.com) ---
> In my case it happened on 5.3-rc3, with a strestest. The same machine has
> been
> running fstests periodically, with slab debug on, but there are no slab
> reports
> like that.
>
> [ 8516.870046] BUG kmalloc-4k (Not tainted): Poison overwritten   
> [ 8516.875873]
> - 
>
> [ 8516.885864] Disabling lock debugging due to kernel taint   
> [ 8516.891312] INFO: 0x1c70c8c9-0x3cd1e164. First byte 0x16
> instead of 0x6b   
> [ 8516.899717] INFO: Allocated in btrfs_read_tree_root+0x46/0x120 [btrfs]
> age=1769 cpu=7 pid=8717   
> [ 8516.908544]  __slab_alloc.isra.53+0x3e/0x70
> [ 8516.912861]  kmem_cache_alloc_trace+0x1b0/0x330
> [ 8516.917581]  btrfs_read_tree_root+0x46/0x120 [btrfs]   
> [ 8516.922737]  btrfs_read_fs_root+0xe/0x40 [btrfs]   
> [ 8516.927552]  create_reloc_root+0x17f/0x2a0 [btrfs] 
> [ 8516.932536]  btrfs_init_reloc_root+0x72/0xe0 [btrfs]   
> [ 8516.937686]  record_root_in_trans+0xbb/0xf0 [btrfs]
> [ 8516.942750]  btrfs_record_root_in_trans+0x50/0x70 [btrfs]  
> [ 8516.948340]  start_transaction+0xa1/0x550 [btrfs]  
> [ 8516.953237]  __btrfs_prealloc_file_range+0xca/0x490 [btrfs]
> [ 8516.959003]  btrfs_prealloc_file_range+0x10/0x20 [btrfs]   
> [ 8516.964509]  prealloc_file_extent_cluster+0x13e/0x2b0 [btrfs]  
> [ 8516.970447]  relocate_file_extent_cluster+0x8d/0x530 [btrfs]   
> [ 8516.976305]  relocate_data_extent+0x80/0x110 [btrfs]   
> [ 8516.981469]  relocate_block_group+0x473/0x720 [btrfs]  
> [ 8516.986711]  btrfs_relocate_block_group+0x15f/0x2c0 [btrfs]

So this is looking more like it could be a btrfs bug, given you've both
hit it using btrfs but on different platforms.

cheers

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: [Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-09 Thread Michael Ellerman
bugzilla-dae...@bugzilla.kernel.org writes:
> https://bugzilla.kernel.org/show_bug.cgi?id=204371
>
> --- Comment #10 from David Sterba (dste...@suse.com) ---
> In my case it happened on 5.3-rc3, with a strestest. The same machine has been
> running fstests periodically, with slab debug on, but there are no slab 
> reports
> like that.
>
> [ 8516.870046] BUG kmalloc-4k (Not tainted): Poison overwritten   
>   
> [ 8516.875873]
> - 
>   
>
> [ 8516.885864] Disabling lock debugging due to kernel taint   
>   
> [ 8516.891312] INFO: 0x1c70c8c9-0x3cd1e164. First byte 0x16
> instead of 0x6b   
>   
> [ 8516.899717] INFO: Allocated in btrfs_read_tree_root+0x46/0x120 [btrfs]
> age=1769 cpu=7 pid=8717   
>   
> [ 8516.908544]  __slab_alloc.isra.53+0x3e/0x70
>   
> [ 8516.912861]  kmem_cache_alloc_trace+0x1b0/0x330
>   
> [ 8516.917581]  btrfs_read_tree_root+0x46/0x120 [btrfs]   
>   
> [ 8516.922737]  btrfs_read_fs_root+0xe/0x40 [btrfs]   
>   
> [ 8516.927552]  create_reloc_root+0x17f/0x2a0 [btrfs] 
>   
> [ 8516.932536]  btrfs_init_reloc_root+0x72/0xe0 [btrfs]   
>   
> [ 8516.937686]  record_root_in_trans+0xbb/0xf0 [btrfs]
>   
> [ 8516.942750]  btrfs_record_root_in_trans+0x50/0x70 [btrfs]  
>   
> [ 8516.948340]  start_transaction+0xa1/0x550 [btrfs]  
>   
> [ 8516.953237]  __btrfs_prealloc_file_range+0xca/0x490 [btrfs]
>   
> [ 8516.959003]  btrfs_prealloc_file_range+0x10/0x20 [btrfs]   
>   
> [ 8516.964509]  prealloc_file_extent_cluster+0x13e/0x2b0 [btrfs]  
>   
> [ 8516.970447]  relocate_file_extent_cluster+0x8d/0x530 [btrfs]   
>   
> [ 8516.976305]  relocate_data_extent+0x80/0x110 [btrfs]   
>   
> [ 8516.981469]  relocate_block_group+0x473/0x720 [btrfs]  
>   
> [ 8516.986711]  btrfs_relocate_block_group+0x15f/0x2c0 [btrfs]
>   

So this is looking more like it could be a btrfs bug, given you've both
hit it using btrfs but on different platforms.

cheers


[Bug 204375] kernel 5.2.4 w. KASAN enabled fails to boot on a PowerMac G4 3,6 at very early stage

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204375

Michael Ellerman (mich...@ellerman.id.au) changed:

   What|Removed |Added

 Status|RESOLVED|CLOSED
 CC||mich...@ellerman.id.au

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH v3 38/41] powerpc: convert put_page() to put_user_page*()

2019-08-09 Thread Michael Ellerman
John Hubbard  writes:
> On 8/7/19 10:42 PM, Michael Ellerman wrote:
>> Hi John,
>> 
>> john.hubb...@gmail.com writes:
>>> diff --git a/arch/powerpc/mm/book3s64/iommu_api.c 
>>> b/arch/powerpc/mm/book3s64/iommu_api.c
>>> index b056cae3388b..e126193ba295 100644
>>> --- a/arch/powerpc/mm/book3s64/iommu_api.c
>>> +++ b/arch/powerpc/mm/book3s64/iommu_api.c
>>> @@ -203,6 +202,7 @@ static void mm_iommu_unpin(struct 
>>> mm_iommu_table_group_mem_t *mem)
>>>  {
>>> long i;
>>> struct page *page = NULL;
>>> +   bool dirty = false;
>> 
>> I don't think you need that initialisation do you?
>> 
>
> Nope, it can go. Fixed locally, thanks.

Thanks.

> Did you get a chance to look at enough of the other bits to feel comfortable 
> with the patch, overall?

Mostly :) It's not really my area, but all the conversions looked
correct to me as best as I could tell.

So I'm fine for it to go in as part of the series:

Acked-by: Michael Ellerman  (powerpc)

cheers


Re: [RFC V2 0/1] mm/debug: Add tests for architecture exported page table helpers

2019-08-09 Thread Mark Rutland
On Fri, Aug 09, 2019 at 03:16:33AM -0700, Matthew Wilcox wrote:
> On Fri, Aug 09, 2019 at 01:03:17PM +0530, Anshuman Khandual wrote:
> > Should alloc_gigantic_page() be made available as an interface for general
> > use in the kernel. The test module here uses very similar implementation 
> > from
> > HugeTLB to allocate a PUD aligned memory block. Similar for mm_alloc() which
> > needs to be exported through a header.
> 
> Why are you allocating memory at all instead of just using some
> known-to-exist PFNs like I suggested?

IIUC the issue is that there aren't necessarily known-to-exist PFNs that
are sufficiently aligned -- they may not even exist.

For example, with 64K pages, a PMD covers 512M. The kernel image is
(generally) smaller than 512M, and will be mapped at page granularity.
In that case, any PMD entry for a kernel symbol address will point to
the PTE level table, and that will only necessarily be page-aligned, as
any P?D level table is only necessarily page-aligned.

In the same configuration, you could have less than 512M of total
memory, and none of this memory is necessarily aligned to 512M. So
beyond the PTE level, I don't think you can guarantee a known-to-exist
valid PFN.

I also believe that synthetic PFNs could fail pfn_valid(), so that might
cause us pain too...

Thanks,
Mark.


[PATCH 1/2] ASoC: fsl_esai: Add compatible string for imx6ull

2019-08-09 Thread Shengjiu Wang
Add compatible string for imx6ull, from imx6ull platform,
the issue of channel swap after xrun is fixed in hardware.

Signed-off-by: Shengjiu Wang 
---
 sound/soc/fsl/fsl_esai.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c
index 10d2210c91ef..4b4a8e831e9e 100644
--- a/sound/soc/fsl/fsl_esai.c
+++ b/sound/soc/fsl/fsl_esai.c
@@ -920,6 +920,7 @@ static int fsl_esai_remove(struct platform_device *pdev)
 static const struct of_device_id fsl_esai_dt_ids[] = {
{ .compatible = "fsl,imx35-esai", },
{ .compatible = "fsl,vf610-esai", },
+   { .compatible = "fsl,imx6ull-esai", },
{}
 };
 MODULE_DEVICE_TABLE(of, fsl_esai_dt_ids);
-- 
2.21.0



[PATCH 2/2] ASoC: fsl_esai: Add new compatible string for imx6ull

2019-08-09 Thread Shengjiu Wang
Add new compatible string "fsl,imx6ull-esai" in the binding document.

Signed-off-by: Shengjiu Wang 
---
 Documentation/devicetree/bindings/sound/fsl,esai.txt | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/sound/fsl,esai.txt 
b/Documentation/devicetree/bindings/sound/fsl,esai.txt
index 5b9914367610..0e6e2166f76c 100644
--- a/Documentation/devicetree/bindings/sound/fsl,esai.txt
+++ b/Documentation/devicetree/bindings/sound/fsl,esai.txt
@@ -7,8 +7,11 @@ other DSPs. It has up to six transmitters and four receivers.
 
 Required properties:
 
-  - compatible : Compatible list, must contain "fsl,imx35-esai" or
- "fsl,vf610-esai"
+  - compatible : Compatible list, should contain one of the following
+ compatibles:
+ "fsl,imx35-esai",
+ "fsl,vf610-esai",
+ "fsl,imx6ull-esai",
 
   - reg: Offset and length of the register set for the 
device.
 
-- 
2.21.0



[Bug 204479] KASAN hit at modprobe zram

2019-08-09 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=204479

--- Comment #10 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 284297
  --> https://bugzilla.kernel.org/attachment.cgi?id=284297=edit
dmesg (kernel 5.3-rc3 + patch, without CONFIG_SMP, PowerMac G4 DP)

Here's the dmesg with the kernel built without CONFIG_SMP.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: [RFC V2 0/1] mm/debug: Add tests for architecture exported page table helpers

2019-08-09 Thread Anshuman Khandual



On 08/09/2019 03:46 PM, Matthew Wilcox wrote:
> On Fri, Aug 09, 2019 at 01:03:17PM +0530, Anshuman Khandual wrote:
>> Should alloc_gigantic_page() be made available as an interface for general
>> use in the kernel. The test module here uses very similar implementation from
>> HugeTLB to allocate a PUD aligned memory block. Similar for mm_alloc() which
>> needs to be exported through a header.
> 
> Why are you allocating memory at all instead of just using some
> known-to-exist PFNs like I suggested?

We needed PFN to be PUD aligned for pfn_pud() and PMD aligned for mk_pmd().
Now walking the kernel page table for a known symbol like kernel_init()
as you had suggested earlier we might encounter page table page entries at PMD
and PUD which might not be PMD or PUD aligned respectively. It seemed to me
that alignment requirement is applicable only for mk_pmd() and pfn_pud()
which create large mappings at those levels but that requirement does not
exist for page table pages pointing to next level. Is not that correct ? Or
I am missing something here ?


Re: [RFC V2 0/1] mm/debug: Add tests for architecture exported page table helpers

2019-08-09 Thread Matthew Wilcox
On Fri, Aug 09, 2019 at 01:03:17PM +0530, Anshuman Khandual wrote:
> Should alloc_gigantic_page() be made available as an interface for general
> use in the kernel. The test module here uses very similar implementation from
> HugeTLB to allocate a PUD aligned memory block. Similar for mm_alloc() which
> needs to be exported through a header.

Why are you allocating memory at all instead of just using some
known-to-exist PFNs like I suggested?


[PATCH v6 11/12] powerpc/fsl_booke/kaslr: export offset in VMCOREINFO ELF notes

2019-08-09 Thread Jason Yan
Like all other architectures such as x86 or arm64, include KASLR offset
in VMCOREINFO ELF notes to assist in debugging. After this, we can use
crash --kaslr option to parse vmcore generated from a kaslr kernel.

Note: The crash tool needs to support --kaslr too.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/kernel/machine_kexec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index c4ed328a7b96..078fe3d76feb 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -86,6 +86,7 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
VMCOREINFO_OFFSET(mmu_psize_def, shift);
 #endif
+   vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
 }
 
 /*
-- 
2.17.2



[PATCH v6 12/12] powerpc/fsl_booke/32: Document KASLR implementation

2019-08-09 Thread Jason Yan
Add document to explain how we implement KASLR for fsl_booke32.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 Documentation/powerpc/kaslr-booke32.rst | 42 +
 1 file changed, 42 insertions(+)
 create mode 100644 Documentation/powerpc/kaslr-booke32.rst

diff --git a/Documentation/powerpc/kaslr-booke32.rst 
b/Documentation/powerpc/kaslr-booke32.rst
new file mode 100644
index ..8b259fdfdf03
--- /dev/null
+++ b/Documentation/powerpc/kaslr-booke32.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+KASLR for Freescale BookE32
+===
+
+The word KASLR stands for Kernel Address Space Layout Randomization.
+
+This document tries to explain the implementation of the KASLR for
+Freescale BookE32. KASLR is a security feature that deters exploit
+attempts relying on knowledge of the location of kernel internals.
+
+Since CONFIG_RELOCATABLE has already supported, what we need to do is
+map or copy kernel to a proper place and relocate. Freescale Book-E
+parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
+entries are not suitable to map the kernel directly in a randomized
+region, so we chose to copy the kernel to a proper place and restart to
+relocate.
+
+Entropy is derived from the banner and timer base, which will change every
+build and boot. This not so much safe so additionally the bootloader may
+pass entropy via the /chosen/kaslr-seed node in device tree.
+
+We will use the first 512M of the low memory to randomize the kernel
+image. The memory will be split in 64M zones. We will use the lower 8
+bit of the entropy to decide the index of the 64M zone. Then we chose a
+16K aligned offset inside the 64M zone to put the kernel in::
+
+KERNELBASE
+
+|-->   64M   <--|
+|   |
++---+++---+
+|   |||kernel||   |
++---+++---+
+| |
+|->   offset<-|
+
+  kernstart_virt_addr
+
+To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enable and you
+want to disable it at runtime, add "nokaslr" to the kernel cmdline.
-- 
2.17.2



[PATCH v6 09/12] powerpc/fsl_booke/kaslr: support nokaslr cmdline parameter

2019-08-09 Thread Jason Yan
One may want to disable kaslr when boot, so provide a cmdline parameter
'nokaslr' to support this.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/kernel/kaslr_booke.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c
index 9a360b6124ed..fd32ae10c218 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -334,6 +334,11 @@ static unsigned long __init kaslr_choose_location(void 
*dt_ptr, phys_addr_t size
return kaslr_offset;
 }
 
+static inline __init bool kaslr_disabled(void)
+{
+   return strstr(boot_command_line, "nokaslr") != NULL;
+}
+
 /*
  * To see if we need to relocate the kernel to a random offset
  * void *dt_ptr - address of the device tree
@@ -349,6 +354,8 @@ notrace void __init kaslr_early_init(void *dt_ptr, 
phys_addr_t size)
kernel_sz = (unsigned long)_end - KERNELBASE;
 
kaslr_get_cmdline(dt_ptr);
+   if (kaslr_disabled())
+   return;
 
offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
 
-- 
2.17.2



[PATCH v6 08/12] powerpc/fsl_booke/kaslr: clear the original kernel if randomized

2019-08-09 Thread Jason Yan
The original kernel still exists in the memory, clear it now.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/kernel/kaslr_booke.c  | 11 +++
 arch/powerpc/mm/mmu_decl.h |  2 ++
 arch/powerpc/mm/nohash/fsl_booke.c |  1 +
 3 files changed, 14 insertions(+)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c
index 51a0b3749724..9a360b6124ed 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -373,3 +373,14 @@ notrace void __init kaslr_early_init(void *dt_ptr, 
phys_addr_t size)
 
reloc_kernel_entry(dt_ptr, kernstart_virt_addr);
 }
+
+void __init kaslr_late_init(void)
+{
+   /* If randomized, clear the original kernel */
+   if (kernstart_virt_addr != KERNELBASE) {
+   unsigned long kernel_sz;
+
+   kernel_sz = (unsigned long)_end - kernstart_virt_addr;
+   memzero_explicit((void *)KERNELBASE, kernel_sz);
+   }
+}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 213997d69729..64b2ac8a5343 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -151,8 +151,10 @@ extern void loadcam_multi(int first_idx, int num, int 
tmp_idx);
 
 #ifdef CONFIG_RANDOMIZE_BASE
 void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
 #else
 static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
 #endif
 
 struct tlbcam {
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c 
b/arch/powerpc/mm/nohash/fsl_booke.c
index 2dc27cf88add..b4eb06ceb189 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -269,6 +269,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t 
start)
kernstart_addr = start;
if (is_second_reloc) {
virt_phys_offset = PAGE_OFFSET - memstart_addr;
+   kaslr_late_init();
return;
}
 
-- 
2.17.2



[PATCH v6 06/12] powerpc/fsl_booke/32: implement KASLR infrastructure

2019-08-09 Thread Jason Yan
This patch add support to boot kernel from places other than KERNELBASE.
Since CONFIG_RELOCATABLE has already supported, what we need to do is
map or copy kernel to a proper place and relocate. Freescale Book-E
parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
entries are not suitable to map the kernel directly in a randomized
region, so we chose to copy the kernel to a proper place and restart to
relocate.

The offset of the kernel was not randomized yet(a fixed 64M is set). We
will randomize it in the next patch.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Tested-by: Diana Craciun 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/Kconfig  | 11 
 arch/powerpc/kernel/Makefile  |  1 +
 arch/powerpc/kernel/early_32.c|  2 +-
 arch/powerpc/kernel/fsl_booke_entry_mapping.S | 17 +++--
 arch/powerpc/kernel/head_fsl_booke.S  | 13 +++-
 arch/powerpc/kernel/kaslr_booke.c | 62 +++
 arch/powerpc/mm/mmu_decl.h|  7 +++
 arch/powerpc/mm/nohash/fsl_booke.c|  7 ++-
 8 files changed, 105 insertions(+), 15 deletions(-)
 create mode 100644 arch/powerpc/kernel/kaslr_booke.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 77f6ebf97113..710c12ef7159 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -548,6 +548,17 @@ config RELOCATABLE
  setting can still be useful to bootwrappers that need to know the
  load address of the kernel (eg. u-boot/mkimage).
 
+config RANDOMIZE_BASE
+   bool "Randomize the address of the kernel image"
+   depends on (FSL_BOOKE && FLATMEM && PPC32)
+   depends on RELOCATABLE
+   help
+ Randomizes the virtual address at which the kernel image is
+ loaded, as a security feature that deters exploit attempts
+ relying on knowledge of the location of kernel internals.
+
+ If unsure, say N.
+
 config RELOCATABLE_TEST
bool "Test relocatable kernel"
depends on (PPC64 && RELOCATABLE)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ea0c69236789..32f6c5b99307 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -106,6 +106,7 @@ extra-$(CONFIG_PPC_8xx) := head_8xx.o
 extra-y+= vmlinux.lds
 
 obj-$(CONFIG_RELOCATABLE)  += reloc_$(BITS).o
+obj-$(CONFIG_RANDOMIZE_BASE)   += kaslr_booke.o
 
 obj-$(CONFIG_PPC32)+= entry_32.o setup_32.o early_32.o
 obj-$(CONFIG_PPC64)+= dma-iommu.o iommu.o
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
index 3482118ffe76..0c5849fd936d 100644
--- a/arch/powerpc/kernel/early_32.c
+++ b/arch/powerpc/kernel/early_32.c
@@ -32,5 +32,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
 
apply_feature_fixups();
 
-   return KERNELBASE + offset;
+   return kernstart_virt_addr + offset;
 }
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S 
b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index f4d3eaae54a9..641920d4f694 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -155,23 +155,22 @@ skpinv:   addir6,r6,1 /* 
Increment */
 
 #if defined(ENTRY_MAPPING_BOOT_SETUP)
 
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
+/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */
lis r6,0x1000   /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 
*/
mtspr   SPRN_MAS0,r6
lis r6,(MAS1_VALID|MAS1_IPROT)@h
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
mtspr   SPRN_MAS1,r6
-   lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, MAS2_M_IF_NEEDED)@h
-   ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, 
MAS2_M_IF_NEEDED)@l
-   mtspr   SPRN_MAS2,r6
+   lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+   ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+   and r6,r6,r20
+   ori r6,r6,MAS2_M_IF_NEEDED@l
+   mtspr   SPRN_MAS2,r6
mtspr   SPRN_MAS3,r8
tlbwe
 
-/* 7. Jump to KERNELBASE mapping */
-   lis r6,(KERNELBASE & ~0xfff)@h
-   ori r6,r6,(KERNELBASE & ~0xfff)@l
-   rlwinm  r7,r25,0,0x03ff
-   add r6,r7,r6
+/* 7. Jump to kernstart_virt_addr mapping */
+   mr  r6,r20
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 2083382dd662..f7a5c5f03c72 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -155,6 +155,8 @@ _ENTRY(_start);
  */
 
 _ENTRY(__early_start)
+   LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
+   lwz r20,0(r20)
 
 #define ENTRY_MAPPING_BOOT_SETUP
 

[PATCH v6 10/12] powerpc/fsl_booke/kaslr: dump out kernel offset information on panic

2019-08-09 Thread Jason Yan
When kaslr is enabled, the kernel offset is different for every boot.
This brings some difficult to debug the kernel. Dump out the kernel
offset when panic so that we can easily debug the kernel.

This code is derived from x86/arm64 which has similar functionality.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/include/asm/page.h|  5 +
 arch/powerpc/kernel/setup-common.c | 20 
 2 files changed, 25 insertions(+)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 4d32d1b561d6..b34b9cdd91f1 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -317,6 +317,11 @@ struct vm_area_struct;
 
 extern unsigned long kernstart_virt_addr;
 
+static inline unsigned long kaslr_offset(void)
+{
+   return kernstart_virt_addr - KERNELBASE;
+}
+
 #include 
 #endif /* __ASSEMBLY__ */
 #include 
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 1f8db666468d..ba1a34ab218a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -715,8 +715,28 @@ static struct notifier_block ppc_panic_block = {
.priority = INT_MIN /* may not return; must be done last */
 };
 
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+ void *p)
+{
+   pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+kaslr_offset(), KERNELBASE);
+
+   return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+   .notifier_call = dump_kernel_offset
+};
+
 void __init setup_panic(void)
 {
+   if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+   atomic_notifier_chain_register(_notifier_list,
+  _offset_notifier);
+
/* PPC64 always does a hard irq disable in its panic handler */
if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
return;
-- 
2.17.2



[PATCH v6 05/12] powerpc/fsl_booke/32: introduce reloc_kernel_entry() helper

2019-08-09 Thread Jason Yan
Add a new helper reloc_kernel_entry() to jump back to the start of the
new kernel. After we put the new kernel in a randomized place we can use
this new helper to enter the kernel and begin to relocate again.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/kernel/head_fsl_booke.S | 13 +
 arch/powerpc/mm/mmu_decl.h   |  1 +
 2 files changed, 14 insertions(+)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 04d124fee17d..2083382dd662 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1143,6 +1143,19 @@ _GLOBAL(create_tlb_entry)
sync
blr
 
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+   mfmsr   r7
+   rlwinm  r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+   mtspr   SPRN_SRR0,r4
+   mtspr   SPRN_SRR1,r7
+   rfi
+
 /*
  * Create a tlb entry with the same effective and physical address as
  * the tlb entry used by the current running code. But set the TS to 1.
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index a09f89d3aa0f..804da298beb3 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -143,6 +143,7 @@ extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
 void create_tlb_entry(phys_addr_t phys, unsigned long virt, int entry);
+void reloc_kernel_entry(void *fdt, int addr);
 #endif
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
-- 
2.17.2



[PATCH v6 07/12] powerpc/fsl_booke/32: randomize the kernel image offset

2019-08-09 Thread Jason Yan
After we have the basic support of relocate the kernel in some
appropriate place, we can start to randomize the offset now.

Entropy is derived from the banner and timer, which will change every
build and boot. This not so much safe so additionally the bootloader may
pass entropy via the /chosen/kaslr-seed node in device tree.

We will use the first 512M of the low memory to randomize the kernel
image. The memory will be split in 64M zones. We will use the lower 8
bit of the entropy to decide the index of the 64M zone. Then we chose a
16K aligned offset inside the 64M zone to put the kernel in.

We also check if we will overlap with some areas like the dtb area, the
initrd area or the crashkernel area. If we cannot find a proper area,
kaslr will be disabled and boot from the original kernel.

Some pieces of code are derived from arch/x86/boot/compressed/kaslr.c or
arch/arm64/kernel/kaslr.c such as rotate_xor(). Credit goes to Kees and
Ard.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/kernel/kaslr_booke.c | 317 +-
 1 file changed, 315 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c
index f8dc60534ac1..51a0b3749724 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -12,15 +12,326 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include 
+
+struct regions {
+   unsigned long pa_start;
+   unsigned long pa_end;
+   unsigned long kernel_size;
+   unsigned long dtb_start;
+   unsigned long dtb_end;
+   unsigned long initrd_start;
+   unsigned long initrd_end;
+   unsigned long crash_start;
+   unsigned long crash_end;
+   int reserved_mem;
+   int reserved_mem_addr_cells;
+   int reserved_mem_size_cells;
+};
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+   LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+struct regions __initdata regions;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+   int node = fdt_path_offset(fdt, "/chosen");
+
+   early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+  size_t size)
+{
+   size_t i;
+   const unsigned long *ptr = area;
+
+   for (i = 0; i < size / sizeof(hash); i++) {
+   /* Rotate by odd number of bits and XOR. */
+   hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+   hash ^= ptr[i];
+   }
+
+   return hash;
+}
+
+/* Attempt to create a simple starting entropy. This can make it defferent for
+ * every build but it is still not enough. Stronger entropy should
+ * be added to make it change for every boot.
+ */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+   unsigned long hash = 0;
+
+   hash = rotate_xor(hash, build_str, sizeof(build_str));
+   hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+   return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+   int node, len;
+   fdt64_t *prop;
+   u64 ret;
+
+   node = fdt_path_offset(fdt, "/chosen");
+   if (node < 0)
+   return 0;
+
+   prop = fdt_getprop_w(fdt, node, "kaslr-seed", );
+   if (!prop || len != sizeof(u64))
+   return 0;
+
+   ret = fdt64_to_cpu(*prop);
+   *prop = 0;
+   return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+   return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+   u32 end)
+{
+   int subnode, len, i;
+   u64 base, size;
+
+   /* check for overlap with /memreserve/ entries */
+   for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+   if (fdt_get_mem_rsv(fdt, i, , ) < 0)
+   continue;
+   if (regions_overlap(start, end, base, base + size))
+   return true;
+   }
+
+   if (regions.reserved_mem < 0)
+   return false;
+
+   /* check for overlap with static reservations in /reserved-memory */
+   for (subnode = fdt_first_subnode(fdt, regions.reserved_mem);
+subnode >= 0;
+subnode = fdt_next_subnode(fdt, subnode)) {
+   const fdt32_t *reg;
+   u64 rsv_end;
+
+   len = 0;
+   reg = fdt_getprop(fdt, subnode, "reg", );
+   while (len >= (regions.reserved_mem_addr_cells +
+   

[PATCH v6 04/12] powerpc/fsl_booke/32: introduce create_tlb_entry() helper

2019-08-09 Thread Jason Yan
Add a new helper create_tlb_entry() to create a tlb entry by the virtual
and physical address. This is a preparation to support boot kernel at a
randomized address.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/kernel/head_fsl_booke.S | 29 
 arch/powerpc/mm/mmu_decl.h   |  1 +
 2 files changed, 30 insertions(+)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index adf0505dbe02..04d124fee17d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1114,6 +1114,35 @@ __secondary_hold_acknowledge:
.long   -1
 #endif
 
+/*
+ * Create a 64M tlb by address and entry
+ * r3/r4 - physical address
+ * r5 - virtual address
+ * r6 - entry
+ */
+_GLOBAL(create_tlb_entry)
+   lis r7,0x1000   /* Set MAS0(TLBSEL) = 1 */
+   rlwimi  r7,r6,16,4,15   /* Setup MAS0 = TLBSEL | ESEL(r6) */
+   mtspr   SPRN_MAS0,r7/* Write MAS0 */
+
+   lis r6,(MAS1_VALID|MAS1_IPROT)@h
+   ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+   mtspr   SPRN_MAS1,r6/* Write MAS1 */
+
+   lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+   ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+   and r6,r6,r5
+   ori r6,r6,MAS2_M@l
+   mtspr   SPRN_MAS2,r6/* Write MAS2(EPN) */
+
+   ori r8,r4,(MAS3_SW|MAS3_SR|MAS3_SX)
+   mtspr   SPRN_MAS3,r8/* Write MAS3(RPN) */
+
+   tlbwe   /* Write TLB */
+   isync
+   sync
+   blr
+
 /*
  * Create a tlb entry with the same effective and physical address as
  * the tlb entry used by the current running code. But set the TS to 1.
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 32c1a191c28a..a09f89d3aa0f 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -142,6 +142,7 @@ extern unsigned long calc_cam_sz(unsigned long ram, 
unsigned long virt,
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_tlb_entry(phys_addr_t phys, unsigned long virt, int entry);
 #endif
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
-- 
2.17.2



[PATCH v6 01/12] powerpc: unify definition of M_IF_NEEDED

2019-08-09 Thread Jason Yan
M_IF_NEEDED is defined too many times. Move it to a common place and
rename it to MAS2_M_IF_NEEDED which is much readable.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/include/asm/nohash/mmu-book3e.h  | 10 ++
 arch/powerpc/kernel/exceptions-64e.S  | 12 +---
 arch/powerpc/kernel/fsl_booke_entry_mapping.S | 14 ++
 arch/powerpc/kernel/misc_64.S |  7 +--
 4 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h 
b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index 4c9777d256fb..fa3efc2d310f 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -221,6 +221,16 @@
 #define TLBILX_T_CLASS26
 #define TLBILX_T_CLASS37
 
+/*
+ * The mapping only needs to be cache-coherent on SMP, except on
+ * Freescale e500mc derivatives where it's also needed for coherent DMA.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define MAS2_M_IF_NEEDED   MAS2_M
+#else
+#define MAS2_M_IF_NEEDED   0
+#endif
+
 #ifndef __ASSEMBLY__
 #include 
 
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..c5bc09b5e281 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1342,16 +1342,6 @@ skpinv:  addir6,r6,1 /* 
Increment */
sync
isync
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDEDMAS2_M
-#else
-#define M_IF_NEEDED0
-#endif
-
 /* 6. Setup KERNELBASE mapping in TLB[0]
  *
  * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
@@ -1364,7 +1354,7 @@ skpinv:   addir6,r6,1 /* 
Increment */
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
mtspr   SPRN_MAS1,r6
 
-   LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED)
+   LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED)
mtspr   SPRN_MAS2,r6
 
rlwinm  r5,r5,0,0,25
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S 
b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index ea065282b303..f4d3eaae54a9 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -153,16 +153,6 @@ skpinv:addir6,r6,1 /* 
Increment */
tlbivax 0,r9
TLBSYNC
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDEDMAS2_M
-#else
-#define M_IF_NEEDED0
-#endif
-
 #if defined(ENTRY_MAPPING_BOOT_SETUP)
 
 /* 6. Setup KERNELBASE mapping in TLB1[0] */
@@ -171,8 +161,8 @@ skpinv: addir6,r6,1 /* 
Increment */
lis r6,(MAS1_VALID|MAS1_IPROT)@h
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
mtspr   SPRN_MAS1,r6
-   lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h
-   ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l
+   lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, MAS2_M_IF_NEEDED)@h
+   ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, 
MAS2_M_IF_NEEDED)@l
mtspr   SPRN_MAS2,r6
mtspr   SPRN_MAS3,r8
tlbwe
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..2062a299a22d 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -432,18 +432,13 @@ kexec_create_tlb:
rlwimi  r9,r10,16,4,15  /* Setup MAS0 = TLBSEL | ESEL(r9) */
 
 /* Set up a temp identity mapping v:0 to p:0 and return to it. */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDEDMAS2_M
-#else
-#define M_IF_NEEDED0
-#endif
mtspr   SPRN_MAS0,r9
 
lis r9,(MAS1_VALID|MAS1_IPROT)@h
ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
mtspr   SPRN_MAS1,r9
 
-   LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED)
+   LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED)
mtspr   SPRN_MAS2,r9
 
LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
-- 
2.17.2



[PATCH v6 02/12] powerpc: move memstart_addr and kernstart_addr to init-common.c

2019-08-09 Thread Jason Yan
These two variables are both defined in init_32.c and init_64.c. Move
them to init-common.c and make them __ro_after_init.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/mm/init-common.c | 5 +
 arch/powerpc/mm/init_32.c | 5 -
 arch/powerpc/mm/init_64.c | 5 -
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index a84da92920f7..e223da482c0c 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -21,6 +21,11 @@
 #include 
 #include 
 
+phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr __ro_after_init;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+
 static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
 static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
 
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index b04896a88d79..872df48ae41b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -56,11 +56,6 @@
 phys_addr_t total_memory;
 phys_addr_t total_lowmem;
 
-phys_addr_t memstart_addr = (phys_addr_t)~0ull;
-EXPORT_SYMBOL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL(kernstart_addr);
-
 #ifdef CONFIG_RELOCATABLE
 /* Used in __va()/__pa() */
 long long virt_phys_offset;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a44f6281ca3a..c836f1269ee7 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -63,11 +63,6 @@
 
 #include 
 
-phys_addr_t memstart_addr = ~0;
-EXPORT_SYMBOL_GPL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL_GPL(kernstart_addr);
-
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Given an address within the vmemmap, determine the pfn of the page that
-- 
2.17.2



[PATCH v6 03/12] powerpc: introduce kernstart_virt_addr to store the kernel base

2019-08-09 Thread Jason Yan
Now the kernel base is a fixed value - KERNELBASE. To support KASLR, we
need a variable to store the kernel base.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/include/asm/page.h | 2 ++
 arch/powerpc/mm/init-common.c   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 0d52f57fca04..4d32d1b561d6 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -315,6 +315,8 @@ void arch_free_page(struct page *page, int order);
 
 struct vm_area_struct;
 
+extern unsigned long kernstart_virt_addr;
+
 #include 
 #endif /* __ASSEMBLY__ */
 #include 
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index e223da482c0c..42ef7a6e6098 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -25,6 +25,8 @@ phys_addr_t memstart_addr __ro_after_init = 
(phys_addr_t)~0ull;
 EXPORT_SYMBOL_GPL(memstart_addr);
 phys_addr_t kernstart_addr __ro_after_init;
 EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
+EXPORT_SYMBOL_GPL(kernstart_virt_addr);
 
 static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
 static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
-- 
2.17.2



[PATCH v6 00/12] implement KASLR for powerpc/fsl_booke/32

2019-08-09 Thread Jason Yan
This series implements KASLR for powerpc/fsl_booke/32, as a security
feature that deters exploit attempts relying on knowledge of the location
of kernel internals.

Since CONFIG_RELOCATABLE has already supported, what we need to do is
map or copy kernel to a proper place and relocate. Freescale Book-E
parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
entries are not suitable to map the kernel directly in a randomized
region, so we chose to copy the kernel to a proper place and restart to
relocate.

Entropy is derived from the banner and timer base, which will change every
build and boot. This not so much safe so additionally the bootloader may
pass entropy via the /chosen/kaslr-seed node in device tree.

We will use the first 512M of the low memory to randomize the kernel
image. The memory will be split in 64M zones. We will use the lower 8
bit of the entropy to decide the index of the 64M zone. Then we chose a
16K aligned offset inside the 64M zone to put the kernel in.

KERNELBASE

|-->   64M   <--|
|   |
+---+++---+
|   |||kernel||   |
+---+++---+
| |
|->   offset<-|

  kernstart_virt_addr

We also check if we will overlap with some areas like the dtb area, the
initrd area or the crashkernel area. If we cannot find a proper area,
kaslr will be disabled and boot from the original kernel.

Changes since v5:
 - Rename M_IF_NEEDED to MAS2_M_IF_NEEDED
 - Define some global variable as __ro_after_init
 - Replace kimage_vaddr with kernstart_virt_addr
 - Depend on RELOCATABLE, not select it
 - Modify the comment block below the SPDX tag
 - Remove some useless headers in kaslr_booke.c and move is_second_reloc
   declarationto mmu_decl.h
 - Remove DBG() and use pr_debug() and rewrite comment above get_boot_seed().
 - Add a patch to document the KASLR implementation.
 - Split a patch from patch #10 which exports kaslr offset in VMCOREINFO ELF 
notes.
 - Remove extra logic around finding nokaslr string in cmdline.
 - Make regions static global and __initdata

Changes since v4:
 - Add Reviewed-by tag from Christophe
 - Remove an unnecessary cast
 - Remove unnecessary parenthesis
 - Fix checkpatch warning

Changes since v3:
 - Add Reviewed-by and Tested-by tag from Diana
 - Change the comment in fsl_booke_entry_mapping.S to be consistent
   with the new code.

Changes since v2:
 - Remove unnecessary #ifdef
 - Use SZ_64M instead of0x400
 - Call early_init_dt_scan_chosen() to init boot_command_line
 - Rename kaslr_second_init() to kaslr_late_init()

Changes since v1:
 - Remove some useless 'extern' keyword.
 - Replace EXPORT_SYMBOL with EXPORT_SYMBOL_GPL
 - Improve some assembly code
 - Use memzero_explicit instead of memset
 - Use boot_command_line and remove early_command_line
 - Do not print kaslr offset if kaslr is disabled

Jason Yan (12):
  powerpc: unify definition of M_IF_NEEDED
  powerpc: move memstart_addr and kernstart_addr to init-common.c
  powerpc: introduce kernstart_virt_addr to store the kernel base
  powerpc/fsl_booke/32: introduce create_tlb_entry() helper
  powerpc/fsl_booke/32: introduce reloc_kernel_entry() helper
  powerpc/fsl_booke/32: implement KASLR infrastructure
  powerpc/fsl_booke/32: randomize the kernel image offset
  powerpc/fsl_booke/kaslr: clear the original kernel if randomized
  powerpc/fsl_booke/kaslr: support nokaslr cmdline parameter
  powerpc/fsl_booke/kaslr: dump out kernel offset information on panic
  powerpc/fsl_booke/kaslr: export offset in VMCOREINFO ELF notes
  powerpc/fsl_booke/32: Document KASLR implementation

 Documentation/powerpc/kaslr-booke32.rst   |  42 ++
 arch/powerpc/Kconfig  |  11 +
 arch/powerpc/include/asm/nohash/mmu-book3e.h  |  10 +
 arch/powerpc/include/asm/page.h   |   7 +
 arch/powerpc/kernel/Makefile  |   1 +
 arch/powerpc/kernel/early_32.c|   2 +-
 arch/powerpc/kernel/exceptions-64e.S  |  12 +-
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |  27 +-
 arch/powerpc/kernel/head_fsl_booke.S  |  55 ++-
 arch/powerpc/kernel/kaslr_booke.c | 393 ++
 arch/powerpc/kernel/machine_kexec.c   |   1 +
 arch/powerpc/kernel/misc_64.S |   7 +-
 arch/powerpc/kernel/setup-common.c|  20 +
 arch/powerpc/mm/init-common.c |   7 +
 arch/powerpc/mm/init_32.c |   5 -
 arch/powerpc/mm/init_64.c |   5 -
 arch/powerpc/mm/mmu_decl.h|  11 +
 arch/powerpc/mm/nohash/fsl_booke.c|   8 +-
 18 files changed, 572 insertions(+), 52 deletions(-)
 create mode 100644 Documentation/powerpc/kaslr-booke32.rst
 create mode 100644 arch/powerpc/kernel/kaslr_booke.c

-- 
2.17.2



Re: [PATCH 6/8] arm-nommu: call dma_mmap_from_dev_coherent directly

2019-08-09 Thread Sergei Shtylyov

On 08.08.2019 19:00, Christoph Hellwig wrote:


Ther is no need to go through dma_common_mmap for the arm-nommu


   There. :-)


dma mmap implementation as the only possible memory not handled above
could be that from the per-device coherent pool.

Signed-off-by: Christoph Hellwig 

[...]

MBR, Sergei


Re: [PATCH 2/2] powerpc: Convert flush_icache_range to C

2019-08-09 Thread Christophe Leroy




Le 09/08/2019 à 02:46, Alastair D'Silva a écrit :

From: Alastair D'Silva 

Similar to commit 22e9c88d486a
("powerpc/64: reuse PPC32 static inline flush_dcache_range()")
this patch converts flush_icache_range to C.


Should we also convert __flush_dcache_icache() which does exactly the 
same but on a full page ? We could most likely use the same code, ie 
call flush_icache_range() from __flush_dcache_icache() ?




This was done as we discovered a long-standing bug where the
length of the range was truncated due to using a 32 bit shift
instead of a 64 bit one.

By converting this function to C, it becomes easier to maintain.

Signed-off-by: Alastair D'Silva 
---
  arch/powerpc/include/asm/cache.h  | 35 +--
  arch/powerpc/include/asm/cacheflush.h | 63 +++
  arch/powerpc/kernel/misc_64.S | 55 ---


There is also a flush_icache_range() in arch/powerpc/kernel/misc_32.S, 
it must be converted as well.



  3 files changed, 85 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index b3388d95f451..d3d7077b75e2 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -55,25 +55,46 @@ struct ppc64_caches {
  
  extern struct ppc64_caches ppc64_caches;
  
-static inline u32 l1_cache_shift(void)

+static inline u32 l1_dcache_shift(void)
  {
return ppc64_caches.l1d.log_block_size;
  }
  
-static inline u32 l1_cache_bytes(void)

+static inline u32 l1_dcache_bytes(void)
  {
return ppc64_caches.l1d.block_size;
  }
+
+static inline u32 l1_icache_shift(void)
+{
+   return ppc64_caches.l1i.log_block_size;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+   return ppc64_caches.l1i.block_size;
+}
  #else
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
  {
return L1_CACHE_SHIFT;
  }
  
-static inline u32 l1_cache_bytes(void)

+static inline u32 l1_dcache_bytes(void)
  {
return L1_CACHE_BYTES;
  }
+
+static inline u32 l1_icache_shift(void)
+{
+   return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+   return L1_CACHE_BYTES;
+}
+


Could the above adds/changes be a separate patch ?


  #endif
  #endif /* ! __ASSEMBLY__ */
  
@@ -124,6 +145,12 @@ static inline void dcbst(void *addr)

  {
__asm__ __volatile__ ("dcbst %y0" : : "Z"(*(u8 *)addr) : "memory");
  }
+
+static inline void icbi(void *addr)
+{
+   __asm__ __volatile__ ("icbi %y0" : : "Z"(*(u8 *)addr) : "memory");
+}
+


Commit 6c5875843b87c3ad ("") is likely to be reverted in the near future 
due to a bug in CLANG and because it has no real benefit in our use cases.


So maybe you should consider using the previous format when adding icbi() ?

Should you also add iccci() in order to handle the 4xx part from misc_32 ?


  #endif /* !__ASSEMBLY__ */
  #endif /* __KERNEL__ */
  #endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index eef388f2659f..f68e75a6dc4b 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -42,7 +42,6 @@ extern void flush_dcache_page(struct page *page);
  #define flush_dcache_mmap_lock(mapping)   do { } while (0)
  #define flush_dcache_mmap_unlock(mapping) do { } while (0)
  
-extern void flush_icache_range(unsigned long, unsigned long);

  extern void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr,
int len);
@@ -57,14 +56,17 @@ static inline void __flush_dcache_icache_phys(unsigned long 
physaddr)
  }
  #endif
  
-/*

- * Write any modified data cache blocks out to memory and invalidate them.
+/**
+ * flush_dcache_range: Write any modified data cache blocks out to memory and 
invalidate them.
   * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
   */
  static inline void flush_dcache_range(unsigned long start, unsigned long stop)
  {
-   unsigned long shift = l1_cache_shift();
-   unsigned long bytes = l1_cache_bytes();
+   unsigned long shift = l1_dcache_shift();
+   unsigned long bytes = l1_dcache_bytes();
void *addr = (void *)(start & ~(bytes - 1));
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
@@ -82,6 +84,49 @@ static inline void flush_dcache_range(unsigned long start, 
unsigned long stop)
isync();
  }
  
+/**

+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static inline void 

Re: [PATCH 1/2] powerpc: Allow flush_icache_range to work across ranges >4GB

2019-08-09 Thread Christophe Leroy




Le 09/08/2019 à 02:45, Alastair D'Silva a écrit :

From: Alastair D'Silva 

When calling flush_icache_range with a size >4GB, we were masking
off the upper 32 bits, so we would incorrectly flush a range smaller
than intended.

This patch replaces the 32 bit shifts with 64 bit ones, so that
the full size is accounted for.

Heads-up for backporters: the old version of flush_dcache_range is
subject to a similar bug (this has since been replaced with a C
implementation).


Can you submit a patch to stable, explaining this ?



Signed-off-by: Alastair D'Silva 


Reviewed-by: Christophe Leroy 

Should add:

Cc: sta...@vger.kernel.org

Christophe


---
  arch/powerpc/kernel/misc_64.S | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..9bc0aa9aeb65 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -82,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subfr8,r6,r4/* compute length */
add r8,r8,r5/* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of cache block 
size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
mtctr   r8
  1:dcbst   0,r6
@@ -98,7 +98,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subfr8,r6,r4/* compute length */
add r8,r8,r5
lwz r9,ICACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of Icache block 
size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
mtctr   r8
  2:icbi0,r6



[PATCH v6 7/7] KVM: PPC: Ultravisor: Add PPC_UV config option

2019-08-09 Thread Bharata B Rao
From: Anshuman Khandual 

CONFIG_PPC_UV adds support for ultravisor.

Signed-off-by: Anshuman Khandual 
Signed-off-by: Bharata B Rao 
Signed-off-by: Ram Pai 
[ Update config help and commit message ]
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/Kconfig | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d8dcd8820369..8b36ca5ed3b0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -448,6 +448,24 @@ config PPC_TRANSACTIONAL_MEM
help
  Support user-mode Transactional Memory on POWERPC.
 
+config PPC_UV
+   bool "Ultravisor support"
+   depends on KVM_BOOK3S_HV_POSSIBLE
+   select ZONE_DEVICE
+   select MIGRATE_VMA_HELPER
+   select DEV_PAGEMAP_OPS
+   select DEVICE_PRIVATE
+   select MEMORY_HOTPLUG
+   select MEMORY_HOTREMOVE
+   default n
+   help
+ This option paravirtualizes the kernel to run in POWER platforms that
+ supports the Protected Execution Facility (PEF). In such platforms,
+ the ultravisor firmware runs at a privilege level above the
+ hypervisor.
+
+ If unsure, say "N".
+
 config LD_HEAD_STUB_CATCH
bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if 
EXPERT
depends on PPC64
-- 
2.21.0



[PATCH v6 6/7] kvmppc: Support reset of secure guest

2019-08-09 Thread Bharata B Rao
Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF.
This ioctl will be issued by QEMU during reset and includes the
the following steps:

- Ask UV to terminate the guest via UV_SVM_TERMINATE ucall
- Unpin the VPA pages so that they can be migrated back to secure
  side when guest becomes secure again. This is required because
  pinned pages can't be migrated.
- Reinitialize guest's partitioned scoped page tables. These are
  freed when guest become secure (H_SVM_INIT_DONE)
- Release all device pages of the secure guest.

After these steps, guest is ready to issue UV_ESM call once again
to switch to secure mode.

Signed-off-by: Bharata B Rao 
Signed-off-by: Sukadev Bhattiprolu 
[Implementation of uv_svm_terminate() and its call from
guest shutdown path]
Signed-off-by: Ram Pai 
[Unpinning of VPA pages]
---
 Documentation/virtual/kvm/api.txt  | 18 ++
 arch/powerpc/include/asm/kvm_book3s_devm.h |  7 +++
 arch/powerpc/include/asm/kvm_ppc.h |  2 +
 arch/powerpc/include/asm/ultravisor-api.h  |  1 +
 arch/powerpc/include/asm/ultravisor.h  |  5 ++
 arch/powerpc/kvm/book3s_hv.c   | 70 ++
 arch/powerpc/kvm/book3s_hv_devm.c  | 61 +++
 arch/powerpc/kvm/powerpc.c | 12 
 include/uapi/linux/kvm.h   |  1 +
 9 files changed, 177 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index e54a3f51ddc5..6fdd140dd9af 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4111,6 +4111,24 @@ Valid values for 'action':
 #define KVM_PMU_EVENT_ALLOW 0
 #define KVM_PMU_EVENT_DENY 1
 
+4.121 KVM_PPC_SVM_OFF
+
+Capability: basic
+Architectures: powerpc
+Type: vm ioctl
+Parameters: none
+Returns: 0 on successful completion,
+Errors:
+  EINVAL:if ultravisor failed to terminate the secure guest
+  ENOMEM:if hypervisor failed to allocate new radix page tables for guest
+
+This ioctl is used to turn off the secure mode of the guest or transition
+the guest from secure mode to normal mode. This is invoked when the guest
+is reset. This has no effect if called for a normal guest.
+
+This ioctl issues an ultravisor call to terminate the secure guest,
+unpin the VPA pages, reinitialize guest's partition scoped page
+tables and releases all the HMM pages that is associated with this guest.
 
 5. The kvm_run structure
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_devm.h 
b/arch/powerpc/include/asm/kvm_book3s_devm.h
index 8c7aacabb2e0..6d97da320400 100644
--- a/arch/powerpc/include/asm/kvm_book3s_devm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_devm.h
@@ -13,6 +13,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
  unsigned long page_shift);
 extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
 extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
+extern void kvmppc_devm_free_memslot_pfns(struct kvm *kvm,
+   struct kvm_memslots *slots);
 #else
 static inline unsigned long
 kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
@@ -37,5 +39,10 @@ static inline unsigned long kvmppc_h_svm_init_done(struct 
kvm *kvm)
 {
return H_UNSUPPORTED;
 }
+
+static inline void kvmppc_devm_free_memslot_pfns(struct kvm *kvm,
+   struct kvm_memslots *slots)
+{
+}
 #endif /* CONFIG_PPC_UV */
 #endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 2484e6a8f5ca..e4093d067354 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -177,6 +177,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm 
*kvm,
 extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
 extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
 extern void kvmppc_setup_partition_table(struct kvm *kvm);
+extern int kvmppc_reinit_partition_table(struct kvm *kvm);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
@@ -321,6 +322,7 @@ struct kvmppc_ops {
   int size);
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
  int size);
+   int (*svm_off)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index cf200d4ce703..3a27a0c0be05 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -30,5 +30,6 @@
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 #define UV_PAGE_INVAL  0xF138
+#define UV_SVM_TERMINATE   0xF13C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git 

[PATCH v6 5/7] kvmppc: Radix changes for secure guest

2019-08-09 Thread Bharata B Rao
- After the guest becomes secure, when we handle a page fault of a page
  belonging to SVM in HV, send that page to UV via UV_PAGE_IN.
- Whenever a page is unmapped on the HV side, inform UV via UV_PAGE_INVAL.
- Ensure all those routines that walk the secondary page tables of
  the guest don't do so in case of secure VM. For secure guest, the
  active secondary page tables are in secure memory and the secondary
  page tables in HV are freed when guest becomes secure.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/kvm_host.h   | 12 
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  5 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c| 22 ++
 arch/powerpc/kvm/book3s_hv_devm.c | 20 
 5 files changed, 60 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 1827c22909cd..db680d7f5779 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -856,6 +856,8 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 #ifdef CONFIG_PPC_UV
 extern int kvmppc_devm_init(void);
 extern void kvmppc_devm_free(void);
+extern bool kvmppc_is_guest_secure(struct kvm *kvm);
+extern int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa);
 #else
 static inline int kvmppc_devm_init(void)
 {
@@ -863,6 +865,16 @@ static inline int kvmppc_devm_init(void)
 }
 
 static inline void kvmppc_devm_free(void) {}
+
+static inline bool kvmppc_is_guest_secure(struct kvm *kvm)
+{
+   return false;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa)
+{
+   return -EFAULT;
+}
 #endif /* CONFIG_PPC_UV */
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 46b1ee381695..cf200d4ce703 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -29,5 +29,6 @@
 #define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
+#define UV_PAGE_INVAL  0xF138
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 79c415bf5ee8..640db659c8c8 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -45,4 +45,9 @@ static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid);
 }
 
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+   return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift);
+}
+
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 2d415c36a61d..93ad34e63045 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -19,6 +19,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * Supported radix tree geometry.
@@ -915,6 +917,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
if (!(dsisr & DSISR_PRTABLE_FAULT))
gpa |= ea & 0xfff;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return kvmppc_send_page_to_uv(kvm, gpa & PAGE_MASK);
+
/* Get the corresponding memslot */
memslot = gfn_to_memslot(kvm, gfn);
 
@@ -972,6 +977,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;
 
+   if (kvmppc_is_guest_secure(kvm)) {
+   uv_page_inval(kvm->arch.lpid, gpa, PAGE_SIZE);
+   return 0;
+   }
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
@@ -989,6 +999,9 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot 
*memslot,
int ref = 0;
unsigned long old, *rmapp;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
@@ -1013,6 +1026,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
@@ -1030,6 +1046,9 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
int ret = 0;
unsigned long old, *rmapp;
 
+   if 

[PATCH v6 4/7] kvmppc: Handle memory plug/unplug to secure VM

2019-08-09 Thread Bharata B Rao
Register the new memslot with UV during plug and unregister
the memslot during unplug.

Signed-off-by: Bharata B Rao 
Acked-by: Paul Mackerras 
---
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  5 +
 arch/powerpc/kvm/book3s_hv.c  | 19 +++
 3 files changed, 25 insertions(+)

diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index c578d9b13a56..46b1ee381695 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -26,6 +26,7 @@
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
 #define UV_REGISTER_MEM_SLOT   0xF120
+#define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 8a722c575c56..79c415bf5ee8 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -40,4 +40,9 @@ static inline int uv_register_mem_slot(u64 lpid, u64 
start_gpa, u64 size,
size, flags, slotid);
 }
 
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+   return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid);
+}
+
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 33b8ebffbef0..13e31ef3583e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -74,6 +74,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "book3s.h"
 
@@ -4504,6 +4505,24 @@ static void kvmppc_core_commit_memory_region_hv(struct 
kvm *kvm,
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
kvmppc_radix_flush_memslot(kvm, old);
+   /*
+* If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+*/
+   if (!kvm->arch.secure_guest)
+   return;
+
+   /*
+* TODO: Handle KVM_MR_MOVE
+*/
+   if (change == KVM_MR_CREATE) {
+   uv_register_mem_slot(kvm->arch.lpid,
+  new->base_gfn << PAGE_SHIFT,
+  new->npages * PAGE_SIZE,
+  0,
+  new->id);
+   } else if (change == KVM_MR_DELETE) {
+   uv_unregister_mem_slot(kvm->arch.lpid, old->id);
+   }
 }
 
 /*
-- 
2.21.0



[PATCH v6 3/7] kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls

2019-08-09 Thread Bharata B Rao
H_SVM_INIT_START: Initiate securing a VM
H_SVM_INIT_DONE: Conclude securing a VM

As part of H_SVM_INIT_START, register all existing memslots with
the UV. H_SVM_INIT_DONE call by UV informs HV that transition of
the guest to secure mode is complete.

These two states (transition to secure mode STARTED and transition
to secure mode COMPLETED) are recorded in kvm->arch.secure_guest.
Setting these states will cause the assembly code that enters the
guest to call the UV_RETURN ucall instead of trying to enter the
guest directly.

Signed-off-by: Bharata B Rao 
Acked-by: Paul Mackerras 
---
 arch/powerpc/include/asm/hvcall.h  |  2 ++
 arch/powerpc/include/asm/kvm_book3s_devm.h | 12 
 arch/powerpc/include/asm/kvm_host.h|  4 +++
 arch/powerpc/include/asm/ultravisor-api.h  |  1 +
 arch/powerpc/include/asm/ultravisor.h  |  7 +
 arch/powerpc/kvm/book3s_hv.c   |  7 +
 arch/powerpc/kvm/book3s_hv_devm.c  | 34 ++
 7 files changed, 67 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 05b8536f6653..fa7695928e30 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -343,6 +343,8 @@
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
+#define H_SVM_INIT_START   0xEF08
+#define H_SVM_INIT_DONE0xEF0C
 
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
diff --git a/arch/powerpc/include/asm/kvm_book3s_devm.h 
b/arch/powerpc/include/asm/kvm_book3s_devm.h
index 21f3de5f2acb..8c7aacabb2e0 100644
--- a/arch/powerpc/include/asm/kvm_book3s_devm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_devm.h
@@ -11,6 +11,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
  unsigned long gra,
  unsigned long flags,
  unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
 #else
 static inline unsigned long
 kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
@@ -25,5 +27,15 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
 {
return H_UNSUPPORTED;
 }
+
+static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+   return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+   return H_UNSUPPORTED;
+}
 #endif /* CONFIG_PPC_UV */
 #endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 86bbe607ad7e..1827c22909cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -261,6 +261,10 @@ struct kvm_hpt_info {
 
 struct kvm_resize_hpt;
 
+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START   0x1 /* H_SVM_INIT_START has been called 
*/
+#define KVMPPC_SECURE_INIT_DONE0x2 /* H_SVM_INIT_DONE 
completed */
+
 struct kvm_arch {
unsigned int lpid;
unsigned int smt_mode;  /* # vcpus per virtual core */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 1cd1f595fd81..c578d9b13a56 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -25,6 +25,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_REGISTER_MEM_SLOT   0xF120
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index d668a59e099b..8a722c575c56 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -33,4 +33,11 @@ static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 
src_gpa, u64 flags,
page_shift);
 }
 
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
+  u64 flags, u64 slotid)
+{
+   return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa,
+   size, flags, slotid);
+}
+
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 00b43ee8b693..33b8ebffbef0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1089,6 +1089,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
break;
+   case H_SVM_INIT_START:
+   ret = kvmppc_h_svm_init_start(vcpu->kvm);
+

[PATCH v6 2/7] kvmppc: Shared pages support for secure guests

2019-08-09 Thread Bharata B Rao
A secure guest will share some of its pages with hypervisor (Eg. virtio
bounce buffers etc). Support sharing of pages between hypervisor and
ultravisor.

Once a secure page is converted to shared page, stop tracking that page
as a device page.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/hvcall.h |  3 ++
 arch/powerpc/kvm/book3s_hv_devm.c | 67 +--
 2 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 2f6b952deb0f..05b8536f6653 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,9 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C
 
+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED0x1
+
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
diff --git a/arch/powerpc/kvm/book3s_hv_devm.c 
b/arch/powerpc/kvm/book3s_hv_devm.c
index 2e6c077bd22e..c9189e58401d 100644
--- a/arch/powerpc/kvm/book3s_hv_devm.c
+++ b/arch/powerpc/kvm/book3s_hv_devm.c
@@ -55,6 +55,7 @@ struct kvmppc_devm_page_pvt {
unsigned long *rmap;
unsigned int lpid;
unsigned long gpa;
+   bool skip_page_out;
 };
 
 struct kvmppc_devm_copy_args {
@@ -188,6 +189,54 @@ kvmppc_devm_migrate_alloc_and_copy(struct migrate_vma *mig,
return 0;
 }
 
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses the dev_pagemap_ops migrate_to_ram handler to
+ * release the device page.
+ */
+static unsigned long
+kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+{
+
+   int ret = H_PARAMETER;
+   struct page *devm_page;
+   struct kvmppc_devm_page_pvt *pvt;
+   unsigned long pfn;
+   unsigned long *rmap;
+   struct kvm_memory_slot *slot;
+   unsigned long gfn = gpa >> page_shift;
+   int srcu_idx;
+
+   srcu_idx = srcu_read_lock(>srcu);
+   slot = gfn_to_memslot(kvm, gfn);
+   if (!slot)
+   goto out;
+
+   rmap = >arch.rmap[gfn - slot->base_gfn];
+   if (kvmppc_is_devm_pfn(*rmap)) {
+   devm_page = pfn_to_page(*rmap & ~KVMPPC_PFN_DEVM);
+   pvt = (struct kvmppc_devm_page_pvt *)
+   devm_page->zone_device_data;
+   pvt->skip_page_out = true;
+   }
+
+   pfn = gfn_to_pfn(kvm, gpa >> page_shift);
+   if (is_error_noslot_pfn(pfn))
+   goto out;
+
+   ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift);
+   if (ret == U_SUCCESS)
+   ret = H_SUCCESS;
+   kvm_release_pfn_clean(pfn);
+out:
+   srcu_read_unlock(>srcu, srcu_idx);
+   return ret;
+}
+
 /*
  * Move page from normal memory to secure memory.
  */
@@ -209,9 +258,12 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
if (page_shift != PAGE_SHIFT)
return H_P3;
 
-   if (flags)
+   if (flags & ~H_PAGE_IN_SHARED)
return H_P2;
 
+   if (flags & H_PAGE_IN_SHARED)
+   return kvmppc_share_page(kvm, gpa, page_shift);
+
ret = H_PARAMETER;
down_read(>mm->mmap_sem);
srcu_idx = srcu_read_lock(>srcu);
@@ -279,8 +331,17 @@ kvmppc_devm_fault_migrate_alloc_and_copy(struct 
migrate_vma *mig)
pvt = (struct kvmppc_devm_page_pvt *)spage->zone_device_data;
 
pfn = page_to_pfn(dpage);
-   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
- pvt->gpa, 0, PAGE_SHIFT);
+
+   /*
+* This same function is used in two cases:
+* - When HV touches a secure page, for which we do page-out
+* - When a secure page is converted to shared page, we touch
+*   the page to essentially discard the device page. In this
+*   case we skip page-out.
+*/
+   if (!pvt->skip_page_out)
+   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
+ pvt->gpa, 0, PAGE_SHIFT);
if (ret == U_SUCCESS)
*mig->dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
return 0;
-- 
2.21.0



[PATCH v6 1/7] kvmppc: Driver to manage pages of secure guest

2019-08-09 Thread Bharata B Rao
KVMPPC driver to manage page transitions of secure guest
via H_SVM_PAGE_IN and H_SVM_PAGE_OUT hcalls.

H_SVM_PAGE_IN: Move the content of a normal page to secure page
H_SVM_PAGE_OUT: Move the content of a secure page to normal page

Private ZONE_DEVICE memory equal to the amount of secure memory
available in the platform for running secure guests is created
via a char device. Whenever a page belonging to the guest becomes
secure, a page from this private device memory is used to
represent and track that secure page on the HV side. The movement
of pages between normal and secure memory is done via
migrate_vma_pages() using UV_PAGE_IN and UV_PAGE_OUT ucalls.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/hvcall.h  |   4 +
 arch/powerpc/include/asm/kvm_book3s_devm.h |  29 ++
 arch/powerpc/include/asm/kvm_host.h|  12 +
 arch/powerpc/include/asm/ultravisor-api.h  |   2 +
 arch/powerpc/include/asm/ultravisor.h  |  14 +
 arch/powerpc/kvm/Makefile  |   3 +
 arch/powerpc/kvm/book3s_hv.c   |  19 +
 arch/powerpc/kvm/book3s_hv_devm.c  | 492 +
 8 files changed, 575 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_book3s_devm.h
 create mode 100644 arch/powerpc/kvm/book3s_hv_devm.c

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 463c63a9fcf1..2f6b952deb0f 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,10 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C
 
+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN  0xEF00
+#define H_SVM_PAGE_OUT 0xEF04
+
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
 #define H_SET_MODE_RESOURCE_SET_DAWR   2
diff --git a/arch/powerpc/include/asm/kvm_book3s_devm.h 
b/arch/powerpc/include/asm/kvm_book3s_devm.h
new file mode 100644
index ..21f3de5f2acb
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_devm.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERPC_KVM_PPC_HMM_H__
+#define __POWERPC_KVM_PPC_HMM_H__
+
+#ifdef CONFIG_PPC_UV
+extern unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+#else
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+#endif /* CONFIG_PPC_UV */
+#endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 4bb552d639b8..86bbe607ad7e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -849,4 +849,16 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+#ifdef CONFIG_PPC_UV
+extern int kvmppc_devm_init(void);
+extern void kvmppc_devm_free(void);
+#else
+static inline int kvmppc_devm_init(void)
+{
+   return 0;
+}
+
+static inline void kvmppc_devm_free(void) {}
+#endif /* CONFIG_PPC_UV */
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 6a0f9c74f959..1cd1f595fd81 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -25,5 +25,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_PAGE_IN 0xF128
+#define UV_PAGE_OUT0xF12C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 6fe1f365dec8..d668a59e099b 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -19,4 +19,18 @@ static inline int uv_register_pate(u64 lpid, u64 dw0, u64 
dw1)
return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
 }
 
+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
+u64 page_shift)
+{
+   return ucall_norets(UV_PAGE_IN, lpid, src_ra, 

[PATCH v6 0/7] KVMPPC driver to manage secure guest pages

2019-08-09 Thread Bharata B Rao
Hi,

A pseries guest can be run as a secure guest on Ultravisor-enabled
POWER platforms. On such platforms, this driver will be used to manage
the movement of guest pages between the normal memory managed by
hypervisor(HV) and secure memory managed by Ultravisor(UV).

Private ZONE_DEVICE memory equal to the amount of secure memory
available in the platform for running secure guests is created
via a char device. Whenever a page belonging to the guest becomes
secure, a page from this private device memory is used to
represent and track that secure page on the HV side. The movement
of pages between normal and secure memory is done via
migrate_vma_pages().

The page-in or page-out requests from UV will come to HV as hcalls and
HV will call back into UV via uvcalls to satisfy these page requests.

These patches are against Christoph Hellwig's migrate_vma-cleanup.2
branch
(http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/migrate_vma-cleanup.2)

plus

Claudio Carvalho's base ultravisor enablement patchset
(https://lore.kernel.org/linuxppc-dev/20190808040555.2371-1-cclau...@linux.ibm.com/T/#t)

These patches along with Claudio's above patches are required to
run a secure pseries guest on KVM.

Changes in v6
=
Updated the driver to account for the changes in HMM and migrate_vma()
by Christoph Hellwig.
 - Not using any HMM routines any more.
 - Switched to using migrate_vma_pages()

v5: https://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg153294.html

Anshuman Khandual (1):
  KVM: PPC: Ultravisor: Add PPC_UV config option

Bharata B Rao (6):
  kvmppc: Driver to manage pages of secure guest
  kvmppc: Shared pages support for secure guests
  kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls
  kvmppc: Handle memory plug/unplug to secure VM
  kvmppc: Radix changes for secure guest
  kvmppc: Support reset of secure guest

 Documentation/virtual/kvm/api.txt  |  18 +
 arch/powerpc/Kconfig   |  18 +
 arch/powerpc/include/asm/hvcall.h  |   9 +
 arch/powerpc/include/asm/kvm_book3s_devm.h |  48 ++
 arch/powerpc/include/asm/kvm_host.h|  28 +
 arch/powerpc/include/asm/kvm_ppc.h |   2 +
 arch/powerpc/include/asm/ultravisor-api.h  |   6 +
 arch/powerpc/include/asm/ultravisor.h  |  36 ++
 arch/powerpc/kvm/Makefile  |   3 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  22 +
 arch/powerpc/kvm/book3s_hv.c   | 115 
 arch/powerpc/kvm/book3s_hv_devm.c  | 668 +
 arch/powerpc/kvm/powerpc.c |  12 +
 include/uapi/linux/kvm.h   |   1 +
 14 files changed, 986 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_book3s_devm.h
 create mode 100644 arch/powerpc/kvm/book3s_hv_devm.c

-- 
2.21.0



[PATCH v5 4/4] mm/nvdimm: Pick the right alignment default when creating dax devices

2019-08-09 Thread Aneesh Kumar K.V
Allow arch to provide the supported alignments and use hugepage alignment only
if we support hugepage. Right now we depend on compile time configs whereas this
patch switch this to runtime discovery.

Architectures like ppc64 can have THP enabled in code, but then can have
hugepage size disabled by the hypervisor. This allows us to create dax devices
with PAGE_SIZE alignment in this case.

Existing dax namespace with alignment larger than PAGE_SIZE will fail to
initialize in this specific case. We still allow fsdax namespace initialization.

With respect to identifying whether to enable hugepage fault for a dax device,
if THP is enabled during compile, we default to taking hugepage fault and in dax
fault handler if we find the fault size > alignment we retry with PAGE_SIZE
fault size.

This also addresses the below failure scenario on ppc64

ndctl create-namespace --mode=devdax  | grep align
 "align":16777216,
 "align":16777216

cat /sys/devices/ndbus0/region0/dax0.0/supported_alignments
 65536 16777216

daxio.static-debug  -z -o /dev/dax0.0
  Bus error (core dumped)

  $ dmesg | tail
   lpar: Failed hash pte insert with error -4
   hash-mmu: mm: Hashing failure ! EA=0x7fff1700 access=0x8006 
current=daxio
   hash-mmu: trap=0x300 vsid=0x22cb7a3 ssize=1 base psize=2 psize 10 
pte=0xc00501002b86
   daxio[3860]: bus error (7) at 7fff1700 nip 7fff973c007c lr 7fff973bff34 
code 2 in libpmem.so.1.0.0[7fff973b+2]
   daxio[3860]: code: 792945e4 7d494b78 e95f0098 7d494b78 f93f00a0 4800012c 
e93f0088 f93f0120
   daxio[3860]: code: e93f00a0 f93f0128 e93f0120 e95f0128  e93f0088 
39290008 f93f0110

The failure was due to guest kernel using wrong page size.

The namespaces created with 16M alignment will appear as below on a config with
16M page size disabled.

$ ndctl list -Ni
[
  {
"dev":"namespace0.1",
"mode":"fsdax",
"map":"dev",
"size":5351931904,
"uuid":"fc6e9667-461a-4718-82b4-69b24570bddb",
"align":16777216,
"blockdev":"pmem0.1",
"supported_alignments":[
  65536
]
  },
  {
"dev":"namespace0.0",
"mode":"fsdax",< devdax 16M alignment marked disabled.
"map":"mem",
"size":5368709120,
"uuid":"a4bdf81a-f2ee-4bc6-91db-7b87eddd0484",
"state":"disabled"
  }
]

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/libnvdimm.h |  9 
 arch/powerpc/mm/Makefile |  1 +
 arch/powerpc/mm/nvdimm.c | 34 
 arch/x86/include/asm/libnvdimm.h | 19 
 drivers/nvdimm/nd.h  |  6 -
 drivers/nvdimm/pfn_devs.c| 32 +-
 include/linux/huge_mm.h  |  7 +-
 7 files changed, 100 insertions(+), 8 deletions(-)
 create mode 100644 arch/powerpc/include/asm/libnvdimm.h
 create mode 100644 arch/powerpc/mm/nvdimm.c
 create mode 100644 arch/x86/include/asm/libnvdimm.h

diff --git a/arch/powerpc/include/asm/libnvdimm.h 
b/arch/powerpc/include/asm/libnvdimm.h
new file mode 100644
index ..d35fd7f48603
--- /dev/null
+++ b/arch/powerpc/include/asm/libnvdimm.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_LIBNVDIMM_H
+#define _ASM_POWERPC_LIBNVDIMM_H
+
+#define nd_pfn_supported_alignments nd_pfn_supported_alignments
+extern unsigned long *nd_pfn_supported_alignments(void);
+extern unsigned long nd_pfn_default_alignment(void);
+
+#endif
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 0f499db315d6..42e4a399ba5d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_HIGHMEM) += highmem.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
 obj-$(CONFIG_PPC_PTDUMP)   += ptdump/
 obj-$(CONFIG_KASAN)+= kasan/
+obj-$(CONFIG_NVDIMM_PFN)   += nvdimm.o
diff --git a/arch/powerpc/mm/nvdimm.c b/arch/powerpc/mm/nvdimm.c
new file mode 100644
index ..a29a4510715e
--- /dev/null
+++ b/arch/powerpc/mm/nvdimm.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+#include 
+
+#include 
+/*
+ * We support only pte and pmd mappings for now.
+ */
+const unsigned long *nd_pfn_supported_alignments(void)
+{
+   static unsigned long supported_alignments[3];
+
+   supported_alignments[0] = PAGE_SIZE;
+
+   if (has_transparent_hugepage())
+   supported_alignments[1] = HPAGE_PMD_SIZE;
+   else
+   supported_alignments[1] = 0;
+
+   supported_alignments[2] = 0;
+   return supported_alignments;
+}
+
+/*
+ * Use pmd mapping if supported as default alignment
+ */
+unsigned long nd_pfn_default_alignment(void)
+{
+
+   if (has_transparent_hugepage())
+   return HPAGE_PMD_SIZE;
+   return PAGE_SIZE;
+}
diff --git a/arch/x86/include/asm/libnvdimm.h b/arch/x86/include/asm/libnvdimm.h
new file mode 100644
index ..3d5361db9164
--- /dev/null
+++ b/arch/x86/include/asm/libnvdimm.h
@@ -0,0 

[PATCH v5 3/4] mm/nvdimm: Use correct #defines instead of open coding

2019-08-09 Thread Aneesh Kumar K.V
Use PAGE_SIZE instead of SZ_4K and sizeof(struct page) instead of 64.
If we have a kernel built with different struct page size the previous
patch should handle marking the namespace disabled.

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/nvdimm/label.c  | 2 +-
 drivers/nvdimm/namespace_devs.c | 6 +++---
 drivers/nvdimm/pfn_devs.c   | 3 ++-
 drivers/nvdimm/region_devs.c| 8 
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 73e197babc2f..7ee037063be7 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -355,7 +355,7 @@ static bool slot_valid(struct nvdimm_drvdata *ndd,
 
/* check that DPA allocations are page aligned */
if ((__le64_to_cpu(nd_label->dpa)
-   | __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
+   | __le64_to_cpu(nd_label->rawsize)) % PAGE_SIZE)
return false;
 
/* check checksum */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index a16e52251a30..a9c76df12cb9 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1006,10 +1006,10 @@ static ssize_t __size_store(struct device *dev, 
unsigned long long val)
return -ENXIO;
}
 
-   div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, );
+   div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, );
if (remainder) {
-   dev_dbg(dev, "%llu is not %dK aligned\n", val,
-   (SZ_4K * nd_region->ndr_mappings) / SZ_1K);
+   dev_dbg(dev, "%llu is not %ldK aligned\n", val,
+   (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
return -EINVAL;
}
 
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 37e96811c2fc..c1d9be609322 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -725,7 +725,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
 * when populating the vmemmap. This *should* be equal to
 * PMD_SIZE for most architectures.
 */
-   offset = ALIGN(start + SZ_8K + 64 * npfns, align) - start;
+   offset = ALIGN(start + SZ_8K + sizeof(struct page) * npfns,
+  align) - start;
} else if (nd_pfn->mode == PFN_MODE_RAM)
offset = ALIGN(start + SZ_8K, align) - start;
else
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index af30cbe7a8ea..20e265a534f8 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -992,10 +992,10 @@ static struct nd_region *nd_region_create(struct 
nvdimm_bus *nvdimm_bus,
struct nd_mapping_desc *mapping = _desc->mapping[i];
struct nvdimm *nvdimm = mapping->nvdimm;
 
-   if ((mapping->start | mapping->size) % SZ_4K) {
-   dev_err(_bus->dev, "%s: %s mapping%d is not 4K 
aligned\n",
-   caller, dev_name(>dev), i);
-
+   if ((mapping->start | mapping->size) % PAGE_SIZE) {
+   dev_err(_bus->dev,
+   "%s: %s mapping%d is not %ld aligned\n",
+   caller, dev_name(>dev), i, PAGE_SIZE);
return NULL;
}
 
-- 
2.21.0



[PATCH v5 2/4] mm/nvdimm: Add page size and struct page size to pfn superblock

2019-08-09 Thread Aneesh Kumar K.V
This is needed so that we don't wrongly initialize a namespace
which doesn't have enough space reserved for holding struct pages
with the current kernel.

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/nvdimm/pfn.h  |  5 -
 drivers/nvdimm/pfn_devs.c | 27 ++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index 7381673b7b70..acb19517f678 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -29,7 +29,10 @@ struct nd_pfn_sb {
/* minor-version-2 record the base alignment of the mapping */
__le32 align;
/* minor-version-3 guarantee the padding and flags are zero */
-   u8 padding[4000];
+   /* minor-version-4 record the page size and struct page size */
+   __le32 page_size;
+   __le16 page_struct_size;
+   u8 padding[3994];
__le64 checksum;
 };
 
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 3e7b11cf1aae..37e96811c2fc 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -460,6 +460,15 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
if (__le16_to_cpu(pfn_sb->version_minor) < 2)
pfn_sb->align = 0;
 
+   if (__le16_to_cpu(pfn_sb->version_minor) < 4) {
+   /*
+* For a large part we use PAGE_SIZE. But we
+* do have some accounting code using SZ_4K.
+*/
+   pfn_sb->page_struct_size = cpu_to_le16(64);
+   pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
+   }
+
switch (le32_to_cpu(pfn_sb->mode)) {
case PFN_MODE_RAM:
case PFN_MODE_PMEM:
@@ -475,6 +484,20 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
align = 1UL << ilog2(offset);
mode = le32_to_cpu(pfn_sb->mode);
 
+   if (le32_to_cpu(pfn_sb->page_size) != PAGE_SIZE) {
+   dev_err(_pfn->dev,
+   "init failed, page size mismatch %d\n",
+   le32_to_cpu(pfn_sb->page_size));
+   return -EOPNOTSUPP;
+   }
+
+   if (le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) {
+   dev_err(_pfn->dev,
+   "init failed, struct page size mismatch %d\n",
+   le16_to_cpu(pfn_sb->page_struct_size));
+   return -EOPNOTSUPP;
+   }
+
if (!nd_pfn->uuid) {
/*
 * When probing a namepace via nd_pfn_probe() the uuid
@@ -722,8 +745,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(>dev), 16);
pfn_sb->version_major = cpu_to_le16(1);
-   pfn_sb->version_minor = cpu_to_le16(3);
+   pfn_sb->version_minor = cpu_to_le16(4);
pfn_sb->align = cpu_to_le32(nd_pfn->align);
+   pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page));
+   pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
 
-- 
2.21.0



[PATCH v5 1/4] nvdimm: Consider probe return -EOPNOTSUPP as success

2019-08-09 Thread Aneesh Kumar K.V
This patch add -EOPNOTSUPP as return from probe callback to
indicate we were not able to initialize a namespace due to pfn superblock
feature/version mismatch. We want to consider this a probe success so that
we can create new namesapce seed and there by avoid marking the failed
namespace as the seed namespace.

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/nvdimm/bus.c  |  2 +-
 drivers/nvdimm/pmem.c | 26 ++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 798c5c4aea9c..16c35e6446a7 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -95,7 +95,7 @@ static int nvdimm_bus_probe(struct device *dev)
rc = nd_drv->probe(dev);
debug_nvdimm_unlock(dev);
 
-   if (rc == 0)
+   if (rc == 0 || rc == -EOPNOTSUPP)
nd_region_probe_success(nvdimm_bus, dev);
else
nd_region_disable(nvdimm_bus, dev);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4c121dd03dd9..3f498881dd28 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -490,6 +490,7 @@ static int pmem_attach_disk(struct device *dev,
 
 static int nd_pmem_probe(struct device *dev)
 {
+   int ret;
struct nd_namespace_common *ndns;
 
ndns = nvdimm_namespace_common_probe(dev);
@@ -505,12 +506,29 @@ static int nd_pmem_probe(struct device *dev)
if (is_nd_pfn(dev))
return pmem_attach_disk(dev, ndns);
 
-   /* if we find a valid info-block we'll come back as that personality */
-   if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
-   || nd_dax_probe(dev, ndns) == 0)
+   ret = nd_btt_probe(dev, ndns);
+   if (ret == 0)
return -ENXIO;
+   else if (ret == -EOPNOTSUPP)
+   return ret;
 
-   /* ...otherwise we're just a raw pmem device */
+   ret = nd_pfn_probe(dev, ndns);
+   if (ret == 0)
+   return -ENXIO;
+   else if (ret == -EOPNOTSUPP)
+   return ret;
+
+   ret = nd_dax_probe(dev, ndns);
+   if (ret == 0)
+   return -ENXIO;
+   else if (ret == -EOPNOTSUPP)
+   return ret;
+   /*
+* We have two failure conditions here, there is no
+* info reserver block or we found a valid info reserve block
+* but failed to initialize the pfn superblock.
+* Don't create a raw pmem disk for the second case.
+*/
return pmem_attach_disk(dev, ndns);
 }
 
-- 
2.21.0



[PATCH v5 0/4] Mark the namespace disabled on pfn superblock mismatch

2019-08-09 Thread Aneesh Kumar K.V
We add new members to pfn superblock (PAGE_SIZE and struct page size) in this 
series.
This is now checked while initializing the namespace. If we find a mismatch we 
mark
the namespace disabled.

This series also handle configs where hugepage support is not enabled by 
default.
This can result in different align restrictions for dax namespace. We mark the
dax namespace disabled if we find the alignment not supported.

Aneesh Kumar K.V (4):
  nvdimm: Consider probe return -EOPNOTSUPP as success
  mm/nvdimm: Add page size and struct page size to pfn superblock
  mm/nvdimm: Use correct #defines instead of open coding
  mm/nvdimm: Pick the right alignment default when creating dax devices

 arch/powerpc/include/asm/libnvdimm.h |  9 
 arch/powerpc/mm/Makefile |  1 +
 arch/powerpc/mm/nvdimm.c | 34 +++
 arch/x86/include/asm/libnvdimm.h | 19 +
 drivers/nvdimm/bus.c |  2 +-
 drivers/nvdimm/label.c   |  2 +-
 drivers/nvdimm/namespace_devs.c  |  6 +--
 drivers/nvdimm/nd.h  |  6 ---
 drivers/nvdimm/pfn.h |  5 ++-
 drivers/nvdimm/pfn_devs.c| 62 ++--
 drivers/nvdimm/pmem.c| 26 ++--
 drivers/nvdimm/region_devs.c |  8 ++--
 include/linux/huge_mm.h  |  7 +++-
 13 files changed, 163 insertions(+), 24 deletions(-)
 create mode 100644 arch/powerpc/include/asm/libnvdimm.h
 create mode 100644 arch/powerpc/mm/nvdimm.c
 create mode 100644 arch/x86/include/asm/libnvdimm.h

-- 
2.21.0



[RFC V2 1/1] mm/pgtable/debug: Add test validating architecture page table helpers

2019-08-09 Thread Anshuman Khandual
This adds a test module which will validate architecture page table helpers
and accessors regarding compliance with generic MM semantics expectations.
This will help various architectures in validating changes to the existing
page table helpers or addition of new ones.

Cc: Andrew Morton 
Cc: Vlastimil Babka 
Cc: Greg Kroah-Hartman 
Cc: Thomas Gleixner 
Cc: Mike Rapoport 
Cc: Jason Gunthorpe 
Cc: Dan Williams 
Cc: Peter Zijlstra 
Cc: Michal Hocko 
Cc: Mark Rutland 
Cc: Mark Brown 
Cc: Steven Price 
Cc: Ard Biesheuvel 
Cc: Masahiro Yamada 
Cc: Kees Cook 
Cc: Tetsuo Handa 
Cc: Matthew Wilcox 
Cc: Sri Krishna chowdary 
Cc: Dave Hansen 
Cc: Russell King - ARM Linux 
Cc: Michael Ellerman 
Cc: Paul Mackerras 
Cc: Martin Schwidefsky 
Cc: Heiko Carstens 
Cc: "David S. Miller" 
Cc: Vineet Gupta 
Cc: James Hogan 
Cc: Paul Burton 
Cc: Ralf Baechle 
Cc: linux-snps-...@lists.infradead.org
Cc: linux-m...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-i...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s...@vger.kernel.org
Cc: linux...@vger.kernel.org
Cc: sparcli...@vger.kernel.org
Cc: x...@kernel.org
Cc: linux-ker...@vger.kernel.org

Suggested-by: Catalin Marinas 
Signed-off-by: Anshuman Khandual 
---
 mm/Kconfig.debug   |  14 ++
 mm/Makefile|   1 +
 mm/arch_pgtable_test.c | 400 +
 3 files changed, 415 insertions(+)
 create mode 100644 mm/arch_pgtable_test.c

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 82b6a20898bd..d3dfbe984d41 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -115,3 +115,17 @@ config DEBUG_RODATA_TEST
 depends on STRICT_KERNEL_RWX
 ---help---
   This option enables a testcase for the setting rodata read-only.
+
+config DEBUG_ARCH_PGTABLE_TEST
+   bool "Test arch page table helpers for semantics compliance"
+   depends on MMU
+   depends on DEBUG_KERNEL
+   help
+ This options provides a kernel module which can be used to test
+ architecture page table helper functions on various platform in
+ verifying if they comply with expected generic MM semantics. This
+ will help architectures code in making sure that any changes or
+ new additions of these helpers will still conform to generic MM
+ expected semantics.
+
+ If unsure, say N.
diff --git a/mm/Makefile b/mm/Makefile
index 338e528ad436..0e6ac3789ca8 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
 obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
+obj-$(CONFIG_DEBUG_ARCH_PGTABLE_TEST) += arch_pgtable_test.o
 obj-$(CONFIG_PAGE_OWNER) += page_owner.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
diff --git a/mm/arch_pgtable_test.c b/mm/arch_pgtable_test.c
new file mode 100644
index ..41d6fa78a620
--- /dev/null
+++ b/mm/arch_pgtable_test.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This kernel module validates architecture page table helpers &
+ * accessors and helps in verifying their continued compliance with
+ * generic MM semantics.
+ *
+ * Copyright (C) 2019 ARM Ltd.
+ *
+ * Author: Anshuman Khandual 
+ */
+#define pr_fmt(fmt) "arch_pgtable_test: %s " fmt, __func__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * Basic operations
+ *
+ * mkold(entry)= An old and not a young entry
+ * mkyoung(entry)  = A young and not an old entry
+ * mkdirty(entry)  = A dirty and not a clean entry
+ * mkclean(entry)  = A clean and not a dirty entry
+ * mkwrite(entry)  = A write and not a write protected entry
+ * wrprotect(entry)= A write protected and not a write entry
+ * pxx_bad(entry)  = A mapped and non-table entry
+ * pxx_same(entry1, entry2)= Both entries hold the exact same value
+ */
+#define VADDR_TEST (PGDIR_SIZE + PUD_SIZE + PMD_SIZE + PAGE_SIZE)
+#define VMA_TEST_FLAGS (VM_READ|VM_WRITE|VM_EXEC)
+#define RANDOM_NZVALUE (0xbe)
+
+static bool pud_aligned;
+
+extern struct mm_struct *mm_alloc(void);
+
+static void pte_basic_tests(struct page *page, pgprot_t prot)
+{
+   pte_t pte = mk_pte(page, prot);
+
+   WARN_ON(!pte_same(pte, pte));
+   WARN_ON(!pte_young(pte_mkyoung(pte)));
+   WARN_ON(!pte_dirty(pte_mkdirty(pte)));
+   WARN_ON(!pte_write(pte_mkwrite(pte)));
+   WARN_ON(pte_young(pte_mkold(pte)));
+   WARN_ON(pte_dirty(pte_mkclean(pte)));
+   WARN_ON(pte_write(pte_wrprotect(pte)));
+}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE
+static void pmd_basic_tests(struct page *page, pgprot_t prot)
+{
+   pmd_t pmd = mk_pmd(page, prot);
+
+   

[RFC V2 0/1] mm/debug: Add tests for architecture exported page table helpers

2019-08-09 Thread Anshuman Khandual
This series adds a test validation for architecture exported page table
helpers. Patch in the series adds basic transformation tests at various
levels of the page table.

This test was originally suggested by Catalin during arm64 THP migration
RFC discussion earlier. Going forward it can include more specific tests
with respect to various generic MM functions like THP, HugeTLB etc and
platform specific tests.

https://lore.kernel.org/linux-mm/20190628102003.ga56...@arrakis.emea.arm.com/

Questions:

Should alloc_gigantic_page() be made available as an interface for general
use in the kernel. The test module here uses very similar implementation from
HugeTLB to allocate a PUD aligned memory block. Similar for mm_alloc() which
needs to be exported through a header.

Testing:

Build and boot tested on arm64 and x86 platforms. While arm64 clears all
these tests, following errors were reported on x86.

1. WARN_ON(pud_bad(pud)) in pud_populate_tests()
2. WARN_ON(p4d_bad(p4d)) in p4d_populate_tests()

I would really appreciate if folks can help validate this test on other
platforms and report back problems if any. Suggestions, comments and
inputs welcome. Thank you.

Changes in V2:

- Moved test module and it's config from lib/ to mm/
- Renamed config TEST_ARCH_PGTABLE as DEBUG_ARCH_PGTABLE_TEST
- Renamed file from test_arch_pgtable.c to arch_pgtable_test.c
- Added relevant MODULE_DESCRIPTION() and MODULE_AUTHOR() details
- Dropped loadable module config option
- Basic tests now use memory blocks with required size and alignment
- PUD aligned memory block gets allocated with alloc_contig_range()
- If PUD aligned memory could not be allocated it falls back on PMD aligned
  memory block from page allocator and pud_* tests are skipped
- Clear and populate tests now operate on real in memory page table entries
- Dummy mm_struct gets allocated with mm_alloc()
- Dummy page table entries get allocated with [pud|pmd|pte]_alloc_[map]()
- Simplified [p4d|pgd]_basic_tests(), now has random values in the entries

RFC V1:

https://lore.kernel.org/linux-mm/1564037723-26676-1-git-send-email-anshuman.khand...@arm.com/

Cc: Andrew Morton 
Cc: Vlastimil Babka 
Cc: Greg Kroah-Hartman 
Cc: Thomas Gleixner 
Cc: Mike Rapoport 
Cc: Jason Gunthorpe 
Cc: Dan Williams 
Cc: Peter Zijlstra 
Cc: Michal Hocko 
Cc: Mark Rutland 
Cc: Mark Brown 
Cc: Steven Price 
Cc: Ard Biesheuvel 
Cc: Masahiro Yamada 
Cc: Kees Cook 
Cc: Tetsuo Handa 
Cc: Matthew Wilcox 
Cc: Sri Krishna chowdary 
Cc: Dave Hansen 
Cc: Russell King - ARM Linux 
Cc: Michael Ellerman 
Cc: Paul Mackerras 
Cc: Martin Schwidefsky 
Cc: Heiko Carstens 
Cc: "David S. Miller" 
Cc: Vineet Gupta 
Cc: James Hogan 
Cc: Paul Burton 
Cc: Ralf Baechle 
Cc: linux-snps-...@lists.infradead.org
Cc: linux-m...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-i...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s...@vger.kernel.org
Cc: linux...@vger.kernel.org
Cc: sparcli...@vger.kernel.org
Cc: x...@kernel.org
Cc: linux-ker...@vger.kernel.org

Anshuman Khandual (1):
  mm/pgtable/debug: Add test validating architecture page table helpers

 mm/Kconfig.debug   |  14 ++
 mm/Makefile|   1 +
 mm/arch_pgtable_test.c | 400 +
 3 files changed, 415 insertions(+)
 create mode 100644 mm/arch_pgtable_test.c

-- 
2.20.1



Re: [PATCH 8/8] dma-mapping: remove CONFIG_ARCH_NO_COHERENT_DMA_MMAP

2019-08-09 Thread Geert Uytterhoeven
Hi Christoph,

On Thu, Aug 8, 2019 at 6:01 PM Christoph Hellwig  wrote:
> CONFIG_ARCH_NO_COHERENT_DMA_MMAP is now functionally identical to
> !CONFIG_MMU, so remove the separate symbol.  The only difference is that
> arm did not set it for !CONFIG_MMU, but arm uses a separate dma mapping
> implementation including its own mmap method, which is handled by moving
> the CONFIG_MMU check in dma_can_mmap so that is only applies to the
> dma-direct case, just as the other ifdefs for it.
>
> Signed-off-by: Christoph Hellwig 

>  arch/m68k/Kconfig   |  1 -

For m68k:
Acked-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds