Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-20 Thread Russell Currey
On Wed, 2019-02-20 at 12:45 +0530, Akshay Adiga wrote:
> On Wed, Feb 06, 2019 at 05:28:37PM +1100, Russell Currey wrote:
> > Without restoring the IAMR after idle, execution prevention on
> > POWER9
> > with Radix MMU is overwritten and the kernel can freely execute
> > userspace without
> > faulting.
> > 
> > This is necessary when returning from any stop state that modifies
> > user
> > state, as well as hypervisor state.
> > 
> > To test how this fails without this patch, load the lkdtm driver
> > and
> > do the following:
> > 
> >echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
> > 
> > which won't fault, then boot the kernel with powersave=off, where
> > it
> > will fault.  Applying this patch will fix this.
> > 
> > Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of
> > user
> > space")
> > Cc: 
> > Signed-off-by: Russell Currey 
> > ---
> >  arch/powerpc/include/asm/cpuidle.h |  1 +
> >  arch/powerpc/kernel/asm-offsets.c  |  1 +
> >  arch/powerpc/kernel/idle_book3s.S  | 20 
> >  3 files changed, 22 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/cpuidle.h
> > b/arch/powerpc/include/asm/cpuidle.h
> > index 43e5f31fe64d..ad67dbe59498 100644
> > --- a/arch/powerpc/include/asm/cpuidle.h
> > +++ b/arch/powerpc/include/asm/cpuidle.h
> > @@ -77,6 +77,7 @@ struct stop_sprs {
> > u64 mmcr1;
> > u64 mmcr2;
> > u64 mmcra;
> > +   u64 iamr;
> >  };
> > 
> >  #define PNV_IDLE_NAME_LEN16
> > diff --git a/arch/powerpc/kernel/asm-offsets.c
> > b/arch/powerpc/kernel/asm-offsets.c
> > index 9ffc72ded73a..10e0314c2b0d 100644
> > --- a/arch/powerpc/kernel/asm-offsets.c
> > +++ b/arch/powerpc/kernel/asm-offsets.c
> > @@ -774,6 +774,7 @@ int main(void)
> > STOP_SPR(STOP_MMCR1, mmcr1);
> > STOP_SPR(STOP_MMCR2, mmcr2);
> > STOP_SPR(STOP_MMCRA, mmcra);
> > +   STOP_SPR(STOP_IAMR, iamr);
> >  #endif
> > 
> > DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
> > diff --git a/arch/powerpc/kernel/idle_book3s.S
> > b/arch/powerpc/kernel/idle_book3s.S
> > index 7f5ac2e8581b..bb4f552f6c7e 100644
> > --- a/arch/powerpc/kernel/idle_book3s.S
> > +++ b/arch/powerpc/kernel/idle_book3s.S
> > @@ -200,6 +200,12 @@ pnv_powersave_common:
> > /* Continue saving state */
> > SAVE_GPR(2, r1)
> > SAVE_NVGPRS(r1)
> > +
> > +BEGIN_FTR_SECTION
> > +   mfspr   r5, SPRN_IAMR
> > +   std r5, STOP_IAMR(r13)
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> > +
> 
> Are we trying to add for both power8 and power9 ?
> power9 would be CPU_FTR_ARCH_300.
> 

If I recall correctly I had this at P9 only but for some reason I
changed it - the reason is probably just that I had it confused for
something else.  Michael can you confirm this should be P9 only?



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-20 Thread Russell Currey
On Wed, 2019-02-20 at 14:28 +0530, Akshay Adiga wrote:
> On Wed, Feb 06, 2019 at 05:28:37PM +1100, Russell Currey wrote:
> > Without restoring the IAMR after idle, execution prevention on
> > POWER9
> > with Radix MMU is overwritten and the kernel can freely execute
> > userspace without
> > faulting.
> > 
> > This is necessary when returning from any stop state that modifies
> > user
> > state, as well as hypervisor state.
> > 
> > To test how this fails without this patch, load the lkdtm driver
> > and
> > do the following:
> > 
> >echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
> > 
> > which won't fault, then boot the kernel with powersave=off, where
> > it
> > will fault.  Applying this patch will fix this.
> > 
> > Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of
> > user
> > space")
> > Cc: 
> > Signed-off-by: Russell Currey 
> > ---
> >  arch/powerpc/include/asm/cpuidle.h |  1 +
> >  arch/powerpc/kernel/asm-offsets.c  |  1 +
> >  arch/powerpc/kernel/idle_book3s.S  | 20 
> >  3 files changed, 22 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/cpuidle.h
> > b/arch/powerpc/include/asm/cpuidle.h
> > index 43e5f31fe64d..ad67dbe59498 100644
> > --- a/arch/powerpc/include/asm/cpuidle.h
> > +++ b/arch/powerpc/include/asm/cpuidle.h
> > @@ -77,6 +77,7 @@ struct stop_sprs {
> > u64 mmcr1;
> > u64 mmcr2;
> > u64 mmcra;
> > +   u64 iamr;
> >  };
> > 
> >  #define PNV_IDLE_NAME_LEN16
> > diff --git a/arch/powerpc/kernel/asm-offsets.c
> > b/arch/powerpc/kernel/asm-offsets.c
> > index 9ffc72ded73a..10e0314c2b0d 100644
> > --- a/arch/powerpc/kernel/asm-offsets.c
> > +++ b/arch/powerpc/kernel/asm-offsets.c
> > @@ -774,6 +774,7 @@ int main(void)
> > STOP_SPR(STOP_MMCR1, mmcr1);
> > STOP_SPR(STOP_MMCR2, mmcr2);
> > STOP_SPR(STOP_MMCRA, mmcra);
> > +   STOP_SPR(STOP_IAMR, iamr);
> >  #endif
> > 
> > DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
> > diff --git a/arch/powerpc/kernel/idle_book3s.S
> > b/arch/powerpc/kernel/idle_book3s.S
> > index 7f5ac2e8581b..bb4f552f6c7e 100644
> > --- a/arch/powerpc/kernel/idle_book3s.S
> > +++ b/arch/powerpc/kernel/idle_book3s.S
> > @@ -200,6 +200,12 @@ pnv_powersave_common:
> > /* Continue saving state */
> > SAVE_GPR(2, r1)
> > SAVE_NVGPRS(r1)
> > +
> > +BEGIN_FTR_SECTION
> > +   mfspr   r5, SPRN_IAMR
> > +   std r5, STOP_IAMR(r13)
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> > +
> > mfcrr5
> > std r5,_CCR(r1)
> > std r1,PACAR1(r13)
> > @@ -924,6 +930,13 @@ BEGIN_FTR_SECTION
> >  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > REST_NVGPRS(r1)
> > REST_GPR(2, r1)
> > +
> > +BEGIN_FTR_SECTION
> > +   ld  r4, STOP_IAMR(r13)
> > +   mtspr   SPRN_IAMR, r4
> > +   isync
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> > +
> > ld  r4,PACAKMSR(r13)
> > ld  r5,_LINK(r1)
> > ld  r6,_CCR(r1)
> > @@ -946,6 +959,13 @@ pnv_wakeup_noloss:
> >  BEGIN_FTR_SECTION
> > CHECK_HMI_INTERRUPT
> >  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > +
> > +BEGIN_FTR_SECTION
> > +   ld  r4, STOP_IAMR(r13)
> > +   mtspr   SPRN_IAMR, r4
> > +   isync
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> > +
> 
> pnv_wakeup_noloss gets called from two paths:
> 1) cpu wakes up at 0x100  (ESL=EC=1)
> 2) cpu wakes up at next instruction  (ESL=EC=0)
> 

In v2 I drop it from noloss, is that still correct?

> We know for the fact that its not lost with ESL=EC=0 , so we
> should put it somewhere else.
> 
> I would like to put the restore code in
> pnv_restore_hyp_resource_arch300
> it already has some work arounds we can add another.
> 
> By this time we cr3 has comparison of SRR1[46:47] with 2
> 
>   542 pnv_restore_hyp_resource_arch300:
>   543 /*
>   544  * Workaround for POWER9, if we lost resources, the
> ERAT
>   545  * might have been mixed up and needs flushing. We also
> need
>   546  * to reload MMCR0 (see comment above). We also need to
> set
>   547  * then clear bit 60 in MMCRA to ensure the PMU starts
> running.
>   548  */
>   549 blt cr3,1f
>   550 BEGIN_FTR_SECTION
>   551 PPC_INVALIDATE_ERAT
>   552 ld  r1,PACAR1(r13)
>   553 ld  r4,_MMCR0(r1)
>   554 mtspr   SPRN_MMCR0,r4
>   555 END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
>   556 mfspr   r4,SPRN_MMCRA
>   557 ori r4,r4,(1 << (63-60))
>   558 mtspr   SPRN_MMCRA,r4
>   559 xorir4,r4,(1 << (63-60))
>   560 mtspr   SPRN_MMCRA,r4
> + 561 BEGIN_FTR_SECTION
> + 562 ld  r4,STOP_IAMR(r13)
> + 563 mtspr   SPRN_IAMR,r4
> + 564 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
>   565 1:
>   566 /* 
> 
> I have a patchset to handle both AMOR and IAMR,
> need to test it on power8 before posting.

If you think that's better, go ahead.

> 
> 
> > ld  r4,PACAKMSR(r13)
> > ld  r5,_NIP(r1)
> > ld  r6,_CCR(r1)
> > -- 

Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-20 Thread Russell Currey
On Wed, 2019-02-20 at 11:34 +0530, Akshay Adiga wrote:
> On Tue, Feb 19, 2019 at 02:21:04PM +1000, Nicholas Piggin wrote:
> > Michael Ellerman's on February 8, 2019 11:04 am:
> > > Nicholas Piggin  writes:
> > > > Russell Currey's on February 6, 2019 4:28 pm:
> > > > > Without restoring the IAMR after idle, execution prevention
> > > > > on POWER9
> > > > > with Radix MMU is overwritten and the kernel can freely
> > > > > execute userspace without
> > > > > faulting.
> > > > > 
> > > > > This is necessary when returning from any stop state that
> > > > > modifies user
> > > > > state, as well as hypervisor state.
> > > > > 
> > > > > To test how this fails without this patch, load the lkdtm
> > > > > driver and
> > > > > do the following:
> > > > > 
> > > > >echo EXEC_USERSPACE > /sys/kernel/debug/provoke-
> > > > > crash/DIRECT
> > > > > 
> > > > > which won't fault, then boot the kernel with powersave=off,
> > > > > where it
> > > > > will fault.  Applying this patch will fix this.
> > > > > 
> > > > > Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel
> > > > > execution of user
> > > > > space")
> > > > > Cc: 
> > > > > Signed-off-by: Russell Currey 
> > > > 
> > > > Good catch and debugging. This really should be a quirk, we
> > > > don't want 
> > > > to have to restore this thing on a thread switch.
> > > 
> > > I'm not sure I follow. We don't context switch it on Radix, but
> > > we do
> > > on hash if pkeys are enabled.
> > 
> > Badly worded, I mean a hardware quirk. It should follow thread
> > switches. Still, avoiding it for the no-loss case is better than
> > nothing. We can just revisit it as an optimization if future
> > hardware does not require the restore.
> 
> Apparently, the POWER9 Processor User’s Manual v2.0 documents that
> IAMR can be lost, and that is not just the end.
> 
> Pasting excerpt from "Section 23.5.9.2 State Loss and
> Restoration,Page 309"
> 
>   On the POWER9 core, the only state that can be lost for
>   Stop levels less than four, when PSSCR[ESL] = ‘1’ are the
>   following SPRs: CR, FPSCR, VSCR, XER, DSCR, AMR, IAMR, UAMOR,
>   AMOR, DAWR, DAWRX.
> 
> My observation is that AMOR is being used in kernel as of today
> and AMOR is also lost (recreated in similar scenarios where
> IAMR is lost).
> 

I can add AMOR to this patch (or you can send a patch, either way).



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-20 Thread Akshay Adiga
On Wed, Feb 06, 2019 at 05:28:37PM +1100, Russell Currey wrote:
> Without restoring the IAMR after idle, execution prevention on POWER9
> with Radix MMU is overwritten and the kernel can freely execute userspace 
> without
> faulting.
> 
> This is necessary when returning from any stop state that modifies user
> state, as well as hypervisor state.
> 
> To test how this fails without this patch, load the lkdtm driver and
> do the following:
> 
>echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
> 
> which won't fault, then boot the kernel with powersave=off, where it
> will fault.  Applying this patch will fix this.
> 
> Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
> space")
> Cc: 
> Signed-off-by: Russell Currey 
> ---
>  arch/powerpc/include/asm/cpuidle.h |  1 +
>  arch/powerpc/kernel/asm-offsets.c  |  1 +
>  arch/powerpc/kernel/idle_book3s.S  | 20 
>  3 files changed, 22 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/cpuidle.h 
> b/arch/powerpc/include/asm/cpuidle.h
> index 43e5f31fe64d..ad67dbe59498 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -77,6 +77,7 @@ struct stop_sprs {
>   u64 mmcr1;
>   u64 mmcr2;
>   u64 mmcra;
> + u64 iamr;
>  };
> 
>  #define PNV_IDLE_NAME_LEN16
> diff --git a/arch/powerpc/kernel/asm-offsets.c 
> b/arch/powerpc/kernel/asm-offsets.c
> index 9ffc72ded73a..10e0314c2b0d 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -774,6 +774,7 @@ int main(void)
>   STOP_SPR(STOP_MMCR1, mmcr1);
>   STOP_SPR(STOP_MMCR2, mmcr2);
>   STOP_SPR(STOP_MMCRA, mmcra);
> + STOP_SPR(STOP_IAMR, iamr);
>  #endif
> 
>   DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 7f5ac2e8581b..bb4f552f6c7e 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -200,6 +200,12 @@ pnv_powersave_common:
>   /* Continue saving state */
>   SAVE_GPR(2, r1)
>   SAVE_NVGPRS(r1)
> +
> +BEGIN_FTR_SECTION
> + mfspr   r5, SPRN_IAMR
> + std r5, STOP_IAMR(r13)
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> +
>   mfcrr5
>   std r5,_CCR(r1)
>   std r1,PACAR1(r13)
> @@ -924,6 +930,13 @@ BEGIN_FTR_SECTION
>  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>   REST_NVGPRS(r1)
>   REST_GPR(2, r1)
> +
> +BEGIN_FTR_SECTION
> + ld  r4, STOP_IAMR(r13)
> + mtspr   SPRN_IAMR, r4
> + isync
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> +
>   ld  r4,PACAKMSR(r13)
>   ld  r5,_LINK(r1)
>   ld  r6,_CCR(r1)
> @@ -946,6 +959,13 @@ pnv_wakeup_noloss:
>  BEGIN_FTR_SECTION
>   CHECK_HMI_INTERRUPT
>  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> +
> +BEGIN_FTR_SECTION
> + ld  r4, STOP_IAMR(r13)
> + mtspr   SPRN_IAMR, r4
> + isync
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> +

pnv_wakeup_noloss gets called from two paths:
1) cpu wakes up at 0x100  (ESL=EC=1)
2) cpu wakes up at next instruction  (ESL=EC=0)

We know for the fact that its not lost with ESL=EC=0 , so we
should put it somewhere else.

I would like to put the restore code in pnv_restore_hyp_resource_arch300
it already has some work arounds we can add another.

By this time we cr3 has comparison of SRR1[46:47] with 2

  542 pnv_restore_hyp_resource_arch300:
  543 /*
  544  * Workaround for POWER9, if we lost resources, the ERAT
  545  * might have been mixed up and needs flushing. We also need
  546  * to reload MMCR0 (see comment above). We also need to set
  547  * then clear bit 60 in MMCRA to ensure the PMU starts running.
  548  */
  549 blt cr3,1f
  550 BEGIN_FTR_SECTION
  551 PPC_INVALIDATE_ERAT
  552 ld  r1,PACAR1(r13)
  553 ld  r4,_MMCR0(r1)
  554 mtspr   SPRN_MMCR0,r4
  555 END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
  556 mfspr   r4,SPRN_MMCRA
  557 ori r4,r4,(1 << (63-60))
  558 mtspr   SPRN_MMCRA,r4
  559 xorir4,r4,(1 << (63-60))
  560 mtspr   SPRN_MMCRA,r4
+ 561 BEGIN_FTR_SECTION
+ 562 ld  r4,STOP_IAMR(r13)
+ 563 mtspr   SPRN_IAMR,r4
+ 564 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  565 1:
  566 /* 

I have a patchset to handle both AMOR and IAMR,
need to test it on power8 before posting.


>   ld  r4,PACAKMSR(r13)
>   ld  r5,_NIP(r1)
>   ld  r6,_CCR(r1)
> -- 
> 2.20.1
> 



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-19 Thread Akshay Adiga
On Wed, Feb 06, 2019 at 05:28:37PM +1100, Russell Currey wrote:
> Without restoring the IAMR after idle, execution prevention on POWER9
> with Radix MMU is overwritten and the kernel can freely execute userspace 
> without
> faulting.
> 
> This is necessary when returning from any stop state that modifies user
> state, as well as hypervisor state.
> 
> To test how this fails without this patch, load the lkdtm driver and
> do the following:
> 
>echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
> 
> which won't fault, then boot the kernel with powersave=off, where it
> will fault.  Applying this patch will fix this.
> 
> Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
> space")
> Cc: 
> Signed-off-by: Russell Currey 
> ---
>  arch/powerpc/include/asm/cpuidle.h |  1 +
>  arch/powerpc/kernel/asm-offsets.c  |  1 +
>  arch/powerpc/kernel/idle_book3s.S  | 20 
>  3 files changed, 22 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/cpuidle.h 
> b/arch/powerpc/include/asm/cpuidle.h
> index 43e5f31fe64d..ad67dbe59498 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -77,6 +77,7 @@ struct stop_sprs {
>   u64 mmcr1;
>   u64 mmcr2;
>   u64 mmcra;
> + u64 iamr;
>  };
> 
>  #define PNV_IDLE_NAME_LEN16
> diff --git a/arch/powerpc/kernel/asm-offsets.c 
> b/arch/powerpc/kernel/asm-offsets.c
> index 9ffc72ded73a..10e0314c2b0d 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -774,6 +774,7 @@ int main(void)
>   STOP_SPR(STOP_MMCR1, mmcr1);
>   STOP_SPR(STOP_MMCR2, mmcr2);
>   STOP_SPR(STOP_MMCRA, mmcra);
> + STOP_SPR(STOP_IAMR, iamr);
>  #endif
> 
>   DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 7f5ac2e8581b..bb4f552f6c7e 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -200,6 +200,12 @@ pnv_powersave_common:
>   /* Continue saving state */
>   SAVE_GPR(2, r1)
>   SAVE_NVGPRS(r1)
> +
> +BEGIN_FTR_SECTION
> + mfspr   r5, SPRN_IAMR
> + std r5, STOP_IAMR(r13)
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> +

Are we trying to add for both power8 and power9 ?
power9 would be CPU_FTR_ARCH_300.



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-19 Thread Akshay Adiga
On Tue, Feb 19, 2019 at 02:21:04PM +1000, Nicholas Piggin wrote:
> Michael Ellerman's on February 8, 2019 11:04 am:
> > Nicholas Piggin  writes:
> >> Russell Currey's on February 6, 2019 4:28 pm:
> >>> Without restoring the IAMR after idle, execution prevention on POWER9
> >>> with Radix MMU is overwritten and the kernel can freely execute userspace 
> >>> without
> >>> faulting.
> >>> 
> >>> This is necessary when returning from any stop state that modifies user
> >>> state, as well as hypervisor state.
> >>> 
> >>> To test how this fails without this patch, load the lkdtm driver and
> >>> do the following:
> >>> 
> >>>echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
> >>> 
> >>> which won't fault, then boot the kernel with powersave=off, where it
> >>> will fault.  Applying this patch will fix this.
> >>> 
> >>> Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
> >>> space")
> >>> Cc: 
> >>> Signed-off-by: Russell Currey 
> >>
> >> Good catch and debugging. This really should be a quirk, we don't want 
> >> to have to restore this thing on a thread switch.
> > 
> > I'm not sure I follow. We don't context switch it on Radix, but we do
> > on hash if pkeys are enabled.
> 
> Badly worded, I mean a hardware quirk. It should follow thread
> switches. Still, avoiding it for the no-loss case is better than
> nothing. We can just revisit it as an optimization if future
> hardware does not require the restore.

Apparently, the POWER9 Processor User’s Manual v2.0 documents that
IAMR can be lost, and that is not just the end.

Pasting excerpt from "Section 23.5.9.2 State Loss and Restoration,Page 309"

  On the POWER9 core, the only state that can be lost for
  Stop levels less than four, when PSSCR[ESL] = ‘1’ are the
  following SPRs: CR, FPSCR, VSCR, XER, DSCR, AMR, IAMR, UAMOR,
  AMOR, DAWR, DAWRX.

My observation is that AMOR is being used in kernel as of today
and AMOR is also lost (recreated in similar scenarios where
IAMR is lost).



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-18 Thread Nicholas Piggin
Michael Ellerman's on February 8, 2019 11:04 am:
> Nicholas Piggin  writes:
>> Russell Currey's on February 6, 2019 4:28 pm:
>>> Without restoring the IAMR after idle, execution prevention on POWER9
>>> with Radix MMU is overwritten and the kernel can freely execute userspace 
>>> without
>>> faulting.
>>> 
>>> This is necessary when returning from any stop state that modifies user
>>> state, as well as hypervisor state.
>>> 
>>> To test how this fails without this patch, load the lkdtm driver and
>>> do the following:
>>> 
>>>echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
>>> 
>>> which won't fault, then boot the kernel with powersave=off, where it
>>> will fault.  Applying this patch will fix this.
>>> 
>>> Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
>>> space")
>>> Cc: 
>>> Signed-off-by: Russell Currey 
>>
>> Good catch and debugging. This really should be a quirk, we don't want 
>> to have to restore this thing on a thread switch.
> 
> I'm not sure I follow. We don't context switch it on Radix, but we do
> on hash if pkeys are enabled.

Badly worded, I mean a hardware quirk. It should follow thread
switches. Still, avoiding it for the no-loss case is better than
nothing. We can just revisit it as an optimization if future
hardware does not require the restore.

>> Can we put it under a CONFIG option if we're not using IAMR?
> 
> We'll always be using it with Radix, and we might be using it for pkeys
> on hash, unless pkeys are compiled out. But I don't really expect anyone
> to be running with pkeys compiled out.
> 
> So I think the only case we could optimise is that we're on hash and the
> current thread has an IAMR of 0, then we could just not restore
> (assuming we come out of idle with IAMR=0).
> 
> But maybe I'm not understanding.

Nah it sounds like more trouble than it's worth in that case.

Thanks,
Nick


Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-07 Thread Michael Ellerman
Nicholas Piggin  writes:
> Russell Currey's on February 6, 2019 4:28 pm:
>> Without restoring the IAMR after idle, execution prevention on POWER9
>> with Radix MMU is overwritten and the kernel can freely execute userspace 
>> without
>> faulting.
>> 
>> This is necessary when returning from any stop state that modifies user
>> state, as well as hypervisor state.
>> 
>> To test how this fails without this patch, load the lkdtm driver and
>> do the following:
>> 
>>echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
>> 
>> which won't fault, then boot the kernel with powersave=off, where it
>> will fault.  Applying this patch will fix this.
>> 
>> Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
>> space")
>> Cc: 
>> Signed-off-by: Russell Currey 
>
> Good catch and debugging. This really should be a quirk, we don't want 
> to have to restore this thing on a thread switch.

I'm not sure I follow. We don't context switch it on Radix, but we do
on hash if pkeys are enabled.

> Can we put it under a CONFIG option if we're not using IAMR?

We'll always be using it with Radix, and we might be using it for pkeys
on hash, unless pkeys are compiled out. But I don't really expect anyone
to be running with pkeys compiled out.

So I think the only case we could optimise is that we're on hash and the
current thread has an IAMR of 0, then we could just not restore
(assuming we come out of idle with IAMR=0).

But maybe I'm not understanding.

cheers


Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-07 Thread Russell Currey
On Thu, 2019-02-07 at 14:37 -0200, Thiago Jung Bauermann wrote:
> Russell Currey  writes:
> > On Thu, 2019-02-07 at 15:08 +1000, Nicholas Piggin wrote:
> > > Russell Currey's on February 6, 2019 4:28 pm:
> > > > 
> > > > Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel
> > > > execution of
> > > > user
> > > > space")
> > > > Cc: 
> > > > Signed-off-by: Russell Currey 
> > > 
> > > Good catch and debugging. This really should be a quirk, we don't
> > > want
> > > to have to restore this thing on a thread switch.
> > > 
> > > Can we put it under a CONFIG option if we're not using IAMR?
> > 
> > I don't exactly know when we do or don't use the IAMR (since the
> > only
> > thing I've used it for is radix).  When wouldn't we care about
> > restoring it on hash?
> 
> On hash it's used for memory protection keys (code is in
> arch/powerpc/mm/pkeys.c). The kernel doesn't use protection keys, but
> userspace apps may use it explicitly via specific syscalls
> (pkey_alloc(), pkey_mprotect, pkey_free()).
> 
> Also, the kernel may use a protection key if the process does an
> mmap(PROT_EXEC).

I don't understand how this would work, though - in this case we
wouldn't know on boot if we were going to use the IAMR or not.  On
radix (unless booting with nosmep) it would always be used, but on hash
it seems it depends on what userspace does.  How exactly would a
runtime toggle of "IAMR in use" work?

With a CONFIG option it would have to depend on PPC_MEM_KEYS ||
PPC_RADIX_MMU, but those are (pretty much) always going to be on in P8
and P9, which I already check for.

> 
> --
> Thiago Jung Bauermann
> IBM Linux Technology Center
> 



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-07 Thread Thiago Jung Bauermann


Russell Currey  writes:

> On Thu, 2019-02-07 at 15:08 +1000, Nicholas Piggin wrote:
>> Russell Currey's on February 6, 2019 4:28 pm:
>> > Without restoring the IAMR after idle, execution prevention on
>> > POWER9
>> > with Radix MMU is overwritten and the kernel can freely execute
>> > userspace without
>> > faulting.
>> >
>> > This is necessary when returning from any stop state that modifies
>> > user
>> > state, as well as hypervisor state.
>> >
>> > To test how this fails without this patch, load the lkdtm driver
>> > and
>> > do the following:
>> >
>> >echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
>> >
>> > which won't fault, then boot the kernel with powersave=off, where
>> > it
>> > will fault.  Applying this patch will fix this.
>> >
>> > Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of
>> > user
>> > space")
>> > Cc: 
>> > Signed-off-by: Russell Currey 
>>
>> Good catch and debugging. This really should be a quirk, we don't
>> want
>> to have to restore this thing on a thread switch.
>>
>> Can we put it under a CONFIG option if we're not using IAMR?
>
> I don't exactly know when we do or don't use the IAMR (since the only
> thing I've used it for is radix).  When wouldn't we care about
> restoring it on hash?

On hash it's used for memory protection keys (code is in
arch/powerpc/mm/pkeys.c). The kernel doesn't use protection keys, but
userspace apps may use it explicitly via specific syscalls
(pkey_alloc(), pkey_mprotect, pkey_free()).

Also, the kernel may use a protection key if the process does an
mmap(PROT_EXEC).

--
Thiago Jung Bauermann
IBM Linux Technology Center



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-06 Thread Russell Currey
On Thu, 2019-02-07 at 15:08 +1000, Nicholas Piggin wrote:
> Russell Currey's on February 6, 2019 4:28 pm:
> > Without restoring the IAMR after idle, execution prevention on
> > POWER9
> > with Radix MMU is overwritten and the kernel can freely execute
> > userspace without
> > faulting.
> > 
> > This is necessary when returning from any stop state that modifies
> > user
> > state, as well as hypervisor state.
> > 
> > To test how this fails without this patch, load the lkdtm driver
> > and
> > do the following:
> > 
> >echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
> > 
> > which won't fault, then boot the kernel with powersave=off, where
> > it
> > will fault.  Applying this patch will fix this.
> > 
> > Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of
> > user
> > space")
> > Cc: 
> > Signed-off-by: Russell Currey 
> 
> Good catch and debugging. This really should be a quirk, we don't
> want 
> to have to restore this thing on a thread switch.
> 
> Can we put it under a CONFIG option if we're not using IAMR?

I don't exactly know when we do or don't use the IAMR (since the only
thing I've used it for is radix).  When wouldn't we care about
restoring it on hash?

> 
> > ---
> >  arch/powerpc/include/asm/cpuidle.h |  1 +
> >  arch/powerpc/kernel/asm-offsets.c  |  1 +
> >  arch/powerpc/kernel/idle_book3s.S  | 20 
> >  3 files changed, 22 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/cpuidle.h
> > b/arch/powerpc/include/asm/cpuidle.h
> > index 43e5f31fe64d..ad67dbe59498 100644
> > --- a/arch/powerpc/include/asm/cpuidle.h
> > +++ b/arch/powerpc/include/asm/cpuidle.h
> > @@ -77,6 +77,7 @@ struct stop_sprs {
> > u64 mmcr1;
> > u64 mmcr2;
> > u64 mmcra;
> > +   u64 iamr;
> >  };
> >  
> >  #define PNV_IDLE_NAME_LEN16
> > diff --git a/arch/powerpc/kernel/asm-offsets.c
> > b/arch/powerpc/kernel/asm-offsets.c
> > index 9ffc72ded73a..10e0314c2b0d 100644
> > --- a/arch/powerpc/kernel/asm-offsets.c
> > +++ b/arch/powerpc/kernel/asm-offsets.c
> > @@ -774,6 +774,7 @@ int main(void)
> > STOP_SPR(STOP_MMCR1, mmcr1);
> > STOP_SPR(STOP_MMCR2, mmcr2);
> > STOP_SPR(STOP_MMCRA, mmcra);
> > +   STOP_SPR(STOP_IAMR, iamr);
> >  #endif
> >  
> > DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
> > diff --git a/arch/powerpc/kernel/idle_book3s.S
> > b/arch/powerpc/kernel/idle_book3s.S
> > index 7f5ac2e8581b..bb4f552f6c7e 100644
> > --- a/arch/powerpc/kernel/idle_book3s.S
> > +++ b/arch/powerpc/kernel/idle_book3s.S
> > @@ -200,6 +200,12 @@ pnv_powersave_common:
> > /* Continue saving state */
> > SAVE_GPR(2, r1)
> > SAVE_NVGPRS(r1)
> > +
> > +BEGIN_FTR_SECTION
> > +   mfspr   r5, SPRN_IAMR
> > +   std r5, STOP_IAMR(r13)
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> > +
> > mfcrr5
> > std r5,_CCR(r1)
> > std r1,PACAR1(r13)
> > @@ -924,6 +930,13 @@ BEGIN_FTR_SECTION
> >  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > REST_NVGPRS(r1)
> > REST_GPR(2, r1)
> > +
> > +BEGIN_FTR_SECTION
> > +   ld  r4, STOP_IAMR(r13)
> > +   mtspr   SPRN_IAMR, r4
> > +   isync
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> 
> Sigh, good old isync. Suspect you'll get away without it, mtmsrd L=0 
> just below is architecturally guaranteeing a CSI, so just add a
> comment 
> there, might save a flush.

Makes sense, I wanted to be super safe with this, will drop the isync
and try to execute userspace nonstop and see if there are any (non)
failures.

> 
> > +
> > ld  r4,PACAKMSR(r13)
> > ld  r5,_LINK(r1)
> > ld  r6,_CCR(r1)
> > @@ -946,6 +959,13 @@ pnv_wakeup_noloss:
> >  BEGIN_FTR_SECTION
> > CHECK_HMI_INTERRUPT
> >  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > +
> > +BEGIN_FTR_SECTION
> > +   ld  r4, STOP_IAMR(r13)
> > +   mtspr   SPRN_IAMR, r4
> > +   isync
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> 
> For the noloss part, it should mean really nothing lost including
> GPRs, 
> so I think IAMR *should* be okay here.

Sweet, will drop.

Thanks for the review!

> 
> Thanks,
> Nick



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-06 Thread Russell Currey
On Thu, 2019-02-07 at 15:29 +1100, Michael Ellerman wrote:
> Russell Currey  writes:
> > 
> > Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of
> > user
> > space")
> 
> Don't word wrap the fixes line please.

My bad, will teach my editor :)

> > Cc: 
> > Signed-off-by: Russell Currey 
> > ---
> >  arch/powerpc/include/asm/cpuidle.h |  1 +
> >  arch/powerpc/kernel/asm-offsets.c  |  1 +
> >  arch/powerpc/kernel/idle_book3s.S  | 20 
> >  3 files changed, 22 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/cpuidle.h
> > b/arch/powerpc/include/asm/cpuidle.h
> > index 43e5f31fe64d..ad67dbe59498 100644
> > --- a/arch/powerpc/include/asm/cpuidle.h
> > +++ b/arch/powerpc/include/asm/cpuidle.h
> > @@ -77,6 +77,7 @@ struct stop_sprs {
> > u64 mmcr1;
> > u64 mmcr2;
> > u64 mmcra;
> > +   u64 iamr;
> >  };
> 
> We don't actually need to put this in the paca anymore.
> 
> > diff --git a/arch/powerpc/kernel/idle_book3s.S
> > b/arch/powerpc/kernel/idle_book3s.S
> > index 7f5ac2e8581b..bb4f552f6c7e 100644
> > --- a/arch/powerpc/kernel/idle_book3s.S
> > +++ b/arch/powerpc/kernel/idle_book3s.S
> > @@ -200,6 +200,12 @@ pnv_powersave_common:
> > /* Continue saving state */
> > SAVE_GPR(2, r1)
> > SAVE_NVGPRS(r1)
> > +
> > +BEGIN_FTR_SECTION
> > +   mfspr   r5, SPRN_IAMR
> > +   std r5, STOP_IAMR(r13)
> > +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> 
> We have space for a full pt_regs on the stack, and we're not using it
> all.
> 
> We don't have a specific slot for the IAMR (we may want to in
> future),
> but for now you could follow the time-honoured tradition of (ab)using
> the _DAR slot, with an appropriate comment.

I read this, then did it, and when writing the comment I thought I was
clever using "(ab)use".  I then reread this and realised I just
subconsciously stole it.

Thanks for the review.

> cheers



Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-06 Thread Nicholas Piggin
Russell Currey's on February 6, 2019 4:28 pm:
> Without restoring the IAMR after idle, execution prevention on POWER9
> with Radix MMU is overwritten and the kernel can freely execute userspace 
> without
> faulting.
> 
> This is necessary when returning from any stop state that modifies user
> state, as well as hypervisor state.
> 
> To test how this fails without this patch, load the lkdtm driver and
> do the following:
> 
>echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
> 
> which won't fault, then boot the kernel with powersave=off, where it
> will fault.  Applying this patch will fix this.
> 
> Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
> space")
> Cc: 
> Signed-off-by: Russell Currey 

Good catch and debugging. This really should be a quirk, we don't want 
to have to restore this thing on a thread switch.

Can we put it under a CONFIG option if we're not using IAMR?

> ---
>  arch/powerpc/include/asm/cpuidle.h |  1 +
>  arch/powerpc/kernel/asm-offsets.c  |  1 +
>  arch/powerpc/kernel/idle_book3s.S  | 20 
>  3 files changed, 22 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/cpuidle.h 
> b/arch/powerpc/include/asm/cpuidle.h
> index 43e5f31fe64d..ad67dbe59498 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -77,6 +77,7 @@ struct stop_sprs {
>   u64 mmcr1;
>   u64 mmcr2;
>   u64 mmcra;
> + u64 iamr;
>  };
>  
>  #define PNV_IDLE_NAME_LEN16
> diff --git a/arch/powerpc/kernel/asm-offsets.c 
> b/arch/powerpc/kernel/asm-offsets.c
> index 9ffc72ded73a..10e0314c2b0d 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -774,6 +774,7 @@ int main(void)
>   STOP_SPR(STOP_MMCR1, mmcr1);
>   STOP_SPR(STOP_MMCR2, mmcr2);
>   STOP_SPR(STOP_MMCRA, mmcra);
> + STOP_SPR(STOP_IAMR, iamr);
>  #endif
>  
>   DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 7f5ac2e8581b..bb4f552f6c7e 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -200,6 +200,12 @@ pnv_powersave_common:
>   /* Continue saving state */
>   SAVE_GPR(2, r1)
>   SAVE_NVGPRS(r1)
> +
> +BEGIN_FTR_SECTION
> + mfspr   r5, SPRN_IAMR
> + std r5, STOP_IAMR(r13)
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> +
>   mfcrr5
>   std r5,_CCR(r1)
>   std r1,PACAR1(r13)
> @@ -924,6 +930,13 @@ BEGIN_FTR_SECTION
>  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>   REST_NVGPRS(r1)
>   REST_GPR(2, r1)
> +
> +BEGIN_FTR_SECTION
> + ld  r4, STOP_IAMR(r13)
> + mtspr   SPRN_IAMR, r4
> + isync
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)

Sigh, good old isync. Suspect you'll get away without it, mtmsrd L=0 
just below is architecturally guaranteeing a CSI, so just add a comment 
there, might save a flush.

> +
>   ld  r4,PACAKMSR(r13)
>   ld  r5,_LINK(r1)
>   ld  r6,_CCR(r1)
> @@ -946,6 +959,13 @@ pnv_wakeup_noloss:
>  BEGIN_FTR_SECTION
>   CHECK_HMI_INTERRUPT
>  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> +
> +BEGIN_FTR_SECTION
> + ld  r4, STOP_IAMR(r13)
> + mtspr   SPRN_IAMR, r4
> + isync
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)

For the noloss part, it should mean really nothing lost including GPRs, 
so I think IAMR *should* be okay here.

Thanks,
Nick


Re: [PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-06 Thread Michael Ellerman
Russell Currey  writes:
> Without restoring the IAMR after idle, execution prevention on POWER9
> with Radix MMU is overwritten and the kernel can freely execute userspace 
> without
> faulting.
>
> This is necessary when returning from any stop state that modifies user
> state, as well as hypervisor state.
>
> To test how this fails without this patch, load the lkdtm driver and
> do the following:
>
>echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT
>
> which won't fault, then boot the kernel with powersave=off, where it
> will fault.  Applying this patch will fix this.
>
> Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
> space")

Don't word wrap the fixes line please.

> Cc: 
> Signed-off-by: Russell Currey 
> ---
>  arch/powerpc/include/asm/cpuidle.h |  1 +
>  arch/powerpc/kernel/asm-offsets.c  |  1 +
>  arch/powerpc/kernel/idle_book3s.S  | 20 
>  3 files changed, 22 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/cpuidle.h 
> b/arch/powerpc/include/asm/cpuidle.h
> index 43e5f31fe64d..ad67dbe59498 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -77,6 +77,7 @@ struct stop_sprs {
>   u64 mmcr1;
>   u64 mmcr2;
>   u64 mmcra;
> + u64 iamr;
>  };

We don't actually need to put this in the paca anymore.

> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 7f5ac2e8581b..bb4f552f6c7e 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -200,6 +200,12 @@ pnv_powersave_common:
>   /* Continue saving state */
>   SAVE_GPR(2, r1)
>   SAVE_NVGPRS(r1)
> +
> +BEGIN_FTR_SECTION
> + mfspr   r5, SPRN_IAMR
> + std r5, STOP_IAMR(r13)
> +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)

We have space for a full pt_regs on the stack, and we're not using it
all.

We don't have a specific slot for the IAMR (we may want to in future),
but for now you could follow the time-honoured tradition of (ab)using
the _DAR slot, with an appropriate comment.

cheers


[PATCH] powerpc/powernv/idle: Restore IAMR after idle

2019-02-05 Thread Russell Currey
Without restoring the IAMR after idle, execution prevention on POWER9
with Radix MMU is overwritten and the kernel can freely execute userspace 
without
faulting.

This is necessary when returning from any stop state that modifies user
state, as well as hypervisor state.

To test how this fails without this patch, load the lkdtm driver and
do the following:

   echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT

which won't fault, then boot the kernel with powersave=off, where it
will fault.  Applying this patch will fix this.

Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user
space")
Cc: 
Signed-off-by: Russell Currey 
---
 arch/powerpc/include/asm/cpuidle.h |  1 +
 arch/powerpc/kernel/asm-offsets.c  |  1 +
 arch/powerpc/kernel/idle_book3s.S  | 20 
 3 files changed, 22 insertions(+)

diff --git a/arch/powerpc/include/asm/cpuidle.h 
b/arch/powerpc/include/asm/cpuidle.h
index 43e5f31fe64d..ad67dbe59498 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -77,6 +77,7 @@ struct stop_sprs {
u64 mmcr1;
u64 mmcr2;
u64 mmcra;
+   u64 iamr;
 };
 
 #define PNV_IDLE_NAME_LEN16
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 9ffc72ded73a..10e0314c2b0d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -774,6 +774,7 @@ int main(void)
STOP_SPR(STOP_MMCR1, mmcr1);
STOP_SPR(STOP_MMCR2, mmcr2);
STOP_SPR(STOP_MMCRA, mmcra);
+   STOP_SPR(STOP_IAMR, iamr);
 #endif
 
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/idle_book3s.S 
b/arch/powerpc/kernel/idle_book3s.S
index 7f5ac2e8581b..bb4f552f6c7e 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -200,6 +200,12 @@ pnv_powersave_common:
/* Continue saving state */
SAVE_GPR(2, r1)
SAVE_NVGPRS(r1)
+
+BEGIN_FTR_SECTION
+   mfspr   r5, SPRN_IAMR
+   std r5, STOP_IAMR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
mfcrr5
std r5,_CCR(r1)
std r1,PACAR1(r13)
@@ -924,6 +930,13 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
REST_NVGPRS(r1)
REST_GPR(2, r1)
+
+BEGIN_FTR_SECTION
+   ld  r4, STOP_IAMR(r13)
+   mtspr   SPRN_IAMR, r4
+   isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
ld  r4,PACAKMSR(r13)
ld  r5,_LINK(r1)
ld  r6,_CCR(r1)
@@ -946,6 +959,13 @@ pnv_wakeup_noloss:
 BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
+BEGIN_FTR_SECTION
+   ld  r4, STOP_IAMR(r13)
+   mtspr   SPRN_IAMR, r4
+   isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
ld  r4,PACAKMSR(r13)
ld  r5,_NIP(r1)
ld  r6,_CCR(r1)
-- 
2.20.1