Re: [PATCH] target/ppc: Fix mtmsr(d) L=1 variant that loses interrupts

2020-04-16 Thread David Gibson
On Tue, Apr 14, 2020 at 09:11:31PM +1000, Nicholas Piggin wrote:
65;5803;1c> If mtmsr L=1 sets MSR[EE] while there is a maskable exception 
pending,
> it does not cause an interrupt. This causes the test case to hang:
> 
> https://lists.gnu.org/archive/html/qemu-ppc/2019-10/msg00826.html
> 
> More recently, Linux reduced the occurance of operations (e.g., rfi)
> which stop translation and allow pending interrupts to be processed.
> This started causing hangs in Linux boot in long-running kernel tests,
> running with '-d int' shows the decrementer stops firing despite DEC
> wrapping and MSR[EE]=1.
> 
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208301.html
> 
> The cause is the broken mtmsr L=1 behaviour, which is contrary to the
> architecture. From Power ISA v3.0B, p.977, Move To Machine State Register,
> Programming Note states:
> 
> If MSR[EE]=0 and an External, Decrementer, or Performance Monitor
> exception is pending, executing an mtmsrd instruction that sets
> MSR[EE] to 1 will cause the interrupt to occur before the next
> instruction is executed, if no higher priority exception exists
> 
> Fix this by handling L=1 exactly the same way as L=0, modulo the MSR
> bits altered.
> 
> The confusion arises from L=0 being "context synchronizing" whereas L=1
> is "execution synchronizing", which is a weaker semantic. However this
> is not a relaxation of the requirement that these exceptions cause
> interrupts when MSR[EE]=1 (e.g., when mtmsr executes to completion as
> TCG is doing here), rather it specifies how a pipelined processor can
> have multiple instructions in flight where one may influence how another
> behaves.
> 
> Cc: qemu-sta...@nongnu.org
> Reported-by: Anton Blanchard 
> Reported-by: Nathan Chancellor 
> Tested-by: Nathan Chancellor 
> Signed-off-by: Nicholas Piggin 
> ---
> Thanks very much to Nathan for reporting and testing it, I added his
> Tested-by tag despite a more polished patch, as the the basics are 
> still the same (and still fixes his test case here).
> 
> This bug possibly goes back to early v2.04 / mtmsrd L=1 support around
> 2007, and the code has been changed several times since then so may
> require some backporting.
> 
> 32-bit / mtmsr untested at the moment, I don't have an environment
> handy.
> 
>  target/ppc/translate.c | 46 +-
>  1 file changed, 27 insertions(+), 19 deletions(-)

Applied to ppc-for-5.0.

> 
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index b207fb5386..9959259dba 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -4361,30 +4361,34 @@ static void gen_mtmsrd(DisasContext *ctx)
>  CHK_SV;
>  
>  #if !defined(CONFIG_USER_ONLY)
> +if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> +gen_io_start();
> +}
>  if (ctx->opcode & 0x0001) {
> -/* Special form that does not need any synchronisation */
> +/* L=1 form only updates EE and RI */
>  TCGv t0 = tcg_temp_new();
> +TCGv t1 = tcg_temp_new();
>  tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
>  (1 << MSR_RI) | (1 << MSR_EE));
> -tcg_gen_andi_tl(cpu_msr, cpu_msr,
> +tcg_gen_andi_tl(t1, cpu_msr,
>  ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
> -tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
> +tcg_gen_or_tl(t1, t1, t0);
> +
> +gen_helper_store_msr(cpu_env, t1);
>  tcg_temp_free(t0);
> +tcg_temp_free(t1);
> +
>  } else {
>  /*
>   * XXX: we need to update nip before the store if we enter
>   *  power saving mode, we will exit the loop directly from
>   *  ppc_store_msr
>   */
> -if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> -gen_io_start();
> -}
>  gen_update_nip(ctx, ctx->base.pc_next);
>  gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
> -/* Must stop the translation as machine state (may have) changed */
> -/* Note that mtmsr is not always defined as context-synchronizing */
> -gen_stop_exception(ctx);
>  }
> +/* Must stop the translation as machine state (may have) changed */
> +gen_stop_exception(ctx);
>  #endif /* !defined(CONFIG_USER_ONLY) */
>  }
>  #endif /* defined(TARGET_PPC64) */
> @@ -4394,15 +4398,23 @@ static void gen_mtmsr(DisasContext *ctx)
>  CHK_SV;
>  
>  #if !defined(CONFIG_USER_ONLY)
> -   if (ctx->opcode & 0x0001) {
> -/* Special form that does not need any synchronisation */
> +if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> +gen_io_start();
> +}
> +if (ctx->opcode & 0x0001) {
> +/* L=1 form only updates EE and RI */
>  TCGv t0 = tcg_temp_new();
> +TCGv t1 = tcg_temp_new();
>  tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
>  (1 << MSR_RI) | (1 << MSR_EE));
> -

Re: [EXTERNAL] [PATCH] target/ppc: Fix mtmsr(d) L=1 variant that loses interrupts

2020-04-16 Thread Alex Bennée


Cédric Le Goater  writes:

> On 4/14/20 1:11 PM, Nicholas Piggin wrote:
>> If mtmsr L=1 sets MSR[EE] while there is a maskable exception pending,
>> it does not cause an interrupt. This causes the test case to hang:
>> 
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.gnu.org_archive_html_qemu-2Dppc_2019-2D10_msg00826.html=DwIDAg=jf_iaSHvJObTbx-siA1ZOg=XHJsZhhuWSw9713Fp0ciew=TQfi_v-8XYgz7MiMDAZ_CjkyalSh9-EXhQ3oDesUm74=pFoesXbioVBh5wCuzEnzwgfze6X7e-a9unkfUgsRwiw=
>>  
>> 

> I was expecting more changes but this looks fine. 
>
> Reviewed-by: Cédric Le Goater 
>
>> Cc: qemu-sta...@nongnu.org
>> Reported-by: Anton Blanchard 
>> Reported-by: Nathan Chancellor 
>> Tested-by: Nathan Chancellor 
>> Signed-off-by: Nicholas Piggin 
>
> I gave it a try on PowerNV, pseries and mac99. All good.
>
> Tested-by: Cédric Le Goater 
>
> I don't know how we could include tests in QEMU such as the one Anton 
> sent. These are good exercisers for our exception model.

It certainly looks possible. The ideal would be a fresh boot.S for ppc64
that supported:

 a) some sort of serial output
 b) a way to exit the test case with a result

For ARM we use semihosting, for x86 we use plain ioport and an ACPI
exit. See the tests in: tests/tcg/[x86_64/aarch64]/system/boot.S and the
Makefile.softmmu-target files under tests/tcg.

>
> Thanks,
>
> C. 
>
>> ---
>> Thanks very much to Nathan for reporting and testing it, I added his
>> Tested-by tag despite a more polished patch, as the the basics are 
>> still the same (and still fixes his test case here).
>> 
>> This bug possibly goes back to early v2.04 / mtmsrd L=1 support around
>> 2007, and the code has been changed several times since then so may
>> require some backporting.
>> 
>> 32-bit / mtmsr untested at the moment, I don't have an environment
>> handy.
>>
>> 
>>  target/ppc/translate.c | 46 +-
>>  1 file changed, 27 insertions(+), 19 deletions(-)
>> 
>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>> index b207fb5386..9959259dba 100644
>> --- a/target/ppc/translate.c
>> +++ b/target/ppc/translate.c
>> @@ -4361,30 +4361,34 @@ static void gen_mtmsrd(DisasContext *ctx)
>>  CHK_SV;
>>  
>>  #if !defined(CONFIG_USER_ONLY)
>> +if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
>> +gen_io_start();
>> +}
>>  if (ctx->opcode & 0x0001) {
>> -/* Special form that does not need any synchronisation */
>> +/* L=1 form only updates EE and RI */
>>  TCGv t0 = tcg_temp_new();
>> +TCGv t1 = tcg_temp_new();
>>  tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
>>  (1 << MSR_RI) | (1 << MSR_EE));
>> -tcg_gen_andi_tl(cpu_msr, cpu_msr,
>> +tcg_gen_andi_tl(t1, cpu_msr,
>>  ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
>> -tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
>> +tcg_gen_or_tl(t1, t1, t0);
>> +
>> +gen_helper_store_msr(cpu_env, t1);
>>  tcg_temp_free(t0);
>> +tcg_temp_free(t1);
>> +
>>  } else {
>>  /*
>>   * XXX: we need to update nip before the store if we enter
>>   *  power saving mode, we will exit the loop directly from
>>   *  ppc_store_msr
>>   */
>> -if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
>> -gen_io_start();
>> -}
>>  gen_update_nip(ctx, ctx->base.pc_next);
>>  gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
>> -/* Must stop the translation as machine state (may have) changed */
>> -/* Note that mtmsr is not always defined as context-synchronizing */
>> -gen_stop_exception(ctx);
>>  }
>> +/* Must stop the translation as machine state (may have) changed */
>> +gen_stop_exception(ctx);
>>  #endif /* !defined(CONFIG_USER_ONLY) */
>>  }
>>  #endif /* defined(TARGET_PPC64) */
>> @@ -4394,15 +4398,23 @@ static void gen_mtmsr(DisasContext *ctx)
>>  CHK_SV;
>>  
>>  #if !defined(CONFIG_USER_ONLY)
>> -   if (ctx->opcode & 0x0001) {
>> -/* Special form that does not need any synchronisation */
>> +if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
>> +gen_io_start();
>> +}
>> +if (ctx->opcode & 0x0001) {
>> +/* L=1 form only updates EE and RI */
>>  TCGv t0 = tcg_temp_new();
>> +TCGv t1 = tcg_temp_new();
>>  tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
>>  (1 << MSR_RI) | (1 << MSR_EE));
>> -tcg_gen_andi_tl(cpu_msr, cpu_msr,
>> +tcg_gen_andi_tl(t1, cpu_msr,
>>  ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
>> -tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
>> +tcg_gen_or_tl(t1, t1, t0);
>> +
>> +gen_helper_store_msr(cpu_env, t1);
>>  tcg_temp_free(t0);
>> +tcg_temp_free(t1);
>> +
>>  } else {
>>  TCGv msr = tcg_temp_new();
>>  
>> @@ -4411,9 +4423,6 

Re: [EXTERNAL] [PATCH] target/ppc: Fix mtmsr(d) L=1 variant that loses interrupts

2020-04-16 Thread Nicholas Piggin
Excerpts from Cédric Le Goater's message of April 15, 2020 4:49 pm:
> On 4/14/20 1:11 PM, Nicholas Piggin wrote:
>> 
>> The confusion arises from L=0 being "context synchronizing" whereas L=1
>> is "execution synchronizing", which is a weaker semantic. However this
>> is not a relaxation of the requirement that these exceptions cause
>> interrupts when MSR[EE]=1 (e.g., when mtmsr executes to completion as
>> TCG is doing here), rather it specifies how a pipelined processor can
>> have multiple instructions in flight where one may influence how another
>> behaves.
> 
> I was expecting more changes but this looks fine. 

It _seems_ to just be these, from what I could see, but could quite 
easily be other issues I missed.

There is at least one other "funny" thing with this synchronization, 
which is the TLB flushing. I don't think it has a bug, but comments
are a bit suspect. tlbie/l doesn't have anything to do with context
/ execution synchronization, so it's a bit interesting to check them
in isync and rfi etc.

ptesync is required because the page table walkers are not necessarily 
coherent with the main CPU's memory pipeline, so you store a new value 
to a PTE then do a tlbiel, you can't have the MMU reload the TLB with 
the old PTE because the store was sitting in the store queue that 
doesn't forward to the table walker. This condition can persist after
the store instruction itself has completed so no amount of this
context synchronization would help.

It does kind of make sense to check the tlb flush in rfi, so you catch 
stray ones that didn't have the right ptesync/tlbsync, but it would 
almost be a condition you could catch and add a warn for. isync doesn't
make a lot of sense though, as far as I can see.

Thanks,
Nick

> Reviewed-by: Cédric Le Goater 

Sorry I always get your email wrong, phantom address book entry.



Re: [EXTERNAL] [PATCH] target/ppc: Fix mtmsr(d) L=1 variant that loses interrupts

2020-04-15 Thread Cédric Le Goater
On 4/14/20 1:11 PM, Nicholas Piggin wrote:
> If mtmsr L=1 sets MSR[EE] while there is a maskable exception pending,
> it does not cause an interrupt. This causes the test case to hang:
> 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.gnu.org_archive_html_qemu-2Dppc_2019-2D10_msg00826.html=DwIDAg=jf_iaSHvJObTbx-siA1ZOg=XHJsZhhuWSw9713Fp0ciew=TQfi_v-8XYgz7MiMDAZ_CjkyalSh9-EXhQ3oDesUm74=pFoesXbioVBh5wCuzEnzwgfze6X7e-a9unkfUgsRwiw=
>  
> 
> More recently, Linux reduced the occurance of operations (e.g., rfi)
> which stop translation and allow pending interrupts to be processed.
> This started causing hangs in Linux boot in long-running kernel tests,
> running with '-d int' shows the decrementer stops firing despite DEC
> wrapping and MSR[EE]=1.
> 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ozlabs.org_pipermail_linuxppc-2Ddev_2020-2DApril_208301.html=DwIDAg=jf_iaSHvJObTbx-siA1ZOg=XHJsZhhuWSw9713Fp0ciew=TQfi_v-8XYgz7MiMDAZ_CjkyalSh9-EXhQ3oDesUm74=EhkRfxvQMomvneYweWDEIUktCkKykgIqEmdhA0PtiwU=
>  
> 
> The cause is the broken mtmsr L=1 behaviour, which is contrary to the
> architecture. From Power ISA v3.0B, p.977, Move To Machine State Register,
> Programming Note states:
> 
> If MSR[EE]=0 and an External, Decrementer, or Performance Monitor
> exception is pending, executing an mtmsrd instruction that sets
> MSR[EE] to 1 will cause the interrupt to occur before the next
> instruction is executed, if no higher priority exception exists
> 
> Fix this by handling L=1 exactly the same way as L=0, modulo the MSR
> bits altered.
> 
> The confusion arises from L=0 being "context synchronizing" whereas L=1
> is "execution synchronizing", which is a weaker semantic. However this
> is not a relaxation of the requirement that these exceptions cause
> interrupts when MSR[EE]=1 (e.g., when mtmsr executes to completion as
> TCG is doing here), rather it specifies how a pipelined processor can
> have multiple instructions in flight where one may influence how another
> behaves.

I was expecting more changes but this looks fine. 

Reviewed-by: Cédric Le Goater 

> Cc: qemu-sta...@nongnu.org
> Reported-by: Anton Blanchard 
> Reported-by: Nathan Chancellor 
> Tested-by: Nathan Chancellor 
> Signed-off-by: Nicholas Piggin 

I gave it a try on PowerNV, pseries and mac99. All good.

Tested-by: Cédric Le Goater 

I don't know how we could include tests in QEMU such as the one Anton 
sent. These are good exercisers for our exception model.

Thanks,

C. 

> ---
> Thanks very much to Nathan for reporting and testing it, I added his
> Tested-by tag despite a more polished patch, as the the basics are 
> still the same (and still fixes his test case here).
> 
> This bug possibly goes back to early v2.04 / mtmsrd L=1 support around
> 2007, and the code has been changed several times since then so may
> require some backporting.
> 
> 32-bit / mtmsr untested at the moment, I don't have an environment
> handy.
>
> 
>  target/ppc/translate.c | 46 +-
>  1 file changed, 27 insertions(+), 19 deletions(-)
> 
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index b207fb5386..9959259dba 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -4361,30 +4361,34 @@ static void gen_mtmsrd(DisasContext *ctx)
>  CHK_SV;
>  
>  #if !defined(CONFIG_USER_ONLY)
> +if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> +gen_io_start();
> +}
>  if (ctx->opcode & 0x0001) {
> -/* Special form that does not need any synchronisation */
> +/* L=1 form only updates EE and RI */
>  TCGv t0 = tcg_temp_new();
> +TCGv t1 = tcg_temp_new();
>  tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
>  (1 << MSR_RI) | (1 << MSR_EE));
> -tcg_gen_andi_tl(cpu_msr, cpu_msr,
> +tcg_gen_andi_tl(t1, cpu_msr,
>  ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
> -tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
> +tcg_gen_or_tl(t1, t1, t0);
> +
> +gen_helper_store_msr(cpu_env, t1);
>  tcg_temp_free(t0);
> +tcg_temp_free(t1);
> +
>  } else {
>  /*
>   * XXX: we need to update nip before the store if we enter
>   *  power saving mode, we will exit the loop directly from
>   *  ppc_store_msr
>   */
> -if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> -gen_io_start();
> -}
>  gen_update_nip(ctx, ctx->base.pc_next);
>  gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
> -/* Must stop the translation as machine state (may have) changed */
> -/* Note that mtmsr is not always defined as context-synchronizing */
> -gen_stop_exception(ctx);
>  }
> +/* Must stop the translation as machine state (may have) changed */
> +gen_stop_exception(ctx);
>  #endif /* !defined(CONFIG_USER_ONLY) */
>  }
>  

Re: [PATCH] target/ppc: Fix mtmsr(d) L=1 variant that loses interrupts

2020-04-14 Thread Nathan Chancellor
On Tue, Apr 14, 2020 at 09:11:31PM +1000, Nicholas Piggin wrote:
> If mtmsr L=1 sets MSR[EE] while there is a maskable exception pending,
> it does not cause an interrupt. This causes the test case to hang:
> 
> https://lists.gnu.org/archive/html/qemu-ppc/2019-10/msg00826.html
> 
> More recently, Linux reduced the occurance of operations (e.g., rfi)
> which stop translation and allow pending interrupts to be processed.
> This started causing hangs in Linux boot in long-running kernel tests,
> running with '-d int' shows the decrementer stops firing despite DEC
> wrapping and MSR[EE]=1.
> 
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208301.html
> 
> The cause is the broken mtmsr L=1 behaviour, which is contrary to the
> architecture. From Power ISA v3.0B, p.977, Move To Machine State Register,
> Programming Note states:
> 
> If MSR[EE]=0 and an External, Decrementer, or Performance Monitor
> exception is pending, executing an mtmsrd instruction that sets
> MSR[EE] to 1 will cause the interrupt to occur before the next
> instruction is executed, if no higher priority exception exists
> 
> Fix this by handling L=1 exactly the same way as L=0, modulo the MSR
> bits altered.
> 
> The confusion arises from L=0 being "context synchronizing" whereas L=1
> is "execution synchronizing", which is a weaker semantic. However this
> is not a relaxation of the requirement that these exceptions cause
> interrupts when MSR[EE]=1 (e.g., when mtmsr executes to completion as
> TCG is doing here), rather it specifies how a pipelined processor can
> have multiple instructions in flight where one may influence how another
> behaves.
> 
> Cc: qemu-sta...@nongnu.org
> Reported-by: Anton Blanchard 
> Reported-by: Nathan Chancellor 
> Tested-by: Nathan Chancellor 
> Signed-off-by: Nicholas Piggin 
> ---
> Thanks very much to Nathan for reporting and testing it, I added his
> Tested-by tag despite a more polished patch, as the the basics are 
> still the same (and still fixes his test case here).

I did re-run the test with the updated version of your patch and it
passed still so that tag can still stand without any controversy :)

Thank you for the fix again!
Nathan

> This bug possibly goes back to early v2.04 / mtmsrd L=1 support around
> 2007, and the code has been changed several times since then so may
> require some backporting.
> 
> 32-bit / mtmsr untested at the moment, I don't have an environment
> handy.
> 
>  target/ppc/translate.c | 46 +-
>  1 file changed, 27 insertions(+), 19 deletions(-)
> 
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index b207fb5386..9959259dba 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -4361,30 +4361,34 @@ static void gen_mtmsrd(DisasContext *ctx)
>  CHK_SV;
>  
>  #if !defined(CONFIG_USER_ONLY)
> +if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> +gen_io_start();
> +}
>  if (ctx->opcode & 0x0001) {
> -/* Special form that does not need any synchronisation */
> +/* L=1 form only updates EE and RI */
>  TCGv t0 = tcg_temp_new();
> +TCGv t1 = tcg_temp_new();
>  tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
>  (1 << MSR_RI) | (1 << MSR_EE));
> -tcg_gen_andi_tl(cpu_msr, cpu_msr,
> +tcg_gen_andi_tl(t1, cpu_msr,
>  ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
> -tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
> +tcg_gen_or_tl(t1, t1, t0);
> +
> +gen_helper_store_msr(cpu_env, t1);
>  tcg_temp_free(t0);
> +tcg_temp_free(t1);
> +
>  } else {
>  /*
>   * XXX: we need to update nip before the store if we enter
>   *  power saving mode, we will exit the loop directly from
>   *  ppc_store_msr
>   */
> -if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> -gen_io_start();
> -}
>  gen_update_nip(ctx, ctx->base.pc_next);
>  gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
> -/* Must stop the translation as machine state (may have) changed */
> -/* Note that mtmsr is not always defined as context-synchronizing */
> -gen_stop_exception(ctx);
>  }
> +/* Must stop the translation as machine state (may have) changed */
> +gen_stop_exception(ctx);
>  #endif /* !defined(CONFIG_USER_ONLY) */
>  }
>  #endif /* defined(TARGET_PPC64) */
> @@ -4394,15 +4398,23 @@ static void gen_mtmsr(DisasContext *ctx)
>  CHK_SV;
>  
>  #if !defined(CONFIG_USER_ONLY)
> -   if (ctx->opcode & 0x0001) {
> -/* Special form that does not need any synchronisation */
> +if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
> +gen_io_start();
> +}
> +if (ctx->opcode & 0x0001) {
> +/* L=1 form only updates EE and RI */
>  TCGv t0 = tcg_temp_new();
> +TCGv t1 = tcg_temp_new();

[PATCH] target/ppc: Fix mtmsr(d) L=1 variant that loses interrupts

2020-04-14 Thread Nicholas Piggin
If mtmsr L=1 sets MSR[EE] while there is a maskable exception pending,
it does not cause an interrupt. This causes the test case to hang:

https://lists.gnu.org/archive/html/qemu-ppc/2019-10/msg00826.html

More recently, Linux reduced the occurance of operations (e.g., rfi)
which stop translation and allow pending interrupts to be processed.
This started causing hangs in Linux boot in long-running kernel tests,
running with '-d int' shows the decrementer stops firing despite DEC
wrapping and MSR[EE]=1.

https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208301.html

The cause is the broken mtmsr L=1 behaviour, which is contrary to the
architecture. From Power ISA v3.0B, p.977, Move To Machine State Register,
Programming Note states:

If MSR[EE]=0 and an External, Decrementer, or Performance Monitor
exception is pending, executing an mtmsrd instruction that sets
MSR[EE] to 1 will cause the interrupt to occur before the next
instruction is executed, if no higher priority exception exists

Fix this by handling L=1 exactly the same way as L=0, modulo the MSR
bits altered.

The confusion arises from L=0 being "context synchronizing" whereas L=1
is "execution synchronizing", which is a weaker semantic. However this
is not a relaxation of the requirement that these exceptions cause
interrupts when MSR[EE]=1 (e.g., when mtmsr executes to completion as
TCG is doing here), rather it specifies how a pipelined processor can
have multiple instructions in flight where one may influence how another
behaves.

Cc: qemu-sta...@nongnu.org
Reported-by: Anton Blanchard 
Reported-by: Nathan Chancellor 
Tested-by: Nathan Chancellor 
Signed-off-by: Nicholas Piggin 
---
Thanks very much to Nathan for reporting and testing it, I added his
Tested-by tag despite a more polished patch, as the the basics are 
still the same (and still fixes his test case here).

This bug possibly goes back to early v2.04 / mtmsrd L=1 support around
2007, and the code has been changed several times since then so may
require some backporting.

32-bit / mtmsr untested at the moment, I don't have an environment
handy.

 target/ppc/translate.c | 46 +-
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index b207fb5386..9959259dba 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -4361,30 +4361,34 @@ static void gen_mtmsrd(DisasContext *ctx)
 CHK_SV;
 
 #if !defined(CONFIG_USER_ONLY)
+if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+gen_io_start();
+}
 if (ctx->opcode & 0x0001) {
-/* Special form that does not need any synchronisation */
+/* L=1 form only updates EE and RI */
 TCGv t0 = tcg_temp_new();
+TCGv t1 = tcg_temp_new();
 tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
 (1 << MSR_RI) | (1 << MSR_EE));
-tcg_gen_andi_tl(cpu_msr, cpu_msr,
+tcg_gen_andi_tl(t1, cpu_msr,
 ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
-tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
+tcg_gen_or_tl(t1, t1, t0);
+
+gen_helper_store_msr(cpu_env, t1);
 tcg_temp_free(t0);
+tcg_temp_free(t1);
+
 } else {
 /*
  * XXX: we need to update nip before the store if we enter
  *  power saving mode, we will exit the loop directly from
  *  ppc_store_msr
  */
-if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-gen_io_start();
-}
 gen_update_nip(ctx, ctx->base.pc_next);
 gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
-/* Must stop the translation as machine state (may have) changed */
-/* Note that mtmsr is not always defined as context-synchronizing */
-gen_stop_exception(ctx);
 }
+/* Must stop the translation as machine state (may have) changed */
+gen_stop_exception(ctx);
 #endif /* !defined(CONFIG_USER_ONLY) */
 }
 #endif /* defined(TARGET_PPC64) */
@@ -4394,15 +4398,23 @@ static void gen_mtmsr(DisasContext *ctx)
 CHK_SV;
 
 #if !defined(CONFIG_USER_ONLY)
-   if (ctx->opcode & 0x0001) {
-/* Special form that does not need any synchronisation */
+if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+gen_io_start();
+}
+if (ctx->opcode & 0x0001) {
+/* L=1 form only updates EE and RI */
 TCGv t0 = tcg_temp_new();
+TCGv t1 = tcg_temp_new();
 tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
 (1 << MSR_RI) | (1 << MSR_EE));
-tcg_gen_andi_tl(cpu_msr, cpu_msr,
+tcg_gen_andi_tl(t1, cpu_msr,
 ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
-tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
+tcg_gen_or_tl(t1, t1, t0);
+
+gen_helper_store_msr(cpu_env, t1);
 tcg_temp_free(t0);
+tcg_temp_free(t1);
+
 } else