Excerpts from Sachin Sant's message of June 29, 2021 12:37 am:
> 
>> On 28-Jun-2021, at 1:19 PM, Nicholas Piggin <npig...@gmail.com> wrote:
>> 
>> Similar to 2b48e96be2f9f ("powerpc/64: fix irq replay pt_regs->softe
>> value"), enable MSR_EE in pt_regs->msr, which makes the regs look a
>> bit more normal and allows the extra debug checks to be added to
>> interrupt handler entry.
>> 
>> Signed-off-by: Nicholas Piggin <npig...@gmail.com>
>> ---
>> arch/powerpc/include/asm/interrupt.h | 4 ++++
>> arch/powerpc/kernel/irq.c            | 1 +
>> 2 files changed, 5 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/asm/interrupt.h 
>> b/arch/powerpc/include/asm/interrupt.h
>> index 789311d1e283..d4bdf7d274ac 100644
>> --- a/arch/powerpc/include/asm/interrupt.h
>> +++ b/arch/powerpc/include/asm/interrupt.h
>> @@ -173,6 +173,8 @@ static inline void interrupt_enter_prepare(struct 
>> pt_regs *regs, struct interrup
>>                      BUG_ON(search_kernel_restart_table(regs->nip));
>> #endif
>>      }
>> +    if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>> +            BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
>> #endif
> 
> I think this BUG_ON was triggered while running selftests 
> (powerpc/mm/pkey_exec_prot)
> 
> [ 9741.254969] ------------[ cut here ]------------
> [ 9741.254978] kernel BUG at arch/powerpc/include/asm/interrupt.h:177!
> [ 9741.254985] Oops: Exception in kernel mode, sig: 5 [#1]
> [ 9741.254990] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
> [ 9741.254995] Modules linked in: rpadlpar_io rpaphp uinput sha512_generic 
> vmac n_gsm pps_ldisc pps_core ppp_synctty ppp_async ppp_generic slcan slip 
> slhc snd_hrtimer snd_seq snd_seq_device snd_timer snd soundcore authenc 
> pcrypt crypto_user n_hdlc dummy veth nfsv3 nfs_acl nfs lockd grace fscache 
> netfs tun brd overlay vfat fat btrfs blake2b_generic xor zstd_compress 
> raid6_pq xfs loop sctp ip6_udp_tunnel udp_tunnel dm_mod bonding nft_ct 
> nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set rfkill nf_tables libcrc32c 
> nfnetlink sunrpc pseries_rng xts vmx_crypto uio_pdrv_genirq uio sch_fq_codel 
> ip_tables ext4 mbcache jbd2 sr_mod sd_mod cdrom t10_pi sg ibmvscsi ibmveth 
> scsi_transport_srp fuse [last unloaded: test_cpuidle_latency]
> [ 9741.255097] CPU: 17 PID: 3278920 Comm: pkey_exec_prot Tainted: G        W  
> OE     5.13.0-rc7-next-20210625-dirty #4
> [ 9741.255106] NIP:  c0000000000300d8 LR: c000000000009604 CTR: 
> c000000000009330
> [ 9741.255111] REGS: c0000000347536f0 TRAP: 0700   Tainted: G        W  OE    
>   (5.13.0-rc7-next-20210625-dirty)
> [ 9741.255117] MSR:  8000000000021033 <SF,ME,IR,DR,RI,LE>  CR: 22004282  XER: 
> 20040000
> [ 9741.255130] CFAR: c00000000003007c IRQMASK: 3 
> [ 9741.255130] GPR00: c000000000093cd0 c000000034753990 c0000000029bbe00 
> c000000034753a30 
> [ 9741.255130] GPR04: 00007fff9ebb0000 0000000000200000 000000000000000a 
> 000000000000002d 
> [ 9741.255130] GPR08: 0000000000000000 0000000000000001 0000000000000000 
> 7265677368657265 
> [ 9741.255130] GPR12: 8000000000021033 c00000001ec27280 0000000000000000 
> 0000000000000000 
> [ 9741.255130] GPR16: 0000000000000000 0000000000000000 0000000000000000 
> 0000000000000000 
> [ 9741.255130] GPR20: 0000000000000000 0000000000000000 0000000000000000 
> 0000000010003c40 
> [ 9741.255130] GPR24: 0000000000000000 0000000000000000 0000000000200000 
> c00000005e89d200 
> [ 9741.255130] GPR28: 0000000000000300 00007fff9ebb0000 c000000034753e80 
> c000000034753a30 
> [ 9741.255191] NIP [c0000000000300d8] program_check_exception+0xe8/0x1c0
> [ 9741.255202] LR [c000000000009604] program_check_common_virt+0x2d4/0x320
> [ 9741.255209] Call Trace:
> [ 9741.255212] [c000000034753990] [0000000000000008] 0x8 (unreliable)
> [ 9741.255219] [c0000000347539c0] [c000000034753a80] 0xc000000034753a80
> [ 9741.255225] --- interrupt: 700 at arch_local_irq_restore+0x1d0/0x200
> [ 9741.255231] NIP:  c000000000016790 LR: c000000000093388 CTR: 
> c000000000008780
> [ 9741.255236] REGS: c000000034753a30 TRAP: 0700   Tainted: G        W  OE    
>   (5.13.0-rc7-next-20210625-dirty)
> [ 9741.255242] MSR:  8000000000021033 <SF,ME,IR,DR,RI,LE>  CR: 24004288  XER: 
> 20040000
> [ 9741.255253] CFAR: c0000000000165ec IRQMASK: 0 
> [ 9741.255253] GPR00: c000000000093cd0 c000000034753cd0 c0000000029bbe00 
> 0000000000000000 
> [ 9741.255253] GPR04: 00007fff9ebb0000 0000000000200000 000000000000000a 
> 000000000000002d 
> [ 9741.255253] GPR08: 0000000000000000 0000000000000000 c0000000bd77d400 
> 7265677368657265 
> [ 9741.255253] GPR12: 0000000044000282 c00000001ec27280 0000000000000000 
> 0000000000000000 
> [ 9741.255253] GPR16: 0000000000000000 0000000000000000 0000000000000000 
> 0000000000000000 
> [ 9741.255253] GPR20: 0000000000000000 0000000000000000 0000000000000000 
> 0000000010003c40 
> [ 9741.255253] GPR24: 0000000000000000 0000000000000000 0000000000200000 
> c00000005e89d200 
> [ 9741.255253] GPR28: 0000000000000300 00007fff9ebb0000 c000000034753e80 
> 0000000000000001 
> [ 9741.255313] NIP [c000000000016790] arch_local_irq_restore+0x1d0/0x200
> [ 9741.255319] LR [c000000000093388] ___do_page_fault+0x438/0xb80
> [ 9741.255325] --- interrupt: 700
> [ 9741.255328] [c000000034753cd0] [c00000000009be74] hash_page_mm+0x5e4/0x800 
> (unreliable)
> [ 9741.255335] [c000000034753d00] [000000000000002d] 0x2d
> [ 9741.255340] [c000000034753db0] [c000000000093cd0] 
> hash__do_page_fault+0x30/0x70
> [ 9741.255348] [c000000034753de0] [c00000000009c438] do_hash_fault+0x78/0xb0

This looks like it's probably running un-reconciled due to the first 
call to hash__do_page_fault not calling a real interrupt handler.  
Without this patch, the test must be causing a warning due to the same
thing probably (the bug triggered in the program check interrupt handler).

I think this patch is probably the right fix for it.

Thanks,
Nick

---

diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 96d9aa164007..ac5720371c0d 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1522,8 +1522,8 @@ int hash_page(unsigned long ea, unsigned long access, 
unsigned long trap,
 }
 EXPORT_SYMBOL_GPL(hash_page);
 
-DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
-DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
+DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
+DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
 {
        unsigned long ea = regs->dar;
        unsigned long dsisr = regs->dsisr;
@@ -1533,6 +1533,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
        unsigned int region_id;
        long err;
 
+       if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+               hash__do_page_fault(regs);
+               return;
+       }
+
        region_id = get_region_id(ea);
        if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
                mm = &init_mm;
@@ -1571,9 +1576,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
                        bad_page_fault(regs, SIGBUS);
                }
                err = 0;
-       }
 
-       return err;
+       } else if (err) {
+               hash__do_page_fault(regs);
+       }
 }
 
 /*
@@ -1582,13 +1588,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
  */
 DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
 {
-       unsigned long dsisr = regs->dsisr;
-
-       if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
-               hash__do_page_fault(regs);
-               return 0;
-       }
-
        /*
         * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
         * don't call hash_page, just fail the fault. This is required to
@@ -1607,8 +1606,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
                return 0;
        }
 
-       if (__do_hash_fault(regs))
-               hash__do_page_fault(regs);
+       __do_hash_fault(regs);
 
        return 0;
 }

Reply via email to