Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-04 Thread Naresh Kamboju
On Wed, 3 Jun 2020 at 19:03, Joerg Roedel  wrote:
>
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
>
> Can you please try the attached patch?

@Joerg, Linaro test farm noticed this kernel crash on nxp ls2088
Machine model: Freescale Layerscape 2088A RDB Board
while booting Linux mainline 5.7.0 version kernel.

After applying your proposed patch fixed boot problem.

Tested-by: Naresh Kamboju 

Test ref:
https://lavalab.nxp.com/scheduler/job/23787#L426

Here is the kernel crash log before patch applied,

[0.00] Linux version 5.7.0-03887-gf6aee505c71b
(TuxBuild@ecb9ef34f06f) (gcc version 9.3.0 (Debian 9.3.0-8), GNU ld
(GNU Binutils for Debian) 2.34) #1 SMP PREEMPT Wed Jun 3 18:21:26 UTC
2020
[0.00] Machine model: Freescale Layerscape 2088A RDB Board
<>
[0.00] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0
[0.00] Unable to handle kernel paging request at virtual
address fffe8000
[0.00] Mem abort info:
[0.00]   ESR = 0x9604
[0.00]   EC = 0x25: DABT (current EL), IL = 32 bits
[0.00]   SET = 0, FnV = 0
[0.00]   EA = 0, S1PTW = 0
[0.00] Data abort info:
[0.00]   ISV = 0, ISS = 0x0004
[0.00]   CM = 0, WnR = 0
[0.00] [fffe8000] address between user and kernel address ranges
[0.00] Internal error: Oops: 9604 [#1] PREEMPT SMP
[0.00] Modules linked in:
[0.00] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
5.7.0-03887-gf6aee505c71b #1
[0.00] Hardware name: Freescale Layerscape 2088A RDB Board (DT)
[0.00] pstate: 8085 (Nzcv daIf -PAN -UAO BTYPE=--)
[0.00] pc : map_kernel_range_noflush+0xc0/0x280
[0.00] lr : __vmalloc_node_range+0x154/0x2a0
[0.00] sp : b3b1dcbc3e20
[0.00] x29: b3b1dcbc3e20 x28: fffe8000
[0.00] x27: 800010004000 x26: 80001000
[0.00] x25: 00402dc2 x24: b3b1dc53c000
[0.00] x23: 00680f13 x22: 0004
[0.00] x21: b3b1dc53cf48 x20: 
[0.00] x19: b3b1dc627800 x18: 00c0
[0.00] x17:  x16: 0007
[0.00] x15: dead0100 x14: fe020b990600
[0.00] x13: dead0122 x12: 0001
[0.00] x11:  x10: 0082fe3fdec0
[0.00] x9 : 0082fe342d58 x8 : 4cd121ba5000
[0.00] x7 : 80801000 x6 : 0004
[0.00] x5 : fffd x4 : 4000
[0.00] x3 : 80005000 x2 : 00018000
[0.00] x1 :  x0 : 800010003fff
[0.00] Call trace:
[0.00]  map_kernel_range_noflush+0xc0/0x280
[0.00]  __vmalloc_node_range+0x154/0x2a0
[0.00]  __vmalloc_node+0x5c/0x70
[0.00]  init_IRQ+0xac/0xf8
[0.00]  start_kernel+0x2d0/0x4dc
[0.00] Code: f90047e0 d503201f d2a80003 8b030343 (f9400380)
[0.00] random: get_random_bytes called from
print_oops_end_marker+0x2c/0x58 with crng_init=0
[0.00] ---[ end trace  ]---
[0.00] Kernel panic - not syncing: Attempted to kill the idle task!

ref:
https://lavalab.nxp.com/scheduler/job/23596#L603

-- 
Linaro LKFT
https://lkft.linaro.org


Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-03 Thread Abdul Haleem
On Wed, 2020-06-03 at 15:32 +0200, Joerg Roedel wrote:
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
> 
> Can you please try the attached patch?

Thanks Joerg, The given patch fixes the boot problem.

Please add Reported-by in fix commit.

Reported-by: Abdul Haleem 

> 
> diff --git a/include/asm-generic/5level-fixup.h 
> b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
>   ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
>   NULL : pud_offset(p4d, address))
> 
> +#define pud_alloc_track(mm, p4d, address, mask)  
> \
> + ((unlikely(pgd_none(*(p4d))) && 
> \
> +   (__pud_alloc(mm, p4d, address) || 
> ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
> +   NULL : pud_offset(p4d, address))
> +
>  #define p4d_alloc(mm, pgd, address)  (pgd)
>  #define p4d_alloc_track(mm, pgd, address, mask)  (pgd)
>  #define p4d_offset(pgd, start)   (pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7e07f4f490cb..d46bf03b804f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
> p4d_t *p4d,
>   NULL : pud_offset(p4d, address);
>  }
> 
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> -
>  {
> - if (unlikely(pgd_none(*pgd))) {
> - if (__p4d_alloc(mm, pgd, address))
> + if (unlikely(p4d_none(*p4d))) {
> + if (__pud_alloc(mm, p4d, address))
>   return NULL;
> - *mod_mask |= PGTBL_PGD_MODIFIED;
> + *mod_mask |= PGTBL_P4D_MODIFIED;
>   }
> 
> - return p4d_offset(pgd, address);
> + return pud_offset(p4d, address);
>  }
> 
> -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> +
>  {
> - if (unlikely(p4d_none(*p4d))) {
> - if (__pud_alloc(mm, p4d, address))
> + if (unlikely(pgd_none(*pgd))) {
> + if (__p4d_alloc(mm, pgd, address))
>   return NULL;
> - *mod_mask |= PGTBL_P4D_MODIFIED;
> + *mod_mask |= PGTBL_PGD_MODIFIED;
>   }
> 
> - return pud_offset(p4d, address);
> + return p4d_offset(pgd, address);
>  }
> 
> +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> +
>  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned 
> long address)
>  {
>   return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?


-- 
Regard's

Abdul Haleem
IBM Linux Technology Centre





Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-03 Thread Satheesh Rajendran
On Wed, Jun 03, 2020 at 03:32:57PM +0200, Joerg Roedel wrote:
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
> 
> Can you please try the attached patch?

Hi Joerg,

I did hit the similar boot failue on a Power9 baremetal box(mentioned in Note) 
and 
your below patch helped solving that for my environment and 
am able to boot the system fine.

...
Fedora 31 (Thirty One)
Kernel 5.7.0-gd6f9469a0-dirty on an ppc64le (hvc0)

 login:


Tested-by: Satheesh Rajendran 

Note: for the record, here is the boot failure call trace.

[0.023555] mempolicy: Enabling automatic NUMA balancing. Configure with 
numa_balancing= or the kernel.numa_balancing sysctl
[0.023582] pid_max: default: 163840 minimum: 1280
[0.035014] BUG: Unable to handle kernel data access on read at 
0xc060
[0.035058] Faulting instruction address: 0xc0382304
[0.035074] Oops: Kernel access of bad area, sig: 11 [#1]
[0.035097] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV
[0.035113] Modules linked in:
[0.035136] CPU: 24 PID: 0 Comm: swapper/24 Not tainted 5.7.0-gd6f9469a0 #1
[0.035161] NIP:  c0382304 LR: c038407c CTR: 
[0.035197] REGS: c167f930 TRAP: 0300   Not tainted  
(5.7.0-gd6f9469a0)
[0.035241] MSR:  92009033   CR: 
42022422  XER: 
[0.035294] CFAR: c03822fc DAR: c060 DSISR: 4000 
IRQMASK: 0 
[0.035294] GPR00: c038407c c167fbc0 c168090[  
150.252645597,5] OPAL: Reboot request...
[  150.252928266,5] RESET: Initiating fast reboot 1...
0 c008 
[0.035294] GPR04:  01ff c008001f 
0060 
[0.035294] GPR08: 6000 0005 c060 
c0080020 
[0.035294] GPR12: 22022422 c187 c000 
c008 
[0.035294] GPR16: c00807ff c0080020  
c060 
[0.035294] GPR20: c0080800 c0080800 c00807ff 
c00807ff 
[0.035294] GPR24: c163f7c8 c172d0c0 0001 
0001 
[0.035294] GPR28: c1708000 c172d0c8  
c0080800 
[0.035622] NIP [c0382304] map_kernel_range_noflush+0x274/0x510
[0.035657] LR [c038407c] __vmalloc_node_range+0x2ec/0x3a0
[0.035690] Call Trace:
[0.035709] [c167fbc0] [c038d848] 
__alloc_pages_nodemask+0x158/0x3f0 (unreliable)
[0.035750] [c167fc90] [c038407c] 
__vmalloc_node_range+0x2ec/0x3a0
[0.035787] [c167fd40] [c0384268] __vmalloc+0x58/0x70
[0.035823] [c167fdb0] [c1056db8] 
alloc_large_system_hash+0x204/0x304
[0.035870] [c167fe60] [c105c1f0] vfs_caches_init+0xd8/0x138
[0.035916] [c167fee0] [c10242a0] start_kernel+0x644/0x6ec
[0.035960] [c167ff90] [c000ca9c] 
start_here_common+0x1c/0x400
[0.036004] Instruction dump:
[0.036016] 3af4 6000 6000 38c90010 7f663036 7d667a14 7cc600d0 
7d713038 
[0.036038] 38d1 7c373040 41810008 7e91a378  2c25 418201b4 
7f464830 
[0.036083] ---[ end trace c7e72029dfacc217 ]---
[0.036114] 
[1.036223] Kernel panic - not syncing: Attempted to kill the idle task!
[1.036858] Rebooting in 10 seconds..


Regards,
-Satheesh.

> 
> diff --git a/include/asm-generic/5level-fixup.h 
> b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
>   ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
>   NULL : pud_offset(p4d, address))
> 
> +#define pud_alloc_track(mm, p4d, address, mask)  
> \
> + ((unlikely(pgd_none(*(p4d))) && 
> \
> +   (__pud_alloc(mm, p4d, address) || 
> ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
> +   NULL : pud_offset(p4d, address))
> +
>  #define p4d_alloc(mm, pgd, address)  (pgd)
>  #define p4d_alloc_track(mm, pgd, address, mask)  (pgd)
>  #define p4d_offset(pgd, start)   (pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7e07f4f490cb..d46bf03b804f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
> p4d_t *p4d,
>   NULL : pud_offset(p4d, address);
>  }
> 
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> -
>  {
> - if (unlikely(pgd_none(*pgd))) {
> -  

Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-03 Thread Joerg Roedel
On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> @Joerg, Could you please have a look?

Can you please try the attached patch?

diff --git a/include/asm-generic/5level-fixup.h 
b/include/asm-generic/5level-fixup.h
index 58046ddc08d0..afbab31fbd7e 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -17,6 +17,11 @@
((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
NULL : pud_offset(p4d, address))
 
+#define pud_alloc_track(mm, p4d, address, mask)
\
+   ((unlikely(pgd_none(*(p4d))) && 
\
+ (__pud_alloc(mm, p4d, address) || 
({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
+ NULL : pud_offset(p4d, address))
+
 #define p4d_alloc(mm, pgd, address)(pgd)
 #define p4d_alloc_track(mm, pgd, address, mask)(pgd)
 #define p4d_offset(pgd, start) (pgd)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7e07f4f490cb..d46bf03b804f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
p4d_t *p4d,
NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
-
 {
-   if (unlikely(pgd_none(*pgd))) {
-   if (__p4d_alloc(mm, pgd, address))
+   if (unlikely(p4d_none(*p4d))) {
+   if (__pud_alloc(mm, p4d, address))
return NULL;
-   *mod_mask |= PGTBL_PGD_MODIFIED;
+   *mod_mask |= PGTBL_P4D_MODIFIED;
}
 
-   return p4d_offset(pgd, address);
+   return pud_offset(p4d, address);
 }
 
-#endif /* !__ARCH_HAS_5LEVEL_HACK */
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
+
 {
-   if (unlikely(p4d_none(*p4d))) {
-   if (__pud_alloc(mm, p4d, address))
+   if (unlikely(pgd_none(*pgd))) {
+   if (__p4d_alloc(mm, pgd, address))
return NULL;
-   *mod_mask |= PGTBL_P4D_MODIFIED;
+   *mod_mask |= PGTBL_PGD_MODIFIED;
}
 
-   return pud_offset(p4d, address);
+   return p4d_offset(pgd, address);
 }
 
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
+
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?


Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-03 Thread Joerg Roedel
hi Abdul,

On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> Greeting's
> 
> Today's mainline kernel panics when booting on my powerpc lpar

Thanks for the report, I am looking into it with my limited powerpc
knowledge. But I have an idea and will send you something to test later
today.

Thanks,

Joerg