Re: next-20170125 hangs on aarch64

2017-01-30 Thread Andy Gross
On Mon, Jan 30, 2017 at 06:21:25PM +0530, Yury Norov wrote:
> On Mon, Jan 30, 2017 at 11:48:01AM +, James Morse wrote:
> > Hi Yury,
> > 
> > [CC: Andy Gross]
> > 
> > On 29/01/17 12:21, Yury Norov wrote:
> > > On Sun, Jan 29, 2017 at 03:42:55PM +0530, Yury Norov wrote:
> > >> Hi all,
> > >>
> > >> I pulled next-20170125 kernel, and found it hanged on boot. The exact 
> > >> reason is
> > >> panic on dereferencing of the 0xffc8 address, which is most probably 
> > >> the
> > >> attempt to dereference the ENOSYS error code as the address. 
> > >> next-20170124 works
> > >> fine, at least it boots.
> > >>
> > >> Does anyone have details on that?
> > 
> > I hit this with next-20170130 too, in /arch/arm64/kernel/smccc-call.S
> > aabde95fc543 changed the SMCCC macro to check for an optional quirk 
> > structure.
> > 
> > A previous patch provided:
> > > #define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL)
> > 
> > to handle the 'no quirk' case, but this missed HVC calls.
> > The following hunk fixes/hides it for me:

Wow, I botched this completely.  I missed the hvc using the same macro.  I'll
rework with the fixes below.

> 
> It works for me too, but I think "ldr x4, [sp, #8]" should
> also go under (.if \maybe_quirk != 0) condition - like below.

Yes I believe so.

> %<
> diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
> index 72ecdca929b1..9e287a7d1822 100644
> --- a/arch/arm64/kernel/smccc-call.S
> +++ b/arch/arm64/kernel/smccc-call.S
> @@ -15,18 +15,20 @@
>  #include 
>  #include 
> 
> -   .macro SMCCC instr
> +   .macro SMCCC instr, maybe_quirk = 0
> .cfi_startproc
> \instr  #0
> ldr x4, [sp]
> stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
> stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
> +   .if \maybe_quirk != 0
> ldr x4, [sp, #8]
> cbz x4, 1f /* no quirk structure */
> ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
> cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
> b.ne1f
> str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
> +   .endif
>  1: ret
> .cfi_endproc
> .endm
> @@ -38,7 +40,7 @@
>   *   struct arm_smccc_quirk *quirk)
>   */
>  ENTRY(__arm_smccc_smc)
> -   SMCCC   smc
> +   SMCCC   smc, 1
>  ENDPROC(__arm_smccc_smc)
> 
>  /*
> %<
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Re: next-20170125 hangs on aarch64

2017-01-30 Thread Yury Norov
On Mon, Jan 30, 2017 at 11:48:01AM +, James Morse wrote:
> Hi Yury,
> 
> [CC: Andy Gross]
> 
> On 29/01/17 12:21, Yury Norov wrote:
> > On Sun, Jan 29, 2017 at 03:42:55PM +0530, Yury Norov wrote:
> >> Hi all,
> >>
> >> I pulled next-20170125 kernel, and found it hanged on boot. The exact 
> >> reason is
> >> panic on dereferencing of the 0xffc8 address, which is most probably 
> >> the
> >> attempt to dereference the ENOSYS error code as the address. next-20170124 
> >> works
> >> fine, at least it boots.
> >>
> >> Does anyone have details on that?
> 
> I hit this with next-20170130 too, in /arch/arm64/kernel/smccc-call.S
> aabde95fc543 changed the SMCCC macro to check for an optional quirk structure.
> 
> A previous patch provided:
> > #define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL)
> 
> to handle the 'no quirk' case, but this missed HVC calls.
> The following hunk fixes/hides it for me:

It works for me too, but I think "ldr x4, [sp, #8]" should
also go under (.if \maybe_quirk != 0) condition - like below.

Yury

%<
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 72ecdca929b1..9e287a7d1822 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -15,18 +15,20 @@
 #include 
 #include 

-   .macro SMCCC instr
+   .macro SMCCC instr, maybe_quirk = 0
.cfi_startproc
\instr  #0
ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
+   .if \maybe_quirk != 0
ldr x4, [sp, #8]
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
+   .endif
 1: ret
.cfi_endproc
.endm
@@ -38,7 +40,7 @@
  *   struct arm_smccc_quirk *quirk)
  */
 ENTRY(__arm_smccc_smc)
-   SMCCC   smc
+   SMCCC   smc, 1
 ENDPROC(__arm_smccc_smc)

 /*
%<


Re: next-20170125 hangs on aarch64

2017-01-30 Thread James Morse
Hi Yury,

[CC: Andy Gross]

On 29/01/17 12:21, Yury Norov wrote:
> On Sun, Jan 29, 2017 at 03:42:55PM +0530, Yury Norov wrote:
>> Hi all,
>>
>> I pulled next-20170125 kernel, and found it hanged on boot. The exact reason 
>> is
>> panic on dereferencing of the 0xffc8 address, which is most probably the
>> attempt to dereference the ENOSYS error code as the address. next-20170124 
>> works
>> fine, at least it boots.
>>
>> Does anyone have details on that?

I hit this with next-20170130 too, in /arch/arm64/kernel/smccc-call.S
aabde95fc543 changed the SMCCC macro to check for an optional quirk structure.

A previous patch provided:
> #define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL)

to handle the 'no quirk' case, but this missed HVC calls.
The following hunk fixes/hides it for me:

%<
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 72ecdca929b1..9e287a7d1822 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -15,18 +15,20 @@
 #include 
 #include 

-   .macro SMCCC instr
+   .macro SMCCC instr, maybe_quirk = 0
.cfi_startproc
\instr  #0
ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
ldr x4, [sp, #8]
+   .if \maybe_quirk != 0
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
+   .endif
 1: ret
.cfi_endproc
.endm
@@ -38,7 +40,7 @@
  *   struct arm_smccc_quirk *quirk)
  */
 ENTRY(__arm_smccc_smc)
-   SMCCC   smc
+   SMCCC   smc, 1
 ENDPROC(__arm_smccc_smc)

 /*
%<


Thanks,

James



Re: next-20170125 hangs on aarch64

2017-01-29 Thread Yury Norov
On Sun, Jan 29, 2017 at 03:42:55PM +0530, Yury Norov wrote:
> Hi all,
> 
> I pulled next-20170125 kernel, and found it hanged on boot. The exact reason 
> is
> panic on dereferencing of the 0xffc8 address, which is most probably the
> attempt to dereference the ENOSYS error code as the address. next-20170124 
> works
> fine, at least it boots.
> 
> Does anyone have details on that?
> 
> Yury

UPD:

I run qemu for testing.

The true failure backtrace is like below. The bad dereference happens for me in
arm_smccc_hvc() function in macro SMCCC.

Yury

Backtrace:

#0  0x0808f7a8 in arm_smccc_hvc () at
arch/arm64/kernel/smccc-call.S:50
#1  0x08745ea0 in __invoke_psci_fn_hvc (function_id=, 
arg0=,
arg1=, arg2=) at drivers/firmware/psci.c:119
#2  0x08745d18 in psci_migrate_info_type () at 
drivers/firmware/psci.c:204
#3  0x08ca150c in psci_init_migrate () at drivers/firmware/psci.c:465
#4  psci_probe () at drivers/firmware/psci.c:539
#5  0x08ca1684 in psci_0_2_init (np=) at 
drivers/firmware/psci.c:571
#6  0x08ca16d8 in psci_dt_init () at drivers/firmware/psci.c:637
#7  0x08c62914 in setup_arch (cmdline_p=) at 
arch/arm64/kernel/setup.c:287
#8  0x08c6082c in start_kernel () at init/main.c:509
#9  0x08c601e0 in __primary_switched () at arch/arm64/kernel/head.S:452

Listing:

 │0x0808f790 hvc#0x0
 │0x0808f794   ldrx4, [sp]
 │0x0808f798   stpx0, x1, [x4]  
 │0x0808f79c  stp x2, x3, [x4,#16]
 │0x0808f7a0  ldr x4, [sp,#8] 
 │0x0808f7a4  cbz x4, 0x0808f7b8 

>│0x0808f7a8  cmpx9, #0x1
 │0x0808f7b0  b.ne   0x0808f7b8 

 │0x0808f7b4  strx6, [x4,#8]
 │0x0808f7b8  ret


next-20170125 hangs on aarch64

2017-01-29 Thread Yury Norov
Hi all,

I pulled next-20170125 kernel, and found it hanged on boot. The exact reason is
panic on dereferencing of the 0xffc8 address, which is most probably the
attempt to dereference the ENOSYS error code as the address. next-20170124 works
fine, at least it boots.

Does anyone have details on that?

Yury

#0  arch_counter_get_cntvct () at
./arch/arm64/include/asm/arch_timer.h:151
#1  __delay (cycles=1024) at arch/arm64/lib/delay.c:31
#2  0x0838b430 in __const_udelay (xloops=) at
arch/arm64/lib/delay.c:41
#3  0x0816a894 in panic (fmt=) at
kernel/panic.c:295
#4  0x080c1238 in do_exit (code=11) at kernel/exit.c:780
#5  0x080888f0 in die (str=, 
regs=0x08d63d30 , err=)
at arch/arm64/kernel/traps.c:295
#6  0x080998c4 in __do_kernel_fault (mm=0x0, addr=4294967240, 
esr=2483027972, 
regs=0x08d63d30 ) at 
arch/arm64/mm/fault.c:185
#7  0x08097244 in __do_kernel_fault (regs=, 
esr=, 
 addr=, mm=) at arch/arm64/mm/fault.c:419
#8  do_page_fault (addr=4294967240, esr=2483027972, 
regs=0x08d63d30 ) at 
arch/arm64/mm/fault.c:443
#9  0x08097334 in do_translation_fault (addr=, 
esr=, 
regs=) at arch/arm64/mm/fault.c:469
#10 0x08081298 in do_mem_abort (addr=4294967240, esr=2483027972, 
regs=0x08d63d30 ) at 
arch/arm64/mm/fault.c:577
#11 0x08082604 in el1_sync () at arch/arm64/kernel/entry.S:438
#12 0x08082604 in el1_sync () at arch/arm64/kernel/entry.S:438
[...]