[PATCH v3] soc: fsl: dpio: Get the cpumask through cpumask_of(cpu)

2020-10-19 Thread Yi Wang
From: Hao Si 

The local variable 'cpumask_t mask' is in the stack memory, and its address
is assigned to 'desc->affinity' in 'irq_set_affinity_hint()'.
But the memory area where this variable is located is at risk of being
modified.

During LTP testing, the following error was generated:

Unable to handle kernel paging request at virtual address 12e9b790
Mem abort info:
  ESR = 0x9607
  Exception class = DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
Data abort info:
  ISV = 0, ISS = 0x0007
  CM = 0, WnR = 0
swapper pgtable: 4k pages, 48-bit VAs, pgdp = 75ac5e07
[12e9b790] pgd=0027dbffe003, pud=0027dbffd003,
pmd=0027b6d61003, pte=
Internal error: Oops: 9607 [#1] PREEMPT SMP
Modules linked in: xt_conntrack
Process read_all (pid: 20171, stack limit = 0x44ea4095)
CPU: 14 PID: 20171 Comm: read_all Tainted: GB   W
Hardware name: NXP Layerscape LX2160ARDB (DT)
pstate: 8085 (Nzcv daIf -PAN -UAO)
pc : irq_affinity_hint_proc_show+0x54/0xb0
lr : irq_affinity_hint_proc_show+0x4c/0xb0
sp : 1138bc10
x29: 1138bc10 x28: d131d1e0
x27: 007000c0 x26: 8025b9480dc0
x25: 8025b9480da8 x24: 03ff
x23: 8027334f8300 x22: 80272e97d000
x21: 80272e97d0b0 x20: 8025b9480d80
x19: 09a49000 x18: 
x17:  x16: 
x15:  x14: 
x13:  x12: 0040
x11:  x10: 802735b79b88
x9 :  x8 : 
x7 : 09a49848 x6 : 0003
x5 :  x4 : 08157d6c
x3 : 1138bc10 x2 : 12e9b790
x1 :  x0 : 
Call trace:
 irq_affinity_hint_proc_show+0x54/0xb0
 seq_read+0x1b0/0x440
 proc_reg_read+0x80/0xd8
 __vfs_read+0x60/0x178
 vfs_read+0x94/0x150
 ksys_read+0x74/0xf0
 __arm64_sys_read+0x24/0x30
 el0_svc_common.constprop.0+0xd8/0x1a0
 el0_svc_handler+0x34/0x88
 el0_svc+0x10/0x14
Code: f9001bbf 943e0732 f94066c2 b462 (f9400041)
---[ end trace b495bdcb0b3b732b ]---
Kernel panic - not syncing: Fatal exception
SMP: stopping secondary CPUs
SMP: failed to stop secondary CPUs 0,2-4,6,8,11,13-15
Kernel Offset: disabled
CPU features: 0x0,21006008
Memory Limit: none
---[ end Kernel panic - not syncing: Fatal exception ]---

Fix it by using 'cpumask_of(cpu)' to get the cpumask.

Signed-off-by: Hao Si 
Signed-off-by: Lin Chen 
Signed-off-by: Yi Wang 
---
v3: Use cpumask_of(cpu) to get the pre-defined cpumask in the static 
cpu_bit_bitmap array.
v2: Place 'cpumask_t mask' in the driver's private data and while at it, 
rename it to cpu_mask.

 drivers/soc/fsl/dpio/dpio-driver.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/soc/fsl/dpio/dpio-driver.c 
b/drivers/soc/fsl/dpio/dpio-driver.c
index 7b642c3..7f397b4 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -95,7 +95,6 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
*dpio_dev, int cpu)
 {
int error;
struct fsl_mc_device_irq *irq;
-   cpumask_t mask;
 
irq = dpio_dev->irqs[0];
error = devm_request_irq(&dpio_dev->dev,
@@ -112,9 +111,7 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
*dpio_dev, int cpu)
}
 
/* set the affinity hint */
-   cpumask_clear(&mask);
-   cpumask_set_cpu(cpu, &mask);
-   if (irq_set_affinity_hint(irq->msi_desc->irq, &mask))
+   if (irq_set_affinity_hint(irq->msi_desc->irq, cpumask_of(cpu)))
dev_err(&dpio_dev->dev,
"irq_set_affinity failed irq %d cpu %d\n",
irq->msi_desc->irq, cpu);
-- 
2.15.2

[PATCH v2] soc: fsl: dpio: Change 'cpumask_t mask' to the driver's private data

2020-10-16 Thread Yi Wang
From: Hao Si 

The local variable 'cpumask_t mask' is in the stack memory, and its address
is assigned to 'desc->affinity' in 'irq_set_affinity_hint()'.
But the memory area where this variable is located is at risk of being
modified.

During LTP testing, the following error was generated:

Unable to handle kernel paging request at virtual address 12e9b790
Mem abort info:
  ESR = 0x9607
  Exception class = DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
Data abort info:
  ISV = 0, ISS = 0x0007
  CM = 0, WnR = 0
swapper pgtable: 4k pages, 48-bit VAs, pgdp = 75ac5e07
[12e9b790] pgd=0027dbffe003, pud=0027dbffd003,
pmd=0027b6d61003, pte=
Internal error: Oops: 9607 [#1] PREEMPT SMP
Modules linked in: xt_conntrack
Process read_all (pid: 20171, stack limit = 0x44ea4095)
CPU: 14 PID: 20171 Comm: read_all Tainted: GB   W
Hardware name: NXP Layerscape LX2160ARDB (DT)
pstate: 8085 (Nzcv daIf -PAN -UAO)
pc : irq_affinity_hint_proc_show+0x54/0xb0
lr : irq_affinity_hint_proc_show+0x4c/0xb0
sp : 1138bc10
x29: 1138bc10 x28: d131d1e0
x27: 007000c0 x26: 8025b9480dc0
x25: 8025b9480da8 x24: 03ff
x23: 8027334f8300 x22: 80272e97d000
x21: 80272e97d0b0 x20: 8025b9480d80
x19: 09a49000 x18: 
x17:  x16: 
x15:  x14: 
x13:  x12: 0040
x11:  x10: 802735b79b88
x9 :  x8 : 
x7 : 09a49848 x6 : 0003
x5 :  x4 : 08157d6c
x3 : 1138bc10 x2 : 12e9b790
x1 :  x0 : 
Call trace:
 irq_affinity_hint_proc_show+0x54/0xb0
 seq_read+0x1b0/0x440
 proc_reg_read+0x80/0xd8
 __vfs_read+0x60/0x178
 vfs_read+0x94/0x150
 ksys_read+0x74/0xf0
 __arm64_sys_read+0x24/0x30
 el0_svc_common.constprop.0+0xd8/0x1a0
 el0_svc_handler+0x34/0x88
 el0_svc+0x10/0x14
Code: f9001bbf 943e0732 f94066c2 b462 (f9400041)
---[ end trace b495bdcb0b3b732b ]---
Kernel panic - not syncing: Fatal exception
SMP: stopping secondary CPUs
SMP: failed to stop secondary CPUs 0,2-4,6,8,11,13-15
Kernel Offset: disabled
CPU features: 0x0,21006008
Memory Limit: none
---[ end Kernel panic - not syncing: Fatal exception ]---

Fix it by changing 'cpumask_t mask' to the driver's private data.

Signed-off-by: Hao Si 
Signed-off-by: Lin Chen 
Signed-off-by: Yi Wang 
---
v2: Place 'cpumask_t mask' in the driver's private data and while at it,
rename it to cpu_mask.

 drivers/soc/fsl/dpio/dpio-driver.c | 9 +
 include/linux/fsl/mc.h | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/soc/fsl/dpio/dpio-driver.c 
b/drivers/soc/fsl/dpio/dpio-driver.c
index 7b642c3..e9d820d 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -95,7 +95,7 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
*dpio_dev, int cpu)
 {
int error;
struct fsl_mc_device_irq *irq;
-   cpumask_t mask;
+   cpumask_t *cpu_mask;
 
irq = dpio_dev->irqs[0];
error = devm_request_irq(&dpio_dev->dev,
@@ -112,9 +112,10 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
*dpio_dev, int cpu)
}
 
/* set the affinity hint */
-   cpumask_clear(&mask);
-   cpumask_set_cpu(cpu, &mask);
-   if (irq_set_affinity_hint(irq->msi_desc->irq, &mask))
+   cpu_mask = &dpio_dev->mask;
+   cpumask_clear(cpu_mask);
+   cpumask_set_cpu(cpu, cpu_mask);
+   if (irq_set_affinity_hint(irq->msi_desc->irq, cpu_mask))
dev_err(&dpio_dev->dev,
"irq_set_affinity failed irq %d cpu %d\n",
irq->msi_desc->irq, cpu);
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index a428c61..ebdfb54 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -151,6 +151,7 @@ struct fsl_mc_obj_desc {
 /**
  * struct fsl_mc_device - MC object device object
  * @dev: Linux driver model device object
+ * @mask: cpu mask for affinity_hint
  * @dma_mask: Default DMA mask
  * @flags: MC object device flags
  * @icid: Isolation context ID for the device
@@ -184,6 +185,7 @@ struct fsl_mc_obj_desc {
  */
 struct fsl_mc_device {
struct device dev;
+   cpumask_t mask;
u64 dma_mask;
u16 flags;
u16 icid;
-- 
2.15.2

[PATCH] soc: fsl: dpio: Change 'cpumask_t mask' to global variable

2020-10-14 Thread Yi Wang
From: Hao Si 

The local variable 'cpumask_t mask' is in the stack memory, and its address
is assigned to 'desc->affinity' in 'irq_set_affinity_hint()'.
But the memory area where this variable is located is at risk of being
modified.

During LTP testing, the following error was generated:

Unable to handle kernel paging request at virtual address 12e9b790
Mem abort info:
  ESR = 0x9607
  Exception class = DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
Data abort info:
  ISV = 0, ISS = 0x0007
  CM = 0, WnR = 0
swapper pgtable: 4k pages, 48-bit VAs, pgdp = 75ac5e07
[12e9b790] pgd=0027dbffe003, pud=0027dbffd003,
pmd=0027b6d61003, pte=
Internal error: Oops: 9607 [#1] PREEMPT SMP
Modules linked in: xt_conntrack
Process read_all (pid: 20171, stack limit = 0x44ea4095)
CPU: 14 PID: 20171 Comm: read_all Tainted: GB   W
Hardware name: NXP Layerscape LX2160ARDB (DT)
pstate: 8085 (Nzcv daIf -PAN -UAO)
pc : irq_affinity_hint_proc_show+0x54/0xb0
lr : irq_affinity_hint_proc_show+0x4c/0xb0
sp : 1138bc10
x29: 1138bc10 x28: d131d1e0
x27: 007000c0 x26: 8025b9480dc0
x25: 8025b9480da8 x24: 03ff
x23: 8027334f8300 x22: 80272e97d000
x21: 80272e97d0b0 x20: 8025b9480d80
x19: 09a49000 x18: 
x17:  x16: 
x15:  x14: 
x13:  x12: 0040
x11:  x10: 802735b79b88
x9 :  x8 : 
x7 : 09a49848 x6 : 0003
x5 :  x4 : 08157d6c
x3 : 1138bc10 x2 : 12e9b790
x1 :  x0 : 
Call trace:
 irq_affinity_hint_proc_show+0x54/0xb0
 seq_read+0x1b0/0x440
 proc_reg_read+0x80/0xd8
 __vfs_read+0x60/0x178
 vfs_read+0x94/0x150
 ksys_read+0x74/0xf0
 __arm64_sys_read+0x24/0x30
 el0_svc_common.constprop.0+0xd8/0x1a0
 el0_svc_handler+0x34/0x88
 el0_svc+0x10/0x14
Code: f9001bbf 943e0732 f94066c2 b462 (f9400041)
---[ end trace b495bdcb0b3b732b ]---
Kernel panic - not syncing: Fatal exception
SMP: stopping secondary CPUs
SMP: failed to stop secondary CPUs 0,2-4,6,8,11,13-15
Kernel Offset: disabled
CPU features: 0x0,21006008
Memory Limit: none
---[ end Kernel panic - not syncing: Fatal exception ]---

Fix it by changing 'cpumask_t mask' to global variable.

Signed-off-by: Hao Si 
Signed-off-by: Lin Chen 
Signed-off-by: Yi Wang 
---
 drivers/soc/fsl/dpio/dpio-driver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/fsl/dpio/dpio-driver.c 
b/drivers/soc/fsl/dpio/dpio-driver.c
index 7b642c3..b31ec53 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -31,6 +31,7 @@ struct dpio_priv {
struct dpaa2_io *io;
 };
 
+static cpumask_t mask;
 static cpumask_var_t cpus_unused_mask;
 
 static const struct soc_device_attribute ls1088a_soc[] = {
@@ -95,7 +96,6 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
*dpio_dev, int cpu)
 {
int error;
struct fsl_mc_device_irq *irq;
-   cpumask_t mask;
 
irq = dpio_dev->irqs[0];
error = devm_request_irq(&dpio_dev->dev,
-- 
2.15.2

[PATCH] ASoC: fsl: mpc8610_hpcd: Add missing of_node_put()

2020-07-07 Thread Yi Wang
From: Liao Pingfang 

After finishing using device node got from of_find_compatible_node(),
of_node_put() needs to be called.

Signed-off-by: Liao Pingfang 
---
 sound/soc/fsl/mpc8610_hpcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c
index f7bd900..b3090fe 100644
--- a/sound/soc/fsl/mpc8610_hpcd.c
+++ b/sound/soc/fsl/mpc8610_hpcd.c
@@ -426,9 +426,11 @@ static int __init mpc8610_hpcd_init(void)
guts_np = of_find_compatible_node(NULL, NULL, "fsl,mpc8610-guts");
if (of_address_to_resource(guts_np, 0, &res)) {
pr_err("mpc8610-hpcd: missing/invalid global utilities node\n");
+   of_node_put(guts_np);
return -EINVAL;
}
guts_phys = res.start;
+   of_node_put(guts_np);
 
return platform_driver_register(&mpc8610_hpcd_driver);
 }
-- 
2.9.5



[PATCH] powerpc/nvram: Replace kmalloc with kzalloc in the error message

2020-05-28 Thread Yi Wang
From: Liao Pingfang 

Use kzalloc instead of kmalloc in the error message according to
the previous kzalloc() call.

Signed-off-by: Liao Pingfang 
---
 arch/powerpc/kernel/nvram_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index fb4f610..c3a0c8d 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -892,7 +892,7 @@ loff_t __init nvram_create_partition(const char *name, int 
sig,
/* Create our OS partition */
new_part = kzalloc(sizeof(*new_part), GFP_KERNEL);
if (!new_part) {
-   pr_err("%s: kmalloc failed\n", __func__);
+   pr_err("%s: kzalloc failed\n", __func__);
return -ENOMEM;
}
 
-- 
2.9.5



Re: [PATCH RESEND v4] reboot: support offline CPUs before reboot

2020-01-16 Thread Hsin-Yi Wang
On Fri, Jan 17, 2020 at 12:16 PM Rob Landley  wrote:
>
> On 1/14/20 5:06 AM, Hsin-Yi Wang wrote:
> > This patch adds a config ARCH_OFFLINE_CPUS_ON_REBOOT, which would offline 
> > cpus in
> > migrate_to_reboot_cpu(). If non reboot cpus are all offlined here, the loop 
> > for
> > checking online cpus would be an empty loop. If architecture don't enable 
> > this
> > config, or some cpus somehow fails to offline, it would fallback to ipi
> > function.
>
> I'm curious:
>
> > +# Select to do a full offline on secondary CPUs before reboot.
> > +config ARCH_OFFLINE_CPUS_ON_REBOOT
> > + bool "Support for offline CPUs before reboot"
> > + depends on HOTPLUG_CPU
>
> The new symbol can't be selected without the other symbol.
>
> > + select ARCH_OFFLINE_CPUS_ON_REBOOT if HOTPLUG_CPU
>
> And the other symbol automatically selects the new one.
>
> Why are you adding a second symbol that means the same thing as the existing 
> symbol?
>

I should make the arch selecting this symbol in other patches and let
the arch decides if they want to opt in, as Thomas pointed out in v5:
https://lore.kernel.org/lkml/8736cgxmxi@nanos.tec.linutronix.de/

Current solution is not sufficient since it only solve problems for
system that enables HOTPLUG_CPU.

> > +#if defined(CONFIG_PM_SLEEP_SMP) || 
> > defined(CONFIG_ARCH_OFFLINE_CPUS_ON_REBOOT)
> > +extern int freeze_secondary_cpus(int primary, bool reboot);
> > +#endif
>
> Couldn't that just test HOTPLUG_CPU? What's the second symbol for? (You can 
> have
> empty stub functions when architectures don't support a thing...)
>
> Rob


Re: [PATCH v5] reboot: support offline CPUs before reboot

2020-01-16 Thread Hsin-Yi Wang
On Wed, Jan 15, 2020 at 7:41 PM Sudeep Holla  wrote:
>
> On Wed, Jan 15, 2020 at 02:34:10PM +0800, Hsin-Yi Wang wrote:
> > Currently system reboots uses architecture specific codes (smp_send_stop)
> > to offline non reboot CPUs. Most architecture's implementation is looping
> > through all non reboot online CPUs and call ipi function to each of them. 
> > Some
> > architecture like arm64, arm, and x86... would set offline masks to cpu 
> > without
> > really offline them. This causes some race condition and kernel warning 
> > comes
> > out sometimes when system reboots.
> >
> > This patch adds a config ARCH_OFFLINE_CPUS_ON_REBOOT, which would offline 
> > cpus in
> > migrate_to_reboot_cpu(). If non reboot cpus are all offlined here, the loop 
> > for
> > checking online cpus would be an empty loop. If architecture don't enable 
> > this
> > config, or some cpus somehow fails to offline, it would fallback to ipi
> > function.
> >
>
> What's the timing impact on systems with large number of CPUs(say 256 or
> more) ? I remember we added some change to reduce the wait times for
> offlining CPUs in system suspend path on arm64, still not negligible.
>

This is not the final solution, but I would still provided some data
points here:

Tested on my arm64 with 4 cpu: 2 a53 and 2 a72.
Offlining 3 cpu takes about 60~65 ms
Offlining 2 cpu(a53+a72 or a72+a72) takes about 42~47 ms
Offlining 1 cpu(a53 or a72) takes about 23~25 ms.

It would take longer time for systems with large number of CPUs.


Re: [PATCH v5] reboot: support offline CPUs before reboot

2020-01-16 Thread Hsin-Yi Wang
On Thu, Jan 16, 2020 at 8:30 AM Thomas Gleixner  wrote:
>
> Hsin-Yi Wang  writes:
>
> > Currently system reboots uses architecture specific codes (smp_send_stop)
> > to offline non reboot CPUs. Most architecture's implementation is looping
> > through all non reboot online CPUs and call ipi function to each of them. 
> > Some
> > architecture like arm64, arm, and x86... would set offline masks to cpu 
> > without
> > really offline them. This causes some race condition and kernel warning 
> > comes
> > out sometimes when system reboots.
>
> 'some race condition and kernel warning' is pretty useless information.
> Please describe exactly which kind of issues are caused by the current
> mechanism. Especially the race conditions are the interesting part (the
> warnings are just a consequence).
>
> > This patch adds a config ARCH_OFFLINE_CPUS_ON_REBOOT, which would
> > offline cpus in
>
> Please read Documentation/process/submitting-patches.rst and search for
> 'This patch'.
>
> > migrate_to_reboot_cpu(). If non reboot cpus are all offlined here, the loop 
> > for
> > checking online cpus would be an empty loop.
>
> This does not make any sense. The issues which you are trying to solve
> are going to be still there when CONFIG_HOTPLUG_CPU is disabled.
>
> > If architecture don't enable this config, or some cpus somehow fails
> > to offline, it would fallback to ipi function.
>
> This is really a half baken solution which keeps the various pointlessly
> different pseudo reboot/kexec offlining implementations around. So with
> this we have yet more code which only works depending on kernel
> configuration and has the issue of potentially not being able to offline
> a CPU. IOW this is going to fail completely in cases where a system is
> in a state which prevents regular hotplug.
>
> The existing pseudo-offline functions have timeouts and eventually a
> fallback, e.g. the NMI fallback on x86. With this proposed regular
> offline solution this will just get stuck w/o a chance to force
> recovery.
>
> While I like the idea and surely agree that the ideal solution is to
> properly shutdown the CPUs on reboot, we need to take a step back and
> look at the minimum requirements for a regular shutdown/reboot and at
> the same time have a look at the requirements for emergency shutdown and
> kexec/kcrash. Having proper information about the race conditions and
> warnings you mentioned would be a good starting point.
>

We saw this issue on regular reboot (not panic) on arm64: If tick
broadcast and smp_send_stop() happen together and the first broadcast
arrives to some idled CPU that hasn't already executed reboot ipi to
run in spinloop, it would try to broadcast to another CPU, but that
target CPU is already marked as offline by set_cpu_online() in reboot
ipi, and a warning comes out since tick_handle_oneshot_broadcast()
would check if it tries to broadcast to offline cpus. Most of the time
the CPU getting the broadcast interrupt is already in the spinloop and
thus isn't going to receive interrupts from the broadcast timer.

[   27.032080] Set kernel.core_pattern before fs.suid_dumpable.
[   27.978628] reboot: Restarting system
[   27.978919] WARNING: CPU: 3 PID: 0 at
/mnt/host/source/src/third_party/kernel/v4.19/kernel/time/tick-broadcast.c:652
tick_handle_oneshot_broadcast+0x1f8/0x214
[   27.978932] Modules linked in: rfcomm uinput bridge stp llc
nf_nat_tftp nf_conntrack_tftp nf_nat_ftp nf_conntrack_ftp esp6 ah6
xfrm6_mode_tunnel xfrm6_mode_transport xfrm4_mode_tunnel
xfrm4_mode_transport ip6t_REJECT ip6t_ipv6header hci_uart btqca
bluetooth ipt_MASQUERADE ecdh_generic lzo_rle lzo_compress zram fuse
cros_ec_sensors_sync cros_ec_sensors_ring cros_ec_light_prox
cros_ec_sensors industrialio_triggered_buffer kfifo_buf
cros_ec_sensors_core ath10k_sdio ath10k_core ath mac80211 cfg80211
asix usbnet mii joydev
[   27.979102] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G S
  4.19.56 #79
[   27.979113] Hardware name: MediaTek krane sku176 board (DT)
[   27.979127] pstate: 0085 (nzcv daIf -PAN -UAO)
[   27.979140] pc : tick_handle_oneshot_broadcast+0x1f8/0x214
[   27.979154] lr : tick_handle_oneshot_broadcast+0x108/0x214
[   27.979162] sp : fff85c851610
[   27.979171] x29: fff85c851670 x28: ff9082785510
[   27.979187] x27: ff90822da700 x26: ff90826169c8
[   27.979202] x25: ff9082616000 x24: 0001
[   27.979217] x23: ff90827854f8 x22: 00067a6599b8
[   27.979232] x21:  x20: 7fff
[   27.979248] x19: ff9082785508 x18: 
[   27.979263] x17:  x16: 
[   27.979278] x15:  x14: fff85bf08040
[   27.979293] x13: ff9082785000 x12: 0069
[   27

Re: [PATCH v5] reboot: support offline CPUs before reboot

2020-01-15 Thread Hsin-Yi Wang
On Wed, Jan 15, 2020 at 5:49 PM Rafael J. Wysocki  wrote:
>
> On Wed, Jan 15, 2020 at 7:35 AM Hsin-Yi Wang  wrote:
> >
> > Currently system reboots uses architecture specific codes (smp_send_stop)
> > to offline non reboot CPUs. Most architecture's implementation is looping
> > through all non reboot online CPUs and call ipi function to each of them. 
> > Some
> > architecture like arm64, arm, and x86... would set offline masks to cpu 
> > without
> > really offline them. This causes some race condition and kernel warning 
> > comes
> > out sometimes when system reboots.
> >
> > This patch adds a config ARCH_OFFLINE_CPUS_ON_REBOOT, which would offline 
> > cpus in
> > migrate_to_reboot_cpu(). If non reboot cpus are all offlined here, the loop 
> > for
> > checking online cpus would be an empty loop. If architecture don't enable 
> > this
> > config, or some cpus somehow fails to offline, it would fallback to ipi
> > function.
> >
> > Opt in this config for architectures that support CONFIG_HOTPLUG_CPU.
> >
> > Signed-off-by: Hsin-Yi Wang 
> > ---
> > Change from v4:
> > * fix a few nits: naming, comments, remove Kconfig text...
> >
> > Change from v3:
> > * Opt in config for architectures that support CONFIG_HOTPLUG_CPU
> > * Merge function offline_secondary_cpus() and freeze_secondary_cpus()
> >   with an additional flag.
>
> This does not seem to be a very good idea, since
> freeze_secondary_cpus() does much more than you need for reboot.
>
> For reboot, you basically only need to do something like this AFAICS:
>
> cpu_maps_update_begin();
>
> for_each_online_cpu(i) {
> if (i != cpu)
> _cpu_down(i, 1, CPUHP_OFFLINE);
> }
> cpu_hotplug_disabled++;
>
> cpu_maps_update_done();
>
> And you may put this into a function defined outside of CONFIG_PM_SLEEP.
>
v2's implementation is similar to this. The conclusion in v2[1] is
that since these 2 functions are similar, we should merge them. I'm
fine both ways but slightly prefer v2's. Maybe wait for others to
comment?

[1] https://lore.kernel.org/lkml/87muarpcwm@vitty.brq.redhat.com/
> >
> > Change from v2:
> > * Add another config instead of configed by CONFIG_HOTPLUG_CPU
>
> So why exactly is this new Kconfig option needed?
>
> Everybody supporting CPU hotplug seems to opt in anyway.
>
Currently we opt-in for all arch that supports HOTPLUG_CPU, but if
some arch decides that this would make reboot slow (or maybe other
reasons), they can choose to opt-out.
I have only tested on arm64 and x86 for now.
> [cut]
>
> >
> > -int freeze_secondary_cpus(int primary)
> > +int freeze_secondary_cpus(int primary, bool reboot)
> >  {
> > int cpu, error = 0;
> >
> > @@ -1237,11 +1237,13 @@ int freeze_secondary_cpus(int primary)
> > if (cpu == primary)
> > continue;
> >
> > -   if (pm_wakeup_pending()) {
> > +#ifdef CONFIG_PM_SLEEP
> > +   if (!reboot && pm_wakeup_pending()) {
> > pr_info("Wakeup pending. Abort CPU freeze\n");
> > error = -EBUSY;
> > break;
> > }
> > +#endif
>
> Please avoid using #ifdefs in function bodies.  This makes the code
> hard to maintain in the long term.
>
> >
> > trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
> > error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
> > @@ -1250,7 +1252,9 @@ int freeze_secondary_cpus(int primary)
> > cpumask_set_cpu(cpu, frozen_cpus);
> > else {
> > pr_err("Error taking CPU%d down: %d\n", cpu, error);
> > -   break;
> > +   /* When rebooting, offline as many CPUs as 
> > possible. */
> > +   if (!reboot)
> > +   break;
> > }
> > }
> >
> > diff --git a/kernel/reboot.c b/kernel/reboot.c
> > index c4d472b7f1b4..12f643b66e57 100644
> > --- a/kernel/reboot.c
> > +++ b/kernel/reboot.c
> > @@ -7,6 +7,7 @@
> >
> >  #define pr_fmt(fmt)"reboot: " fmt
> >
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -220,7 +221,9 @@ void migrate_to_reboot_cpu(void)
> > /* The boot cpu is always logical cpu 0 */
> > int cpu = reboot_cpu;
> >
> > +

[PATCH v5] reboot: support offline CPUs before reboot

2020-01-14 Thread Hsin-Yi Wang
Currently system reboots uses architecture specific codes (smp_send_stop)
to offline non reboot CPUs. Most architecture's implementation is looping
through all non reboot online CPUs and call ipi function to each of them. Some
architecture like arm64, arm, and x86... would set offline masks to cpu without
really offline them. This causes some race condition and kernel warning comes
out sometimes when system reboots.

This patch adds a config ARCH_OFFLINE_CPUS_ON_REBOOT, which would offline cpus 
in
migrate_to_reboot_cpu(). If non reboot cpus are all offlined here, the loop for
checking online cpus would be an empty loop. If architecture don't enable this
config, or some cpus somehow fails to offline, it would fallback to ipi
function.

Opt in this config for architectures that support CONFIG_HOTPLUG_CPU.

Signed-off-by: Hsin-Yi Wang 
---
Change from v4:
* fix a few nits: naming, comments, remove Kconfig text...

Change from v3:
* Opt in config for architectures that support CONFIG_HOTPLUG_CPU
* Merge function offline_secondary_cpus() and freeze_secondary_cpus()
  with an additional flag.

Change from v2:
* Add another config instead of configed by CONFIG_HOTPLUG_CPU

Previous related discussion on list:
https://lore.kernel.org/lkml/20190727164450.ga11...@roeck-us.net/
https://lore.kernel.org/patchwork/patch/1117201/
---
 arch/Kconfig  |  5 +
 arch/arm/Kconfig  |  1 +
 arch/arm64/Kconfig|  1 +
 arch/arm64/kernel/hibernate.c |  2 +-
 arch/csky/Kconfig |  1 +
 arch/ia64/Kconfig |  1 +
 arch/mips/Kconfig |  1 +
 arch/parisc/Kconfig   |  1 +
 arch/powerpc/Kconfig  |  1 +
 arch/s390/Kconfig |  1 +
 arch/sh/Kconfig   |  1 +
 arch/sparc/Kconfig|  1 +
 arch/x86/Kconfig  |  1 +
 arch/xtensa/Kconfig   |  1 +
 drivers/power/reset/sc27xx-poweroff.c |  2 +-
 include/linux/cpu.h   |  9 ++---
 kernel/cpu.c  | 12 
 kernel/reboot.c   |  8 
 18 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 48b5e103bdb0..a8db7999cd4c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -255,6 +255,11 @@ config ARCH_HAS_UNCACHED_SEGMENT
select ARCH_HAS_DMA_PREP_COHERENT
bool
 
+# Select to do a full offline on secondary CPUs before reboot.
+config ARCH_OFFLINE_CPUS_ON_REBOOT
+   bool
+   depends on HOTPLUG_CPU
+
 # Select if arch init_task must go in the __init_task_data section
 config ARCH_TASK_STRUCT_ON_STACK
bool
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 69950fb5be64..d53cc8cb47e3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -28,6 +28,7 @@ config ARM
select ARCH_KEEP_MEMBLOCK if HAVE_ARCH_PFN_VALID || KEXEC
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN
+   select ARCH_OFFLINE_CPUS_ON_REBOOT if HOTPLUG_CPU
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9af26ac75d19..9f913bc5c1f6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -61,6 +61,7 @@ config ARM64
select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
select ARCH_KEEP_MEMBLOCK
+   select ARCH_OFFLINE_CPUS_ON_REBOOT if HOTPLUG_CPU
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 590963c9c609..f7245dfa09d9 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -581,5 +581,5 @@ int hibernate_resume_nonboot_cpu_disable(void)
return -ENODEV;
}
 
-   return freeze_secondary_cpus(sleep_cpu);
+   return freeze_secondary_cpus(sleep_cpu, false);
 }
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 4acef4088de7..0f03e5c3f2fc 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -5,6 +5,7 @@ config CSKY
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+   select ARCH_OFFLINE_CPUS_ON_REBOOT if HOTPLUG_CPU
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
select COMMON_CLK
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index bab7cd878464..f12b4b11ee98 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -10,6 +10,7 @@ config IA64
bool
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+   select ARCH_OFFLINE_CPUS

[PATCH RESEND v4] reboot: support offline CPUs before reboot

2020-01-14 Thread Hsin-Yi Wang
Currently system reboots uses architecture specific codes (smp_send_stop)
to offline non reboot CPUs. Most architecture's implementation is looping
through all non reboot online CPUs and call ipi function to each of them. Some
architecture like arm64, arm, and x86... would set offline masks to cpu without
really offline them. This causes some race condition and kernel warning comes
out sometimes when system reboots.

This patch adds a config ARCH_OFFLINE_CPUS_ON_REBOOT, which would offline cpus 
in
migrate_to_reboot_cpu(). If non reboot cpus are all offlined here, the loop for
checking online cpus would be an empty loop. If architecture don't enable this
config, or some cpus somehow fails to offline, it would fallback to ipi
function.

Opt in this config for architectures that support CONFIG_HOTPLUG_CPU.

Signed-off-by: Hsin-Yi Wang 
---
Resend v4:
* Cc more people and mailing lists. Also fix a few nits from v4.

Change from v3:
* Opt in config for architectures that support CONFIG_HOTPLUG_CPU
* Merge function offline_secondary_cpus() and freeze_secondary_cpus()
  with an additional flag.

Change from v2:
* Add another config instead of configed by CONFIG_HOTPLUG_CPU

Previous related discussion on list:
https://lore.kernel.org/lkml/20190727164450.ga11...@roeck-us.net/
https://lore.kernel.org/patchwork/patch/1117201/
---
 arch/Kconfig  |  5 +
 arch/arm/Kconfig  |  1 +
 arch/arm64/Kconfig|  1 +
 arch/arm64/kernel/hibernate.c |  2 +-
 arch/csky/Kconfig |  1 +
 arch/ia64/Kconfig |  1 +
 arch/mips/Kconfig |  1 +
 arch/parisc/Kconfig   |  1 +
 arch/powerpc/Kconfig  |  1 +
 arch/s390/Kconfig |  1 +
 arch/sh/Kconfig   |  1 +
 arch/sparc/Kconfig|  1 +
 arch/x86/Kconfig  |  1 +
 arch/xtensa/Kconfig   |  1 +
 drivers/power/reset/sc27xx-poweroff.c |  2 +-
 include/linux/cpu.h   |  9 ++---
 kernel/cpu.c  | 12 
 kernel/reboot.c   |  8 
 18 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 48b5e103bdb0..210095ce2d92 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -255,6 +255,11 @@ config ARCH_HAS_UNCACHED_SEGMENT
select ARCH_HAS_DMA_PREP_COHERENT
bool
 
+# Select to do a full offline on secondary CPUs before reboot.
+config ARCH_OFFLINE_CPUS_ON_REBOOT
+   bool "Support for offline CPUs before reboot"
+   depends on HOTPLUG_CPU
+
 # Select if arch init_task must go in the __init_task_data section
 config ARCH_TASK_STRUCT_ON_STACK
bool
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 69950fb5be64..d53cc8cb47e3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -28,6 +28,7 @@ config ARM
select ARCH_KEEP_MEMBLOCK if HAVE_ARCH_PFN_VALID || KEXEC
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN
+   select ARCH_OFFLINE_CPUS_ON_REBOOT if HOTPLUG_CPU
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9af26ac75d19..9f913bc5c1f6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -61,6 +61,7 @@ config ARM64
select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
select ARCH_KEEP_MEMBLOCK
+   select ARCH_OFFLINE_CPUS_ON_REBOOT if HOTPLUG_CPU
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 590963c9c609..f7245dfa09d9 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -581,5 +581,5 @@ int hibernate_resume_nonboot_cpu_disable(void)
return -ENODEV;
}
 
-   return freeze_secondary_cpus(sleep_cpu);
+   return freeze_secondary_cpus(sleep_cpu, false);
 }
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 4acef4088de7..0f03e5c3f2fc 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -5,6 +5,7 @@ config CSKY
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+   select ARCH_OFFLINE_CPUS_ON_REBOOT if HOTPLUG_CPU
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
select COMMON_CLK
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index bab7cd878464..f12b4b11ee98 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -10,6 +10,7 @@ config IA64
bool
select ARCH_MIGHT_HAVE_PC_PARPORT
select A

[PATCH] soc/fsl/qe: fix err handling of ucc_of_parse_tdm

2018-11-22 Thread Yi Wang
From: Wen Yang 

Currently there are 2 problems with the ucc_of_parse_tdm function:
1,a possible null pointer dereference in ucc_of_parse_tdm,
detected by the semantic patch deref_null.cocci,
with the following warning:
drivers/soc/fsl/qe/qe_tdm.c:177:21-24: ERROR: pdev is NULL but dereferenced.
2,dev gets modified, so in any case that devm_iounmap() will fail even when
the new pdev is valid, because the iomap was done with a different pdev.
This patch fixes them.

Suggested-by: Christophe LEROY 
Signed-off-by: Wen Yang 
CC: Julia Lawall 
CC: Zhao Qiang 
---
 drivers/soc/fsl/qe/qe_tdm.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe_tdm.c b/drivers/soc/fsl/qe/qe_tdm.c
index f78c346..9a29f0b 100644
--- a/drivers/soc/fsl/qe/qe_tdm.c
+++ b/drivers/soc/fsl/qe/qe_tdm.c
@@ -47,7 +47,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm 
*utdm,
struct resource *res;
struct device_node *np2;
static int siram_init_flag;
-   struct platform_device *pdev;
+   struct platform_device *pdev_si, *pdev_siram;
 
sprop = of_get_property(np, "fsl,rx-sync-clock", NULL);
if (sprop) {
@@ -129,16 +129,16 @@ int ucc_of_parse_tdm(struct device_node *np, struct 
ucc_tdm *utdm,
if (!np2)
return -EINVAL;
 
-   pdev = of_find_device_by_node(np2);
-   if (!pdev) {
+   pdev_si = of_find_device_by_node(np2);
+   if (!pdev_si) {
pr_err("%pOFn: failed to lookup pdev\n", np2);
of_node_put(np2);
return -EINVAL;
}
 
of_node_put(np2);
-   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   utdm->si_regs = devm_ioremap_resource(&pdev->dev, res);
+   res = platform_get_resource(pdev_si, IORESOURCE_MEM, 0);
+   utdm->si_regs = devm_ioremap_resource(&pdev_si->dev, res);
if (IS_ERR(utdm->si_regs)) {
ret = PTR_ERR(utdm->si_regs);
goto err_miss_siram_property;
@@ -150,8 +150,8 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm 
*utdm,
goto err_miss_siram_property;
}
 
-   pdev = of_find_device_by_node(np2);
-   if (!pdev) {
+   pdev_siram = of_find_device_by_node(np2);
+   if (!pdev_siram) {
ret = -EINVAL;
pr_err("%pOFn: failed to lookup pdev\n", np2);
of_node_put(np2);
@@ -159,8 +159,8 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm 
*utdm,
}
 
of_node_put(np2);
-   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   utdm->siram = devm_ioremap_resource(&pdev->dev, res);
+   res = platform_get_resource(pdev_siram, IORESOURCE_MEM, 0);
+   utdm->siram = devm_ioremap_resource(&pdev_siram->dev, res);
if (IS_ERR(utdm->siram)) {
ret = PTR_ERR(utdm->siram);
goto err_miss_siram_property;
@@ -174,7 +174,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm 
*utdm,
return ret;
 
 err_miss_siram_property:
-   devm_iounmap(&pdev->dev, utdm->si_regs);
+   devm_iounmap(&pdev_si->dev, utdm->si_regs);
return ret;
 }
 EXPORT_SYMBOL(ucc_of_parse_tdm);
-- 
2.9.5