[PATCH v4 3/3] PCI/DPC: Disable DPC interrupt during suspend

2023-04-23 Thread Kai-Heng Feng
PCIe service that shares IRQ with PME may cause spurious wakeup on
system suspend.

Since AER is conditionally disabled in previous patch, also apply the
same logic to disable DPC which depends on AER to work.

PCIe Base Spec 5.0, section 5.2 "Link State Power Management" states
that TLP and DLLP transmission is disabled for a Link in L2/L3 Ready
(D3hot), L2 (D3cold with aux power) and L3 (D3cold), so we don't lose
much here to disable DPC during system suspend.

This is very similar to previous attempts to suspend AER and DPC [1],
but with a different reason.

[1] 
https://lore.kernel.org/linux-pci/20220408153159.106741-1-kai.heng.f...@canonical.com/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216295

Reviewed-by: Mika Westerberg 
Signed-off-by: Kai-Heng Feng 
---
 drivers/pci/pcie/dpc.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index a5d7c69b764e..98bdefde6df1 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -385,6 +385,30 @@ static int dpc_probe(struct pcie_device *dev)
return status;
 }
 
+static int dpc_suspend(struct pcie_device *dev)
+{
+   struct pci_dev *pdev = dev->port;
+   u16 ctl;
+
+   pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, );
+   ctl &= ~PCI_EXP_DPC_CTL_INT_EN;
+   pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
+
+   return 0;
+}
+
+static int dpc_resume(struct pcie_device *dev)
+{
+   struct pci_dev *pdev = dev->port;
+   u16 ctl;
+
+   pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, );
+   ctl |= PCI_EXP_DPC_CTL_INT_EN;
+   pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
+
+   return 0;
+}
+
 static void dpc_remove(struct pcie_device *dev)
 {
struct pci_dev *pdev = dev->port;
@@ -400,6 +424,8 @@ static struct pcie_port_service_driver dpcdriver = {
.port_type  = PCIE_ANY_PORT,
.service= PCIE_PORT_SERVICE_DPC,
.probe  = dpc_probe,
+   .suspend= dpc_suspend,
+   .resume = dpc_resume,
.remove = dpc_remove,
 };
 
-- 
2.34.1



[PATCH v4 1/3] PCI/AER: Factor out interrupt toggling into helpers

2023-04-23 Thread Kai-Heng Feng
There are many places that enable and disable AER interrput, so move
them into helpers.

Reviewed-by: Mika Westerberg 
Reviewed-by: Kuppuswamy Sathyanarayanan 

Signed-off-by: Kai-Heng Feng 
---
 drivers/pci/pcie/aer.c | 45 +-
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index f6c24ded134c..1420e1f27105 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1227,6 +1227,28 @@ static irqreturn_t aer_irq(int irq, void *context)
return IRQ_WAKE_THREAD;
 }
 
+static void aer_enable_irq(struct pci_dev *pdev)
+{
+   int aer = pdev->aer_cap;
+   u32 reg32;
+
+   /* Enable Root Port's interrupt in response to error messages */
+   pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, );
+   reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
+   pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+}
+
+static void aer_disable_irq(struct pci_dev *pdev)
+{
+   int aer = pdev->aer_cap;
+   u32 reg32;
+
+   /* Disable Root's interrupt in response to error messages */
+   pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, );
+   reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
+   pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+}
+
 /**
  * aer_enable_rootport - enable Root Port's interrupts when receiving messages
  * @rpc: pointer to a Root Port data structure
@@ -1256,10 +1278,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, );
pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32);
 
-   /* Enable Root Port's interrupt in response to error messages */
-   pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, );
-   reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
-   pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+   aer_enable_irq(pdev);
 }
 
 /**
@@ -1274,10 +1293,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc)
int aer = pdev->aer_cap;
u32 reg32;
 
-   /* Disable Root's interrupt in response to error messages */
-   pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, );
-   reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
-   pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+   aer_disable_irq(pdev);
 
/* Clear Root's error status reg */
pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, );
@@ -1372,12 +1388,8 @@ static pci_ers_result_t aer_root_reset(struct pci_dev 
*dev)
 */
aer = root ? root->aer_cap : 0;
 
-   if ((host->native_aer || pcie_ports_native) && aer) {
-   /* Disable Root's interrupt in response to error messages */
-   pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, );
-   reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
-   pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
-   }
+   if ((host->native_aer || pcie_ports_native) && aer)
+   aer_disable_irq(root);
 
if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
rc = pcie_reset_flr(dev, PCI_RESET_DO_RESET);
@@ -1396,10 +1408,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev 
*dev)
pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, );
pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
 
-   /* Enable Root Port's interrupt in response to error messages */
-   pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, );
-   reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
-   pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
+   aer_enable_irq(root);
}
 
return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
-- 
2.34.1



[PATCH v4 2/3] PCI/AER: Disable AER interrupt on suspend

2023-04-23 Thread Kai-Heng Feng
PCIe service that shares IRQ with PME may cause spurious wakeup on
system suspend.

PCIe Base Spec 5.0, section 5.2 "Link State Power Management" states
that TLP and DLLP transmission is disabled for a Link in L2/L3 Ready
(D3hot), L2 (D3cold with aux power) and L3 (D3cold), so we don't lose
much here to disable AER during system suspend.

This is very similar to previous attempts to suspend AER and DPC [1],
but with a different reason.

[1] 
https://lore.kernel.org/linux-pci/20220408153159.106741-1-kai.heng.f...@canonical.com/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216295

Reviewed-by: Mika Westerberg 
Signed-off-by: Kai-Heng Feng 
---
 drivers/pci/pcie/aer.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 1420e1f27105..9c07fdbeb52d 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1356,6 +1356,26 @@ static int aer_probe(struct pcie_device *dev)
return 0;
 }
 
+static int aer_suspend(struct pcie_device *dev)
+{
+   struct aer_rpc *rpc = get_service_data(dev);
+   struct pci_dev *pdev = rpc->rpd;
+
+   aer_disable_irq(pdev);
+
+   return 0;
+}
+
+static int aer_resume(struct pcie_device *dev)
+{
+   struct aer_rpc *rpc = get_service_data(dev);
+   struct pci_dev *pdev = rpc->rpd;
+
+   aer_enable_irq(pdev);
+
+   return 0;
+}
+
 /**
  * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
  * @dev: pointer to Root Port, RCEC, or RCiEP
@@ -1420,6 +1440,8 @@ static struct pcie_port_service_driver aerdriver = {
.service= PCIE_PORT_SERVICE_AER,
 
.probe  = aer_probe,
+   .suspend= aer_suspend,
+   .resume = aer_resume,
.remove = aer_remove,
 };
 
-- 
2.34.1



Re: BUG : PowerPC RCU: torture test failed with __stack_chk_fail

2023-04-23 Thread Zhouyi Zhou
Thank Boqun for your wonderful analysis!

On Mon, Apr 24, 2023 at 8:33 AM Boqun Feng  wrote:
>
> On Sat, Apr 22, 2023 at 09:28:39PM +0200, Joel Fernandes wrote:
> > On Sat, Apr 22, 2023 at 2:47 PM Zhouyi Zhou  wrote:
> > >
> > > Dear PowerPC and RCU developers:
> > > During the RCU torture test on mainline (on the VM of Opensource Lab
> > > of Oregon State University), SRCU-P failed with __stack_chk_fail:
> > > [  264.381952][   T99] [c6c7bab0] [c10c67c0]
> > > dump_stack_lvl+0x94/0xd8 (unreliable)
> > > [  264.383786][   T99] [c6c7bae0] [c014fc94] 
> > > panic+0x19c/0x468
> > > [  264.385128][   T99] [c6c7bb80] [c10fca24]
> > > __stack_chk_fail+0x24/0x30
> > > [  264.386610][   T99] [c6c7bbe0] [c02293b4]
> > > srcu_gp_start_if_needed+0x5c4/0x5d0
> > > [  264.388188][   T99] [c6c7bc70] [c022f7f4]
> > > srcu_torture_call+0x34/0x50
> > > [  264.389611][   T99] [c6c7bc90] [c022b5e8]
> > > rcu_torture_fwd_prog+0x8c8/0xa60
> > > [  264.391439][   T99] [c6c7be00] [c018e37c] 
> > > kthread+0x15c/0x170
> > > [  264.392792][   T99] [c6c7be50] [c000df94]
> > > ret_from_kernel_thread+0x5c/0x64
> > > The kernel config file can be found in [1].
> > > And I write a bash script to accelerate the bug reproducing [2].
> > > After a week's debugging, I found the cause of the bug is because the
> > > register r10 used to judge for stack overflow is not constant between
> > > context switches.
> > > The assembly code for srcu_gp_start_if_needed is located at [3]:
> > > c0226eb4:   78 6b aa 7d mr  r10,r13
> > > c0226eb8:   14 42 29 7d add r9,r9,r8
> > > c0226ebc:   ac 04 00 7c hwsync
> > > c0226ec0:   10 00 7b 3b addir27,r27,16
> > > c0226ec4:   14 da 29 7d add r9,r9,r27
> > > c0226ec8:   a8 48 00 7d ldarx   r8,0,r9
> > > c0226ecc:   01 00 08 31 addic   r8,r8,1
> > > c0226ed0:   ad 49 00 7d stdcx.  r8,0,r9
> > > c0226ed4:   f4 ff c2 40 bne-c0226ec8
> > > 
> > > c0226ed8:   28 00 21 e9 ld  r9,40(r1)
> > > c0226edc:   78 0c 4a e9 ld  r10,3192(r10)
> > > c0226ee0:   79 52 29 7d xor.r9,r9,r10
> > > c0226ee4:   00 00 40 39 li  r10,0
> > > c0226ee8:   b8 03 82 40 bne c02272a0
> > > 
> > > by debugging, I see the r10 is assigned with r13 on c0226eb4,
> > > but if there is a context-switch before c0226edc, a false
> > > positive will be reported.
> > >
> > > [1] http://154.220.3.115/logs/0422/configformainline.txt
> > > [2] 154.220.3.115/logs/0422/whilebash.sh
> > > [3] http://154.220.3.115/logs/0422/srcu_gp_start_if_needed.txt
> > >
> > > My analysis and debugging may not be correct, but the bug is easily
> > > reproducible.
> >
> > If this is a bug in the stack smashing protection as you seem to hint,
> > I wonder if you see the issue with a specific gcc version and is a
> > compiler-specific issue. It's hard to say, but considering this I
>
> Very likely, more asm code from Zhouyi's link:
>
> This is the __srcu_read_unlock_nmisafe(), since "hwsync" is
> smp_mb__{after,before}_atomic(), and the following code is first
> barrier then atomic, so it's the unlock.
>
> c0226eb4:   78 6b aa 7d mr  r10,r13
>
> ^ r13 is the pointer to percpu data on PPC64 kernel, and it's also
> the pointer to TLS data for userspace code.
>
> c0226eb8:   14 42 29 7d add r9,r9,r8
> c0226ebc:   ac 04 00 7c hwsync
> c0226ec0:   10 00 7b 3b addir27,r27,16
> c0226ec4:   14 da 29 7d add r9,r9,r27
> c0226ec8:   a8 48 00 7d ldarx   r8,0,r9
> c0226ecc:   01 00 08 31 addic   r8,r8,1
> c0226ed0:   ad 49 00 7d stdcx.  r8,0,r9
> c0226ed4:   f4 ff c2 40 bne-c0226ec8 
> 
> c0226ed8:   28 00 21 e9 ld  r9,40(r1)
> c0226edc:   78 0c 4a e9 ld  r10,3192(r10)
>
> here I think that the compiler is using r10 as an alias to r13, since
> for userspace program, it's safe to assume the TLS pointer doesn't
> change. However this is not true for kernel percpu pointer.
I learned a lot from your analysis, this is a fruitful learning
journey for me ;-)
>
> The real intention here is to compare 40(r1) vs 3192(r13) for stack
> guard checking, however since r13 is the percpu pointer in kernel, so
> the value of r13 can be changed if the thread gets scheduled to a
> different CPU after reading r13 for r10.
>
> __srcu_read_unlock_nmisafe() triggers this issue, because:
>
> * it contains a read from r13
> * it locates at the very end of srcu_gp_start_if_needed().
>
> This gives the compiler more opportunity to "optimize" a read from r13
> 

[PATCH v2 05/13] ASoC: fsl: use asoc_dummy_dlc

2023-04-23 Thread Kuninori Morimoto
Now we can share asoc_dummy_dlc. This patch use it.

Signed-off-by: Kuninori Morimoto 
---
 sound/soc/fsl/imx-audmix.c | 14 +-
 sound/soc/fsl/imx-card.c   | 11 +--
 sound/soc/fsl/imx-rpmsg.c  |  3 +--
 sound/soc/fsl/imx-spdif.c  |  8 +++-
 4 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c
index b2c5aca92c6b..efbcd4a65ca8 100644
--- a/sound/soc/fsl/imx-audmix.c
+++ b/sound/soc/fsl/imx-audmix.c
@@ -207,8 +207,8 @@ static int imx_audmix_probe(struct platform_device *pdev)
for (i = 0; i < num_dai; i++) {
struct snd_soc_dai_link_component *dlc;
 
-   /* for CPU/Codec x 2 */
-   dlc = devm_kcalloc(>dev, 4, sizeof(*dlc), GFP_KERNEL);
+   /* for CPU x 2 */
+   dlc = devm_kcalloc(>dev, 2, sizeof(*dlc), GFP_KERNEL);
if (!dlc)
return -ENOMEM;
 
@@ -244,7 +244,7 @@ static int imx_audmix_probe(struct platform_device *pdev)
 */
priv->dai[i].cpus   =
priv->dai[i].platforms  = [0];
-   priv->dai[i].codecs = [1];
+   priv->dai[i].codecs = _dummy_dlc;
 
priv->dai[i].num_cpus = 1;
priv->dai[i].num_codecs = 1;
@@ -252,8 +252,6 @@ static int imx_audmix_probe(struct platform_device *pdev)
 
priv->dai[i].name = dai_name;
priv->dai[i].stream_name = "HiFi-AUDMIX-FE";
-   priv->dai[i].codecs->dai_name = "snd-soc-dummy-dai";
-   priv->dai[i].codecs->name = "snd-soc-dummy";
priv->dai[i].cpus->of_node = args.np;
priv->dai[i].cpus->dai_name = dev_name(_pdev->dev);
priv->dai[i].dynamic = 1;
@@ -270,15 +268,13 @@ static int imx_audmix_probe(struct platform_device *pdev)
be_cp = devm_kasprintf(>dev, GFP_KERNEL,
   "AUDMIX-Capture-%d", i);
 
-   priv->dai[num_dai + i].cpus = [2];
-   priv->dai[num_dai + i].codecs   = [3];
+   priv->dai[num_dai + i].cpus = [1];
+   priv->dai[num_dai + i].codecs   = _dummy_dlc;
 
priv->dai[num_dai + i].num_cpus = 1;
priv->dai[num_dai + i].num_codecs = 1;
 
priv->dai[num_dai + i].name = be_name;
-   priv->dai[num_dai + i].codecs->dai_name = "snd-soc-dummy-dai";
-   priv->dai[num_dai + i].codecs->name = "snd-soc-dummy";
priv->dai[num_dai + i].cpus->of_node = audmix_np;
priv->dai[num_dai + i].cpus->dai_name = be_name;
priv->dai[num_dai + i].no_pcm = 1;
diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c
index 64a4d7e9db60..78e2e3932ba5 100644
--- a/sound/soc/fsl/imx-card.c
+++ b/sound/soc/fsl/imx-card.c
@@ -615,17 +615,8 @@ static int imx_card_parse_of(struct imx_card_data *data)
plat_data->type = CODEC_AK5552;
 
} else {
-   dlc = devm_kzalloc(dev, sizeof(*dlc), GFP_KERNEL);
-   if (!dlc) {
-   ret = -ENOMEM;
-   goto err;
-   }
-
-   link->codecs = dlc;
+   link->codecs = _dummy_dlc;
link->num_codecs = 1;
-
-   link->codecs->dai_name = "snd-soc-dummy-dai";
-   link->codecs->name = "snd-soc-dummy";
}
 
if (!strncmp(link->name, "HiFi-ASRC-FE", 12)) {
diff --git a/sound/soc/fsl/imx-rpmsg.c b/sound/soc/fsl/imx-rpmsg.c
index 89178106fe2c..93fc976e98dc 100644
--- a/sound/soc/fsl/imx-rpmsg.c
+++ b/sound/soc/fsl/imx-rpmsg.c
@@ -92,8 +92,7 @@ static int imx_rpmsg_probe(struct platform_device *pdev)
/* Optional codec node */
ret = of_parse_phandle_with_fixed_args(np, "audio-codec", 0, 0, );
if (ret) {
-   data->dai.codecs->dai_name = "snd-soc-dummy-dai";
-   data->dai.codecs->name = "snd-soc-dummy";
+   *data->dai.codecs = asoc_dummy_dlc;
} else {
struct clk *clk;
 
diff --git a/sound/soc/fsl/imx-spdif.c b/sound/soc/fsl/imx-spdif.c
index ab978431ac98..44463f92e522 100644
--- a/sound/soc/fsl/imx-spdif.c
+++ b/sound/soc/fsl/imx-spdif.c
@@ -26,7 +26,7 @@ static int imx_spdif_audio_probe(struct platform_device *pdev)
}
 
data = devm_kzalloc(>dev, sizeof(*data), GFP_KERNEL);
-   comp = devm_kzalloc(>dev, 2 * sizeof(*comp), GFP_KERNEL);
+   comp = devm_kzalloc(>dev, sizeof(*comp), GFP_KERNEL);
if (!data || !comp) {
ret = -ENOMEM;
goto end;
@@ -37,8 +37,8 @@ static int imx_spdif_audio_probe(struct platform_device *pdev)
 * platform is using soc-generic-dmaengine-pcm
 */
data->dai.cpus  =
-   

[PATCH v2 00/13] ASoC: add and use asoc_dummy_dlc

2023-04-23 Thread Kuninori Morimoto


Hi Mark

These are v2 patch-set of asoc_dummy_dlc.

Many ASoC drivers are using dummy DAI.
I have 2 concern about it. 1st one is there is no guarantee that local
strings ("snd-soc-dummy-dai",  "snd-soc-dummy") are kept until the card
was binded if it was added at subfunction.
2nd one is we can use common snd_soc_dai_link_component for it.
This patch-set adds common asoc_dummy_dlc, and use it.

v1 -> v2
- Separate intel patch into 3
- Topology codec doesn't use asoc_dummy_dlc

Link: https://lore.kernel.org/r/874jpe3uqh.wl-kuninori.morimoto...@renesas.com

Kuninori Morimoto (13):
  ASoC: soc-utils.c: add asoc_dummy_dlc
  ASoC: ti: use asoc_dummy_dlc
  ASoC: sof: use asoc_dummy_dlc
  ASoC: amd: use asoc_dummy_dlc
  ASoC: fsl: use asoc_dummy_dlc
  ASoC: qcom: use asoc_dummy_dlc
  ASoC: atmel: use asoc_dummy_dlc
  ASoC: meson: use asoc_dummy_dlc
  ASoC: intel: avs: use asoc_dummy_dlc
  ASoC: intel: sof: use asoc_dummy_dlc
  ASoC: intel: skylake: use asoc_dummy_dlc
  ASoC: simple_card_utils.c: use asoc_dummy_dlc
  ASoC: soc-topology.c: add comment for Platform/Codec

 include/sound/simple_card_utils.h|  1 -
 include/sound/soc.h  |  1 +
 sound/soc/amd/acp/acp-mach-common.c  | 43 
 sound/soc/atmel/atmel-classd.c   |  8 ++--
 sound/soc/atmel/atmel-pdmic.c|  8 ++--
 sound/soc/fsl/imx-audmix.c   | 14 +++
 sound/soc/fsl/imx-card.c | 11 +
 sound/soc/fsl/imx-rpmsg.c|  3 +-
 sound/soc/fsl/imx-spdif.c|  8 ++--
 sound/soc/generic/simple-card-utils.c|  9 +---
 sound/soc/intel/avs/boards/i2s_test.c|  6 +--
 sound/soc/intel/boards/ehl_rt5660.c  |  8 +---
 sound/soc/intel/boards/skl_hda_dsp_generic.c |  8 +---
 sound/soc/intel/boards/sof_cs42l42.c | 11 +
 sound/soc/intel/boards/sof_es8336.c  | 11 +
 sound/soc/intel/boards/sof_nau8825.c | 11 +
 sound/soc/intel/boards/sof_pcm512x.c |  3 +-
 sound/soc/intel/boards/sof_rt5682.c  | 14 ++-
 sound/soc/intel/boards/sof_sdw.c | 13 +-
 sound/soc/intel/boards/sof_ssp_amp.c | 18 +++-
 sound/soc/meson/axg-card.c   |  8 ++--
 sound/soc/meson/meson-card-utils.c   | 10 +
 sound/soc/qcom/common.c  | 11 +
 sound/soc/soc-topology.c | 22 +-
 sound/soc/soc-utils.c|  7 
 sound/soc/sof/nocodec.c  |  8 ++--
 sound/soc/ti/omap-hdmi.c |  8 ++--
 27 files changed, 89 insertions(+), 194 deletions(-)

-- 
2.25.1



Re: BUG : PowerPC RCU: torture test failed with __stack_chk_fail

2023-04-23 Thread Boqun Feng
On Sat, Apr 22, 2023 at 09:28:39PM +0200, Joel Fernandes wrote:
> On Sat, Apr 22, 2023 at 2:47 PM Zhouyi Zhou  wrote:
> >
> > Dear PowerPC and RCU developers:
> > During the RCU torture test on mainline (on the VM of Opensource Lab
> > of Oregon State University), SRCU-P failed with __stack_chk_fail:
> > [  264.381952][   T99] [c6c7bab0] [c10c67c0]
> > dump_stack_lvl+0x94/0xd8 (unreliable)
> > [  264.383786][   T99] [c6c7bae0] [c014fc94] 
> > panic+0x19c/0x468
> > [  264.385128][   T99] [c6c7bb80] [c10fca24]
> > __stack_chk_fail+0x24/0x30
> > [  264.386610][   T99] [c6c7bbe0] [c02293b4]
> > srcu_gp_start_if_needed+0x5c4/0x5d0
> > [  264.388188][   T99] [c6c7bc70] [c022f7f4]
> > srcu_torture_call+0x34/0x50
> > [  264.389611][   T99] [c6c7bc90] [c022b5e8]
> > rcu_torture_fwd_prog+0x8c8/0xa60
> > [  264.391439][   T99] [c6c7be00] [c018e37c] 
> > kthread+0x15c/0x170
> > [  264.392792][   T99] [c6c7be50] [c000df94]
> > ret_from_kernel_thread+0x5c/0x64
> > The kernel config file can be found in [1].
> > And I write a bash script to accelerate the bug reproducing [2].
> > After a week's debugging, I found the cause of the bug is because the
> > register r10 used to judge for stack overflow is not constant between
> > context switches.
> > The assembly code for srcu_gp_start_if_needed is located at [3]:
> > c0226eb4:   78 6b aa 7d mr  r10,r13
> > c0226eb8:   14 42 29 7d add r9,r9,r8
> > c0226ebc:   ac 04 00 7c hwsync
> > c0226ec0:   10 00 7b 3b addir27,r27,16
> > c0226ec4:   14 da 29 7d add r9,r9,r27
> > c0226ec8:   a8 48 00 7d ldarx   r8,0,r9
> > c0226ecc:   01 00 08 31 addic   r8,r8,1
> > c0226ed0:   ad 49 00 7d stdcx.  r8,0,r9
> > c0226ed4:   f4 ff c2 40 bne-c0226ec8
> > 
> > c0226ed8:   28 00 21 e9 ld  r9,40(r1)
> > c0226edc:   78 0c 4a e9 ld  r10,3192(r10)
> > c0226ee0:   79 52 29 7d xor.r9,r9,r10
> > c0226ee4:   00 00 40 39 li  r10,0
> > c0226ee8:   b8 03 82 40 bne c02272a0
> > 
> > by debugging, I see the r10 is assigned with r13 on c0226eb4,
> > but if there is a context-switch before c0226edc, a false
> > positive will be reported.
> >
> > [1] http://154.220.3.115/logs/0422/configformainline.txt
> > [2] 154.220.3.115/logs/0422/whilebash.sh
> > [3] http://154.220.3.115/logs/0422/srcu_gp_start_if_needed.txt
> >
> > My analysis and debugging may not be correct, but the bug is easily
> > reproducible.
> 
> If this is a bug in the stack smashing protection as you seem to hint,
> I wonder if you see the issue with a specific gcc version and is a
> compiler-specific issue. It's hard to say, but considering this I

Very likely, more asm code from Zhouyi's link:

This is the __srcu_read_unlock_nmisafe(), since "hwsync" is
smp_mb__{after,before}_atomic(), and the following code is first
barrier then atomic, so it's the unlock.

c0226eb4:   78 6b aa 7d mr  r10,r13

^ r13 is the pointer to percpu data on PPC64 kernel, and it's also
the pointer to TLS data for userspace code.

c0226eb8:   14 42 29 7d add r9,r9,r8
c0226ebc:   ac 04 00 7c hwsync
c0226ec0:   10 00 7b 3b addir27,r27,16
c0226ec4:   14 da 29 7d add r9,r9,r27
c0226ec8:   a8 48 00 7d ldarx   r8,0,r9
c0226ecc:   01 00 08 31 addic   r8,r8,1
c0226ed0:   ad 49 00 7d stdcx.  r8,0,r9
c0226ed4:   f4 ff c2 40 bne-c0226ec8 

c0226ed8:   28 00 21 e9 ld  r9,40(r1)
c0226edc:   78 0c 4a e9 ld  r10,3192(r10)

here I think that the compiler is using r10 as an alias to r13, since
for userspace program, it's safe to assume the TLS pointer doesn't
change. However this is not true for kernel percpu pointer.

The real intention here is to compare 40(r1) vs 3192(r13) for stack
guard checking, however since r13 is the percpu pointer in kernel, so
the value of r13 can be changed if the thread gets scheduled to a
different CPU after reading r13 for r10.

__srcu_read_unlock_nmisafe() triggers this issue, because:

* it contains a read from r13
* it locates at the very end of srcu_gp_start_if_needed().

This gives the compiler more opportunity to "optimize" a read from r13
away.

c0226ee0:   79 52 29 7d xor.r9,r9,r10
c0226ee4:   00 00 40 39 li  r10,0
c0226ee8:   b8 03 82 40 bne c02272a0 


As a result, here triggers __stack_chk_fail if mis-match.

If I'm correct, the following should be a workaround:

diff --git 

[PATCH v10 5/5] powerpc/kexec: add crash memory hotplug support

2023-04-23 Thread Sourabh Jain
Extend PowerPC arch crash hotplug handler to support memory hotplug
events. Since elfcorehdr is used to exchange the memory info between the
kernels hence it needs to be recreated to reflect the changes due to
memory hotplug events.

The way memory hotplug events are handled on PowerPC and the notifier
call chain used in generic code to trigger the arch crash handler, the
process to recreate the elfcorehdr is different for memory add and
remove case.

For memory remove case the memory change notifier call chain is
triggered first and then memblock regions is updated. Whereas for the
memory hot add case, memblock regions are updated before invoking the
memory change notifier call chain.

On PowerPC, memblock regions list is used to prepare the elfcorehdr. In
case of memory hot remove the memblock regions are updated after the
arch crash hotplug handler is triggered, hence an additional step is
taken to ensure that memory ranges used to prepare elfcorehdr do not
include hot removed memory.

When memory is hot removed it possible that memory regions count may
increase. So to accommodate a growing number of memory regions, the
elfcorehdr kexec segment is built with additional buffer space.

The changes done here will also work for the kexec_load system call given
that the kexec tool builds the elfcoredhr with additional space to
accommodate future memory regions as it is done for kexec_file_load
system call in the kernel.

Signed-off-by: Sourabh Jain 
Reviewed-by: Laurent Dufour 
---
 arch/powerpc/include/asm/kexec_ranges.h |  1 +
 arch/powerpc/kexec/core_64.c| 77 +-
 arch/powerpc/kexec/file_load_64.c   | 36 ++-
 arch/powerpc/kexec/ranges.c | 85 +
 4 files changed, 195 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
index f83866a19e870..802abf580cf0f 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,6 +7,7 @@
 void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
 struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
 int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
 int add_tce_mem_ranges(struct crash_mem **mem_ranges);
 int add_initrd_mem_range(struct crash_mem **mem_ranges);
 #ifdef CONFIG_PPC_64S_HASH_MMU
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 147ea6288a526..01a764b1c9b07 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -547,6 +548,76 @@ int update_cpus_node(void *fdt)
 #undef pr_fmt
 #define pr_fmt(fmt) "crash hp: " fmt
 
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
+ *elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @arg: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify 
*mn)
+{
+   int ret;
+   struct crash_mem *cmem = NULL;
+   struct kexec_segment *ksegment;
+   void *ptr, *mem, *elfbuf = NULL;
+   unsigned long elfsz, memsz, base_addr, size;
+
+   ksegment = >segment[image->elfcorehdr_index];
+   mem = (void *) ksegment->mem;
+   memsz = ksegment->memsz;
+
+   ret = get_crash_memory_ranges();
+   if (ret) {
+   pr_err("Failed to get crash mem range\n");
+   return;
+   }
+
+   /*
+* The hot unplugged memory is not yet removed from crash memory
+* ranges, remove it here.
+*/
+   if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+   base_addr = PFN_PHYS(mn->start_pfn);
+   size = mn->nr_pages * PAGE_SIZE;
+   ret = remove_mem_range(, base_addr, size);
+   if (ret) {
+   pr_err("Failed to remove hot-unplugged from crash 
memory ranges.\n");
+   return;
+   }
+   }
+
+   ret = crash_prepare_elf64_headers(cmem, false, , );
+   if (ret) {
+   pr_err("Failed to prepare elf header\n");
+   return;
+   }
+
+   /*
+* It is unlikely that kernel hit this because elfcorehdr kexec
+* segment (memsz) is built with addition space to accommodate growing
+* number of crash memory ranges while loading the kdump kernel. It is
+* Just to avoid any unforeseen case.
+*/
+   if (elfsz > memsz) {
+   pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, 
memsz);
+   goto out;
+   }
+
+   ptr = __va(mem);
+   if (ptr) {
+   /* Temporarily invalidate the crash image while it is replaced 
*/
+   xchg(_crash_image, NULL);
+
+ 

[PATCH v10 4/5] crash: forward memory_notify args to arch crash hotplug handler

2023-04-23 Thread Sourabh Jain
On PowePC memblock regions are used to prepare elfcorehdr which
describes the memory regions of the running kernel to the kdump kernel.
Since the notifier used for the memory hotplug crash handler gets
initiated before the update of the memblock region happens (as depicted
below) the newly prepared elfcorehdr still holds the old memory regions.
If the elfcorehdr is prepared with stale memblock regions then the newly
prepared elfcorehdr will still be holding stale memory regions. And dump
collection with stale elfcorehdr will lead to dump collection failure or
incomplete dump collection.

The sequence of actions done on PowerPC when an LMB memory hot removed:

 Initiate memory hot remove
  |
  v
 offline pages
  |
  v
 initiate memory notify call
 chain for MEM_OFFLINE event  <---> Prepare new elfcorehdr and replace
it with old one
  |
  v
 update memblock regions

Such challenges only exist for memory remove case. For the memory add
case the memory regions are updated first and then memory notify calls
the arch crash hotplug handler to update the elfcorehdr.

This patch passes additional information about the hot removed LMB to
the arch crash hotplug handler in the form of memory_notify object.

How passing memory_notify to arch crash hotplug handler will help?

memory_notify holds the start PFN and page count of the hot removed
memory. With that base address and the size of the hot removed memory
can be calculated and same can be used to avoid adding hot removed
memory region to get added in the elfcorehdr.

Signed-off-by: Sourabh Jain 
Reviewed-by: Laurent Dufour 
---
 arch/powerpc/include/asm/kexec.h |  2 +-
 arch/powerpc/kexec/core_64.c |  3 ++-
 arch/x86/include/asm/kexec.h |  2 +-
 arch/x86/kernel/crash.c  |  3 ++-
 include/linux/kexec.h|  2 +-
 kernel/crash_core.c  | 14 +++---
 6 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f01ba767af56e..7e811bad5ec92 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -104,7 +104,7 @@ struct crash_mem;
 int update_cpus_node(void *fdt);
 int get_crash_memory_ranges(struct crash_mem **mem_ranges);
 #if defined(CONFIG_CRASH_HOTPLUG)
-void arch_crash_handle_hotplug_event(struct kimage *image);
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
 #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
 #endif
 #endif
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 611b89bcea2be..147ea6288a526 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -551,10 +551,11 @@ int update_cpus_node(void *fdt)
  * arch_crash_hotplug_handler() - Handle crash CPU/Memory hotplug events to 
update the
  *necessary kexec segments based on the 
hotplug event.
  * @image: the active struct kimage
+ * @arg: struct memory_notify handler for memory add/remove case and NULL for 
CPU case.
  *
  * Update FDT segment to include newly added CPU. No action for CPU remove 
case.
  */
-void arch_crash_handle_hotplug_event(struct kimage *image)
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
 {
void *fdt, *ptr;
unsigned long mem;
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 1bc852ce347d4..70c3b23b468b6 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -213,7 +213,7 @@ extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
 extern void kdump_nmi_shootdown_cpus(void);
 
 #ifdef CONFIG_CRASH_HOTPLUG
-void arch_crash_handle_hotplug_event(struct kimage *image);
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
 #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index ead602636f3e0..b45d13193b579 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -445,11 +445,12 @@ int crash_load_segments(struct kimage *image)
 /**
  * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
  * @image: the active struct kimage
+ * @arg: struct memory_notify handler for memory add/remove case and NULL for 
CPU case.
  *
  * The new elfcorehdr is prepared in a kernel buffer, and then it is
  * written on top of the existing/old elfcorehdr.
  */
-void arch_crash_handle_hotplug_event(struct kimage *image)
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
 {
void *elfbuf = NULL, *old_elfcorehdr;
unsigned long nr_mem_ranges;
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0ac41f48de0b1..69765e6a92d0d 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -506,7 +506,7 @@ static inline void 

[PATCH v10 3/5] powerpc/crash: add crash CPU hotplug support

2023-04-23 Thread Sourabh Jain
Introduce powerpc crash hotplug handler to update the necessary kexec
segments in the kernel on CPU/Memory hotplug events. Currently, these
updates are done by monitoring CPU/Memory hotplug events in userspace.

A common crash hotplug handler is triggered from generic infrastructure
for both CPU/Memory hotplug events. But in this patch, crash updates are
handled only for CPU hotplug events. Support for the crash update on
memory hotplug events is added in upcoming patches.

The elfcorehdr segment is used to exchange the CPU and other
dump-related information between the kernels. Ideally, the elfcorehdr
segment needs to be recreated on CPU hotplug events to reflect the
changes. But on powerpc, the elfcorehdr is built with possible CPUs
hence there is no need to update/recreate the elfcorehdr on CPU hotplug
events.

In addition to elfcorehdr, there is another kexec segment that holds CPU
data on powerpc is FDT (Flattened Device Tree). During the kdump kernel
boot, it is expected that the crashing CPU must be present in FDT, else
kdump kernel boot fails.

Now the only action needed on powerpc to handle the crash CPU hotplug
event is to add hot added CPUs in the kdump FDT segment to avoid kdump
kernel boot failure. So for the CPU hot add event, the FDT segment is
updated with hot added CPU and Since there is no need to remove the hot
unplugged CPUs from the FDT segment hence no action was taken for CPU
hot remove event.

To accommodate a growing number of CPUs, FDT is built with additional
buffer space to ensure that it can hold possible CPU nodes.

The changes done here will also work for the kexec_load system call
given that the kexec tool builds the FDT segment with additional space
to accommodate possible CPU nodes.

Since memory crash hotplug support is not there yet the crash hotplug
the handler simply warns the user and returns.

Signed-off-by: Sourabh Jain 
Reviewed-by: Laurent Dufour 
---
 arch/powerpc/include/asm/kexec.h  |  4 ++
 arch/powerpc/kexec/core_64.c  | 61 +++
 arch/powerpc/kexec/elf_64.c   | 12 +-
 arch/powerpc/kexec/file_load_64.c | 14 +++
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 8090ad7d97d9d..f01ba767af56e 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -103,6 +103,10 @@ void kexec_copy_flush(struct kimage *image);
 struct crash_mem;
 int update_cpus_node(void *fdt);
 int get_crash_memory_ranges(struct crash_mem **mem_ranges);
+#if defined(CONFIG_CRASH_HOTPLUG)
+void arch_crash_handle_hotplug_event(struct kimage *image);
+#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
+#endif
 #endif
 
 #if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS)
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 0b292f93a74cc..611b89bcea2be 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -543,6 +543,67 @@ int update_cpus_node(void *fdt)
return ret;
 }
 
+#if defined(CONFIG_CRASH_HOTPLUG)
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+
+/**
+ * arch_crash_hotplug_handler() - Handle crash CPU/Memory hotplug events to 
update the
+ *necessary kexec segments based on the 
hotplug event.
+ * @image: the active struct kimage
+ *
+ * Update FDT segment to include newly added CPU. No action for CPU remove 
case.
+ */
+void arch_crash_handle_hotplug_event(struct kimage *image)
+{
+   void *fdt, *ptr;
+   unsigned long mem;
+   int i, fdt_index = -1;
+   unsigned int hp_action = image->hp_action;
+
+   /*
+* Since the hot-unplugged CPU is already part of crash FDT,
+* no action is needed for CPU remove case.
+*/
+   if (hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+   return;
+
+   /* crash update on memory hotplug events is not supported yet */
+   if (hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY || hp_action == 
KEXEC_CRASH_HP_ADD_MEMORY) {
+   pr_info_once("Crash update is not supported for memory 
hotplug\n");
+   return;
+   }
+
+   /* Find the FDT segment index in kexec segment array. */
+   for (i = 0; i < image->nr_segments; i++) {
+   mem = image->segment[i].mem;
+   ptr = __va(mem);
+
+   if (ptr && fdt_magic(ptr) == FDT_MAGIC) {
+   fdt_index = i;
+   break;
+   }
+   }
+
+   if (fdt_index < 0) {
+   pr_err("Unable to locate FDT segment.\n");
+   return;
+   }
+
+   fdt = __va((void *)image->segment[fdt_index].mem);
+
+   /* Temporarily invalidate the crash image while it is replaced */
+   xchg(_crash_image, NULL);
+
+   /* update FDT to refelect changes in CPU resrouces */
+   if (update_cpus_node(fdt))
+   pr_err("Failed to update crash FDT");
+

[PATCH v10 0/5] PowerPC: In-kernel handling of CPU/Memory hotplug/online/offline events for kdump kernel

2023-04-23 Thread Sourabh Jain
The Problem:

Post CPU/Memory hot plug/unplug and online/offline events the  kernel
holds stale information about the system. Dump collection with stale
kdump kernel might end up in dump capture failure or an inaccurate dump
collection.

Existing solution:
==
The existing solution to keep the kdump kernel up-to-date by monitoring
CPU/Memory hotplug/online/offline events via udev rule and trigger a full
kdump kernel reload for every hotplug event.

Shortcomings:

- Leaves a window where kernel crash might not lead to a successful dump
  collection.
- Reloading all kexec components for each hotplug is inefficient.
- udev rules are prone to races if hotplug events are frequent.

More about issues with an existing solution is posted here:
 - https://lkml.org/lkml/2020/12/14/532
 - https://lists.ozlabs.org/pipermail/linuxppc-dev/2022-February/240254.html

Proposed Solution:
==
Instead of reloading all kexec segments on CPU/Memory hotplug/online/offline
event, this patch series focuses on updating only the relevant kexec segment.
Once the kexec segments are loaded in the kernel reserved area then an
arch-specific hotplug handler will update the relevant kexec segment based on
hotplug event type.

Series Dependencies

This patch series implements the crash hotplug handler on PowerPC. The generic
crash hotplug handler is introduced by https://lkml.org/lkml/2023/4/4/1136 patch
series.

Git tree for testing:
=
The below git tree has this patch series applied on top of dependent patch
series.
https://github.com/sourabhjains/linux/tree/e21-s10

To realise the feature the kdump udev rule must updated to avoid
reloading of kdump reload on CPU/Memory hotplug/online/offline events.

  RHEL: /usr/lib/udev/rules.d/98-kexec.rules

-SUBSYSTEM=="cpu", ACTION=="online", GOTO="kdump_reload_cpu"
-SUBSYSTEM=="memory", ACTION=="online", GOTO="kdump_reload_mem"
-SUBSYSTEM=="memory", ACTION=="offline", GOTO="kdump_reload_mem"
+SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
+SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"

Note: only kexec_file_load syscall will work. For kexec_load minor changes are
required in kexec tool.

---
Changelog:

v10:
  - Drop the patch that adds fdt_index attribute to struct kimage_arch
Find the fdt segment index when needed.
  - Added more details into commits messages.
  - Rebased onto 6.3.0-rc5

v9:
  - Removed patch to prepare elfcorehdr crash notes for possible CPUs.
The patch is moved to generic patch series that introduces generic
infrastructure for in kernel crash update.
  - Removed patch to pass the hotplug action type to the arch crash
hotplug handler function. The generic patch series has introduced
the hotplug action type in kimage struct.
  - Add detail commit message for better understanding.

v8:
  - Restrict fdt_index initialization to machine_kexec_post_load
it work for both kexec_load and kexec_file_load.[3/8] Laurent Dufour

  - Updated the logic to find the number of offline core. [6/8]

  - Changed the logic to find the elfcore program header to accommodate
future memory ranges due memory hotplug events. [8/8]

v7
  - added a new config to configure this feature
  - pass hotplug action type to arch specific handler

v6
  - Added crash memory hotplug support

v5:
  - Replace COFNIG_CRASH_HOTPLUG with CONFIG_HOTPLUG_CPU.
  - Move fdt segment identification for kexec_load case to load path
instead of crash hotplug handler
  - Keep new attribute defined under kimage_arch to track FDT segment
under CONFIG_HOTPLUG_CPU config.

v4:
  - Update the logic to find the additional space needed for hotadd CPUs post
kexec load. Refer "[RFC v4 PATCH 4/5] powerpc/crash hp: add crash hotplug
support for kexec_file_load" patch to know more about the change.
  - Fix a couple of typo.
  - Replace pr_err to pr_info_once to warn user about memory hotplug
support.
  - In crash hotplug handle exit the for loop if FDT segment is found.

v3
  - Move fdt_index and fdt_index_vaild variables to kimage_arch struct.
  - Rebase patche on top of https://lkml.org/lkml/2022/3/3/674 [v5]
  - Fixed warning reported by checpatch script

v2:
  - Use generic hotplug handler introduced by 
https://lkml.org/lkml/2022/2/9/1406, a
significant change from v1.

Sourabh Jain (5):
  powerpc/kexec: turn some static helper functions public
  powerpc/crash: introduce a new config option CRASH_HOTPLUG
  powerpc/crash: add crash CPU hotplug support
  crash: forward memory_notify args to arch crash hotplug handler
  powerpc/kexec: add crash memory hotplug support

 arch/powerpc/Kconfig|  12 +
 arch/powerpc/include/asm/kexec.h|  10 +
 arch/powerpc/include/asm/kexec_ranges.h |   1 +
 arch/powerpc/kexec/core_64.c| 301 

[PATCH v10 2/5] powerpc/crash: introduce a new config option CRASH_HOTPLUG

2023-04-23 Thread Sourabh Jain
Due to CPU/Memory hot plug/unplug or online/offline events the system
resources changes. A similar change should reflect in the loaded kdump
kernel kexec segments that describes the state of the CPU and memory of
the running kernel.

If the kdump kernel kexec segments are not updated after the CPU/Memory
hot plug/unplug or online/offline events and kdump kernel tries to
collect the dump with the stale system resource data then this might
lead to dump collection failure or an inaccurate dump collection.

The current method to keep the kdump kernel kexec segments up to date is
by reloading the complete kdump kernel whenever a CPU/Memory hot
plug/unplug or online/offline event is observed in userspace. Reloading
the kdump kernel for every CPU/Memory hot plug/unplug or online/offline
event is inefficient and creates a large window where the kdump service
is not available. It can be improved by doing in-kernel updates to only
necessary kdump kernel kexec segments which describe CPU and Memory
resources of the running kernel to the kdump kernel.

The kernel changes related to in-kernel updates to the kdump kernel
kexec segments are kept under the CRASH_HOTPLUG config option.

Later in the series, a powerpc crash hotplug handler is introduced to
update the kdump kernel kexec segments on CPU/Memory hotplug events.
This arch-specific handler is triggered from a generic crash handler
that registers with the CPU/Memory add/remove notifiers.

The CRASH_HOTPLUG config option is enabled by default.

Signed-off-by: Sourabh Jain 
Reviewed-by: Laurent Dufour 
---
 arch/powerpc/Kconfig | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a6c4407d3ec83..ac0dc0ffe89b4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -681,6 +681,18 @@ config CRASH_DUMP
  The same kernel binary can be used as production kernel and dump
  capture kernel.
 
+config CRASH_HOTPLUG
+   bool "In-kernel update to kdump kernel on system configuration changes"
+   default y
+   depends on CRASH_DUMP && (HOTPLUG_CPU || MEMORY_HOTPLUG)
+   help
+ Quick and efficient mechanism to update the kdump kernel in the
+ event of CPU/Memory hot plug/unplug or online/offline events. This
+ approach does the in-kernel update to only necessary kexec segment
+ instead of unload-reload entire kdump kernel from userspace.
+
+ If unsure, say Y.
+
 config FA_DUMP
bool "Firmware-assisted dump"
depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
-- 
2.39.2



[PATCH v10 1/5] powerpc/kexec: turn some static helper functions public

2023-04-23 Thread Sourabh Jain
Move update_cpus_node and get_crash_memory_ranges functions from
kexec/file_load_64.c to kexec/core_64.c to make these functions usable
by other kexec components.

Later in the series, these functions are utilized to do in-kernel update
to kexec segments on CPU/Memory hot plug/unplug or online/offline events
for both kexec_load and kexec_file_load syscalls.

No functional change intended.

Signed-off-by: Sourabh Jain 
Reviewed-by: Laurent Dufour 
---
 arch/powerpc/include/asm/kexec.h  |   6 ++
 arch/powerpc/kexec/core_64.c  | 166 ++
 arch/powerpc/kexec/file_load_64.c | 162 -
 3 files changed, 172 insertions(+), 162 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index a1ddba01e7d13..8090ad7d97d9d 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -99,6 +99,12 @@ void relocate_new_kernel(unsigned long indirection_page, 
unsigned long reboot_co
 
 void kexec_copy_flush(struct kimage *image);
 
+#ifdef CONFIG_PPC64
+struct crash_mem;
+int update_cpus_node(void *fdt);
+int get_crash_memory_ranges(struct crash_mem **mem_ranges);
+#endif
+
 #if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS)
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
 #define crash_free_reserved_phys_range crash_free_reserved_phys_range
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index a79e28c91e2be..0b292f93a74cc 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -17,6 +17,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -30,6 +32,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 int machine_kexec_prepare(struct kimage *image)
 {
@@ -377,6 +381,168 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
 }
 
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ *   first/crashing kernel's memory regions that
+ *   would be exported via an elfcore.
+ * @mem_ranges:  Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+   phys_addr_t base, end;
+   struct crash_mem *tmem;
+   u64 i;
+   int ret;
+
+   for_each_mem_range(i, , ) {
+   u64 size = end - base;
+
+   /* Skip backup memory region, which needs a separate entry */
+   if (base == BACKUP_SRC_START) {
+   if (size > BACKUP_SRC_SIZE) {
+   base = BACKUP_SRC_END + 1;
+   size -= BACKUP_SRC_SIZE;
+   } else
+   continue;
+   }
+
+   ret = add_mem_range(mem_ranges, base, size);
+   if (ret)
+   goto out;
+
+   /* Try merging adjacent ranges before reallocation attempt */
+   if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+   sort_memory_ranges(*mem_ranges, true);
+   }
+
+   /* Reallocate memory ranges if there is no space to split ranges */
+   tmem = *mem_ranges;
+   if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+   tmem = realloc_mem_ranges(mem_ranges);
+   if (!tmem)
+   goto out;
+   }
+
+   /* Exclude crashkernel region */
+   ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+   if (ret)
+   goto out;
+
+   /*
+* FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+*regions are exported to save their context at the time of
+*crash, they should actually be backed up just like the
+*first 64K bytes of memory.
+*/
+   ret = add_rtas_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_opal_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   /* create a separate program header for the backup region */
+   ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+   if (ret)
+   goto out;
+
+   sort_memory_ranges(*mem_ranges, false);
+out:
+   if (ret)
+   pr_err("Failed to setup crash memory ranges\n");
+   return ret;
+}
+
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ *  them to fdt.
+ * @fdt:Flattened device tree of the kernel
+ * @node_offset:offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node 
*dn)
+{
+   int ret = 0;
+   struct property