From: Quanyang Wang <quanyang.w...@windriver.com>

The ARM's ccn-504 consists of MN/DT/HN-I/SBSX/HN-F/RN-I/XP components.
And HN-I/SBSX/HN-F/RN-F/XP will report error event to MN component.

But in ccn driver arm-ccn.c, it only registers 2 handlers to handle
events from DT(pmu) and HN-F(l3 cache edac). So when there is an error
from HN-I component, the irq handler for ccn won't handle it and it
becomes a "nobody cared" irq.

[ 0.184044] irq 15: nobody cared (try booting with the "irqpoll" option)
[ 0.184049] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
5.10.57-rt47-yocto-preempt-rt #1
[ 0.184054] Hardware name: AXM56xx Victoria (DT)
[ 0.184057] Call trace:
[ 0.184058] dump_backtrace+0x0/0x1a0
[ 0.184069] show_stack+0x24/0x30
[ 0.184075] dump_stack+0xcc/0x108
[ 0.184081] __report_bad_irq+0x54/0xe4
[ 0.184087] note_interrupt+0x2b4/0x300
[ 0.184093] handle_irq_event_percpu+0xb8/0xc0
[ 0.184098] handle_irq_event+0x88/0xf0
[ 0.184102] handle_fasteoi_irq+0xc8/0x2a0
[ 0.184107] __handle_domain_irq+0xbc/0x140
[ 0.184111] gic_handle_irq+0x94/0x160
[ 0.184115] el1_irq+0xcc/0x180

For now I can't figure out why this HN-I error happens and there are
some clues as below:
1) The value in HN-I Error Syndrome 0 register is 0x80000de88111291 and the
value in HN-I Error Syndrome 1 register is 0x80310232c0. It means that the
error happens when some CPU want to access 0x80310232c0.
2) The error happens when kernel call psci_ops.cpu_on, so root cause
should be in atf.

For this I make a workaround that disable HN-I node reporting error to
MN mode.

Signed-off-by: Quanyang Wang <quanyang.w...@windriver.com>
---
 arch/arm64/boot/dts/intel/axm56xx.dtsi |  4 ++--
 drivers/bus/arm-ccn.c                  | 28 +++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/boot/dts/intel/axm56xx.dtsi 
b/arch/arm64/boot/dts/intel/axm56xx.dtsi
index f09cb47183ac..fd22d39d8c19 100644
--- a/arch/arm64/boot/dts/intel/axm56xx.dtsi
+++ b/arch/arm64/boot/dts/intel/axm56xx.dtsi
@@ -98,8 +98,8 @@ soc {
 
                arm_ccn_504: arm_ccn_504@8000000000 {
                        compatible = "arm,ccn-504";
-                       /* CCN_MN only */
-                       reg = <0x80 0x00000000 0 0x10000>;
+                       /* CCN_MN and CCN_NH-I */
+                       reg = <0x80 0x00000000 0 0x100000>;
                        interrupts = <0 256 4>;
                        ranges = <0x0 0x80 0x00000000 0x1000000>;
                        #address-cells = <1>;
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index a13c40de9dcf..b860f63d0bbf 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -30,8 +30,16 @@
 #define CCN_MN_OLY_COMP_LIST_63_0      0x01e0
 #define CCN_MN_ERR_SIG_VAL_63_0                0x0300
 #define CCN_MN_ERR_SIG_VAL_63_0__DT                    BIT(1)
+#define CCN_MN_ERR_SIG_VAL_63_0__HNI           GENMASK(9, 8)
 #define CCN_MN_ERR_SIG_VAL_63_0__HNF                   0xff
 
+#define CCN_HNI_BASE                   0x80000
+#define CCN_HNI_ERR_CLR_REG_H  0x484
+#define CCN_HNI_AUX_CTL_REG            0x500
+#define CCN_HNI_FIRST_ERR_CLR  GENMASK(30, 30)
+#define CCN_HNI_MULTI_ERR_CLR  GENMASK(27, 27)
+#define CCN_HNI_ERROR_ENABLE   GENMASK(3, 2)
+
 LIST_HEAD(arm_ccn_head);
 
 struct arm_ccn {
@@ -130,6 +138,19 @@ static irqreturn_t arm_ccn_irq_handler(int irq, void 
*dev_id)
                                res = er->handler(er->data);
        }
 
+       /*
+        * When psci pwrup secondary CPU, the error from NH-I node will be
+        * reported to MN node. So even though we disable error reporting
+        * from NH-I in arm_ccn_probe, there is still one error need to be
+        * handled.
+        */
+       if (err_or & CCN_MN_ERR_SIG_VAL_63_0__HNI) {
+               err_or &= ~CCN_MN_ERR_SIG_VAL_63_0__HNI;
+               writel((CCN_HNI_FIRST_ERR_CLR | CCN_HNI_MULTI_ERR_CLR),
+                               (ccn->base + CCN_HNI_BASE + 
CCN_HNI_ERR_CLR_REG_H));
+               res |= IRQ_HANDLED;
+       }
+
        /* To my best knowledge for having the interrupt serviced at minimum one
         * has to clear an MN error signal by reading Error Signal Valid
         * regs and deassert the interrupt (INTREQ).
@@ -165,7 +186,7 @@ static int ccn_platform_probe(struct platform_device *pdev)
 {
        int ret;
        struct resource *res;
-       unsigned int irq;
+       unsigned int irq, value;
        struct arm_ccn *ccn;
 
        ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL);
@@ -197,6 +218,11 @@ static int ccn_platform_probe(struct platform_device *pdev)
                errint_write = secure_errint_write;
        }
 
+       /* Let's inhibit HN-I reporting error to MN */
+       value = readl(ccn->base + CCN_HNI_BASE + CCN_HNI_AUX_CTL_REG);
+       value &= ~CCN_HNI_ERROR_ENABLE;
+       writel(value, ccn->base + CCN_HNI_BASE + CCN_HNI_AUX_CTL_REG);
+
        /* Check if we can use the interrupt */
        errint_write(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE,
                     ccn->base + CCN_MN_ERRINT_STATUS);
-- 
2.25.1

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#10382): 
https://lists.yoctoproject.org/g/linux-yocto/message/10382
Mute This Topic: https://lists.yoctoproject.org/mt/85432028/21656
Group Owner: linux-yocto+ow...@lists.yoctoproject.org
Unsubscribe: https://lists.yoctoproject.org/g/linux-yocto/unsub 
[arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to