From: Quanyang Wang <quanyang.w...@windriver.com> The ARM's ccn-504 consists of MN/DT/HN-I/SBSX/HN-F/RN-I/XP components. And HN-I/SBSX/HN-F/RN-F/XP will report error event to MN component.
But in ccn driver arm-ccn.c, it only registers 2 handlers to handle events from DT(pmu) and HN-F(l3 cache edac). So when there is an error from HN-I component, the irq handler for ccn won't handle it and it becomes a "nobody cared" irq. [ 0.184044] irq 15: nobody cared (try booting with the "irqpoll" option) [ 0.184049] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.57-rt47-yocto-preempt-rt #1 [ 0.184054] Hardware name: AXM56xx Victoria (DT) [ 0.184057] Call trace: [ 0.184058] dump_backtrace+0x0/0x1a0 [ 0.184069] show_stack+0x24/0x30 [ 0.184075] dump_stack+0xcc/0x108 [ 0.184081] __report_bad_irq+0x54/0xe4 [ 0.184087] note_interrupt+0x2b4/0x300 [ 0.184093] handle_irq_event_percpu+0xb8/0xc0 [ 0.184098] handle_irq_event+0x88/0xf0 [ 0.184102] handle_fasteoi_irq+0xc8/0x2a0 [ 0.184107] __handle_domain_irq+0xbc/0x140 [ 0.184111] gic_handle_irq+0x94/0x160 [ 0.184115] el1_irq+0xcc/0x180 For now I can't figure out why this HN-I error happens and there are some clues as below: 1) The value in HN-I Error Syndrome 0 register is 0x80000de88111291 and the value in HN-I Error Syndrome 1 register is 0x80310232c0. It means that the error happens when some CPU want to access 0x80310232c0. 2) The error happens when kernel call psci_ops.cpu_on, so root cause should be in atf. For this I make a workaround that disable HN-I node reporting error to MN mode. Signed-off-by: Quanyang Wang <quanyang.w...@windriver.com> --- arch/arm64/boot/dts/intel/axm56xx.dtsi | 4 ++-- drivers/bus/arm-ccn.c | 28 +++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/intel/axm56xx.dtsi b/arch/arm64/boot/dts/intel/axm56xx.dtsi index f09cb47183ac..fd22d39d8c19 100644 --- a/arch/arm64/boot/dts/intel/axm56xx.dtsi +++ b/arch/arm64/boot/dts/intel/axm56xx.dtsi @@ -98,8 +98,8 @@ soc { arm_ccn_504: arm_ccn_504@8000000000 { compatible = "arm,ccn-504"; - /* CCN_MN only */ - reg = <0x80 0x00000000 0 0x10000>; + /* CCN_MN and CCN_NH-I */ + reg = <0x80 0x00000000 0 0x100000>; interrupts = <0 256 4>; ranges = <0x0 0x80 0x00000000 0x1000000>; #address-cells = <1>; diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index a13c40de9dcf..b860f63d0bbf 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -30,8 +30,16 @@ #define CCN_MN_OLY_COMP_LIST_63_0 0x01e0 #define CCN_MN_ERR_SIG_VAL_63_0 0x0300 #define CCN_MN_ERR_SIG_VAL_63_0__DT BIT(1) +#define CCN_MN_ERR_SIG_VAL_63_0__HNI GENMASK(9, 8) #define CCN_MN_ERR_SIG_VAL_63_0__HNF 0xff +#define CCN_HNI_BASE 0x80000 +#define CCN_HNI_ERR_CLR_REG_H 0x484 +#define CCN_HNI_AUX_CTL_REG 0x500 +#define CCN_HNI_FIRST_ERR_CLR GENMASK(30, 30) +#define CCN_HNI_MULTI_ERR_CLR GENMASK(27, 27) +#define CCN_HNI_ERROR_ENABLE GENMASK(3, 2) + LIST_HEAD(arm_ccn_head); struct arm_ccn { @@ -130,6 +138,19 @@ static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id) res = er->handler(er->data); } + /* + * When psci pwrup secondary CPU, the error from NH-I node will be + * reported to MN node. So even though we disable error reporting + * from NH-I in arm_ccn_probe, there is still one error need to be + * handled. + */ + if (err_or & CCN_MN_ERR_SIG_VAL_63_0__HNI) { + err_or &= ~CCN_MN_ERR_SIG_VAL_63_0__HNI; + writel((CCN_HNI_FIRST_ERR_CLR | CCN_HNI_MULTI_ERR_CLR), + (ccn->base + CCN_HNI_BASE + CCN_HNI_ERR_CLR_REG_H)); + res |= IRQ_HANDLED; + } + /* To my best knowledge for having the interrupt serviced at minimum one * has to clear an MN error signal by reading Error Signal Valid * regs and deassert the interrupt (INTREQ). @@ -165,7 +186,7 @@ static int ccn_platform_probe(struct platform_device *pdev) { int ret; struct resource *res; - unsigned int irq; + unsigned int irq, value; struct arm_ccn *ccn; ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL); @@ -197,6 +218,11 @@ static int ccn_platform_probe(struct platform_device *pdev) errint_write = secure_errint_write; } + /* Let's inhibit HN-I reporting error to MN */ + value = readl(ccn->base + CCN_HNI_BASE + CCN_HNI_AUX_CTL_REG); + value &= ~CCN_HNI_ERROR_ENABLE; + writel(value, ccn->base + CCN_HNI_BASE + CCN_HNI_AUX_CTL_REG); + /* Check if we can use the interrupt */ errint_write(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE, ccn->base + CCN_MN_ERRINT_STATUS); -- 2.25.1
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#10382): https://lists.yoctoproject.org/g/linux-yocto/message/10382 Mute This Topic: https://lists.yoctoproject.org/mt/85432028/21656 Group Owner: linux-yocto+ow...@lists.yoctoproject.org Unsubscribe: https://lists.yoctoproject.org/g/linux-yocto/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-