Two bugs in the IRQ handling path:

1) iommu_group reference leak in rocket_job_handle_irq():
   iommu_group_get() increments the reference count but the returned
   pointer is passed directly to iommu_detach_group() which does not
   consume it. Since this runs on every completed job, the reference
   count accumulates and prevents the group from being freed. Use
   core->iommu_group instead, consistent with rocket_reset().

2) Unsafe hardware register access in shared IRQ handler:
   rocket_job_irq_handler() is registered with IRQF_SHARED but accesses
   hardware registers without checking runtime PM status. If another
   device on the same IRQ line triggers an interrupt while the NPU is
   suspended, register reads return 0xffffffff, spuriously triggering
   WARN_ON macros and falsely returning IRQ_WAKE_THREAD.

   Add pm_runtime_get_if_active() in the hardirq handler to atomically
   verify the device is active before accessing registers. Each handler
   (hardirq and threaded) independently acquires and releases its own
   runtime PM reference to avoid coalescing-related leaks when the
   IRQ core coalesces multiple wakeups into a single thread execution.

Cc: [email protected]
Fixes: 0810d5ad88a1 ("accel/rocket: Add job submission IOCTL")
Signed-off-by: ZhaoJinming <[email protected]>
---
 drivers/accel/rocket/rocket_job.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/accel/rocket/rocket_job.c 
b/drivers/accel/rocket/rocket_job.c
index e8a073e22ac2..0ea3b3099704 100644
--- a/drivers/accel/rocket/rocket_job.c
+++ b/drivers/accel/rocket/rocket_job.c
@@ -349,7 +349,7 @@ static void rocket_job_handle_irq(struct rocket_core *core)
                                return;
                        }
 
-                       iommu_detach_group(NULL, iommu_group_get(core->dev));
+                       iommu_detach_group(NULL, core->iommu_group);
                        dma_fence_signal(core->in_flight_job->done_fence);
                        pm_runtime_put_autosuspend(core->dev);
                        core->in_flight_job = NULL;
@@ -420,7 +420,10 @@ static irqreturn_t rocket_job_irq_handler_thread(int irq, 
void *data)
 {
        struct rocket_core *core = data;
 
-       rocket_job_handle_irq(core);
+       if (pm_runtime_get_if_active(core->dev) == 1) {
+               rocket_job_handle_irq(core);
+               pm_runtime_put(core->dev);
+       }
 
        return IRQ_HANDLED;
 }
@@ -428,16 +431,24 @@ static irqreturn_t rocket_job_irq_handler_thread(int irq, 
void *data)
 static irqreturn_t rocket_job_irq_handler(int irq, void *data)
 {
        struct rocket_core *core = data;
-       u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS);
+       u32 raw_status;
+
+       if (pm_runtime_get_if_active(core->dev) != 1)
+               return IRQ_NONE;
+
+       raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS);
 
        WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR);
        WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR);
 
        if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 ||
-             raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1))
+             raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1)) {
+               pm_runtime_put(core->dev);
                return IRQ_NONE;
+       }
 
        rocket_pc_writel(core, INTERRUPT_MASK, 0x0);
+       pm_runtime_put(core->dev);
 
        return IRQ_WAKE_THREAD;
 }
-- 
2.20.1

Reply via email to