gfx_v9_0_hw_fini() unconditionally puts priv_reg_irq, priv_inst_irq,
bad_op_irq and cp_ecc_error_irq, but the matching gets in
gfx_v9_0_late_init() and amdgpu_gfx_ras_late_init() may be skipped
on SR-IOV VF, partial late_init failure, or an earlier IP init
failure.  When hw_fini then runs, the unmatched puts underflow the
refcounts and trip the amdgpu_irq_put() WARN:

  WARNING: CPU: 4 PID: 6367 at drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:676
  RIP: amdgpu_irq_put+0xc6/0xe0 [amdgpu]
  Call Trace:
   gfx_v9_0_hw_fini+0x200/0x9a0 [amdgpu]
   amdgpu_ip_block_hw_fini+0x29/0xc0 [amdgpu]
   amdgpu_device_fini_hw+0x309/0x5f0 [amdgpu]
   amdgpu_driver_unload_kms+0x7c/0x90 [amdgpu]
   amdgpu_pci_remove+0x51/0x90 [amdgpu]

Guard each put with amdgpu_irq_enabled() so hw_fini only releases
IRQs that are currently held.

Fixes: d97b02bb9c7aa ("drm/amdgpu/gfx: disable gfx9 cp_ecc_error_irq only when 
enabling legacy gfx ras")
Cc: [email protected]
Signed-off-by: Yunxiang Li <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index bf270e605949f..e5a3735d98342 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4057,11 +4057,14 @@ static int gfx_v9_0_hw_fini(struct amdgpu_ip_block 
*ip_block)
 {
        struct amdgpu_device *adev = ip_block->adev;
 
-       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+       if (amdgpu_irq_enabled(adev, &adev->gfx.cp_ecc_error_irq, 0))
                amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
-       amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
-       amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
-       amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+       if (amdgpu_irq_enabled(adev, &adev->gfx.priv_reg_irq, 0))
+               amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       if (amdgpu_irq_enabled(adev, &adev->gfx.priv_inst_irq, 0))
+               amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+       if (amdgpu_irq_enabled(adev, &adev->gfx.bad_op_irq, 0))
+               amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
 
        /* DF freeze and kcq disable will fail */
        if (!amdgpu_ras_intr_triggered())
-- 
2.51.2

Reply via email to