with implementation of amdgpu_spm_funcs callbacks for the GFX 9.0 IP block
and wire up the RLC Stream Performance Monitor interrupt
GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT into the IRQ framework.

SPM function table (gfx_v9_0_spm_funcs):
  gfx_v9_0_spm_start(adev, 0):
    Programs the KIQ ring to:
    - Read-modify-write RLC_SPM_PERFMON_CNTL to set PERFMON_RING_MODE
      (0x1 = stall-on-full with interrupt at segment threshold).
    - Write CP_PERFMON_CNTL.SPM_PERFMON_STATE =
      CP_PERFMON_STATE_DISABLE_AND_RESET to reset the SPM counters and
      clear wptr to 0; manually resets RLC_SPM_RING_RDPTR to 0 to match.
    - Writes RLC_SPM_INT_CNTL = 1 to enable the SPM interrupt.

  gfx_v9_0_spm_stop(adev, 0):
    Programs the KIQ ring to:
    - Set CP_PERFMON_CNTL.SPM_PERFMON_STATE = STOP_COUNTING.
    - Set CP_PERFMON_CNTL.PERFMON_STATE = DISABLE_AND_RESET.
    - Reset RLC_SPM_RING_RDPTR to 0 (wptr is also reset by hardware).

  gfx_v9_0_spm_set_rdptr(adev, 0, rptr):
    Writes rptr to RLC_SPM_RING_RDPTR via KIQ ring.

  gfx_v9_0_set_spm_perfmon_ring_buf(adev, 0, gpu_addr, size):
    Programs the ring buffer via KIQ ring:
    - RLC_SPM_PERFMON_RING_BASE_LO/HI with the GPU address.
    - RLC_SPM_PERFMON_RING_SIZE with the buffer size.
    - RLC_SPM_SEGMENT_THRESHOLD = 1.
    - CP_PERFMON_CNTL = 0 (clear any leftover perfmon state).
    set_spm_config_size = 30 DWORDs per KIQ ring operation.

  gfx_v9_0_spm_funcs is registered in gfx_v9_0_set_spm_funcs(), called
  from gfx_v9_0_early_init() before KIQ and ring function setup.

SPM IRQ handling:
  The SPM interrupt source is registered in gfx_v9_0_sw_init() via
  amdgpu_irq_add_id() with client SOC15_IH_CLIENTID_RLC and source ID
  GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT, connected to
  adev->gfx.spm_irq.

  gfx_v9_0_spm_set_interrupt_state(): enables or disables the SPM
  interrupt by writing RLC_SPM_INT_CNTL = 1/0 via direct MMIO.

  gfx_v9_0_spm_irq(): called from the interrupt handler, invokes
  amdgpu_rlc_spm_interrupt(adev, 0) for XCC 0 (gfx9 is single-XCC).

  amdgpu_irq_get(spm_irq) is called in gfx_v9_0_late_init() to enable
  the interrupt at the IRQ framework level. amdgpu_irq_put(spm_irq) is
  called in gfx_v9_0_hw_fini() to disable it.

Signed-off-by: James Zhu <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 135 ++++++++++++++++++++++++
 2 files changed, 136 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 016eed89d6f3..30df02d322b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -472,6 +472,7 @@ struct amdgpu_gfx {
        struct amdgpu_irq_src           priv_inst_irq;
        struct amdgpu_irq_src           bad_op_irq;
        struct amdgpu_irq_src           cp_ecc_error_irq;
+       struct amdgpu_irq_src           spm_irq;
        struct amdgpu_irq_src           sq_irq;
        struct amdgpu_irq_src           rlc_gc_fed_irq;
        struct sq_work                  sq_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 7e9d753f4a80..7435032c358a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2275,6 +2275,13 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block 
*ip_block)
        adev->gfx.mec.num_pipe_per_mec = 4;
        adev->gfx.mec.num_queue_per_pipe = 8;
 
+       /* SPM */
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_RLC,
+                             GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT,
+                             &adev->gfx.spm_irq);
+       if (r)
+               return r;
+
        /* EOP Event */
        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 
GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
        if (r)
@@ -4056,6 +4063,7 @@ static int gfx_v9_0_hw_fini(struct amdgpu_ip_block 
*ip_block)
        if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
                amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0);
        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
        amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
 
@@ -4793,6 +4801,95 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct 
amdgpu_device *adev)
        return r;
 }
 
+static void gfx_v9_0_spm_start(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+       uint32_t data = 0;
+
+       data = RREG32_SOC15(GC, 0, mmRLC_SPM_PERFMON_CNTL);
+       data |= RLC_SPM_PERFMON_CNTL__PERFMON_RING_MODE_MASK;
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_CNTL), data);
+
+       data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE,
+                       CP_PERFMON_STATE_DISABLE_AND_RESET);
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);
+
+       /* When SPM is reset, RLC automatically resets wptr to 0.
+        * Manually reset rptr to match this.
+        */
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), 0);
+
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_INT_CNTL), 1);
+}
+
+static void gfx_v9_0_spm_stop(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+       uint32_t data = 0;
+
+       data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE,
+                       CP_PERFMON_STATE_STOP_COUNTING);
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);
+
+       data = REG_SET_FIELD(0, CP_PERFMON_CNTL, PERFMON_STATE,
+                       CP_PERFMON_STATE_DISABLE_AND_RESET);
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);
+
+       /* When SPM is reset, RLC automatically resets wptr to 0.
+        * Manually reset rptr to match this.
+        */
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), 0);
+}
+
+static void gfx_v9_0_spm_set_rdptr(struct amdgpu_device *adev, int xcc_id,  
u32 rptr)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), rptr);
+}
+
+static void gfx_v9_0_set_spm_perfmon_ring_buf(struct amdgpu_device *adev,
+                                           int xcc_id, u64 gpu_addr, u32 size)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0,
+                       mmRLC_SPM_PERFMON_RING_BASE_LO), 
lower_32_bits(gpu_addr));
+
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0,
+                               mmRLC_SPM_PERFMON_RING_BASE_HI), 
upper_32_bits(gpu_addr));
+
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE), 
size);
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_SEGMENT_THRESHOLD), 
0x1);
+
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), 0);
+}
+
+static const struct amdgpu_spm_funcs gfx_v9_0_spm_funcs = {
+       .start = &gfx_v9_0_spm_start,
+       .stop = &gfx_v9_0_spm_stop,
+       .set_rdptr = &gfx_v9_0_spm_set_rdptr,
+       .set_spm_perfmon_ring_buf = &gfx_v9_0_set_spm_perfmon_ring_buf,
+       .set_spm_config_size = 30,
+};
+
+static void gfx_v9_0_set_spm_funcs(struct amdgpu_device *adev)
+{
+       adev->gfx.spmfuncs = &gfx_v9_0_spm_funcs;
+}
+
 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
 {
        struct amdgpu_device *adev = ip_block->adev;
@@ -4807,6 +4904,7 @@ static int gfx_v9_0_early_init(struct amdgpu_ip_block 
*ip_block)
        adev->gfx.xcc_mask = 1;
        adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
                                          AMDGPU_MAX_COMPUTE_RINGS);
+       gfx_v9_0_set_spm_funcs(adev);
        gfx_v9_0_set_kiq_pm4_funcs(adev);
        gfx_v9_0_set_ring_funcs(adev);
        gfx_v9_0_set_irq_funcs(adev);
@@ -4862,6 +4960,10 @@ static int gfx_v9_0_late_init(struct amdgpu_ip_block 
*ip_block)
        if (r)
                return r;
 
+       r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0);
+       if (r)
+               return r;
+
        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
        if (r)
                return r;
@@ -7100,6 +7202,32 @@ static void gfx_v9_0_query_ras_error_count(struct 
amdgpu_device *adev,
        gfx_v9_0_query_utc_edc_status(adev, err_data);
 }
 
+static int gfx_v9_0_spm_set_interrupt_state(struct amdgpu_device *adev,
+                                            struct amdgpu_irq_src *src,
+                                            unsigned int type,
+                                            enum amdgpu_interrupt_state state)
+{
+       switch (state) {
+       case AMDGPU_IRQ_STATE_DISABLE:
+               WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 0);
+               break;
+       case AMDGPU_IRQ_STATE_ENABLE:
+               WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 1);
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+static int gfx_v9_0_spm_irq(struct amdgpu_device *adev,
+                            struct amdgpu_irq_src *source,
+                            struct amdgpu_iv_entry *entry)
+{
+       amdgpu_rlc_spm_interrupt(adev, 0);
+       return 0;
+}
+
 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
                                        uint32_t pipe, bool enable)
 {
@@ -7716,12 +7844,19 @@ static const struct amdgpu_irq_src_funcs 
gfx_v9_0_cp_ecc_error_irq_funcs = {
        .process = amdgpu_gfx_cp_ecc_error_irq,
 };
 
+static const struct amdgpu_irq_src_funcs gfx_v9_0_spm_irq_funcs = {
+       .set = gfx_v9_0_spm_set_interrupt_state,
+       .process = gfx_v9_0_spm_irq,
+};
 
 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
 {
        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
        adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
 
+       adev->gfx.spm_irq.num_types = 1;
+       adev->gfx.spm_irq.funcs = &gfx_v9_0_spm_irq_funcs;
+
        adev->gfx.priv_reg_irq.num_types = 1;
        adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
 
-- 
2.34.1

Reply via email to