If there are multiple instances of the VCN running,
we may end up switching the video profile while another
instance is active because we only take into account
the current instance's submissions. Look at all
outstanding fences for the video profile.
v2: drop early exit in begin_use()
v3: handle possible race between begin_use() work handler
Fixes: 3b669df92c85 ("drm/amdgpu/vcn: adjust workload profile
handling")
Reviewed-by: Sathishkumar S <sathishkumar.sundarar...@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 40
++++++++++++-------------
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 +
2 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 9a76e11d1c184..593c1ddf8819b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -415,19 +415,25 @@ static void
amdgpu_vcn_idle_work_handler(struct work_struct *work)
struct amdgpu_vcn_inst *vcn_inst =
container_of(work, struct amdgpu_vcn_inst,
idle_work.work);
struct amdgpu_device *adev = vcn_inst->adev;
- unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
- unsigned int i = vcn_inst->inst, j;
+ unsigned int total_fences = 0,
fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i, j;
int r = 0;
- if (adev->vcn.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << vcn_inst->inst))
return;
- for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
- fence[i] +=
amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];
+
+ for (j = 0; j < v->num_enc_rings; ++j)
+ fence[i] +=
amdgpu_fence_count_emitted(&v->ring_enc[j]);
+ fence[i] += amdgpu_fence_count_emitted(&v->ring_dec);
+ total_fences += fence[i];
+ }
/* Only set DPG pause for VCN3 or below, VCN4 and above will
be handled by FW */
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- !adev->vcn.inst[i].using_unified_queue) {
+ !vcn_inst->using_unified_queue) {
struct dpg_pause_state new_state;
if (fence[i] ||
@@ -436,18 +442,18 @@ static void
amdgpu_vcn_idle_work_handler(struct work_struct *work)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state);
+ vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
}
- fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
- fences += fence[i];
-
- if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
+ if (!fence[vcn_inst->inst] &&
!atomic_read(&vcn_inst->total_submission_cnt)) {
+ /* This is specific to this instance */
mutex_lock(&vcn_inst->vcn_pg_lock);
vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
mutex_unlock(&vcn_inst->vcn_pg_lock);
mutex_lock(&adev->vcn.workload_profile_mutex);
- if (adev->vcn.workload_profile_active) {
+ /* This is global and depends on all VCN instances */
+ if (adev->vcn.workload_profile_active &&
!total_fences &&
+ !atomic_read(&adev->vcn.total_submission_cnt)) {
r = amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
@@ -467,16 +473,10 @@ void amdgpu_vcn_ring_begin_use(struct
amdgpu_ring *ring)
int r = 0;
atomic_inc(&vcn_inst->total_submission_cnt);
+ atomic_inc(&adev->vcn.total_submission_cnt);