On Wed, Aug 13, 2025 at 5:09 PM Sathishkumar S <sathishkumar.sundarar...@amd.com> wrote: > > There is a race condition which leads to dpm video power profile > OFF during active video decode on multi-instance VCN hardware. > Add flags to track ON/OFF vcn instances and check if all > instances are off before turning OFF video power profile. > > v2: > using per instance lock is wrong, use a global lock (David Wu) > > Fixes: 3b669df92c85 ("drm/amdgpu/vcn: adjust workload profile handling") > Signed-off-by: Sathishkumar S <sathishkumar.sundarar...@amd.com> > Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - > drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 31 +++++++++------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 7 +++-- > drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 18 +++---------- > 4 files changed, 21 insertions(+), 36 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index f7d7e4748197..69a889b3222a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4444,7 +4444,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, > } > mutex_init(&adev->gfx.userq_sch_mutex); > mutex_init(&adev->gfx.workload_profile_mutex); > - mutex_init(&adev->vcn.workload_profile_mutex);
We already have the global vcn.workload_profile_mutex. Why not just use that? It better describes what it is protecting. > mutex_init(&adev->userq_mutex); > > amdgpu_device_init_apu_flags(adev); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > index 9a76e11d1c18..e034baa77977 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > @@ -102,6 +102,7 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev, int > i) > adev->vcn.inst[i].adev = adev; > adev->vcn.inst[i].inst = i; > amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, > sizeof(ucode_prefix)); > + mutex_init(&adev->vcn.lock); This will get initialized multiple times as this function is called per instance. Better to initialize it in amdgpu_device_init(). > > if (i != 0 && adev->vcn.per_inst_fw) { > r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, > @@ -134,7 +135,6 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) > int r; > > mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); > - mutex_init(&adev->vcn.inst[i].vcn_pg_lock); You can probably drop this lock as a separate patch. I don't see any concurrent access where we would need it. > mutex_init(&adev->vcn.inst[i].engine_reset_mutex); > atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); > INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, > amdgpu_vcn_idle_work_handler); > @@ -290,7 +290,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) > if (adev->vcn.reg_list) > amdgpu_vcn_reg_dump_fini(adev); > > - mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); > mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); > > return 0; > @@ -443,18 +442,18 @@ static void amdgpu_vcn_idle_work_handler(struct > work_struct *work) > fences += fence[i]; > > if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { > - mutex_lock(&vcn_inst->vcn_pg_lock); > + mutex_lock(&adev->vcn.lock); > vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); > - mutex_unlock(&vcn_inst->vcn_pg_lock); > - mutex_lock(&adev->vcn.workload_profile_mutex); > - if (adev->vcn.workload_profile_active) { > + adev->vcn.flags &= AMDGPU_VCN_FLAG_VINST_OFF(vcn_inst->inst); > + if (adev->vcn.workload_profile_active && > + !(adev->vcn.flags & > AMDGPU_VCN_FLAG_VINST_MASK(adev->vcn.num_vcn_inst))) { I still don't see how looking at the global emitted fences here would race, but I don't have a strong preference on how we solve this. Alex > r = amdgpu_dpm_switch_power_profile(adev, > PP_SMC_POWER_PROFILE_VIDEO, > false); > if (r) > dev_warn(adev->dev, "(%d) failed to disable > video power profile mode\n", r); > adev->vcn.workload_profile_active = false; > } > - mutex_unlock(&adev->vcn.workload_profile_mutex); > + mutex_unlock(&adev->vcn.lock); > } else { > schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); > } > @@ -470,14 +469,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) > > cancel_delayed_work_sync(&vcn_inst->idle_work); > > - /* We can safely return early here because we've cancelled the > - * the delayed work so there is no one else to set it to false > - * and we don't care if someone else sets it to true. > - */ > - if (adev->vcn.workload_profile_active) > - goto pg_lock; > + mutex_lock(&adev->vcn.lock); > > - mutex_lock(&adev->vcn.workload_profile_mutex); > if (!adev->vcn.workload_profile_active) { > r = amdgpu_dpm_switch_power_profile(adev, > PP_SMC_POWER_PROFILE_VIDEO, > true); > @@ -485,11 +478,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) > dev_warn(adev->dev, "(%d) failed to switch to video > power profile mode\n", r); > adev->vcn.workload_profile_active = true; > } > - mutex_unlock(&adev->vcn.workload_profile_mutex); > > -pg_lock: > - mutex_lock(&vcn_inst->vcn_pg_lock); > - vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); > + if (!(adev->vcn.flags & AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst))) { > + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); > + adev->vcn.flags |= AMDGPU_VCN_FLAG_VINST_ON(vcn_inst->inst); > + } > > /* Only set DPG pause for VCN3 or below, VCN4 and above will be > handled by FW */ > if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && > @@ -514,7 +507,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) > > vcn_inst->pause_dpg_mode(vcn_inst, &new_state); > } > - mutex_unlock(&vcn_inst->vcn_pg_lock); > + mutex_unlock(&adev->vcn.lock); > } > > void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > index b3fb1d0e43fc..4457dcc5f9dc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > @@ -320,7 +320,6 @@ struct amdgpu_vcn_inst { > uint8_t vcn_config; > uint32_t vcn_codec_disable_mask; > atomic_t total_submission_cnt; > - struct mutex vcn_pg_lock; > enum amd_powergating_state cur_state; > struct delayed_work idle_work; > unsigned fw_version; > @@ -363,9 +362,13 @@ struct amdgpu_vcn { > unsigned fw_version; > > bool workload_profile_active; > - struct mutex workload_profile_mutex; > + struct mutex lock; > u32 reg_count; > const struct amdgpu_hwip_reg_entry *reg_list; > +#define AMDGPU_VCN_FLAG_VINST_MASK(n) (BIT(n+1) - 1) > +#define AMDGPU_VCN_FLAG_VINST_ON(n) (BIT(n)) > +#define AMDGPU_VCN_FLAG_VINST_OFF(n) (~BIT(n)) > + u32 flags; > }; > > struct amdgpu_fw_shared_rb_ptrs_struct { > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > index 3a7c137a83ef..344ab5c4cb18 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > @@ -147,9 +147,9 @@ static void vcn_v2_5_idle_work_handler(struct work_struct > *work) > } > > if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) > { > + mutex_lock(&adev->vcn.lock); > amdgpu_device_ip_set_powergating_state(adev, > AMD_IP_BLOCK_TYPE_VCN, > AMD_PG_STATE_GATE); > - mutex_lock(&adev->vcn.workload_profile_mutex); > if (adev->vcn.workload_profile_active) { > r = amdgpu_dpm_switch_power_profile(adev, > PP_SMC_POWER_PROFILE_VIDEO, > false); > @@ -157,7 +157,7 @@ static void vcn_v2_5_idle_work_handler(struct work_struct > *work) > dev_warn(adev->dev, "(%d) failed to disable > video power profile mode\n", r); > adev->vcn.workload_profile_active = false; > } > - mutex_unlock(&adev->vcn.workload_profile_mutex); > + mutex_unlock(&adev->vcn.lock); > } else { > schedule_delayed_work(&adev->vcn.inst[0].idle_work, > VCN_IDLE_TIMEOUT); > } > @@ -173,14 +173,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring > *ring) > > cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); > > - /* We can safely return early here because we've cancelled the > - * the delayed work so there is no one else to set it to false > - * and we don't care if someone else sets it to true. > - */ > - if (adev->vcn.workload_profile_active) > - goto pg_lock; > - > - mutex_lock(&adev->vcn.workload_profile_mutex); > + mutex_lock(&adev->vcn.lock); > if (!adev->vcn.workload_profile_active) { > r = amdgpu_dpm_switch_power_profile(adev, > PP_SMC_POWER_PROFILE_VIDEO, > true); > @@ -188,10 +181,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring > *ring) > dev_warn(adev->dev, "(%d) failed to switch to video > power profile mode\n", r); > adev->vcn.workload_profile_active = true; > } > - mutex_unlock(&adev->vcn.workload_profile_mutex); > > -pg_lock: > - mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); > amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, > AMD_PG_STATE_UNGATE); > > @@ -217,7 +207,7 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring > *ring) > } > v->pause_dpg_mode(v, &new_state); > } > - mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock); > + mutex_unlock(&adev->vcn.lock); > } > > static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring) > -- > 2.48.1 >