[PATCH] drm/amdgpu: update ras capability's query based on mem ecc configuration
RAS support capability needs to be updated on top of different memeory ECC enablement, and remove redundant memory ecc check in gmc module for vega20 and arcturus. v2: check HBM ECC enablement and set ras mask accordingly. v3: avoid to invoke atomfirmware interface to query twice. Suggested-by: Hawking Zhang Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 - drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 36 ++--- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 69b02b9d4131..38782add479a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1738,18 +1738,30 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, *hw_supported = 0; *supported = 0; - if (amdgpu_sriov_vf(adev) || + if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS)) return; - if (adev->is_atom_fw && - (amdgpu_atomfirmware_mem_ecc_supported(adev) || -amdgpu_atomfirmware_sram_ecc_supported(adev))) - *hw_supported = AMDGPU_RAS_BLOCK_MASK; + if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { + DRM_INFO("HBM ECC is active.\n"); + *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } else + DRM_INFO("HBM ECC is not presented.\n"); + + if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { + DRM_INFO("SRAM ECC is active.\n"); + *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } else + DRM_INFO("SRAM ECC is not presented.\n"); + + /* hw_supported needs to be aligned with RAS block mask. */ + *hw_supported &= AMDGPU_RAS_BLOCK_MASK; *supported = amdgpu_ras_enable == 0 ? - 0 : *hw_supported & amdgpu_ras_mask; + 0 : *hw_supported & amdgpu_ras_mask; } int amdgpu_ras_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 90216abf14a4..3cc886e96420 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -886,29 +886,21 @@ static int gmc_v9_0_late_init(void *handle) if (r) return r; /* Check if ecc is available */ - if (!amdgpu_sriov_vf(adev)) { - switch (adev->asic_type) { - case CHIP_VEGA10: - case CHIP_VEGA20: - case CHIP_ARCTURUS: - r = amdgpu_atomfirmware_mem_ecc_supported(adev); - if (!r) { - DRM_INFO("ECC is not present.\n"); - if (adev->df.funcs->enable_ecc_force_par_wr_rmw) - adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); - } else { - DRM_INFO("ECC is active.\n"); - } + if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) { + r = amdgpu_atomfirmware_mem_ecc_supported(adev); + if (!r) { + DRM_INFO("ECC is not present.\n"); + if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); + } else { + DRM_INFO("ECC is active.\n"); + } - r = amdgpu_atomfirmware_sram_ecc_supported(adev); - if (!r) { - DRM_INFO("SRAM ECC is not present.\n"); - } else { - DRM_INFO("SRAM ECC is active.\n"); - } - break; - default: - break; + r = amdgpu_atomfirmware_sram_ecc_supported(adev); + if (!r) { + DRM_INFO("SRAM ECC is not present.\n"); + } else { + DRM_INFO("SRAM ECC is active.\n"); } } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration
[AMD Public Use] Thanks for your suggestion, Hawking. I will send one patch v3 to target this. Regards, Guchun -Original Message- From: Zhang, Hawking Sent: Thursday, March 12, 2020 11:13 AM To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Li, Dennis ; Zhou1, Tao ; Clements, John Subject: RE: [PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration [AMD Official Use Only - Internal Distribution Only] Hi Guchun, It seems to me we still have redundant function call in amdgpu_ras_check_supported. The atomfirmware interfaces are possibly invoked twice? As I listed the steps in last thread, we can assume hw_supported to 0 or 0xfff either. Check HBM ECC first, explicitly indicates it is present or not, and set the DF/UMC bit in hw_supported Check SRAM ECC, explicitly indicates It is present or not, and set other ip blocks masks. After we run all above checks, set the finally ras mask to con->supported. We'd better keep the message consistent as what we had in gmc_v9_0_late. No need to highlight the what IP block get disabled, that should be transparent to users. Regards, Hawking -Original Message- From: Chen, Guchun Sent: Thursday, March 12, 2020 10:55 To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, Dennis ; Zhou1, Tao ; Clements, John Cc: Chen, Guchun Subject: [PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration When sram ecc is disabled by vbios, ras initialization process in the corrresponding IPs that suppport sram ecc needs to be skipped. So update ras support capability accordingly on top of this configuration. This capability will block further ras operations to the unsupported IPs. v2: check HBM ECC enablement and set ras mask accordingly. Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 +++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 69b02b9d4131..b08226c10d95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1748,8 +1748,41 @@ static void amdgpu_ras_check_supported (struct amdgpu_device *adev, amdgpu_atomfirmware_sram_ecc_supported(adev))) *hw_supported = AMDGPU_RAS_BLOCK_MASK; - *supported = amdgpu_ras_enable == 0 ? - 0 : *hw_supported & amdgpu_ras_mask; + /* Both HBM and SRAM ECC are disabled in vbios. */ + if (*hw_supported == 0) { + DRM_INFO("RAS HW support is disabled as HBM" + " and SRAM ECC are not presented.\n"); + return; + } + + if (amdgpu_ras_enable) { + *supported = *hw_supported; + + /* +* When HBM ECC is disabled in vbios, remove +* UMC's and DF's ras support. +*/ + if (!amdgpu_atomfirmware_mem_ecc_supported(adev)) { + DRM_INFO("HBM ECC is disabled and " + "remove UMC and DF ras support.\n"); + *supported &= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } + + /* +* When sram ecc is disabled in vbios, bypass those IP +* blocks that support sram ecc, and only hold UMC and DF. +*/ + if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) { + DRM_INFO("SRAM ECC is disabled and remove ras support " + "from IPs that support sram ecc.\n"); + *supported &= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } + + /* ras support needs to align with module parmeter */ + *supported &= amdgpu_ras_mask; + } } int amdgpu_ras_init(struct amdgpu_device *adev) -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu: remove mem ecc check for vega20 and arcturus
[AMD Official Use Only - Internal Distribution Only] I think we can merge the patch with first one as they are all refine current logic for querying ras capability. Regards, Hawking -Original Message- From: Chen, Guchun Sent: Thursday, March 12, 2020 10:55 To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, Dennis ; Zhou1, Tao ; Clements, John Cc: Chen, Guchun Subject: [PATCH 2/2] drm/amdgpu: remove mem ecc check for vega20 and arcturus Memory ecc check including HBM and SRAM has been done in ras init function for vega20 and arcturus. So remove it from gmc module, only keep this check for vega10. Suggested-by: Hawking Zhang Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 +++--- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 90216abf14a4..9bde66a6b432 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -887,28 +887,20 @@ static int gmc_v9_0_late_init(void *handle) return r; /* Check if ecc is available */ if (!amdgpu_sriov_vf(adev)) { - switch (adev->asic_type) { - case CHIP_VEGA10: - case CHIP_VEGA20: - case CHIP_ARCTURUS: + if (adev->asic_type == CHIP_VEGA10) { r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); if (adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); - } else { + } else DRM_INFO("ECC is active.\n"); - } r = amdgpu_atomfirmware_sram_ecc_supported(adev); if (!r) { DRM_INFO("SRAM ECC is not present.\n"); - } else { + } else DRM_INFO("SRAM ECC is active.\n"); - } - break; - default: - break; } } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration
[AMD Official Use Only - Internal Distribution Only] Hi Guchun, It seems to me we still have redundant function call in amdgpu_ras_check_supported. The atomfirmware interfaces are possibly invoked twice? As I listed the steps in last thread, we can assume hw_supported to 0 or 0xfff either. Check HBM ECC first, explicitly indicates it is present or not, and set the DF/UMC bit in hw_supported Check SRAM ECC, explicitly indicates It is present or not, and set other ip blocks masks. After we run all above checks, set the finally ras mask to con->supported. We'd better keep the message consistent as what we had in gmc_v9_0_late. No need to highlight the what IP block get disabled, that should be transparent to users. Regards, Hawking -Original Message- From: Chen, Guchun Sent: Thursday, March 12, 2020 10:55 To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, Dennis ; Zhou1, Tao ; Clements, John Cc: Chen, Guchun Subject: [PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration When sram ecc is disabled by vbios, ras initialization process in the corrresponding IPs that suppport sram ecc needs to be skipped. So update ras support capability accordingly on top of this configuration. This capability will block further ras operations to the unsupported IPs. v2: check HBM ECC enablement and set ras mask accordingly. Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 +++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 69b02b9d4131..b08226c10d95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1748,8 +1748,41 @@ static void amdgpu_ras_check_supported (struct amdgpu_device *adev, amdgpu_atomfirmware_sram_ecc_supported(adev))) *hw_supported = AMDGPU_RAS_BLOCK_MASK; - *supported = amdgpu_ras_enable == 0 ? - 0 : *hw_supported & amdgpu_ras_mask; + /* Both HBM and SRAM ECC are disabled in vbios. */ + if (*hw_supported == 0) { + DRM_INFO("RAS HW support is disabled as HBM" + " and SRAM ECC are not presented.\n"); + return; + } + + if (amdgpu_ras_enable) { + *supported = *hw_supported; + + /* +* When HBM ECC is disabled in vbios, remove +* UMC's and DF's ras support. +*/ + if (!amdgpu_atomfirmware_mem_ecc_supported(adev)) { + DRM_INFO("HBM ECC is disabled and " + "remove UMC and DF ras support.\n"); + *supported &= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } + + /* +* When sram ecc is disabled in vbios, bypass those IP +* blocks that support sram ecc, and only hold UMC and DF. +*/ + if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) { + DRM_INFO("SRAM ECC is disabled and remove ras support " + "from IPs that support sram ecc.\n"); + *supported &= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } + + /* ras support needs to align with module parmeter */ + *supported &= amdgpu_ras_mask; + } } int amdgpu_ras_init(struct amdgpu_device *adev) -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset
Have not tried Navi14 yet. But likely it can fix that baco failure. At least it can fix the baco issue of Navi10 which is very similar as Navi14's. Regards, Evan -Original Message- From: Yuan, Xiaojie Sent: Wednesday, March 11, 2020 10:10 PM To: Quan, Evan ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset [AMD Official Use Only - Internal Distribution Only] Hi Evan, Does this patch also fix the baco failure on Navi14 with display connected? BR, Xiaojie From: amd-gfx on behalf of Evan Quan Sent: Wednesday, March 11, 2020 4:18 PM To: amd-gfx@lists.freedesktop.org Cc: Quan, Evan Subject: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset This can fix the baco reset failure seen on Navi10. And this should be a low risk fix as the same sequence is already used for system suspend/resume. Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053 Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 572eb6ea8eab..a35c89973614 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (r) goto out; + amdgpu_fbdev_set_suspend(tmp_adev, 0); + /* must succeed. */ amdgpu_ras_resume(tmp_adev); @@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); + amdgpu_fbdev_set_suspend(adev, 1); + /* disable ras on ALL IPs */ if (!(in_ras_intr && !use_baco) && amdgpu_device_ip_need_full_reset(tmp_adev)) -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7CXiaojie.Yuan%40amd.com%7C3eace080fd0b4f67337e08d7c594e441%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195115872403614sdata=%2B%2B3xRFOl2Ho%2BRe9VuzqHtZNoVZ3s%2BxIP7xTv6yG11LA%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration
When sram ecc is disabled by vbios, ras initialization process in the corrresponding IPs that suppport sram ecc needs to be skipped. So update ras support capability accordingly on top of this configuration. This capability will block further ras operations to the unsupported IPs. v2: check HBM ECC enablement and set ras mask accordingly. Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 +++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 69b02b9d4131..b08226c10d95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1748,8 +1748,41 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, amdgpu_atomfirmware_sram_ecc_supported(adev))) *hw_supported = AMDGPU_RAS_BLOCK_MASK; - *supported = amdgpu_ras_enable == 0 ? - 0 : *hw_supported & amdgpu_ras_mask; + /* Both HBM and SRAM ECC are disabled in vbios. */ + if (*hw_supported == 0) { + DRM_INFO("RAS HW support is disabled as HBM" + " and SRAM ECC are not presented.\n"); + return; + } + + if (amdgpu_ras_enable) { + *supported = *hw_supported; + + /* +* When HBM ECC is disabled in vbios, remove +* UMC's and DF's ras support. +*/ + if (!amdgpu_atomfirmware_mem_ecc_supported(adev)) { + DRM_INFO("HBM ECC is disabled and " + "remove UMC and DF ras support.\n"); + *supported &= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } + + /* +* When sram ecc is disabled in vbios, bypass those IP +* blocks that support sram ecc, and only hold UMC and DF. +*/ + if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) { + DRM_INFO("SRAM ECC is disabled and remove ras support " + "from IPs that support sram ecc.\n"); + *supported &= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } + + /* ras support needs to align with module parmeter */ + *supported &= amdgpu_ras_mask; + } } int amdgpu_ras_init(struct amdgpu_device *adev) -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: remove mem ecc check for vega20 and arcturus
Memory ecc check including HBM and SRAM has been done in ras init function for vega20 and arcturus. So remove it from gmc module, only keep this check for vega10. Suggested-by: Hawking Zhang Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 +++--- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 90216abf14a4..9bde66a6b432 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -887,28 +887,20 @@ static int gmc_v9_0_late_init(void *handle) return r; /* Check if ecc is available */ if (!amdgpu_sriov_vf(adev)) { - switch (adev->asic_type) { - case CHIP_VEGA10: - case CHIP_VEGA20: - case CHIP_ARCTURUS: + if (adev->asic_type == CHIP_VEGA10) { r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); if (adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); - } else { + } else DRM_INFO("ECC is active.\n"); - } r = amdgpu_atomfirmware_sram_ecc_supported(adev); if (!r) { DRM_INFO("SRAM ECC is not present.\n"); - } else { + } else DRM_INFO("SRAM ECC is active.\n"); - } - break; - default: - break; } } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[pull] amdgpu 5.6 fixes
Hi Dave, Daniel, Fixes for 5.6. The following changes since commit 513dc792d6060d5ef572e43852683097a8420f56: vgacon: Fix a UAF in vgacon_invert_region (2020-03-06 21:06:34 +0100) are available in the Git repository at: git://people.freedesktop.org/~agd5f/linux tags/amd-drm-fixes-5.6-2020-03-11 for you to fetch changes up to 1d2686d417c5998af3817f93be01745b3db57ecd: drm/amdgpu/powerplay: nv1x, renior copy dcn clock settings of watermark to smu during boot up (2020-03-10 17:31:10 -0400) amd-drm-fixes-5.6-2020-03-11: amdgpu: - Update the display watermark bounding box for navi14 - Fix fetching vbios directly from rom on vega20/arcturus - Navi and renoir watermark fixes Hawking Zhang (1): drm/amdgpu: correct ROM_INDEX/DATA offset for VEGA20 Hersen Wu (1): drm/amdgpu/powerplay: nv1x, renior copy dcn clock settings of watermark to smu during boot up Martin Leung (1): drm/amd/display: update soc bb for nv14 drivers/gpu/drm/amd/amdgpu/soc15.c | 25 - .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 114 + drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 7 +- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 22 ++-- drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 5 +- 5 files changed, 158 insertions(+), 15 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 5/8] mm/hmm: add missing call to hmm_range_need_fault() before returning EFAULT
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe All return paths that do EFAULT must call hmm_range_need_fault() to determine if the user requires this page to be valid. If the page cannot be made valid if the user later requires it, due to vma flags in this case, then the return should be HMM_PFN_ERROR. Fixes: a3e0d41c2b1f ("mm/hmm: improve driver API to work and wait over a range") Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 19 --- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 5f5ccf13dd1e85..e10cd0adba7b37 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -582,18 +582,15 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end, struct vm_area_struct *vma = walk->vma; /* -* Skip vma ranges that don't have struct page backing them or -* map I/O devices directly. -*/ - if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) - return -EFAULT; - - /* +* Skip vma ranges that don't have struct page backing them or map I/O +* devices directly. +* * If the vma does not allow read access, then assume that it does not -* allow write access either. HMM does not support architectures -* that allow write without read. +* allow write access either. HMM does not support architectures that +* allow write without read. */ - if (!(vma->vm_flags & VM_READ)) { + if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) || + !(vma->vm_flags & VM_READ)) { bool fault, write_fault; /* @@ -607,7 +604,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end, if (fault || write_fault) return -EFAULT; - hmm_pfns_fill(start, end, range, HMM_PFN_NONE); + hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); hmm_vma_walk->last = end; /* Skip this vma and continue processing the next vma. */ ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 4/8] mm/hmm: add missing pfns set to hmm_vma_walk_pmd()
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe All success exit paths from the walker functions must set the pfns array. A migration entry with no required fault is a HMM_PFN_NONE return, just like the pte case. Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd") Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hmm.c b/mm/hmm.c index 32dcbfd3908315..5f5ccf13dd1e85 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -394,7 +394,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, pmd_migration_entry_wait(walk->mm, pmdp); return -EBUSY; } - return 0; + return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); } else if (!pmd_present(pmd)) return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 3/8] mm/hmm: do not call hmm_vma_walk_hole() while holding a spinlock
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe This eventually calls into handle_mm_fault() which is a sleeping function. Release the lock first. hmm_vma_walk_hole() does not touch the contents of the PUD, so it does not need the lock. Fixes: 3afc423632a1 ("mm: pagewalk: add p4d_entry() and pgd_entry()") Cc: Steven Price Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 9e8f68eb83287a..32dcbfd3908315 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -473,8 +473,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, pud = READ_ONCE(*pudp); if (pud_none(pud)) { - ret = hmm_vma_walk_hole(start, end, -1, walk); - goto out_unlock; + spin_unlock(ptl); + return hmm_vma_walk_hole(start, end, -1, walk); } if (pud_huge(pud) && pud_devmap(pud)) { @@ -483,8 +483,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, bool fault, write_fault; if (!pud_present(pud)) { - ret = hmm_vma_walk_hole(start, end, -1, walk); - goto out_unlock; + spin_unlock(ptl); + return hmm_vma_walk_hole(start, end, -1, walk); } i = (addr - range->start) >> PAGE_SHIFT; @@ -495,9 +495,9 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags, , _fault); if (fault || write_fault) { - ret = hmm_vma_walk_hole_(addr, end, fault, -write_fault, walk); - goto out_unlock; + spin_unlock(ptl); + return hmm_vma_walk_hole_(addr, end, fault, write_fault, + walk); } pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 2/8] mm/hmm: don't free the cached pgmap while scanning
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe The pgmap is held in the hmm_vma_walk variable in hope of speeding up future get_dev_pagemap() calls by hitting the same pointer. The algorithm doesn't actually care about how long the pgmap is held for. Move the put of the cached pgmap to after the walk is completed and delete all the other now redundant puts. This solves a possible leak of the reference in hmm_vma_walk_pmd() if a hmm_vma_handle_pte() fails while looping. Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem") Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 31 +-- 1 file changed, 9 insertions(+), 22 deletions(-) We talked about just deleting this stuff, but I think it makes alot sense for hmm_range_fault() to trigger fault on devmap pages that are not compatible with the caller - so lets just fix the leak on error path for now. diff --git a/mm/hmm.c b/mm/hmm.c index 35f85424176d14..9e8f68eb83287a 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -239,10 +239,6 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, } pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } hmm_vma_walk->last = end; return 0; } @@ -360,10 +356,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, return 0; fault: - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } pte_unmap(ptep); /* Fault any virtual address we were asked to fault */ return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); @@ -446,16 +438,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return r; } } - if (hmm_vma_walk->pgmap) { - /* -* We do put_dev_pagemap() here and not in hmm_vma_handle_pte() -* so that we can leverage get_dev_pagemap() optimization which -* will not re-take a reference on a pgmap if we already have -* one. -*/ - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } pte_unmap(ptep - 1); hmm_vma_walk->last = addr; @@ -529,10 +511,6 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } hmm_vma_walk->last = end; goto out_unlock; } @@ -694,6 +672,15 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) return -EBUSY; ret = walk_page_range(mm, hmm_vma_walk.last, range->end, _walk_ops, _vma_walk); + /* +* A pgmap is kept cached in the hmm_vma_walk to avoid expensive +* searching in the probably common case that the pgmap is the +* same for the entire requested range. +*/ + if (hmm_vma_walk.pgmap) { + put_dev_pagemap(hmm_vma_walk.pgmap); + hmm_vma_walk.pgmap = NULL; + } } while (ret == -EBUSY); if (ret) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 8/8] mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL handling
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe Currently if a special PTE is encountered hmm_range_fault() immediately returns EFAULT and sets the HMM_PFN_SPECIAL error output (which nothing uses). EFAULT should only be returned after testing with hmm_pte_need_fault(). Also pte_devmap() and pte_special() are exclusive, and there is no need to check IS_ENABLED, pte_special() is stubbed out to return false on unsupported architectures. Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem") Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index f61fddf2ef6505..ca33d086bdc190 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -335,16 +335,21 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, pte_unmap(ptep); return -EBUSY; } - } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) { - if (!is_zero_pfn(pte_pfn(pte))) { + } + + /* +* Since each architecture defines a struct page for the zero page, just +* fall through and treat it like a normal page. +*/ + if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) { + hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, , + _fault); + if (fault || write_fault) { pte_unmap(ptep); - *pfn = range->values[HMM_PFN_SPECIAL]; return -EFAULT; } - /* -* Since each architecture defines a struct page for the zero -* page, just fall through and treat it like a normal page. -*/ + *pfn = range->values[HMM_PFN_SPECIAL]; + return 0; } *pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe hmm_range_fault() should never return 0 if the caller requested a valid page, but the pfns output for that page would be HMM_PFN_ERROR. hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to detect if the page is in faulting mode or not. Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated code. Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd") Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table") Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 38 +- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index bf676cfef3e8ee..f61fddf2ef6505 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; - uint64_t *pfns = range->pfns; - unsigned long addr = start, i; + uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT]; + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long addr = start; + bool fault, write_fault; pte_t *ptep; pmd_t pmd; @@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return hmm_vma_walk_hole(start, end, -1, walk); if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { - bool fault, write_fault; - unsigned long npages; - uint64_t *pfns; - - i = (addr - range->start) >> PAGE_SHIFT; - npages = (end - addr) >> PAGE_SHIFT; - pfns = >pfns[i]; - hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , _fault); if (fault || write_fault) { @@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return -EBUSY; } return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); - } else if (!pmd_present(pmd)) + } + + if (!pmd_present(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); Shouldn't this fill with HMM_PFN_NONE instead of HMM_PFN_ERROR? Otherwise, when a THP is swapped out, you will get a different value than if a PTE is swapped out and you are prefetching/snapshotting. + } if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { /* @@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; - i = (addr - range->start) >> PAGE_SHIFT; - return hmm_vma_handle_pmd(walk, addr, end, [i], pmd); + return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd); } /* @@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * entry pointing to pte directory or it is a bad pmd that will not * recover. */ - if (pmd_bad(pmd)) + if (pmd_bad(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); + } ptep = pte_offset_map(pmdp, addr); - i = (addr - range->start) >> PAGE_SHIFT; - for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { + for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) { int r; - r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]); + r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns); if (r) { /* hmm_vma_handle_pte() did pte_unmap() */ hmm_vma_walk->last = addr; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 6/8] mm/hmm: reorganize how !pte_present is handled in hmm_vma_handle_pte()
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe The intention with this code is to determine if the caller required the pages to be valid, and if so, then take some action to make them valid. The action varies depending on the page type. In all cases, if the caller doesn't ask for the page, then hmm_range_fault() should not return an error. Revise the implementation to be clearer, and fix some bugs: - hmm_pte_need_fault() must always be called before testing fault or write_fault otherwise the defaults of false apply and the if()'s don't work. This was missed on the is_migration_entry() branch - -EFAULT should not be returned unless hmm_pte_need_fault() indicates fault is required - ie snapshotting should not fail. - For !pte_present() the cpu_flags are always 0, except in the special case of is_device_private_entry(), calling pte_to_hmm_pfn_flags() is confusing. Reorganize the flow so that it always follows the pattern of calling hmm_pte_need_fault() and then checking fault || write_fault. Fixes: 2aee09d8c116 ("mm/hmm: change hmm_vma_fault() to allow write fault on page basis") Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 35 +++ 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index e10cd0adba7b37..bf676cfef3e8ee 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -282,15 +282,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!pte_present(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - if (!non_swap_entry(entry)) { - cpu_flags = pte_to_hmm_pfn_flags(range, pte); - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, - , _fault); - if (fault || write_fault) - goto fault; - return 0; - } - /* * This is a special swap entry, ignore migration, use * device and report anything else as error. @@ -310,26 +301,30 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, return 0; } - if (is_migration_entry(entry)) { - if (fault || write_fault) { - pte_unmap(ptep); - hmm_vma_walk->last = addr; - migration_entry_wait(walk->mm, pmdp, addr); - return -EBUSY; - } + hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, , + _fault); + if (!fault && !write_fault) return 0; + + if (!non_swap_entry(entry)) + goto fault; + + if (is_migration_entry(entry)) { + pte_unmap(ptep); + hmm_vma_walk->last = addr; + migration_entry_wait(walk->mm, pmdp, addr); + return -EBUSY; } /* Report error for everything else */ pte_unmap(ptep); *pfn = range->values[HMM_PFN_ERROR]; return -EFAULT; - } else { - cpu_flags = pte_to_hmm_pfn_flags(range, pte); - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, - , _fault); } + cpu_flags = pte_to_hmm_pfn_flags(range, pte); + hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, , + _fault); if (fault || write_fault) goto fault; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH -next 023/491] AMD KFD: Use fallthrough;
On Wed, 2020-03-11 at 17:50 -0400, Felix Kuehling wrote: > On 2020-03-11 12:51 a.m., Joe Perches wrote: > > Convert the various uses of fallthrough comments to fallthrough; > > > > Done via script > > Link: > > https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ > > The link seems to be broken. This one works: > https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git@perches.com/ Thanks. I neglected to use a backslash on the generating script. In the script in 0/491, Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git@perches.com/ likely should have been: Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe\@perches.com/ ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 1/8] mm/hmm: add missing unmaps of the ptep during hmm_vma_handle_pte()
On 3/11/20 11:34 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe Many of the direct returns of error skipped doing the pte_unmap(). All non zero exit paths must unmap the pte. The pte_unmap() is split unnaturally like this because some of the error exit paths trigger a sleep and must release the lock before sleeping. Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem") Fixes: 53f5c3f489ec ("mm/hmm: factor out pte and pmd handling to simplify hmm_vma_walk_pmd()") Signed-off-by: Jason Gunthorpe The changes look OK to me but one issue noted below. In any case, you can add: Reviewed-by: Ralph Campbell --- mm/hmm.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 72e5a6d9a41756..35f85424176d14 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -325,6 +325,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, } /* Report error for everything else */ + pte_unmap(ptep); *pfn = range->values[HMM_PFN_ERROR]; return -EFAULT; } else { @@ -339,10 +340,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (pte_devmap(pte)) { hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte), hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) + if (unlikely(!hmm_vma_walk->pgmap)) { + pte_unmap(ptep); return -EBUSY; + } } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) { if (!is_zero_pfn(pte_pfn(pte))) { + pte_unmap(ptep); *pfn = range->values[HMM_PFN_SPECIAL]; return -EFAULT; } @@ -437,7 +441,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]); if (r) { - /* hmm_vma_handle_pte() did unmap pte directory */ + /* hmm_vma_handle_pte() did pte_unmap() */ hmm_vma_walk->last = addr; return r; } I think there is a case where hmm_vma_handle_pte() is called, a fault is requested, pte_unmap() and hmm_vma_walk_hole_() are called, the latter returns zero (the fault was handled OK), but now the page table is unmapped for successive loop iterations and pte_unmap(ptep - 1) is called at the loop end. Since this isn't an issue on x86_64 but is on x86_32, I can see how it could be missed. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH -next 023/491] AMD KFD: Use fallthrough;
On 2020-03-11 12:51 a.m., Joe Perches wrote: Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ The link seems to be broken. This one works: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git@perches.com/ Signed-off-by: Joe Perches Acked-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index d6549e..6529ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -79,7 +79,7 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, dev_warn(adev->dev, "Invalid sdma engine id (%d), using engine id 0\n", engine_id); - /* fall through */ + fallthrough; case 0: sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs (v2)
On Wed, Mar 11, 2020 at 4:33 PM Tom St Denis wrote: > > The offset into the array was specified in bytes but should > be in terms of 32-bit words. Also prevent large reads that > would also cause a buffer overread. > > v2: Read from correct offset from internal storage buffer. > > Signed-off-by: Tom St Denis > Acked-by: Christian König Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index 00942afc4e13..02bb1be11ffe 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -784,11 +784,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, > char __user *buf, > ssize_t result = 0; > uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; > > - if (size & 3 || *pos & 3) > + if (size > 4096 || size & 3 || *pos & 3) > return -EINVAL; > > /* decode offset */ > - offset = *pos & GENMASK_ULL(11, 0); > + offset = (*pos & GENMASK_ULL(11, 0)) >> 2; > se = (*pos & GENMASK_ULL(19, 12)) >> 12; > sh = (*pos & GENMASK_ULL(27, 20)) >> 20; > cu = (*pos & GENMASK_ULL(35, 28)) >> 28; > @@ -826,7 +826,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, > char __user *buf, > while (size) { > uint32_t value; > > - value = data[offset++]; > + value = data[result >> 2]; > r = put_user(value, (uint32_t *)buf); > if (r) { > result = r; > -- > 2.24.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
On 3/11/20 9:35 PM, Andrey Grodzovsky wrote: On 3/11/20 4:32 PM, Nirmoy wrote: On 3/11/20 9:02 PM, Andrey Grodzovsky wrote: On 3/11/20 4:00 PM, Andrey Grodzovsky wrote: On 3/11/20 4:00 PM, Nirmoy Das wrote: VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); + Why this needs to be done each time a job is submitted and not once in drm_sched_entity_init (same foramdgpu_job_submit bellow ?) Andrey My bad - not in drm_sched_entity_init but in relevant amdgpu code. Hi Andrey, Do you mean drm_sched_job_init() or after creating VCN entities? Nirmoy I guess after creating the VCN entities (has to be amdgpu specific code) - I just don't get why it needs to be done each time job is submitted, I mean - since you set .no_gpu_sched_loadbalance = true anyway this is always true and so shouldn't you just initialize the VCN entity with a schedulers list consisting of one scheduler and that it ? Assumption: If I understand correctly we shouldn't be doing load balance among VCN jobs in the same context. Christian, James and Leo can clarify that if I am wrong. But we can still do load balance of VNC jobs among multiple contexts. That load balance decision happens in drm_sched_entity_init(). If we initialize VCN entity with one scheduler then all entities irrespective of context gets that one scheduler which means we are not utilizing extra VNC instances. Ideally we should be calling amdgpu_ctx_disable_gpu_sched_load_balance() only once after 1st call of drm_sched_entity_init() of a VCN job. I am not sure how to do that efficiently. Another option might be to copy the logic of drm_sched_entity_get_free_sched() and choose suitable VNC sched at/after VCN entity creation. Regards, Nirmoy Andrey Andrey amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@
[PATCH v5 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. v4: Remove extra counter check, and reduce counter before idle work schedule Signed-off-by: James Zhu Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 21 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..6dacf78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (!fences && !atomic_read(>vcn.total_submission_cnt)) { amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +322,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; @@ -345,10 +349,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + atomic_dec(>adev->vcn.total_submission_cnt); + schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsignedharvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v5 4/4] drm/amdgpu/vcn2.5: add sync when WPTR/RPTR reset under DPG mode
Add vcn harware and firmware synchronization to fix race condition issue among vcn driver, hardware and firmware under DPG mode. Signed-off-by: James Zhu Reviewed-by: Leo Liu --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 16 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 9b22e2b..9793a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -868,6 +868,10 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); /* Initialize the ring buffer's read and write pointers */ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0); @@ -876,6 +880,9 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR); WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); return 0; } @@ -1389,8 +1396,13 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); /* Restore */ ring = >vcn.inst[inst_idx].ring_enc[0]; + ring->wptr = 0; WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); @@ -1398,6 +1410,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); ring = >vcn.inst[inst_idx].ring_enc[1]; + ring->wptr = 0; WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); @@ -1406,6 +1419,9 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFF); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v5 3/4] drm/amdgpu/vcn2.0: add sync when WPTR/RPTR reset under DPG mode
Add vcn harware and firmware synchronization to fix race condition issue among vcn driver, hardware and firmware under DPG mode Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index f2745fd..415d65c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -872,6 +872,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + /* set the write pointer delay */ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); @@ -894,6 +899,10 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + return 0; } @@ -1189,8 +1198,13 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); /* Restore */ ring = >vcn.inst->ring_enc[0]; + ring->wptr = 0; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); @@ -1198,6 +1212,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); ring = >vcn.inst->ring_enc[1]; + ring->wptr = 0; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); @@ -1206,6 +1221,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFF); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v5 2/4] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch
Couldn't only rely on enc fence to decide switching to dpg unpaude mode. Since a enc thread may not schedule a fence in time during multiple threads running situation. v3: 1. Rename enc_submission_cnt to dpg_enc_submission_cnt 2. Add dpg_enc_submission_cnt check in idle_work_handler v4: Remove extra counter check, and reduce counter before idle work schedule Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 32 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 6dacf78..0110610 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -65,6 +65,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); mutex_init(>vcn.vcn_pg_lock); atomic_set(>vcn.total_submission_cnt, 0); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + atomic_set(>vcn.inst[i].dpg_enc_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -298,7 +300,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; - if (fence[j]) + if (fence[j] || + unlikely(atomic_read(>vcn.inst[j].dpg_enc_submission_cnt))) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; @@ -333,19 +336,22 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; - unsigned int fences = 0; - unsigned int i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]); - } - if (fences) + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { + atomic_inc(>vcn.inst[ring->me].dpg_enc_submission_cnt); new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + } else { + unsigned int fences = 0; + unsigned int i; - if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) - new_state.fw_based = VCN_DPG_STATE__PAUSE; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]); + + if (fences || atomic_read(>vcn.inst[ring->me].dpg_enc_submission_cnt)) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + } adev->vcn.pause_dpg_mode(adev, ring->me, _state); } @@ -354,6 +360,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + atomic_dec(>adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + atomic_dec(>adev->vcn.total_submission_cnt); schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 111c4cc..e913de8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -183,6 +183,7 @@ struct amdgpu_vcn_inst { void*dpg_sram_cpu_addr; uint64_tdpg_sram_gpu_addr; uint32_t*dpg_sram_curr_addr; + atomic_tdpg_enc_submission_cnt; }; struct amdgpu_vcn { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
On 3/11/20 4:32 PM, Nirmoy wrote: On 3/11/20 9:02 PM, Andrey Grodzovsky wrote: On 3/11/20 4:00 PM, Andrey Grodzovsky wrote: On 3/11/20 4:00 PM, Nirmoy Das wrote: VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); + Why this needs to be done each time a job is submitted and not once in drm_sched_entity_init (same foramdgpu_job_submit bellow ?) Andrey My bad - not in drm_sched_entity_init but in relevant amdgpu code. Hi Andrey, Do you mean drm_sched_job_init() or after creating VCN entities? Nirmoy I guess after creating the VCN entities (has to be amdgpu specific code) - I just don't get why it needs to be done each time job is submitted, I mean - since you set .no_gpu_sched_loadbalance = true anyway this is always true and so shouldn't you just initialize the VCN entity with a schedulers list consisting of one scheduler and that it ? Andrey Andrey amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++
Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs
Hi Alex, I sent out a v2 of the patch to the list that also addresses the fact we were reading from the wrong offset from the internal buffer. This entry was really only tested with offset==0 which is why this didn't come up until now that people want those trap registers :-) Tom On 2020-03-11 11:16 a.m., Alex Deucher wrote: On Tue, Mar 10, 2020 at 8:53 AM Tom St Denis wrote: The offset into the array was specified in bytes but should be in terms of 32-bit words. Also prevent large reads that would also cause a buffer overread. Signed-off-by: Tom St Denis --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index c573edf02afc..e0f4ccd91fd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, ssize_t result = 0; uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; - if (size & 3 || *pos & 3) + if (size > 4096 || size & 3 || *pos & 3) Is size in dwords as well? Alex return -EINVAL; /* decode offset */ - offset = *pos & GENMASK_ULL(11, 0); + offset = (*pos & GENMASK_ULL(11, 0)) / 4; se = (*pos & GENMASK_ULL(19, 12)) >> 12; sh = (*pos & GENMASK_ULL(27, 20)) >> 20; cu = (*pos & GENMASK_ULL(35, 28)) >> 28; -- 2.24.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Ctom.stdenis%40amd.com%7C550ca4aaaf084c3d7a9f08d7c5cf2c65%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195365897948298sdata=YTb2YGBlAlDS%2FffaVOo2Yrej21N%2FJYVpFVIc1rQERWg%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/amdgpu: Fix GPR read from debugfs (v2)
The offset into the array was specified in bytes but should be in terms of 32-bit words. Also prevent large reads that would also cause a buffer overread. v2: Read from correct offset from internal storage buffer. Signed-off-by: Tom St Denis Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 00942afc4e13..02bb1be11ffe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -784,11 +784,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, ssize_t result = 0; uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; - if (size & 3 || *pos & 3) + if (size > 4096 || size & 3 || *pos & 3) return -EINVAL; /* decode offset */ - offset = *pos & GENMASK_ULL(11, 0); + offset = (*pos & GENMASK_ULL(11, 0)) >> 2; se = (*pos & GENMASK_ULL(19, 12)) >> 12; sh = (*pos & GENMASK_ULL(27, 20)) >> 20; cu = (*pos & GENMASK_ULL(35, 28)) >> 28; @@ -826,7 +826,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = data[offset++]; + value = data[result >> 2]; r = put_user(value, (uint32_t *)buf); if (r) { result = r; -- 2.24.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
On 3/11/20 9:14 PM, James Zhu wrote: On 2020-03-11 4:00 p.m., Nirmoy Das wrote: VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); Does this mean that only vcn IP instances 0 dec/enc will be scheduled? No, not really. drm_sched_job_init() gets called before amdgpu_ctx_disable_gpu_sched_load_balance(). 1st drm_sched_job_init() call will choose a least loaded VCN instance and that VCN instance will stay for the whole context life. Regards, Nirmoy Best Regards! James + amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -115,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; bool support_64bit_ptrs; bool no_user_fence; + bool no_gpu_sched_loadbalance; unsigned vmhub; unsigned extra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index
Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
On 3/11/20 9:02 PM, Andrey Grodzovsky wrote: On 3/11/20 4:00 PM, Andrey Grodzovsky wrote: On 3/11/20 4:00 PM, Nirmoy Das wrote: VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); + Why this needs to be done each time a job is submitted and not once in drm_sched_entity_init (same foramdgpu_job_submit bellow ?) Andrey My bad - not in drm_sched_entity_init but in relevant amdgpu code. Hi Andrey, Do you mean drm_sched_job_init() or after creating VCN entities? Nirmoy Andrey amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -115,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; bool support_64bit_ptrs; bool no_user_fence; + bool no_gpu_sched_loadbalance; unsigned vmhub; unsigned extra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index
Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
On 2020-03-11 4:00 p.m., Nirmoy Das wrote: VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c| 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); Does this mean that only vcn IP instances 0 dec/enc will be scheduled? Best Regards! James + amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -115,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; boolsupport_64bit_ptrs; boolno_user_fence; + boolno_gpu_sched_loadbalance; unsignedvmhub; unsignedextra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 71f61afdc655..749ccdb5fbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1871,6 +1871,7 @@ static const struct amdgpu_ring_funcs
Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
On 3/11/20 4:00 PM, Andrey Grodzovsky wrote: On 3/11/20 4:00 PM, Nirmoy Das wrote: VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); + Why this needs to be done each time a job is submitted and not once in drm_sched_entity_init (same foramdgpu_job_submit bellow ?) Andrey My bad - not in drm_sched_entity_init but in relevant amdgpu code. Andrey amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -115,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; bool support_64bit_ptrs; bool no_user_fence; + bool no_gpu_sched_loadbalance; unsigned vmhub; unsigned extra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 71f61afdc655..749ccdb5fbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1871,6 +1871,7 @@ static const
Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
On 3/11/20 4:00 PM, Nirmoy Das wrote: VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c| 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); + Why this needs to be done each time a job is submitted and not once in drm_sched_entity_init (same foramdgpu_job_submit bellow ?) Andrey amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -115,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; boolsupport_64bit_ptrs; boolno_user_fence; + boolno_gpu_sched_loadbalance; unsignedvmhub; unsignedextra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 71f61afdc655..749ccdb5fbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1871,6 +1871,7 @@
[PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c| 2 ++ 8 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); + amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..1127e8f77721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -115,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; boolsupport_64bit_ptrs; boolno_user_fence; + boolno_gpu_sched_loadbalance; unsignedvmhub; unsignedextra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 71f61afdc655..749ccdb5fbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1871,6 +1871,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, + .no_gpu_sched_loadbalance =
[PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs
VCN HW doesn't support dynamic load balance on multiple instances for a context. This patch modifies entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 14 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c| 2 ++ 8 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..db0eef19c636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; @@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); + amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..d699207d6266 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -559,6 +559,20 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +/** + * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load balancer + * @entity: entity on which load balancer will be disabled + */ +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler **scheds = >rq->sched; + + /* disable gpu_sched's job load balancer by assigning only one */ + /* drm scheduler to the entity */ + drm_sched_entity_modify_sched(entity, scheds, 1); + +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..3a2f900b8000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..64dad7ba74da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched); enum drm_sched_priority priority; int r; @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + if (ring->funcs->no_gpu_sched_loadbalance) + amdgpu_ctx_disable_gpu_sched_load_balance(entity); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 448c76cbf3ed..f78fe1a6912b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -115,6 +115,7 @@ struct amdgpu_ring_funcs { u32 nop; boolsupport_64bit_ptrs; boolno_user_fence; + boolno_gpu_sched_loadbalance; unsignedvmhub; unsignedextra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 71f61afdc655..749ccdb5fbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1871,6 +1871,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, + .no_gpu_sched_loadbalance =
[PATCH hmm 1/8] mm/hmm: add missing unmaps of the ptep during hmm_vma_handle_pte()
From: Jason Gunthorpe Many of the direct returns of error skipped doing the pte_unmap(). All non zero exit paths must unmap the pte. The pte_unmap() is split unnaturally like this because some of the error exit paths trigger a sleep and must release the lock before sleeping. Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem") Fixes: 53f5c3f489ec ("mm/hmm: factor out pte and pmd handling to simplify hmm_vma_walk_pmd()") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 72e5a6d9a41756..35f85424176d14 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -325,6 +325,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, } /* Report error for everything else */ + pte_unmap(ptep); *pfn = range->values[HMM_PFN_ERROR]; return -EFAULT; } else { @@ -339,10 +340,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (pte_devmap(pte)) { hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte), hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) + if (unlikely(!hmm_vma_walk->pgmap)) { + pte_unmap(ptep); return -EBUSY; + } } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) { if (!is_zero_pfn(pte_pfn(pte))) { + pte_unmap(ptep); *pfn = range->values[HMM_PFN_SPECIAL]; return -EFAULT; } @@ -437,7 +441,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]); if (r) { - /* hmm_vma_handle_pte() did unmap pte directory */ + /* hmm_vma_handle_pte() did pte_unmap() */ hmm_vma_walk->last = addr; return r; } -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages
From: Jason Gunthorpe hmm_range_fault() should never return 0 if the caller requested a valid page, but the pfns output for that page would be HMM_PFN_ERROR. hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to detect if the page is in faulting mode or not. Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated code. Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd") Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 38 +- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index bf676cfef3e8ee..f61fddf2ef6505 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; - uint64_t *pfns = range->pfns; - unsigned long addr = start, i; + uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT]; + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long addr = start; + bool fault, write_fault; pte_t *ptep; pmd_t pmd; @@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return hmm_vma_walk_hole(start, end, -1, walk); if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { - bool fault, write_fault; - unsigned long npages; - uint64_t *pfns; - - i = (addr - range->start) >> PAGE_SHIFT; - npages = (end - addr) >> PAGE_SHIFT; - pfns = >pfns[i]; - hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , _fault); if (fault || write_fault) { @@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return -EBUSY; } return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); - } else if (!pmd_present(pmd)) + } + + if (!pmd_present(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); + } if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { /* @@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; - i = (addr - range->start) >> PAGE_SHIFT; - return hmm_vma_handle_pmd(walk, addr, end, [i], pmd); + return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd); } /* @@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * entry pointing to pte directory or it is a bad pmd that will not * recover. */ - if (pmd_bad(pmd)) + if (pmd_bad(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); + } ptep = pte_offset_map(pmdp, addr); - i = (addr - range->start) >> PAGE_SHIFT; - for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { + for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) { int r; - r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]); + r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns); if (r) { /* hmm_vma_handle_pte() did pte_unmap() */ hmm_vma_walk->last = addr; -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 3/8] mm/hmm: do not call hmm_vma_walk_hole() while holding a spinlock
From: Jason Gunthorpe This eventually calls into handle_mm_fault() which is a sleeping function. Release the lock first. hmm_vma_walk_hole() does not touch the contents of the PUD, so it does not need the lock. Fixes: 3afc423632a1 ("mm: pagewalk: add p4d_entry() and pgd_entry()") Cc: Steven Price Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 9e8f68eb83287a..32dcbfd3908315 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -473,8 +473,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, pud = READ_ONCE(*pudp); if (pud_none(pud)) { - ret = hmm_vma_walk_hole(start, end, -1, walk); - goto out_unlock; + spin_unlock(ptl); + return hmm_vma_walk_hole(start, end, -1, walk); } if (pud_huge(pud) && pud_devmap(pud)) { @@ -483,8 +483,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, bool fault, write_fault; if (!pud_present(pud)) { - ret = hmm_vma_walk_hole(start, end, -1, walk); - goto out_unlock; + spin_unlock(ptl); + return hmm_vma_walk_hole(start, end, -1, walk); } i = (addr - range->start) >> PAGE_SHIFT; @@ -495,9 +495,9 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags, , _fault); if (fault || write_fault) { - ret = hmm_vma_walk_hole_(addr, end, fault, -write_fault, walk); - goto out_unlock; + spin_unlock(ptl); + return hmm_vma_walk_hole_(addr, end, fault, write_fault, + walk); } pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 5/8] mm/hmm: add missing call to hmm_range_need_fault() before returning EFAULT
From: Jason Gunthorpe All return paths that do EFAULT must call hmm_range_need_fault() to determine if the user requires this page to be valid. If the page cannot be made valid if the user later requires it, due to vma flags in this case, then the return should be HMM_PFN_ERROR. Fixes: a3e0d41c2b1f ("mm/hmm: improve driver API to work and wait over a range") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 19 --- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 5f5ccf13dd1e85..e10cd0adba7b37 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -582,18 +582,15 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end, struct vm_area_struct *vma = walk->vma; /* -* Skip vma ranges that don't have struct page backing them or -* map I/O devices directly. -*/ - if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) - return -EFAULT; - - /* +* Skip vma ranges that don't have struct page backing them or map I/O +* devices directly. +* * If the vma does not allow read access, then assume that it does not -* allow write access either. HMM does not support architectures -* that allow write without read. +* allow write access either. HMM does not support architectures that +* allow write without read. */ - if (!(vma->vm_flags & VM_READ)) { + if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) || + !(vma->vm_flags & VM_READ)) { bool fault, write_fault; /* @@ -607,7 +604,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end, if (fault || write_fault) return -EFAULT; - hmm_pfns_fill(start, end, range, HMM_PFN_NONE); + hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); hmm_vma_walk->last = end; /* Skip this vma and continue processing the next vma. */ -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 8/8] mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL handling
From: Jason Gunthorpe Currently if a special PTE is encountered hmm_range_fault() immediately returns EFAULT and sets the HMM_PFN_SPECIAL error output (which nothing uses). EFAULT should only be returned after testing with hmm_pte_need_fault(). Also pte_devmap() and pte_special() are exclusive, and there is no need to check IS_ENABLED, pte_special() is stubbed out to return false on unsupported architectures. Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index f61fddf2ef6505..ca33d086bdc190 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -335,16 +335,21 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, pte_unmap(ptep); return -EBUSY; } - } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) { - if (!is_zero_pfn(pte_pfn(pte))) { + } + + /* +* Since each architecture defines a struct page for the zero page, just +* fall through and treat it like a normal page. +*/ + if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) { + hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, , + _fault); + if (fault || write_fault) { pte_unmap(ptep); - *pfn = range->values[HMM_PFN_SPECIAL]; return -EFAULT; } - /* -* Since each architecture defines a struct page for the zero -* page, just fall through and treat it like a normal page. -*/ + *pfn = range->values[HMM_PFN_SPECIAL]; + return 0; } *pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags; -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 4/8] mm/hmm: add missing pfns set to hmm_vma_walk_pmd()
From: Jason Gunthorpe All success exit paths from the walker functions must set the pfns array. A migration entry with no required fault is a HMM_PFN_NONE return, just like the pte case. Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hmm.c b/mm/hmm.c index 32dcbfd3908315..5f5ccf13dd1e85 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -394,7 +394,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, pmd_migration_entry_wait(walk->mm, pmdp); return -EBUSY; } - return 0; + return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); } else if (!pmd_present(pmd)) return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 2/8] mm/hmm: don't free the cached pgmap while scanning
From: Jason Gunthorpe The pgmap is held in the hmm_vma_walk variable in hope of speeding up future get_dev_pagemap() calls by hitting the same pointer. The algorithm doesn't actually care about how long the pgmap is held for. Move the put of the cached pgmap to after the walk is completed and delete all the other now redundant puts. This solves a possible leak of the reference in hmm_vma_walk_pmd() if a hmm_vma_handle_pte() fails while looping. Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 31 +-- 1 file changed, 9 insertions(+), 22 deletions(-) We talked about just deleting this stuff, but I think it makes alot sense for hmm_range_fault() to trigger fault on devmap pages that are not compatible with the caller - so lets just fix the leak on error path for now. diff --git a/mm/hmm.c b/mm/hmm.c index 35f85424176d14..9e8f68eb83287a 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -239,10 +239,6 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, } pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } hmm_vma_walk->last = end; return 0; } @@ -360,10 +356,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, return 0; fault: - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } pte_unmap(ptep); /* Fault any virtual address we were asked to fault */ return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); @@ -446,16 +438,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return r; } } - if (hmm_vma_walk->pgmap) { - /* -* We do put_dev_pagemap() here and not in hmm_vma_handle_pte() -* so that we can leverage get_dev_pagemap() optimization which -* will not re-take a reference on a pgmap if we already have -* one. -*/ - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } pte_unmap(ptep - 1); hmm_vma_walk->last = addr; @@ -529,10 +511,6 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } hmm_vma_walk->last = end; goto out_unlock; } @@ -694,6 +672,15 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) return -EBUSY; ret = walk_page_range(mm, hmm_vma_walk.last, range->end, _walk_ops, _vma_walk); + /* +* A pgmap is kept cached in the hmm_vma_walk to avoid expensive +* searching in the probably common case that the pgmap is the +* same for the entire requested range. +*/ + if (hmm_vma_walk.pgmap) { + put_dev_pagemap(hmm_vma_walk.pgmap); + hmm_vma_walk.pgmap = NULL; + } } while (ret == -EBUSY); if (ret) -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 0/8] Various error case bug fixes for hmm_range_fault()
From: Jason Gunthorpe The hmm_range_fault() flow is fairly complicated. The scheme allows the caller to specify if it needs a usable result for each page, or if it only needs the current page table status filled in. This mixture of behavior is useful for a caller that wants to build a 'prefetch around fault' algorithm. Although we have no in-tree users of this capability, I am working on having RDMA ODP work in this manner, and removing these bugs from hmm_range_fault() is a necessary step. The basic principles are: - If the caller did not ask for a VA to be valid then hmm_range_fault() should not fail because of that VA - If 0 is returned from hmm_range_fault() then the entire pfns array contains valid data - HMM_PFN_ERROR is set if faulting fails, or if asking for faulting would fail - A 0 return from hmm_range_fault() does not have HMM_PFN_ERROR in any VA's the caller asked to be valid This series does not get there completely, I have a followup series closing some more complex cases. I tested this series using Ralph's hmm tester he posted a while back, other testing would be appreciated. Jason Gunthorpe (8): mm/hmm: add missing unmaps of the ptep during hmm_vma_handle_pte() mm/hmm: don't free the cached pgmap while scanning mm/hmm: do not call hmm_vma_walk_hole() while holding a spinlock mm/hmm: add missing pfns set to hmm_vma_walk_pmd() mm/hmm: add missing call to hmm_range_need_fault() before returning EFAULT mm/hmm: reorganize how !pte_present is handled in hmm_vma_handle_pte() mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL handling mm/hmm.c | 166 ++- 1 file changed, 79 insertions(+), 87 deletions(-) -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 6/8] mm/hmm: reorganize how !pte_present is handled in hmm_vma_handle_pte()
From: Jason Gunthorpe The intention with this code is to determine if the caller required the pages to be valid, and if so, then take some action to make them valid. The action varies depending on the page type. In all cases, if the caller doesn't ask for the page, then hmm_range_fault() should not return an error. Revise the implementation to be clearer, and fix some bugs: - hmm_pte_need_fault() must always be called before testing fault or write_fault otherwise the defaults of false apply and the if()'s don't work. This was missed on the is_migration_entry() branch - -EFAULT should not be returned unless hmm_pte_need_fault() indicates fault is required - ie snapshotting should not fail. - For !pte_present() the cpu_flags are always 0, except in the special case of is_device_private_entry(), calling pte_to_hmm_pfn_flags() is confusing. Reorganize the flow so that it always follows the pattern of calling hmm_pte_need_fault() and then checking fault || write_fault. Fixes: 2aee09d8c116 ("mm/hmm: change hmm_vma_fault() to allow write fault on page basis") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 35 +++ 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index e10cd0adba7b37..bf676cfef3e8ee 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -282,15 +282,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!pte_present(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - if (!non_swap_entry(entry)) { - cpu_flags = pte_to_hmm_pfn_flags(range, pte); - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, - , _fault); - if (fault || write_fault) - goto fault; - return 0; - } - /* * This is a special swap entry, ignore migration, use * device and report anything else as error. @@ -310,26 +301,30 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, return 0; } - if (is_migration_entry(entry)) { - if (fault || write_fault) { - pte_unmap(ptep); - hmm_vma_walk->last = addr; - migration_entry_wait(walk->mm, pmdp, addr); - return -EBUSY; - } + hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, , + _fault); + if (!fault && !write_fault) return 0; + + if (!non_swap_entry(entry)) + goto fault; + + if (is_migration_entry(entry)) { + pte_unmap(ptep); + hmm_vma_walk->last = addr; + migration_entry_wait(walk->mm, pmdp, addr); + return -EBUSY; } /* Report error for everything else */ pte_unmap(ptep); *pfn = range->values[HMM_PFN_ERROR]; return -EFAULT; - } else { - cpu_flags = pte_to_hmm_pfn_flags(range, pte); - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, - , _fault); } + cpu_flags = pte_to_hmm_pfn_flags(range, pte); + hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, , + _fault); if (fault || write_fault) goto fault; -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [RFC PATCH 2/2] drm/amdgpu: disable gpu load balancer for vcn jobs
On 3/11/20 7:03 PM, Christian König wrote: Am 11.03.20 um 18:18 schrieb Nirmoy Das: VCN HW doesn't support dynamic load balance on multiple instances for a context. This modifies the entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 4 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..00032093d8a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1257,6 +1257,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + amdgpu_ctx_limit_load_balance(entity); amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..57b49188306d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -139,6 +139,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const if (r) goto error_free_entity; + entity->hw_ip = hw_ip; ctx->entities[hw_ip][ring] = entity; return 0; @@ -559,6 +560,30 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +static void limit_vcn_load_balance(struct amdgpu_ctx_entity *centity) +{ + struct drm_gpu_scheduler **scheds = >entity.rq->sched; + + if (drm_sched_entity_num_jobs(>entity) == 1) That check doesn't work correctly, the job might actually already be processed when we hit here. Okay now I know what Andrey meant. I will resend a updated patch. Thanks, Nirmoy + drm_sched_entity_modify_sched(>entity, scheds, 1); Just always update the scheduler here. + +} + +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity) +{ + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); + + if (!centity) + return; That check looks superfluous to me. + + switch (centity->hw_ip) { Better get the ring from entity->rq->sched instead. + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: Maybe better to make that a flag in the ring functions, but this way works as well. Regards, Christian. + limit_vcn_load_balance(centity); + } + +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..d52d8d562d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -33,6 +33,7 @@ struct amdgpu_fpriv; struct amdgpu_ctx_entity { uint64_t sequence; + uint32_t hw_ip; struct drm_sched_entity entity; struct dma_fence *fences[]; }; @@ -90,5 +91,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..955d12bc89ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -154,6 +154,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + amdgpu_ctx_limit_load_balance(entity); return 0; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [RFC PATCH 2/2] drm/amdgpu: disable gpu load balancer for vcn jobs
Am 11.03.20 um 18:18 schrieb Nirmoy Das: VCN HW doesn't support dynamic load balance on multiple instances for a context. This modifies the entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 4 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..00032093d8a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1257,6 +1257,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + amdgpu_ctx_limit_load_balance(entity); amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..57b49188306d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -139,6 +139,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const if (r) goto error_free_entity; + entity->hw_ip = hw_ip; ctx->entities[hw_ip][ring] = entity; return 0; @@ -559,6 +560,30 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +static void limit_vcn_load_balance(struct amdgpu_ctx_entity *centity) +{ + struct drm_gpu_scheduler **scheds = >entity.rq->sched; + + if (drm_sched_entity_num_jobs(>entity) == 1) That check doesn't work correctly, the job might actually already be processed when we hit here. + drm_sched_entity_modify_sched(>entity, scheds, 1); Just always update the scheduler here. + +} + +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity) +{ + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); + + if (!centity) + return; That check looks superfluous to me. + + switch (centity->hw_ip) { Better get the ring from entity->rq->sched instead. + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: Maybe better to make that a flag in the ring functions, but this way works as well. Regards, Christian. + limit_vcn_load_balance(centity); + } + +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..d52d8d562d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -33,6 +33,7 @@ struct amdgpu_fpriv; struct amdgpu_ctx_entity { uint64_tsequence; + uint32_thw_ip; struct drm_sched_entity entity; struct dma_fence*fences[]; }; @@ -90,5 +91,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..955d12bc89ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -154,6 +154,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + amdgpu_ctx_limit_load_balance(entity); return 0; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs
Am 11.03.20 um 18:58 schrieb Nirmoy: On 3/11/20 6:23 PM, Andrey Grodzovsky wrote: On 3/11/20 1:18 PM, Nirmoy Das wrote: Implement drm_sched_entity_num_jobs() so that drm drivers can query number of jobs in an entity. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/scheduler/sched_entity.c | 15 +++ include/drm/gpu_scheduler.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 90fd9c30ae5a..dfe8216f2d52 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) return false; } +/** + * drm_sched_entity_num_job - Get number of jobs in the entity typo s/drm_sched_entity_num_job/drm_sched_entity_num_jobs + * + * @entity: scheduler entity + * + * Returns number of jobs in the entity + */ +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity) +{ + if (drm_sched_entity_is_idle(entity)) + return 0; + + return spsc_queue_count(>job_queue); +} What about the jobs which already have been dequeued from job_queue and are in progress in the HW ring but yet to complete - don't they count ? Hi Andrey, I am thinking in terms of software side of the counting because for an entity once a job dequeued, that job is completely lost. I might be wrong here that's why tagged RFC :) My question is rather what do we need that for in the first place? Thanks, Christian. Regards, Nirmoy Andrey +EXPORT_SYMBOL(drm_sched_entity_num_jobs); /** * drm_sched_entity_is_ready - Check if entity is ready * diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index d8972836d248..b5ceff75cbbe 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence); unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched); void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, unsigned long remaining); +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity); #endif ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs
On 3/11/20 6:23 PM, Andrey Grodzovsky wrote: On 3/11/20 1:18 PM, Nirmoy Das wrote: Implement drm_sched_entity_num_jobs() so that drm drivers can query number of jobs in an entity. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/scheduler/sched_entity.c | 15 +++ include/drm/gpu_scheduler.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 90fd9c30ae5a..dfe8216f2d52 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) return false; } +/** + * drm_sched_entity_num_job - Get number of jobs in the entity typo s/drm_sched_entity_num_job/drm_sched_entity_num_jobs + * + * @entity: scheduler entity + * + * Returns number of jobs in the entity + */ +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity) +{ + if (drm_sched_entity_is_idle(entity)) + return 0; + + return spsc_queue_count(>job_queue); +} What about the jobs which already have been dequeued from job_queue and are in progress in the HW ring but yet to complete - don't they count ? Hi Andrey, I am thinking in terms of software side of the counting because for an entity once a job dequeued, that job is completely lost. I might be wrong here that's why tagged RFC :) Regards, Nirmoy Andrey +EXPORT_SYMBOL(drm_sched_entity_num_jobs); /** * drm_sched_entity_is_ready - Check if entity is ready * diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index d8972836d248..b5ceff75cbbe 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence); unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched); void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, unsigned long remaining); +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity); #endif ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs
On 3/11/20 1:18 PM, Nirmoy Das wrote: Implement drm_sched_entity_num_jobs() so that drm drivers can query number of jobs in an entity. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/scheduler/sched_entity.c | 15 +++ include/drm/gpu_scheduler.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 90fd9c30ae5a..dfe8216f2d52 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) return false; } +/** + * drm_sched_entity_num_job - Get number of jobs in the entity + * + * @entity: scheduler entity + * + * Returns number of jobs in the entity + */ +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity) +{ + if (drm_sched_entity_is_idle(entity)) + return 0; + + return spsc_queue_count(>job_queue); +} What about the jobs which already have been dequeued from job_queue and are in progress in the HW ring but yet to complete - don't they count ? Andrey +EXPORT_SYMBOL(drm_sched_entity_num_jobs); /** * drm_sched_entity_is_ready - Check if entity is ready * diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index d8972836d248..b5ceff75cbbe 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence); unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched); void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, unsigned long remaining); +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity); #endif ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs
Implement drm_sched_entity_num_jobs() so that drm drivers can query number of jobs in an entity. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/scheduler/sched_entity.c | 15 +++ include/drm/gpu_scheduler.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 90fd9c30ae5a..dfe8216f2d52 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) return false; } +/** + * drm_sched_entity_num_job - Get number of jobs in the entity + * + * @entity: scheduler entity + * + * Returns number of jobs in the entity + */ +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity) +{ + if (drm_sched_entity_is_idle(entity)) + return 0; + + return spsc_queue_count(>job_queue); +} +EXPORT_SYMBOL(drm_sched_entity_num_jobs); /** * drm_sched_entity_is_ready - Check if entity is ready * diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index d8972836d248..b5ceff75cbbe 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence); unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched); void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, unsigned long remaining); +int drm_sched_entity_num_jobs(struct drm_sched_entity *entity); #endif -- 2.25.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[RFC PATCH 2/2] drm/amdgpu: disable gpu load balancer for vcn jobs
VCN HW doesn't support dynamic load balance on multiple instances for a context. This modifies the entity's sched_list to a sched_list consist of only one drm scheduler. Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 4 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8304d0c87899..00032093d8a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1257,6 +1257,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + amdgpu_ctx_limit_load_balance(entity); amdgpu_vm_move_to_lru_tail(p->adev, >vm); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fa575bdc03c8..57b49188306d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -139,6 +139,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const if (r) goto error_free_entity; + entity->hw_ip = hw_ip; ctx->entities[hw_ip][ring] = entity; return 0; @@ -559,6 +560,30 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +static void limit_vcn_load_balance(struct amdgpu_ctx_entity *centity) +{ + struct drm_gpu_scheduler **scheds = >entity.rq->sched; + + if (drm_sched_entity_num_jobs(>entity) == 1) + drm_sched_entity_modify_sched(>entity, scheds, 1); + +} + +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity) +{ + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); + + if (!centity) + return; + + switch (centity->hw_ip) { + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + limit_vcn_load_balance(centity); + } + +} + int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index de490f183af2..d52d8d562d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -33,6 +33,7 @@ struct amdgpu_fpriv; struct amdgpu_ctx_entity { uint64_tsequence; + uint32_thw_ip; struct drm_sched_entity entity; struct dma_fence*fences[]; }; @@ -90,5 +91,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_init_sched(struct amdgpu_device *adev); +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4981e443a884..955d12bc89ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -154,6 +154,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, amdgpu_job_free_resources(job); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); + amdgpu_ctx_limit_load_balance(entity); return 0; } -- 2.25.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs
On 2020-03-11 11:16 a.m., Alex Deucher wrote: On Tue, Mar 10, 2020 at 8:53 AM Tom St Denis wrote: The offset into the array was specified in bytes but should be in terms of 32-bit words. Also prevent large reads that would also cause a buffer overread. Signed-off-by: Tom St Denis --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index c573edf02afc..e0f4ccd91fd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, ssize_t result = 0; uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; - if (size & 3 || *pos & 3) + if (size > 4096 || size & 3 || *pos & 3) Is size in dwords as well? Nope it's in bytes (as per the calling convention standards as well as later in the function we subtract 4 from it repeatedly). Tom Alex return -EINVAL; /* decode offset */ - offset = *pos & GENMASK_ULL(11, 0); + offset = (*pos & GENMASK_ULL(11, 0)) / 4; se = (*pos & GENMASK_ULL(19, 12)) >> 12; sh = (*pos & GENMASK_ULL(27, 20)) >> 20; cu = (*pos & GENMASK_ULL(35, 28)) >> 28; -- 2.24.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Ctom.stdenis%40amd.com%7C550ca4aaaf084c3d7a9f08d7c5cf2c65%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195365897948298sdata=YTb2YGBlAlDS%2FffaVOo2Yrej21N%2FJYVpFVIc1rQERWg%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs
On Tue, Mar 10, 2020 at 8:53 AM Tom St Denis wrote: > > The offset into the array was specified in bytes but should > be in terms of 32-bit words. Also prevent large reads that > would also cause a buffer overread. > > Signed-off-by: Tom St Denis > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index c573edf02afc..e0f4ccd91fd4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, > char __user *buf, > ssize_t result = 0; > uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; > > - if (size & 3 || *pos & 3) > + if (size > 4096 || size & 3 || *pos & 3) Is size in dwords as well? Alex > return -EINVAL; > > /* decode offset */ > - offset = *pos & GENMASK_ULL(11, 0); > + offset = (*pos & GENMASK_ULL(11, 0)) / 4; > se = (*pos & GENMASK_ULL(19, 12)) >> 12; > sh = (*pos & GENMASK_ULL(27, 20)) >> 20; > cu = (*pos & GENMASK_ULL(35, 28)) >> 28; > -- > 2.24.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v5 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
Am 11.03.20 um 16:04 schrieb James Zhu: Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. v4: Remove extra counter check, and reduce counter before idle work schedule Signed-off-by: James Zhu Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 21 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..6dacf78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (!fences && !atomic_read(>vcn.total_submission_cnt)) { amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +322,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { struct dpg_pause_state new_state; @@ -345,10 +349,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + atomic_dec(>adev->vcn.total_submission_cnt); + schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v5 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. v4: Remove extra counter check, and reduce counter before idle work schedule Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 21 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..6dacf78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (!fences && !atomic_read(>vcn.total_submission_cnt)) { amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +322,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; @@ -345,10 +349,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + atomic_dec(>adev->vcn.total_submission_cnt); + schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsignedharvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v4 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
Am 11.03.20 um 15:15 schrieb James Zhu: Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. v4: Remove extra counter check, and reduce counter before idle work schedule Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..2fa2891 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (fences == 0 && + likely(atomic_read(>vcn.total_submission_cnt) == 0)) { The indentation here looks off, maybe just write that as !fences && !atomic_read(>vcn.total_submission_cnt). Apart from that looks good to me now, Christian. amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { struct dpg_pause_state new_state; @@ -345,10 +350,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + atomic_dec(>adev->vcn.total_submission_cnt); + schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v4 2/4] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch
Couldn't only rely on enc fence to decide switching to dpg unpaude mode. Since a enc thread may not schedule a fence in time during multiple threads running situation. v3: 1. Rename enc_submission_cnt to dpg_enc_submission_cnt 2. Add dpg_enc_submission_cnt check in idle_work_handler v4: Remove extra counter check, and reduce counter before idle work schedule Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 32 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2fa2891..ba28fb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -65,6 +65,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); mutex_init(>vcn.vcn_pg_lock); atomic_set(>vcn.total_submission_cnt, 0); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + atomic_set(>vcn.inst[i].dpg_enc_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -298,7 +300,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; - if (fence[j]) + if (fence[j] || + unlikely(atomic_read(>vcn.inst[j].dpg_enc_submission_cnt))) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; @@ -334,19 +337,22 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; - unsigned int fences = 0; - unsigned int i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]); - } - if (fences) + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { + atomic_inc(>vcn.inst[ring->me].dpg_enc_submission_cnt); new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + } else { + unsigned int fences = 0; + unsigned int i; - if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) - new_state.fw_based = VCN_DPG_STATE__PAUSE; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]); + + if (fences || atomic_read(>vcn.inst[ring->me].dpg_enc_submission_cnt)) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + } adev->vcn.pause_dpg_mode(adev, ring->me, _state); } @@ -355,6 +361,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + atomic_dec(>adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + atomic_dec(>adev->vcn.total_submission_cnt); schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 111c4cc..e913de8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -183,6 +183,7 @@ struct amdgpu_vcn_inst { void*dpg_sram_cpu_addr; uint64_tdpg_sram_gpu_addr; uint32_t*dpg_sram_curr_addr; + atomic_tdpg_enc_submission_cnt; }; struct amdgpu_vcn { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v4 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. v4: Remove extra counter check, and reduce counter before idle work schedule Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..2fa2891 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (fences == 0 && + likely(atomic_read(>vcn.total_submission_cnt) == 0)) { amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; @@ -345,10 +350,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + atomic_dec(>adev->vcn.total_submission_cnt); + schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsignedharvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
On 2020-03-11 7:38 a.m., Christian König wrote: Am 11.03.20 um 12:30 schrieb Zhu, James: [AMD Official Use Only - Internal Distribution Only] ping *From:* Zhu, James *Sent:* Monday, March 9, 2020 12:57 PM *To:* amd-gfx@lists.freedesktop.org *Cc:* Zhu, James ; Koenig, Christian *Subject:* [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..6aafda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (fences == 0 && + likely(atomic_read(>vcn.total_submission_cnt) == 0)) { amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { struct dpg_pause_state new_state; @@ -345,11 +350,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + if (unlikely(atomic_dec_return(>adev->vcn.total_submission_cnt) < 0)) + atomic_set(>adev->vcn.total_submission_cnt, 0); You need to decrement first and then call schedule_delayed_work() otherwise the work could run with the wrong counter. And the extra check for an under run should be superfluous. Thanks! Please check V4 James Regards, Christian. } int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset
[AMD Official Use Only - Internal Distribution Only] Hi Evan, Does this patch also fix the baco failure on Navi14 with display connected? BR, Xiaojie From: amd-gfx on behalf of Evan Quan Sent: Wednesday, March 11, 2020 4:18 PM To: amd-gfx@lists.freedesktop.org Cc: Quan, Evan Subject: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset This can fix the baco reset failure seen on Navi10. And this should be a low risk fix as the same sequence is already used for system suspend/resume. Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053 Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 572eb6ea8eab..a35c89973614 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (r) goto out; + amdgpu_fbdev_set_suspend(tmp_adev, 0); + /* must succeed. */ amdgpu_ras_resume(tmp_adev); @@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); + amdgpu_fbdev_set_suspend(adev, 1); + /* disable ras on ALL IPs */ if (!(in_ras_intr && !use_baco) && amdgpu_device_ip_need_full_reset(tmp_adev)) -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7CXiaojie.Yuan%40amd.com%7C3eace080fd0b4f67337e08d7c594e441%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195115872403614sdata=%2B%2B3xRFOl2Ho%2BRe9VuzqHtZNoVZ3s%2BxIP7xTv6yG11LA%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: reenable runtime pm on navi12
[AMD Public Use] Reviewed-by: Alex Deucher From: Quan, Evan Sent: Wednesday, March 11, 2020 6:56 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Quan, Evan Subject: [PATCH] drm/amdgpu: reenable runtime pm on navi12 The runtime pm is verified as working now on navi12. Change-Id: I20393633678297308c9651237bbfdc854a3cff94 Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 80495652a7c1..e376dc072d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -174,8 +174,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) (adev->asic_type >= CHIP_TOPAZ) && (adev->asic_type != CHIP_VEGA10) && (adev->asic_type != CHIP_VEGA20) && -(adev->asic_type != CHIP_ARCTURUS) && -(adev->asic_type != CHIP_NAVI12)) /* enable runpm on VI+ */ +(adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */ adev->runpm = true; else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 on CI */ -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset
[AMD Public Use] Reviewed-by: Alex Deucher From: amd-gfx on behalf of Evan Quan Sent: Wednesday, March 11, 2020 4:18 AM To: amd-gfx@lists.freedesktop.org Cc: Quan, Evan Subject: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset This can fix the baco reset failure seen on Navi10. And this should be a low risk fix as the same sequence is already used for system suspend/resume. Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053 Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 572eb6ea8eab..a35c89973614 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (r) goto out; + amdgpu_fbdev_set_suspend(tmp_adev, 0); + /* must succeed. */ amdgpu_ras_resume(tmp_adev); @@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); + amdgpu_fbdev_set_suspend(adev, 1); + /* disable ras on ALL IPs */ if (!(in_ras_intr && !use_baco) && amdgpu_device_ip_need_full_reset(tmp_adev)) -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Calexander.deucher%40amd.com%7C3eace080fd0b4f67337e08d7c594e441%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195115932121013sdata=my797McW90E%2FJKHa30fYgLlzHvnN%2BYQ3%2BqBLwllajJ0%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
Am 11.03.20 um 12:30 schrieb Zhu, James: [AMD Official Use Only - Internal Distribution Only] ping *From:* Zhu, James *Sent:* Monday, March 9, 2020 12:57 PM *To:* amd-gfx@lists.freedesktop.org *Cc:* Zhu, James ; Koenig, Christian *Subject:* [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..6aafda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (fences == 0 && + likely(atomic_read(>vcn.total_submission_cnt) == 0)) { amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { struct dpg_pause_state new_state; @@ -345,11 +350,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + if (unlikely(atomic_dec_return(>adev->vcn.total_submission_cnt) < 0)) + atomic_set(>adev->vcn.total_submission_cnt, 0); You need to decrement first and then call schedule_delayed_work() otherwise the work could run with the wrong counter. And the extra check for an under run should be superfluous. Regards, Christian. } int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start
[AMD Official Use Only - Internal Distribution Only] ping From: Zhu, James Sent: Monday, March 9, 2020 12:57 PM To: amd-gfx@lists.freedesktop.org Cc: Zhu, James ; Koenig, Christian Subject: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start Fix race condition issue when multiple vcn starts are called. v2: Removed checking the return value of cancel_delayed_work_sync() to prevent possible races here. v3: Add total_submission_cnt to avoid gate power unexpectedly. Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272f..6aafda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); + mutex_init(>vcn.vcn_pg_lock); + atomic_set(>vcn.total_submission_cnt, 0); switch (adev->asic_type) { case CHIP_RAVEN: @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) } release_firmware(adev->vcn.fw); + mutex_destroy(>vcn.vcn_pg_lock); return 0; } @@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[j]; } - if (fences == 0) { + if (fences == 0 && + likely(atomic_read(>vcn.total_submission_cnt) == 0)) { amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work); - if (set_clocks) { - amdgpu_gfx_off_ctrl(adev, false); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - } + atomic_inc(>vcn.total_submission_cnt); + cancel_delayed_work_sync(>vcn.idle_work); + + mutex_lock(>vcn.vcn_pg_lock); + amdgpu_gfx_off_ctrl(adev, false); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ struct dpg_pause_state new_state; @@ -345,11 +350,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) adev->vcn.pause_dpg_mode(adev, ring->me, _state); } + mutex_unlock(>vcn.vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + if (unlikely(atomic_dec_return(>adev->vcn.total_submission_cnt) < 0)) + atomic_set(>adev->vcn.total_submission_cnt, 0); } int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6fe0573..111c4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -200,6 +200,8 @@ struct amdgpu_vcn { struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; uint32_t num_vcn_enc_sched; uint32_t num_vcn_dec_sched; + struct mutex vcn_pg_lock; + atomic_t total_submission_cnt; unsignedharvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: reenable runtime pm on navi12
The runtime pm is verified as working now on navi12. Change-Id: I20393633678297308c9651237bbfdc854a3cff94 Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 80495652a7c1..e376dc072d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -174,8 +174,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) (adev->asic_type >= CHIP_TOPAZ) && (adev->asic_type != CHIP_VEGA10) && (adev->asic_type != CHIP_VEGA20) && -(adev->asic_type != CHIP_ARCTURUS) && -(adev->asic_type != CHIP_NAVI12)) /* enable runpm on VI+ */ +(adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */ adev->runpm = true; else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 on CI */ -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH -next 024/491] AMD DISPLAY CORE: Use fallthrough;
Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 2f1c958..37fa7b 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -267,7 +267,7 @@ static struct atom_display_object_path_v2 *get_bios_object( && id.enum_id == obj_id.enum_id) return >object_info_tbl.v1_4->display_path[i]; } - /* fall through */ + fallthrough; case OBJECT_TYPE_CONNECTOR: case OBJECT_TYPE_GENERIC: /* Both Generic and Connector Object ID @@ -280,7 +280,7 @@ static struct atom_display_object_path_v2 *get_bios_object( && id.enum_id == obj_id.enum_id) return >object_info_tbl.v1_4->display_path[i]; } - /* fall through */ + fallthrough; default: return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 68c4049..743042 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -645,7 +645,7 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, case AUX_TRANSACTION_REPLY_AUX_DEFER: case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER: retry_on_defer = true; - /* fall through */ + fallthrough; case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK: if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) { goto fail; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index 8aa937f..51481e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -479,7 +479,7 @@ static void program_grph_pixel_format( case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: sign = 1; floating = 1; - /* fall through */ + fallthrough; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: /* shouldn't this get float too? */ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: grph_depth = 3; -- 2.24.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH -next 023/491] AMD KFD: Use fallthrough;
Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index d6549e..6529ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -79,7 +79,7 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, dev_warn(adev->dev, "Invalid sdma engine id (%d), using engine id 0\n", engine_id); - /* fall through */ + fallthrough; case 0: sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; -- 2.24.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH -next 025/491] AMD POWERPLAY: Use fallthrough;
Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index bf04cf..fc5236c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1250,7 +1250,7 @@ static void smu7_set_dpm_event_sources(struct pp_hwmgr *hwmgr, uint32_t sources) switch (sources) { default: pr_err("Unknown throttling event sources."); - /* fall through */ + fallthrough; case 0: protection = false; /* src is unused */ @@ -3698,12 +3698,12 @@ static int smu7_request_link_speed_change_before_state_change( data->force_pcie_gen = PP_PCIEGen2; if (current_link_speed == PP_PCIEGen2) break; - /* fall through */ + fallthrough; case PP_PCIEGen2: if (0 == amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN2, false)) break; #endif - /* fall through */ + fallthrough; default: data->force_pcie_gen = smu7_get_current_pcie_speed(hwmgr); break; -- 2.24.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH -next 000/491] treewide: use fallthrough;
There is a new fallthrough pseudo-keyword macro that can be used to replace the various /* fallthrough */ style comments that are used to indicate a case label code block is intended to fallthrough to the next case label block. See commit 294f69e662d1 ("compiler_attributes.h: Add 'fallthrough' pseudo keyword for switch/case use") These patches are intended to allow clang to detect missing switch/case fallthrough uses. Do a depth-first pass on the MAINTAINERS file and find the various F: pattern files and convert the fallthrough comments to fallthrough; for all files matched by all F: patterns in in each section. Done via the perl script below and the previously posted cvt_fallthrough.pl script. Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ These patches are based on next-20200310 and are available in git://repo.or.cz/linux-2.6/trivial-mods.git in branch 20200310_fallthrough_2 $ cat commit_fallthrough.pl #!/usr/bin/env perl use sort 'stable'; # # Reorder a sorted array so file entries are before directory entries # depends on a trailing / for directories # so: # foo/ # foo/bar.c # becomes # foo/bar.c # foo/ # sub file_before_directory { my ($array_ref) = (@_); my $count = scalar(@$array_ref); for (my $i = 1; $i < $count; $i++) { if (substr(@$array_ref[$i - 1], -1) eq '/' && substr(@$array_ref[$i], 0, length(@$array_ref[$i - 1])) eq @$array_ref[$i - 1]) { my $string = @$array_ref[$i - 1]; @$array_ref[$i - 1] = @$array_ref[$i]; @$array_ref[$i] = $string; } } } sub uniq { my (@parms) = @_; my %saw; @parms = grep(!$saw{$_}++, @parms); return @parms; } # Get all the F: file patterns in MAINTAINERS that could be a .[ch] file my $maintainer_patterns = `grep -P '^F:\\s+' MAINTAINERS`; my @patterns = split('\n', $maintainer_patterns); s/^F:\s*// for @patterns; @patterns = grep(!/^(?:Documentation|tools|scripts)\//, @patterns); @patterns = grep(!/\.(?:dtsi?|rst|config)$/, @patterns); @patterns = sort @patterns; @patterns = sort { $b =~ tr/\//\// cmp $a =~ tr/\//\// } @patterns; file_before_directory(\@patterns); my %sections_done; foreach my $pattern (@patterns) { # Find the files the pattern matches my $pattern_files = `git ls-files -- $pattern`; my @new_patterns = split('\n', $pattern_files); $pattern_files = join(' ', @new_patterns); next if ($pattern_files =~ /^\s*$/); # Find the section the first file matches my $pattern_file = @new_patterns[0]; my $section_output = `./scripts/get_maintainer.pl --nogit --nogit-fallback --sections --pattern-depth=1 $pattern_file`; my @section = split('\n', $section_output); my $section_header = @section[0]; print("Section: <$section_header>\n"); # Skip the section if it's already done print("Already done '$section_header'\n") if ($sections_done{$section_header}); next if ($sections_done{$section_header}++); # Find all the .[ch] files in all F: lines in that section my @new_section; foreach my $line (@section) { last if ($line =~ /^\s*$/); push(@new_section, $line); } @section = grep(/^F:/, @new_section); s/^F:\s*// for @section; @section = grep(!/^(?:Documentation|tools|scripts)\//, @section); @section = grep(!/\.(?:dtsi?|rst|config)$/, @section); @section = sort @section; @section = uniq(@section); my $section_files = join(' ', @section); print("section_files: <$section_files>\n"); next if ($section_files =~ /^\s*$/); my $cvt_files = `git ls-files -- $section_files`; my @files = split('\n', $cvt_files); @files = grep(!/^(?:Documentation|tools|scripts)\//, @files); @files = grep(!/\.(?:dtsi?|rst|config)$/, @files); @files = grep(/\.[ch]$/, @files); @files = sort @files; @files = uniq(@files); $cvt_files = join(' ', @files); print("files: <$cvt_files>\n"); next if (scalar(@files) < 1); # Convert fallthroughs for all [.ch] files in the section print("doing cvt_fallthrough.pl -- $cvt_files\n"); `cvt_fallthrough.pl -- $cvt_files`; # If nothing changed, nothing to commit `git diff-index --quiet HEAD --`; next if (!$?); # Commit the changes my $fh; open($fh, "+>", "cvt_fallthrough.commit_msg") or die "$0: can't create temporary file: $!\n"; print $fh
[PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset
This can fix the baco reset failure seen on Navi10. And this should be a low risk fix as the same sequence is already used for system suspend/resume. Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053 Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 572eb6ea8eab..a35c89973614 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (r) goto out; + amdgpu_fbdev_set_suspend(tmp_adev, 0); + /* must succeed. */ amdgpu_ras_resume(tmp_adev); @@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); + amdgpu_fbdev_set_suspend(adev, 1); + /* disable ras on ALL IPs */ if (!(in_ras_intr && !use_baco) && amdgpu_device_ip_need_full_reset(tmp_adev)) -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c
[AMD Public Use] Reviewed-by: Guchun Chen Regards, Guchun -Original Message- From: Stanley.Yang Sent: Wednesday, March 11, 2020 2:38 PM To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Chen, Guchun ; Li, Dennis ; Clements, John ; Yang, Stanley Subject: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c include amdgpu_ras.h head file instead of use extern ras_debugfs_create_all function Signed-off-by: Stanley.Yang Change-Id: I2697250ba67d4deac4371fea05efb68a976f7e5a --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 1d513e4f9934..a9a278f87498 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -34,6 +34,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_dm_debugfs.h" +#include "amdgpu_ras.h" /** * amdgpu_debugfs_add_files - Add simple debugfs entries @@ -1315,7 +1316,6 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL, DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); -extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev) { int r, i; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/5] drm/amdgpu/pm: Use scnprintf() for avoiding potential buffer overflow
BTW, please ignore the subject prefix '[1/5]', which was added mistakenly while extracting a patch from the commit list. This is a single patch. thanks, Takashi On Wed, 11 Mar 2020 08:29:04 +0100, Takashi Iwai wrote: > > Since snprintf() returns the would-be-output size instead of the > actual output size, the succeeding calls may go beyond the given > buffer limit. Fix it by replacing with scnprintf(). > > Also adjust the size argument passed to scnprintf() so that it really > cuts off at the right remaining buffer length. > > Signed-off-by: Takashi Iwai > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > index bc3cf04a1a94..4a737d074f4b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device > *dev, > > buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); > for (i = 0; i < data.nums; i++) > - buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, > + buf_len += scnprintf(buf + buf_len, PAGE_SIZE - buf_len, "%d > %s\n", i, > (data.states[i] == > POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" : > (data.states[i] == POWER_STATE_TYPE_BATTERY) ? > "battery" : > (data.states[i] == POWER_STATE_TYPE_BALANCED) ? > "balanced" : > -- > 2.16.4 > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/5] drm/amdgpu/pm: Use scnprintf() for avoiding potential buffer overflow
Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Also adjust the size argument passed to scnprintf() so that it really cuts off at the right remaining buffer length. Signed-off-by: Takashi Iwai --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index bc3cf04a1a94..4a737d074f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); for (i = 0; i < data.nums; i++) - buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, + buf_len += scnprintf(buf + buf_len, PAGE_SIZE - buf_len, "%d %s\n", i, (data.states[i] == POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" : (data.states[i] == POWER_STATE_TYPE_BATTERY) ? "battery" : (data.states[i] == POWER_STATE_TYPE_BALANCED) ? "balanced" : -- 2.16.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c
[AMD Official Use Only - Internal Distribution Only] Reviewed-by: Tao Zhou > -Original Message- > From: amd-gfx On Behalf Of > Stanley.Yang > Sent: 2020年3月11日 14:38 > To: amd-gfx@lists.freedesktop.org > Cc: Yang, Stanley ; Clements, John > ; Li, Dennis ; Chen, > Guchun ; Zhang, Hawking > > Subject: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c > > include amdgpu_ras.h head file instead of use extern ras_debugfs_create_all > function > > Signed-off-by: Stanley.Yang > Change-Id: I2697250ba67d4deac4371fea05efb68a976f7e5a > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index 1d513e4f9934..a9a278f87498 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -34,6 +34,7 @@ > #include "amdgpu.h" > #include "amdgpu_pm.h" > #include "amdgpu_dm_debugfs.h" > +#include "amdgpu_ras.h" > > /** > * amdgpu_debugfs_add_files - Add simple debugfs entries @@ -1315,7 > +1316,6 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL, > DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL, > amdgpu_debugfs_sclk_set, "%llu\n"); > > -extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); > int amdgpu_debugfs_init(struct amdgpu_device *adev) { > int r, i; > -- > 2.17.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.f > reedesktop.org%2Fmailman%2Flistinfo%2Famd- > gfxdata=02%7C01%7Ctao.zhou1%40amd.com%7C19bfa057da7740ca9 > 96f08d7c586b564%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C6 > 37195054687594420sdata=ZBvaPrTx9oUoGfltIm3JQEy5htUQYjXZqk0ju > fcc98Q%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c
include amdgpu_ras.h head file instead of use extern ras_debugfs_create_all function Signed-off-by: Stanley.Yang Change-Id: I2697250ba67d4deac4371fea05efb68a976f7e5a --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 1d513e4f9934..a9a278f87498 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -34,6 +34,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_dm_debugfs.h" +#include "amdgpu_ras.h" /** * amdgpu_debugfs_add_files - Add simple debugfs entries @@ -1315,7 +1316,6 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL, DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); -extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev) { int r, i; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [refactor RLCG wreg path 1/2] drm/amdgpu: refactor RLCG access path part 1
Reviewed-by: Yintian Tao -Original Message- From: amd-gfx On Behalf Of Monk Liu Sent: 2020年3月11日 13:58 To: amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: [refactor RLCG wreg path 1/2] drm/amdgpu: refactor RLCG access path part 1 what changed: 1)provide new implementation interface for the rlcg access path 2)put SQ_CMD/SQ_IND_INDEX/SQ_IND_DATA to GFX9 RLCG path to align with SRIOV RLCG logic background: we what to clear the code path for WREG32_RLC, to make it only covered and handled by amdgpu_mm_wreg() routine, this way we can let RLCG to serve the register access even through UMR (via debugfs interface) the current implementation cannot achieve that goal because it can only hardcode everywhere, but UMR only pass "offset" as varable to driver tested-by: Monk Liu tested-by: Zhou pengju Signed-off-by: Zhou pengju Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 80 ++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 177 +++- drivers/gpu/drm/amd/amdgpu/soc15.h | 7 ++ 4 files changed, 264 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 52509c2..60bb3e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs { void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); + void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v); + bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t +reg); }; struct amdgpu_rlc { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 82ef08d..3222cd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -224,6 +224,56 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x, 0x0080) }; +static const struct soc15_reg_rlcg rlcg_access_gc_10_0[] = { + {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_HI)}, + {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_LO)}, + {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_LENGTH)}, + {SOC15_REG_ENTRY(GC, 0, mmCP_ME_CNTL)}, }; + +static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, +u32 v) { + static void *scratch_reg0; + static void *scratch_reg1; + static void *scratch_reg2; + static void *scratch_reg3; + static void *spare_int; + static uint32_t grbm_cntl; + static uint32_t grbm_idx; + uint32_t i = 0; + uint32_t retries = 5; + + scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; + scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; + scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; + scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; + spare_int = adev->rmmio + +(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + +mmRLC_SPARE_INT)*4; + + grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; + grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + +mmGRBM_GFX_INDEX; + + if (amdgpu_sriov_runtime(adev)) { + pr_err("shoudn't call rlcg write register during runtime\n"); + return; + } + + writel(v, scratch_reg0); + writel(offset | 0x8000, scratch_reg1); + writel(1, spare_int); + for (i = 0; i < retries; i++) { + u32 tmp; + + tmp = readl(scratch_reg1); + if (!(tmp & 0x8000)) + break; + + udelay(10); + } + + if (i >= retries) + pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); } + static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = { /* Pending on emulation bring up */ @@ -4247,6 +4297,32 @@ static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); } +static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev, + uint32_t offset, + struct soc15_reg_rlcg *entries, int arr_size) { + int i; + uint32_t reg; + + for (i = 0; i < arr_size; i++) { + const struct soc15_reg_rlcg *entry; + + entry = [i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + if
RE: [refactor RLCG wreg path 2/2] drm/amdgpu: refactor RLCG access path part 2
Reviewed-by: Yintian Tao -Original Message- From: amd-gfx On Behalf Of Monk Liu Sent: 2020年3月11日 13:58 To: amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: [refactor RLCG wreg path 2/2] drm/amdgpu: refactor RLCG access path part 2 switch to new RLCG access path, and drop the legacy WREG32_RLC macros tested-by: Monk Liu tested-by: Zhou pengju Signed-off-by: Zhou pengju Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 5 ++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 104 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 28 +++--- drivers/gpu/drm/amd/amdgpu/soc15.c| 11 +-- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 57 8 files changed, 93 insertions(+), 152 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2..a21f005 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -105,8 +105,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, lock_srbm(kgd, 0, 0, 0, vmid); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ unlock_srbm(kgd); @@ -242,13 +242,13 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32(reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); if (wptr) { /* Don't read wptr with get_user because the user @@ -277,25 +277,25 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); + get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); release_queue(kgd); @@ -547,7 +547,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) - WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); switch (reset_type) { case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: @@ -561,7 +561,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, break; } - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);