[PATCH] drm/amdgpu: update ras capability's query based on mem ecc configuration

2020-03-11 Thread Guchun Chen
RAS support capability needs to be updated on top of different
memeory ECC enablement, and remove redundant memory ecc check
in gmc module for vega20 and arcturus.

v2: check HBM ECC enablement and set ras mask accordingly.
v3: avoid to invoke atomfirmware interface to query twice.

Suggested-by: Hawking Zhang 
Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 36 ++---
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..38782add479a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1738,18 +1738,30 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
*hw_supported = 0;
*supported = 0;
 
-   if (amdgpu_sriov_vf(adev) ||
+   if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
(adev->asic_type != CHIP_VEGA20 &&
 adev->asic_type != CHIP_ARCTURUS))
return;
 
-   if (adev->is_atom_fw &&
-   (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
-amdgpu_atomfirmware_sram_ecc_supported(adev)))
-   *hw_supported = AMDGPU_RAS_BLOCK_MASK;
+   if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+   DRM_INFO("HBM ECC is active.\n");
+   *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   } else
+   DRM_INFO("HBM ECC is not presented.\n");
+
+   if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+   DRM_INFO("SRAM ECC is active.\n");
+   *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   } else
+   DRM_INFO("SRAM ECC is not presented.\n");
+
+   /* hw_supported needs to be aligned with RAS block mask. */
+   *hw_supported &= AMDGPU_RAS_BLOCK_MASK;
 
*supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported & amdgpu_ras_mask;
+   0 : *hw_supported & amdgpu_ras_mask;
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216abf14a4..3cc886e96420 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -886,29 +886,21 @@ static int gmc_v9_0_late_init(void *handle)
if (r)
return r;
/* Check if ecc is available */
-   if (!amdgpu_sriov_vf(adev)) {
-   switch (adev->asic_type) {
-   case CHIP_VEGA10:
-   case CHIP_VEGA20:
-   case CHIP_ARCTURUS:
-   r = amdgpu_atomfirmware_mem_ecc_supported(adev);
-   if (!r) {
-   DRM_INFO("ECC is not present.\n");
-   if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
-   
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
-   } else {
-   DRM_INFO("ECC is active.\n");
-   }
+   if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
+   r = amdgpu_atomfirmware_mem_ecc_supported(adev);
+   if (!r) {
+   DRM_INFO("ECC is not present.\n");
+   if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+   
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
+   } else {
+   DRM_INFO("ECC is active.\n");
+   }
 
-   r = amdgpu_atomfirmware_sram_ecc_supported(adev);
-   if (!r) {
-   DRM_INFO("SRAM ECC is not present.\n");
-   } else {
-   DRM_INFO("SRAM ECC is active.\n");
-   }
-   break;
-   default:
-   break;
+   r = amdgpu_atomfirmware_sram_ecc_supported(adev);
+   if (!r) {
+   DRM_INFO("SRAM ECC is not present.\n");
+   } else {
+   DRM_INFO("SRAM ECC is active.\n");
}
}
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-11 Thread Chen, Guchun
[AMD Public Use]

Thanks for your suggestion, Hawking.
I will send one patch v3 to target this.

Regards,
Guchun

-Original Message-
From: Zhang, Hawking  
Sent: Thursday, March 12, 2020 11:13 AM
To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Subject: RE: [PATCH 1/2] drm/amdgpu: update ras support capability with 
different sram ecc configuration

[AMD Official Use Only - Internal Distribution Only]

Hi Guchun,

It seems to me we still have redundant function call in 
amdgpu_ras_check_supported. The atomfirmware interfaces are possibly invoked 
twice?

As I listed the steps in last thread, we can assume hw_supported to 0 or 
0xfff either. 

Check HBM ECC first, explicitly indicates it is present or not, and set the 
DF/UMC bit in hw_supported Check SRAM ECC, explicitly indicates It is present 
or not, and set other ip blocks masks.

After we run all above checks, set the finally ras mask to con->supported.

We'd better keep the message consistent as what we had in gmc_v9_0_late. No 
need to highlight the what IP block get disabled, that should be transparent to 
users.

Regards,
Hawking

-Original Message-
From: Chen, Guchun 
Sent: Thursday, March 12, 2020 10:55
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Cc: Chen, Guchun 
Subject: [PATCH 1/2] drm/amdgpu: update ras support capability with different 
sram ecc configuration

When sram ecc is disabled by vbios, ras initialization process in the 
corrresponding IPs that suppport sram ecc needs to be skipped. So update ras 
support capability accordingly on top of this configuration. This capability 
will block further ras operations to the unsupported IPs.

v2: check HBM ECC enablement and set ras mask accordingly.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 +++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..b08226c10d95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1748,8 +1748,41 @@ static void amdgpu_ras_check_supported (struct 
amdgpu_device *adev,
 amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
 
-   *supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported & amdgpu_ras_mask;
+   /* Both HBM and SRAM ECC are disabled in vbios. */
+   if (*hw_supported == 0) {
+   DRM_INFO("RAS HW support is disabled as HBM"
+   " and SRAM ECC are not presented.\n");
+   return;
+   }
+
+   if (amdgpu_ras_enable) {
+   *supported = *hw_supported;
+
+   /*
+* When HBM ECC is disabled in vbios, remove
+* UMC's and DF's ras support.
+*/
+   if (!amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+   DRM_INFO("HBM ECC is disabled and "
+   "remove UMC and DF ras support.\n");
+   *supported &= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /*
+* When sram ecc is disabled in vbios, bypass those IP
+* blocks that support sram ecc, and only hold UMC and DF.
+*/
+   if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+   DRM_INFO("SRAM ECC is disabled and remove ras support "
+   "from IPs that support sram ecc.\n");
+   *supported &= (1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /* ras support needs to align with module parmeter */
+   *supported &= amdgpu_ras_mask;
+   }
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
--
2.17.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: remove mem ecc check for vega20 and arcturus

2020-03-11 Thread Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]

I think we can merge the patch with first one as they are all refine current 
logic for querying ras capability.

Regards,
Hawking

-Original Message-
From: Chen, Guchun  
Sent: Thursday, March 12, 2020 10:55
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Cc: Chen, Guchun 
Subject: [PATCH 2/2] drm/amdgpu: remove mem ecc check for vega20 and arcturus

Memory ecc check including HBM and SRAM has been done in ras init function for 
vega20 and arcturus. So remove it from gmc module, only keep this check for 
vega10.

Suggested-by: Hawking Zhang 
Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216abf14a4..9bde66a6b432 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -887,28 +887,20 @@ static int gmc_v9_0_late_init(void *handle)
return r;
/* Check if ecc is available */
if (!amdgpu_sriov_vf(adev)) {
-   switch (adev->asic_type) {
-   case CHIP_VEGA10:
-   case CHIP_VEGA20:
-   case CHIP_ARCTURUS:
+   if (adev->asic_type == CHIP_VEGA10) {
r = amdgpu_atomfirmware_mem_ecc_supported(adev);
if (!r) {
DRM_INFO("ECC is not present.\n");
if (adev->df.funcs->enable_ecc_force_par_wr_rmw)

adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
-   } else {
+   } else
DRM_INFO("ECC is active.\n");
-   }
 
r = amdgpu_atomfirmware_sram_ecc_supported(adev);
if (!r) {
DRM_INFO("SRAM ECC is not present.\n");
-   } else {
+   } else
DRM_INFO("SRAM ECC is active.\n");
-   }
-   break;
-   default:
-   break;
}
}
 
--
2.17.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-11 Thread Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]

Hi Guchun,

It seems to me we still have redundant function call in 
amdgpu_ras_check_supported. The atomfirmware interfaces are possibly invoked 
twice?

As I listed the steps in last thread, we can assume hw_supported to 0 or 
0xfff either. 

Check HBM ECC first, explicitly indicates it is present or not, and set the 
DF/UMC bit in hw_supported
Check SRAM ECC, explicitly indicates It is present or not, and set other ip 
blocks masks.

After we run all above checks, set the finally ras mask to con->supported.

We'd better keep the message consistent as what we had in gmc_v9_0_late. No 
need to highlight the what IP block get disabled, that should be transparent to 
users.

Regards,
Hawking

-Original Message-
From: Chen, Guchun  
Sent: Thursday, March 12, 2020 10:55
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Cc: Chen, Guchun 
Subject: [PATCH 1/2] drm/amdgpu: update ras support capability with different 
sram ecc configuration

When sram ecc is disabled by vbios, ras initialization process in the 
corrresponding IPs that suppport sram ecc needs to be skipped. So update ras 
support capability accordingly on top of this configuration. This capability 
will block further ras operations to the unsupported IPs.

v2: check HBM ECC enablement and set ras mask accordingly.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 +++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..b08226c10d95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1748,8 +1748,41 @@ static void amdgpu_ras_check_supported (struct 
amdgpu_device *adev,
 amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
 
-   *supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported & amdgpu_ras_mask;
+   /* Both HBM and SRAM ECC are disabled in vbios. */
+   if (*hw_supported == 0) {
+   DRM_INFO("RAS HW support is disabled as HBM"
+   " and SRAM ECC are not presented.\n");
+   return;
+   }
+
+   if (amdgpu_ras_enable) {
+   *supported = *hw_supported;
+
+   /*
+* When HBM ECC is disabled in vbios, remove
+* UMC's and DF's ras support.
+*/
+   if (!amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+   DRM_INFO("HBM ECC is disabled and "
+   "remove UMC and DF ras support.\n");
+   *supported &= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /*
+* When sram ecc is disabled in vbios, bypass those IP
+* blocks that support sram ecc, and only hold UMC and DF.
+*/
+   if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+   DRM_INFO("SRAM ECC is disabled and remove ras support "
+   "from IPs that support sram ecc.\n");
+   *supported &= (1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /* ras support needs to align with module parmeter */
+   *supported &= amdgpu_ras_mask;
+   }
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
--
2.17.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

2020-03-11 Thread Quan, Evan
Have not tried Navi14 yet. But likely it can fix that baco failure.
At least it can fix the baco issue of Navi10 which is very similar as Navi14's.

Regards,
Evan
-Original Message-
From: Yuan, Xiaojie  
Sent: Wednesday, March 11, 2020 10:10 PM
To: Quan, Evan ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

[AMD Official Use Only - Internal Distribution Only]

Hi Evan,

Does this patch also fix the baco failure on Navi14 with display connected?

BR,
Xiaojie


From: amd-gfx  on behalf of Evan Quan 

Sent: Wednesday, March 11, 2020 4:18 PM
To: amd-gfx@lists.freedesktop.org
Cc: Quan, Evan
Subject: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

This can fix the baco reset failure seen on Navi10.
And this should be a low risk fix as the same sequence is already used for 
system suspend/resume.

Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..a35c89973614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
if (r)
goto out;

+   amdgpu_fbdev_set_suspend(tmp_adev, 0);
+
/* must succeed. */
amdgpu_ras_resume(tmp_adev);

@@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 */
amdgpu_unregister_gpu_instance(tmp_adev);

+   amdgpu_fbdev_set_suspend(adev, 1);
+
/* disable ras on ALL IPs */
if (!(in_ras_intr && !use_baco) &&
  amdgpu_device_ip_need_full_reset(tmp_adev))
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7CXiaojie.Yuan%40amd.com%7C3eace080fd0b4f67337e08d7c594e441%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195115872403614sdata=%2B%2B3xRFOl2Ho%2BRe9VuzqHtZNoVZ3s%2BxIP7xTv6yG11LA%3Dreserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-11 Thread Guchun Chen
When sram ecc is disabled by vbios, ras initialization
process in the corrresponding IPs that suppport sram ecc
needs to be skipped. So update ras support capability
accordingly on top of this configuration. This capability
will block further ras operations to the unsupported IPs.

v2: check HBM ECC enablement and set ras mask accordingly.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 +++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..b08226c10d95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1748,8 +1748,41 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
 amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
 
-   *supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported & amdgpu_ras_mask;
+   /* Both HBM and SRAM ECC are disabled in vbios. */
+   if (*hw_supported == 0) {
+   DRM_INFO("RAS HW support is disabled as HBM"
+   " and SRAM ECC are not presented.\n");
+   return;
+   }
+
+   if (amdgpu_ras_enable) {
+   *supported = *hw_supported;
+
+   /*
+* When HBM ECC is disabled in vbios, remove
+* UMC's and DF's ras support.
+*/
+   if (!amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+   DRM_INFO("HBM ECC is disabled and "
+   "remove UMC and DF ras support.\n");
+   *supported &= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /*
+* When sram ecc is disabled in vbios, bypass those IP
+* blocks that support sram ecc, and only hold UMC and DF.
+*/
+   if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+   DRM_INFO("SRAM ECC is disabled and remove ras support "
+   "from IPs that support sram ecc.\n");
+   *supported &= (1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /* ras support needs to align with module parmeter */
+   *supported &= amdgpu_ras_mask;
+   }
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: remove mem ecc check for vega20 and arcturus

2020-03-11 Thread Guchun Chen
Memory ecc check including HBM and SRAM has been done
in ras init function for vega20 and arcturus. So remove
it from gmc module, only keep this check for vega10.

Suggested-by: Hawking Zhang 
Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216abf14a4..9bde66a6b432 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -887,28 +887,20 @@ static int gmc_v9_0_late_init(void *handle)
return r;
/* Check if ecc is available */
if (!amdgpu_sriov_vf(adev)) {
-   switch (adev->asic_type) {
-   case CHIP_VEGA10:
-   case CHIP_VEGA20:
-   case CHIP_ARCTURUS:
+   if (adev->asic_type == CHIP_VEGA10) {
r = amdgpu_atomfirmware_mem_ecc_supported(adev);
if (!r) {
DRM_INFO("ECC is not present.\n");
if (adev->df.funcs->enable_ecc_force_par_wr_rmw)

adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
-   } else {
+   } else
DRM_INFO("ECC is active.\n");
-   }
 
r = amdgpu_atomfirmware_sram_ecc_supported(adev);
if (!r) {
DRM_INFO("SRAM ECC is not present.\n");
-   } else {
+   } else
DRM_INFO("SRAM ECC is active.\n");
-   }
-   break;
-   default:
-   break;
}
}
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[pull] amdgpu 5.6 fixes

2020-03-11 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.6.

The following changes since commit 513dc792d6060d5ef572e43852683097a8420f56:

  vgacon: Fix a UAF in vgacon_invert_region (2020-03-06 21:06:34 +0100)

are available in the Git repository at:

  git://people.freedesktop.org/~agd5f/linux tags/amd-drm-fixes-5.6-2020-03-11

for you to fetch changes up to 1d2686d417c5998af3817f93be01745b3db57ecd:

  drm/amdgpu/powerplay: nv1x, renior copy dcn clock settings of watermark to 
smu during boot up (2020-03-10 17:31:10 -0400)


amd-drm-fixes-5.6-2020-03-11:

amdgpu:
- Update the display watermark bounding box for navi14
- Fix fetching vbios directly from rom on vega20/arcturus
- Navi and renoir watermark fixes


Hawking Zhang (1):
  drm/amdgpu: correct ROM_INDEX/DATA offset for VEGA20

Hersen Wu (1):
  drm/amdgpu/powerplay: nv1x, renior copy dcn clock settings of watermark 
to smu during boot up

Martin Leung (1):
  drm/amd/display: update soc bb for nv14

 drivers/gpu/drm/amd/amdgpu/soc15.c |  25 -
 .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c  | 114 +
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c |   7 +-
 drivers/gpu/drm/amd/powerplay/navi10_ppt.c |  22 ++--
 drivers/gpu/drm/amd/powerplay/renoir_ppt.c |   5 +-
 5 files changed, 158 insertions(+), 15 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 5/8] mm/hmm: add missing call to hmm_range_need_fault() before returning EFAULT

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

All return paths that do EFAULT must call hmm_range_need_fault() to
determine if the user requires this page to be valid.

If the page cannot be made valid if the user later requires it, due to vma
flags in this case, then the return should be HMM_PFN_ERROR.

Fixes: a3e0d41c2b1f ("mm/hmm: improve driver API to work and wait over a range")
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 19 ---
  1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 5f5ccf13dd1e85..e10cd0adba7b37 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -582,18 +582,15 @@ static int hmm_vma_walk_test(unsigned long start, 
unsigned long end,
struct vm_area_struct *vma = walk->vma;
  
  	/*

-* Skip vma ranges that don't have struct page backing them or
-* map I/O devices directly.
-*/
-   if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP))
-   return -EFAULT;
-
-   /*
+* Skip vma ranges that don't have struct page backing them or map I/O
+* devices directly.
+*
 * If the vma does not allow read access, then assume that it does not
-* allow write access either. HMM does not support architectures
-* that allow write without read.
+* allow write access either. HMM does not support architectures that
+* allow write without read.
 */
-   if (!(vma->vm_flags & VM_READ)) {
+   if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) ||
+   !(vma->vm_flags & VM_READ)) {
bool fault, write_fault;
  
  		/*

@@ -607,7 +604,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned 
long end,
if (fault || write_fault)
return -EFAULT;
  
-		hmm_pfns_fill(start, end, range, HMM_PFN_NONE);

+   hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
hmm_vma_walk->last = end;
  
  		/* Skip this vma and continue processing the next vma. */



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 4/8] mm/hmm: add missing pfns set to hmm_vma_walk_pmd()

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

All success exit paths from the walker functions must set the pfns array.

A migration entry with no required fault is a HMM_PFN_NONE return, just
like the pte case.

Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd")
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 32dcbfd3908315..5f5ccf13dd1e85 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -394,7 +394,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
pmd_migration_entry_wait(walk->mm, pmdp);
return -EBUSY;
}
-   return 0;
+   return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
} else if (!pmd_present(pmd))
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 3/8] mm/hmm: do not call hmm_vma_walk_hole() while holding a spinlock

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

This eventually calls into handle_mm_fault() which is a sleeping function.
Release the lock first.

hmm_vma_walk_hole() does not touch the contents of the PUD, so it does not
need the lock.

Fixes: 3afc423632a1 ("mm: pagewalk: add p4d_entry() and pgd_entry()")
Cc: Steven Price 
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 9e8f68eb83287a..32dcbfd3908315 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -473,8 +473,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
  
  	pud = READ_ONCE(*pudp);

if (pud_none(pud)) {
-   ret = hmm_vma_walk_hole(start, end, -1, walk);
-   goto out_unlock;
+   spin_unlock(ptl);
+   return hmm_vma_walk_hole(start, end, -1, walk);
}
  
  	if (pud_huge(pud) && pud_devmap(pud)) {

@@ -483,8 +483,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
bool fault, write_fault;
  
  		if (!pud_present(pud)) {

-   ret = hmm_vma_walk_hole(start, end, -1, walk);
-   goto out_unlock;
+   spin_unlock(ptl);
+   return hmm_vma_walk_hole(start, end, -1, walk);
}
  
  		i = (addr - range->start) >> PAGE_SHIFT;

@@ -495,9 +495,9 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
hmm_range_need_fault(hmm_vma_walk, pfns, npages,
 cpu_flags, , _fault);
if (fault || write_fault) {
-   ret = hmm_vma_walk_hole_(addr, end, fault,
-write_fault, walk);
-   goto out_unlock;
+   spin_unlock(ptl);
+   return hmm_vma_walk_hole_(addr, end, fault, write_fault,
+ walk);
}
  
  		pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 2/8] mm/hmm: don't free the cached pgmap while scanning

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

The pgmap is held in the hmm_vma_walk variable in hope of speeding up
future get_dev_pagemap() calls by hitting the same pointer. The algorithm
doesn't actually care about how long the pgmap is held for.

Move the put of the cached pgmap to after the walk is completed and delete
all the other now redundant puts.

This solves a possible leak of the reference in hmm_vma_walk_pmd() if a
hmm_vma_handle_pte() fails while looping.

Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed 
filesystem")
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 31 +--
  1 file changed, 9 insertions(+), 22 deletions(-)

We talked about just deleting this stuff, but I think it makes alot sense for
hmm_range_fault() to trigger fault on devmap pages that are not compatible
with the caller - so lets just fix the leak on error path for now.

diff --git a/mm/hmm.c b/mm/hmm.c
index 35f85424176d14..9e8f68eb83287a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -239,10 +239,6 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, 
unsigned long addr,
}
pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
}
-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
hmm_vma_walk->last = end;
return 0;
  }
@@ -360,10 +356,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
return 0;
  
  fault:

-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
pte_unmap(ptep);
/* Fault any virtual address we were asked to fault */
return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
@@ -446,16 +438,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return r;
}
}
-   if (hmm_vma_walk->pgmap) {
-   /*
-* We do put_dev_pagemap() here and not in hmm_vma_handle_pte()
-* so that we can leverage get_dev_pagemap() optimization which
-* will not re-take a reference on a pgmap if we already have
-* one.
-*/
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
pte_unmap(ptep - 1);
  
  	hmm_vma_walk->last = addr;

@@ -529,10 +511,6 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
  cpu_flags;
}
-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
hmm_vma_walk->last = end;
goto out_unlock;
}
@@ -694,6 +672,15 @@ long hmm_range_fault(struct hmm_range *range, unsigned int 
flags)
return -EBUSY;
ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
  _walk_ops, _vma_walk);
+   /*
+* A pgmap is kept cached in the hmm_vma_walk to avoid expensive
+* searching in the probably common case that the pgmap is the
+* same for the entire requested range.
+*/
+   if (hmm_vma_walk.pgmap) {
+   put_dev_pagemap(hmm_vma_walk.pgmap);
+   hmm_vma_walk.pgmap = NULL;
+   }
} while (ret == -EBUSY);
  
  	if (ret)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 8/8] mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL handling

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

Currently if a special PTE is encountered hmm_range_fault() immediately
returns EFAULT and sets the HMM_PFN_SPECIAL error output (which nothing
uses).

EFAULT should only be returned after testing with hmm_pte_need_fault().

Also pte_devmap() and pte_special() are exclusive, and there is no need to
check IS_ENABLED, pte_special() is stubbed out to return false on
unsupported architectures.

Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed 
filesystem")
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 19 ---
  1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index f61fddf2ef6505..ca33d086bdc190 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -335,16 +335,21 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
pte_unmap(ptep);
return -EBUSY;
}
-   } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) 
{
-   if (!is_zero_pfn(pte_pfn(pte))) {
+   }
+
+   /*
+* Since each architecture defines a struct page for the zero page, just
+* fall through and treat it like a normal page.
+*/
+   if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
+   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, ,
+  _fault);
+   if (fault || write_fault) {
pte_unmap(ptep);
-   *pfn = range->values[HMM_PFN_SPECIAL];
return -EFAULT;
}
-   /*
-* Since each architecture defines a struct page for the zero
-* page, just fall through and treat it like a normal page.
-*/
+   *pfn = range->values[HMM_PFN_SPECIAL];
+   return 0;
}
  
  	*pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

hmm_range_fault() should never return 0 if the caller requested a valid
page, but the pfns output for that page would be HMM_PFN_ERROR.

hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to
detect if the page is in faulting mode or not.

Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated
code.

Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd")
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 38 +-
  1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index bf676cfef3e8ee..f61fddf2ef6505 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
  {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
-   uint64_t *pfns = range->pfns;
-   unsigned long addr = start, i;
+   uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT];
+   unsigned long npages = (end - start) >> PAGE_SHIFT;
+   unsigned long addr = start;
+   bool fault, write_fault;
pte_t *ptep;
pmd_t pmd;
  
@@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,

return hmm_vma_walk_hole(start, end, -1, walk);
  
  	if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {

-   bool fault, write_fault;
-   unsigned long npages;
-   uint64_t *pfns;
-
-   i = (addr - range->start) >> PAGE_SHIFT;
-   npages = (end - addr) >> PAGE_SHIFT;
-   pfns = >pfns[i];
-
hmm_range_need_fault(hmm_vma_walk, pfns, npages,
 0, , _fault);
if (fault || write_fault) {
@@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return -EBUSY;
}
return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
-   } else if (!pmd_present(pmd))
+   }
+
+   if (!pmd_present(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);


Shouldn't this fill with HMM_PFN_NONE instead of HMM_PFN_ERROR?
Otherwise, when a THP is swapped out, you will get a different
value than if a PTE is swapped out and you are prefetching/snapshotting.


+   }
  
  	if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {

/*
@@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
goto again;
  
-		i = (addr - range->start) >> PAGE_SHIFT;

-   return hmm_vma_handle_pmd(walk, addr, end, [i], pmd);
+   return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
}
  
  	/*

@@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 * entry pointing to pte directory or it is a bad pmd that will not
 * recover.
 */
-   if (pmd_bad(pmd))
+   if (pmd_bad(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
+   }
  
  	ptep = pte_offset_map(pmdp, addr);

-   i = (addr - range->start) >> PAGE_SHIFT;
-   for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
+   for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) {
int r;
  
-		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]);

+   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);
if (r) {
/* hmm_vma_handle_pte() did pte_unmap() */
hmm_vma_walk->last = addr;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 6/8] mm/hmm: reorganize how !pte_present is handled in hmm_vma_handle_pte()

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

The intention with this code is to determine if the caller required the
pages to be valid, and if so, then take some action to make them valid.
The action varies depending on the page type.

In all cases, if the caller doesn't ask for the page, then
hmm_range_fault() should not return an error.

Revise the implementation to be clearer, and fix some bugs:

  - hmm_pte_need_fault() must always be called before testing fault or
write_fault otherwise the defaults of false apply and the if()'s don't
work. This was missed on the is_migration_entry() branch

  - -EFAULT should not be returned unless hmm_pte_need_fault() indicates
fault is required - ie snapshotting should not fail.

  - For !pte_present() the cpu_flags are always 0, except in the special
case of is_device_private_entry(), calling pte_to_hmm_pfn_flags() is
confusing.

Reorganize the flow so that it always follows the pattern of calling
hmm_pte_need_fault() and then checking fault || write_fault.

Fixes: 2aee09d8c116 ("mm/hmm: change hmm_vma_fault() to allow write fault on page 
basis")
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 35 +++
  1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index e10cd0adba7b37..bf676cfef3e8ee 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -282,15 +282,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
if (!pte_present(pte)) {
swp_entry_t entry = pte_to_swp_entry(pte);
  
-		if (!non_swap_entry(entry)) {

-   cpu_flags = pte_to_hmm_pfn_flags(range, pte);
-   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
-  , _fault);
-   if (fault || write_fault)
-   goto fault;
-   return 0;
-   }
-
/*
 * This is a special swap entry, ignore migration, use
 * device and report anything else as error.
@@ -310,26 +301,30 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
return 0;
}
  
-		if (is_migration_entry(entry)) {

-   if (fault || write_fault) {
-   pte_unmap(ptep);
-   hmm_vma_walk->last = addr;
-   migration_entry_wait(walk->mm, pmdp, addr);
-   return -EBUSY;
-   }
+   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, ,
+  _fault);
+   if (!fault && !write_fault)
return 0;
+
+   if (!non_swap_entry(entry))
+   goto fault;
+
+   if (is_migration_entry(entry)) {
+   pte_unmap(ptep);
+   hmm_vma_walk->last = addr;
+   migration_entry_wait(walk->mm, pmdp, addr);
+   return -EBUSY;
}
  
  		/* Report error for everything else */

pte_unmap(ptep);
*pfn = range->values[HMM_PFN_ERROR];
return -EFAULT;
-   } else {
-   cpu_flags = pte_to_hmm_pfn_flags(range, pte);
-   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
-  , _fault);
}
  
+	cpu_flags = pte_to_hmm_pfn_flags(range, pte);

+   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, ,
+  _fault);
if (fault || write_fault)
goto fault;
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH -next 023/491] AMD KFD: Use fallthrough;

2020-03-11 Thread Joe Perches
On Wed, 2020-03-11 at 17:50 -0400, Felix Kuehling wrote:
> On 2020-03-11 12:51 a.m., Joe Perches wrote:
> > Convert the various uses of fallthrough comments to fallthrough;
> > 
> > Done via script
> > Link: 
> > https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/
> 
> The link seems to be broken. This one works: 
> https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git@perches.com/

Thanks.

I neglected to use a backslash on the generating script.
In the script in 0/491,

Link: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git@perches.com/

likely should have been:

Link: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe\@perches.com/


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 1/8] mm/hmm: add missing unmaps of the ptep during hmm_vma_handle_pte()

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:34 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

Many of the direct returns of error skipped doing the pte_unmap(). All non
zero exit paths must unmap the pte.

The pte_unmap() is split unnaturally like this because some of the error
exit paths trigger a sleep and must release the lock before sleeping.

Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed 
filesystem")
Fixes: 53f5c3f489ec ("mm/hmm: factor out pte and pmd handling to simplify 
hmm_vma_walk_pmd()")
Signed-off-by: Jason Gunthorpe 


The changes look OK to me but one issue noted below.
In any case, you can add:
Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 72e5a6d9a41756..35f85424176d14 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -325,6 +325,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
}
  
  		/* Report error for everything else */

+   pte_unmap(ptep);
*pfn = range->values[HMM_PFN_ERROR];
return -EFAULT;
} else {
@@ -339,10 +340,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
if (pte_devmap(pte)) {
hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
  hmm_vma_walk->pgmap);
-   if (unlikely(!hmm_vma_walk->pgmap))
+   if (unlikely(!hmm_vma_walk->pgmap)) {
+   pte_unmap(ptep);
return -EBUSY;
+   }
} else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) 
{
if (!is_zero_pfn(pte_pfn(pte))) {
+   pte_unmap(ptep);
*pfn = range->values[HMM_PFN_SPECIAL];
return -EFAULT;
}
@@ -437,7 +441,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
  
  		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]);

if (r) {
-   /* hmm_vma_handle_pte() did unmap pte directory */
+   /* hmm_vma_handle_pte() did pte_unmap() */
hmm_vma_walk->last = addr;
return r;
}



I think there is a case where hmm_vma_handle_pte() is called, a fault is 
requested,
pte_unmap() and hmm_vma_walk_hole_() are called, the latter returns zero (the 
fault
was handled OK), but now the page table is unmapped for successive loop 
iterations
and pte_unmap(ptep - 1) is called at the loop end.
Since this isn't an issue on x86_64 but is on x86_32, I can see how it could be 
missed.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH -next 023/491] AMD KFD: Use fallthrough;

2020-03-11 Thread Felix Kuehling

On 2020-03-11 12:51 a.m., Joe Perches wrote:

Convert the various uses of fallthrough comments to fallthrough;

Done via script
Link: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/



The link seems to be broken. This one works: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git@perches.com/





Signed-off-by: Joe Perches 



Acked-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index d6549e..6529ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -79,7 +79,7 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device 
*adev,
dev_warn(adev->dev,
 "Invalid sdma engine id (%d), using engine id 0\n",
 engine_id);
-   /* fall through */
+   fallthrough;
case 0:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs (v2)

2020-03-11 Thread Alex Deucher
On Wed, Mar 11, 2020 at 4:33 PM Tom St Denis  wrote:
>
> The offset into the array was specified in bytes but should
> be in terms of 32-bit words.  Also prevent large reads that
> would also cause a buffer overread.
>
> v2:  Read from correct offset from internal storage buffer.
>
> Signed-off-by: Tom St Denis 
> Acked-by: Christian König 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 00942afc4e13..02bb1be11ffe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -784,11 +784,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
> char __user *buf,
> ssize_t result = 0;
> uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
>
> -   if (size & 3 || *pos & 3)
> +   if (size > 4096 || size & 3 || *pos & 3)
> return -EINVAL;
>
> /* decode offset */
> -   offset = *pos & GENMASK_ULL(11, 0);
> +   offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
> se = (*pos & GENMASK_ULL(19, 12)) >> 12;
> sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
> cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
> @@ -826,7 +826,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
> char __user *buf,
> while (size) {
> uint32_t value;
>
> -   value = data[offset++];
> +   value = data[result >> 2];
> r = put_user(value, (uint32_t *)buf);
> if (r) {
> result = r;
> --
> 2.24.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Nirmoy


On 3/11/20 9:35 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:32 PM, Nirmoy wrote:


On 3/11/20 9:02 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:00 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:00 PM, Nirmoy Das wrote:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c    |  2 ++
  8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  union drm_amdgpu_cs *cs)
  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+    struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
  struct drm_sched_entity *entity = p->entity;
  enum drm_sched_priority priority;
  struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
  +    if (ring->funcs->no_gpu_sched_loadbalance)
+ amdgpu_ctx_disable_gpu_sched_load_balance(entity);
+



Why this needs to be done each time a job is submitted and not once 
in drm_sched_entity_init (same foramdgpu_job_submit bellow ?)


Andrey



My bad - not in drm_sched_entity_init but in relevant amdgpu code.



Hi Andrey,

Do you mean drm_sched_job_init() or after creating VCN entities?


Nirmoy



I guess after creating the VCN entities (has to be amdgpu specific 
code) - I just don't get why it needs to be done each time job is 
submitted, I mean - since you set .no_gpu_sched_loadbalance = true 
anyway this is always true and so shouldn't you just initialize the 
VCN entity with a schedulers list consisting of one scheduler and that 
it ?



Assumption: If I understand correctly we shouldn't be doing load balance 
among VCN jobs in the same context. Christian, James and Leo can clarify 
that if I am wrong.


But we can still do load balance of VNC jobs among multiple contexts. 
That load balance decision happens in drm_sched_entity_init(). If we 
initialize VCN entity with one scheduler then


all entities irrespective of context gets that one scheduler which means 
we are not utilizing extra VNC instances.



Ideally we should be calling amdgpu_ctx_disable_gpu_sched_load_balance() 
only once after 1st call of drm_sched_entity_init() of a VCN job. I am 
not sure how to do that efficiently.


Another option might be to copy the logic of 
drm_sched_entity_get_free_sched() and choose suitable VNC sched at/after 
VCN entity creation.



Regards,

Nirmoy




Andrey






Andrey






amdgpu_vm_move_to_lru_tail(p->adev, >vm);
    ttm_eu_fence_buffer_objects(>ticket, >validated, 
p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct 
amdgpu_ctx *ctx,

  }
  }
  +/**
+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable 
gpu_sched's load balancer

+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity)

+{
+    struct drm_gpu_scheduler **scheds = >rq->sched;
+
+    /* disable gpu_sched's job load balancer by assigning only 
one */

+    /* drm scheduler to the entity */
+    drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr 
*mgr);

    void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  +void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity);

    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ 

[PATCH v5 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread James Zhu
Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

v4: Remove extra counter check, and reduce counter before idle
work schedule

Signed-off-by: James Zhu 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 21 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272f..6dacf78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
int i, r;
 
INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
+   mutex_init(>vcn.vcn_pg_lock);
+   atomic_set(>vcn.total_submission_cnt, 0);
 
switch (adev->asic_type) {
case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
 
release_firmware(adev->vcn.fw);
+   mutex_destroy(>vcn.vcn_pg_lock);
 
return 0;
 }
@@ -307,7 +310,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
fences += fence[j];
}
 
-   if (fences == 0) {
+   if (!fences && !atomic_read(>vcn.total_submission_cnt)) {
amdgpu_gfx_off_ctrl(adev, true);
amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
   AMD_PG_STATE_GATE);
@@ -319,13 +322,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work);
 
-   if (set_clocks) {
-   amdgpu_gfx_off_ctrl(adev, false);
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+   atomic_inc(>vcn.total_submission_cnt);
+   cancel_delayed_work_sync(>vcn.idle_work);
+
+   mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+   amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);
 
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
struct dpg_pause_state new_state;
@@ -345,10 +349,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
adev->vcn.pause_dpg_mode(adev, ring->me, _state);
}
+   mutex_unlock(>vcn.vcn_pg_lock);
 }
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
+   atomic_dec(>adev->vcn.total_submission_cnt);
+
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
uint32_t num_vcn_enc_sched;
uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;
 
unsignedharvest_config;
int (*pause_dpg_mode)(struct amdgpu_device *adev,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v5 4/4] drm/amdgpu/vcn2.5: add sync when WPTR/RPTR reset under DPG mode

2020-03-11 Thread James Zhu
Add vcn harware and firmware synchronization to fix race condition
issue among vcn driver, hardware and firmware under DPG mode.

Signed-off-by: James Zhu 
Reviewed-by: Leo Liu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 9b22e2b..9793a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -868,6 +868,10 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, boo
WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
upper_32_bits(ring->gpu_addr));
 
+   /* Stall DPG before WPTR/RPTR reset */
+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS),
+   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
/* Initialize the ring buffer's read and write pointers */
WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0);
 
@@ -876,6 +880,9 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, boo
ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR);
WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr));
+   /* Unstall DPG */
+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS),
+   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 
return 0;
 }
@@ -1389,8 +1396,13 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
   
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
 
+   /* Stall DPG before WPTR/RPTR reset */
+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, 
mmUVD_POWER_STATUS),
+  
UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+  
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
/* Restore */
ring = >vcn.inst[inst_idx].ring_enc[0];
+   ring->wptr = 0;
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, 
ring->gpu_addr);
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, 
ring->ring_size / 4);
@@ -1398,6 +1410,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, 
lower_32_bits(ring->wptr));
 
ring = >vcn.inst[inst_idx].ring_enc[1];
+   ring->wptr = 0;
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, 
ring->gpu_addr);
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, 
ring->ring_size / 4);
@@ -1406,6 +1419,9 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
 
WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
   RREG32_SOC15(UVD, inst_idx, 
mmUVD_SCRATCH2) & 0x7FFF);
+   /* Unstall DPG */
+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, 
mmUVD_POWER_STATUS),
+  0, 
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 
SOC15_WAIT_ON_RREG(UVD, inst_idx, 
mmUVD_POWER_STATUS,
   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, 
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v5 3/4] drm/amdgpu/vcn2.0: add sync when WPTR/RPTR reset under DPG mode

2020-03-11 Thread James Zhu
Add vcn harware and firmware synchronization to fix race condition
issue among vcn driver, hardware and firmware under DPG mode

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index f2745fd..415d65c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -872,6 +872,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device 
*adev, bool indirect)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
 
+   /* Stall DPG before WPTR/RPTR reset */
+   WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
/* set the write pointer delay */
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
 
@@ -894,6 +899,10 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device 
*adev, bool indirect)
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr));
 
+   /* Unstall DPG */
+   WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
return 0;
 }
 
@@ -1189,8 +1198,13 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device 
*adev,
   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
   
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
 
+   /* Stall DPG before WPTR/RPTR reset */
+   WREG32_P(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_POWER_STATUS),
+  
UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+  
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
/* Restore */
ring = >vcn.inst->ring_enc[0];
+   ring->wptr = 0;
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, 
ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, 
ring->ring_size / 4);
@@ -1198,6 +1212,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device 
*adev,
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, 
lower_32_bits(ring->wptr));
 
ring = >vcn.inst->ring_enc[1];
+   ring->wptr = 0;
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, 
ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, 
ring->ring_size / 4);
@@ -1206,6 +1221,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device 
*adev,
 
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) 
& 0x7FFF);
+   /* Unstall DPG */
+   WREG32_P(SOC15_REG_OFFSET(UVD, 0, 
mmUVD_POWER_STATUS),
+  0, 
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v5 2/4] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch

2020-03-11 Thread James Zhu
Couldn't only rely on enc fence to decide switching to dpg unpaude mode.
Since a enc thread may not schedule a fence in time during multiple
threads running situation.

v3: 1. Rename enc_submission_cnt to dpg_enc_submission_cnt
2. Add dpg_enc_submission_cnt check in idle_work_handler

v4:  Remove extra counter check, and reduce counter before idle
work schedule

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 32 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  1 +
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 6dacf78..0110610 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -65,6 +65,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
mutex_init(>vcn.vcn_pg_lock);
atomic_set(>vcn.total_submission_cnt, 0);
+   for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+   atomic_set(>vcn.inst[i].dpg_enc_submission_cnt, 0);
 
switch (adev->asic_type) {
case CHIP_RAVEN:
@@ -298,7 +300,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
struct dpg_pause_state new_state;
 
-   if (fence[j])
+   if (fence[j] ||
+   
unlikely(atomic_read(>vcn.inst[j].dpg_enc_submission_cnt)))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
@@ -333,19 +336,22 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
struct dpg_pause_state new_state;
-   unsigned int fences = 0;
-   unsigned int i;
 
-   for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
-   fences += 
amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]);
-   }
-   if (fences)
+   if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+   
atomic_inc(>vcn.inst[ring->me].dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
-   else
-   new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+   } else {
+   unsigned int fences = 0;
+   unsigned int i;
 
-   if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
-   new_state.fw_based = VCN_DPG_STATE__PAUSE;
+   for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+   fences += 
amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]);
+
+   if (fences || 
atomic_read(>vcn.inst[ring->me].dpg_enc_submission_cnt))
+   new_state.fw_based = VCN_DPG_STATE__PAUSE;
+   else
+   new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+   }
 
adev->vcn.pause_dpg_mode(adev, ring->me, _state);
}
@@ -354,6 +360,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
+   if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+   ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+   
atomic_dec(>adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
atomic_dec(>adev->vcn.total_submission_cnt);
 
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 111c4cc..e913de8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -183,6 +183,7 @@ struct amdgpu_vcn_inst {
void*dpg_sram_cpu_addr;
uint64_tdpg_sram_gpu_addr;
uint32_t*dpg_sram_curr_addr;
+   atomic_tdpg_enc_submission_cnt;
 };
 
 struct amdgpu_vcn {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Andrey Grodzovsky


On 3/11/20 4:32 PM, Nirmoy wrote:


On 3/11/20 9:02 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:00 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:00 PM, Nirmoy Das wrote:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c    |  2 ++
  8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  union drm_amdgpu_cs *cs)
  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+    struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
  struct drm_sched_entity *entity = p->entity;
  enum drm_sched_priority priority;
  struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
  +    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);
+



Why this needs to be done each time a job is submitted and not once 
in drm_sched_entity_init (same foramdgpu_job_submit bellow ?)


Andrey



My bad - not in drm_sched_entity_init but in relevant amdgpu code.



Hi Andrey,

Do you mean drm_sched_job_init() or after creating VCN entities?


Nirmoy



I guess after creating the VCN entities (has to be amdgpu specific code) 
- I just don't get why it needs to be done each time job is submitted, I 
mean - since you set .no_gpu_sched_loadbalance = true anyway this is 
always true and so shouldn't you just initialize the VCN entity with a 
schedulers list consisting of one scheduler and that it ?


Andrey






Andrey






amdgpu_vm_move_to_lru_tail(p->adev, >vm);
    ttm_eu_fence_buffer_objects(>ticket, >validated, 
p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct 
amdgpu_ctx *ctx,

  }
  }
  +/**
+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's 
load balancer

+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity)

+{
+    struct drm_gpu_scheduler **scheds = >rq->sched;
+
+    /* disable gpu_sched's job load balancer by assigning only one */
+    /* drm scheduler to the entity */
+    drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr 
*mgr);

    void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  +void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity);

    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
  int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,

    void *owner, struct dma_fence **f)
  {
+    struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
  enum drm_sched_priority priority;
  int r;
  @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, 
struct drm_sched_entity *entity,

  amdgpu_job_free_resources(job);
  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
+    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);
    return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ 

Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs

2020-03-11 Thread Tom St Denis

Hi Alex,

I sent out a v2 of the patch to the list that also addresses the fact we 
were reading from the wrong offset from the internal buffer.


This entry was really only tested with offset==0 which is why this 
didn't come up until now that people want those trap registers :-)


Tom

On 2020-03-11 11:16 a.m., Alex Deucher wrote:

On Tue, Mar 10, 2020 at 8:53 AM Tom St Denis  wrote:

The offset into the array was specified in bytes but should
be in terms of 32-bit words.  Also prevent large reads that
would also cause a buffer overread.

Signed-off-by: Tom St Denis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c573edf02afc..e0f4ccd91fd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
char __user *buf,
 ssize_t result = 0;
 uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;

-   if (size & 3 || *pos & 3)
+   if (size > 4096 || size & 3 || *pos & 3)

Is size in dwords as well?

Alex


 return -EINVAL;

 /* decode offset */
-   offset = *pos & GENMASK_ULL(11, 0);
+   offset = (*pos & GENMASK_ULL(11, 0)) / 4;
 se = (*pos & GENMASK_ULL(19, 12)) >> 12;
 sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
 cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
--
2.24.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Ctom.stdenis%40amd.com%7C550ca4aaaf084c3d7a9f08d7c5cf2c65%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195365897948298sdata=YTb2YGBlAlDS%2FffaVOo2Yrej21N%2FJYVpFVIc1rQERWg%3Dreserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: Fix GPR read from debugfs (v2)

2020-03-11 Thread Tom St Denis
The offset into the array was specified in bytes but should
be in terms of 32-bit words.  Also prevent large reads that
would also cause a buffer overread.

v2:  Read from correct offset from internal storage buffer.

Signed-off-by: Tom St Denis 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 00942afc4e13..02bb1be11ffe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -784,11 +784,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
char __user *buf,
ssize_t result = 0;
uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
 
-   if (size & 3 || *pos & 3)
+   if (size > 4096 || size & 3 || *pos & 3)
return -EINVAL;
 
/* decode offset */
-   offset = *pos & GENMASK_ULL(11, 0);
+   offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
se = (*pos & GENMASK_ULL(19, 12)) >> 12;
sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
@@ -826,7 +826,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char 
__user *buf,
while (size) {
uint32_t value;
 
-   value = data[offset++];
+   value = data[result >> 2];
r = put_user(value, (uint32_t *)buf);
if (r) {
result = r;
-- 
2.24.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Nirmoy


On 3/11/20 9:14 PM, James Zhu wrote:


On 2020-03-11 4:00 p.m., Nirmoy Das wrote:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c    |  2 ++
  8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  union drm_amdgpu_cs *cs)
  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+    struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
  struct drm_sched_entity *entity = p->entity;
  enum drm_sched_priority priority;
  struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
  +    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);


Does this mean that only vcn IP instances 0 dec/enc will be scheduled?


No, not really.  drm_sched_job_init() gets called before 
amdgpu_ctx_disable_gpu_sched_load_balance().


1st drm_sched_job_init() call will choose a least loaded VCN instance 
and that VCN instance will stay for the whole context life.



Regards,

Nirmoy



Best Regards!

James


+
  amdgpu_vm_move_to_lru_tail(p->adev, >vm);
    ttm_eu_fence_buffer_objects(>ticket, >validated, 
p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct 
amdgpu_ctx *ctx,

  }
  }
  +/**
+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's 
load balancer

+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity)

+{
+    struct drm_gpu_scheduler **scheds = >rq->sched;
+
+    /* disable gpu_sched's job load balancer by assigning only one */
+    /* drm scheduler to the entity */
+    drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
    void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  +void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity);

    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
  int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,

    void *owner, struct dma_fence **f)
  {
+    struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
  enum drm_sched_priority priority;
  int r;
  @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, 
struct drm_sched_entity *entity,

  amdgpu_job_free_resources(job);
  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
+    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);
    return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -115,6 +115,7 @@ struct amdgpu_ring_funcs {
  u32    nop;
  bool    support_64bit_ptrs;
  bool    no_user_fence;
+    bool    no_gpu_sched_loadbalance;
  unsigned    vmhub;
  unsigned    extra_dw;
  diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c

index 

Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Nirmoy


On 3/11/20 9:02 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:00 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:00 PM, Nirmoy Das wrote:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c    |  2 ++
  8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  union drm_amdgpu_cs *cs)
  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+    struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
  struct drm_sched_entity *entity = p->entity;
  enum drm_sched_priority priority;
  struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
  +    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);
+



Why this needs to be done each time a job is submitted and not once 
in drm_sched_entity_init (same foramdgpu_job_submit bellow ?)


Andrey



My bad - not in drm_sched_entity_init but in relevant amdgpu code.



Hi Andrey,

Do you mean drm_sched_job_init() or after creating VCN entities?


Nirmoy



Andrey






amdgpu_vm_move_to_lru_tail(p->adev, >vm);
    ttm_eu_fence_buffer_objects(>ticket, >validated, 
p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct 
amdgpu_ctx *ctx,

  }
  }
  +/**
+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's 
load balancer

+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity)

+{
+    struct drm_gpu_scheduler **scheds = >rq->sched;
+
+    /* disable gpu_sched's job load balancer by assigning only one */
+    /* drm scheduler to the entity */
+    drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
    void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  +void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity);

    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
  int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,

    void *owner, struct dma_fence **f)
  {
+    struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
  enum drm_sched_priority priority;
  int r;
  @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, 
struct drm_sched_entity *entity,

  amdgpu_job_free_resources(job);
  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
+    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);
    return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -115,6 +115,7 @@ struct amdgpu_ring_funcs {
  u32    nop;
  bool    support_64bit_ptrs;
  bool    no_user_fence;
+    bool    no_gpu_sched_loadbalance;
  unsigned    vmhub;
  unsigned    extra_dw;
  diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c

index 

Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread James Zhu



On 2020-03-11 4:00 p.m., Nirmoy Das wrote:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c|  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c|  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c|  2 ++
  8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
  {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+   struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct drm_sched_entity *entity = p->entity;
enum drm_sched_priority priority;
struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
  
+	if (ring->funcs->no_gpu_sched_loadbalance)

+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);


Does this mean that only vcn IP instances 0 dec/enc will be scheduled?

Best Regards!

James


+
amdgpu_vm_move_to_lru_tail(p->adev, >vm);
  
  	ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
}
  }
  
+/**

+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load 
balancer
+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity)
+{
+   struct drm_gpu_scheduler **scheds = >rq->sched;
+
+   /* disable gpu_sched's job load balancer by assigning only one */
+   /* drm scheduler to the entity */
+   drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
   struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  
  void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity);
  
  #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
  int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
  void *owner, struct dma_fence **f)
  {
+   struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
enum drm_sched_priority priority;
int r;
  
@@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,

amdgpu_job_free_resources(job);
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
+   if (ring->funcs->no_gpu_sched_loadbalance)
+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);
  
  	return 0;

  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -115,6 +115,7 @@ struct amdgpu_ring_funcs {
u32 nop;
boolsupport_64bit_ptrs;
boolno_user_fence;
+   boolno_gpu_sched_loadbalance;
unsignedvmhub;
unsignedextra_dw;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c

index 71f61afdc655..749ccdb5fbfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1871,6 +1871,7 @@ static const struct amdgpu_ring_funcs 

Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Andrey Grodzovsky


On 3/11/20 4:00 PM, Andrey Grodzovsky wrote:


On 3/11/20 4:00 PM, Nirmoy Das wrote:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c    |  2 ++
  8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  union drm_amdgpu_cs *cs)
  {
  struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+    struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
  struct drm_sched_entity *entity = p->entity;
  enum drm_sched_priority priority;
  struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
  +    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);
+



Why this needs to be done each time a job is submitted and not once in 
drm_sched_entity_init (same foramdgpu_job_submit bellow ?)


Andrey



My bad - not in drm_sched_entity_init but in relevant amdgpu code.

Andrey






amdgpu_vm_move_to_lru_tail(p->adev, >vm);
    ttm_eu_fence_buffer_objects(>ticket, >validated, 
p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct 
amdgpu_ctx *ctx,

  }
  }
  +/**
+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's 
load balancer

+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity)

+{
+    struct drm_gpu_scheduler **scheds = >rq->sched;
+
+    /* disable gpu_sched's job load balancer by assigning only one */
+    /* drm scheduler to the entity */
+    drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
    void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  +void amdgpu_ctx_disable_gpu_sched_load_balance(struct 
drm_sched_entity *entity);

    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
  int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,

    void *owner, struct dma_fence **f)
  {
+    struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
  enum drm_sched_priority priority;
  int r;
  @@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, 
struct drm_sched_entity *entity,

  amdgpu_job_free_resources(job);
  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
+    if (ring->funcs->no_gpu_sched_loadbalance)
+    amdgpu_ctx_disable_gpu_sched_load_balance(entity);
    return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -115,6 +115,7 @@ struct amdgpu_ring_funcs {
  u32    nop;
  bool    support_64bit_ptrs;
  bool    no_user_fence;
+    bool    no_gpu_sched_loadbalance;
  unsigned    vmhub;
  unsigned    extra_dw;
  diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c

index 71f61afdc655..749ccdb5fbfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1871,6 +1871,7 @@ static const 

Re: [PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Andrey Grodzovsky



On 3/11/20 4:00 PM, Nirmoy Das wrote:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c|  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c|  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c|  2 ++
  8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
  {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+   struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct drm_sched_entity *entity = p->entity;
enum drm_sched_priority priority;
struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
  
+	if (ring->funcs->no_gpu_sched_loadbalance)

+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);
+



Why this needs to be done each time a job is submitted and not once in 
drm_sched_entity_init (same foramdgpu_job_submit bellow ?)


Andrey



amdgpu_vm_move_to_lru_tail(p->adev, >vm);
  
  	ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
}
  }
  
+/**

+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load 
balancer
+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity)
+{
+   struct drm_gpu_scheduler **scheds = >rq->sched;
+
+   /* disable gpu_sched's job load balancer by assigning only one */
+   /* drm scheduler to the entity */
+   drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
   struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  
  void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity);
  
  #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
  int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
  void *owner, struct dma_fence **f)
  {
+   struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
enum drm_sched_priority priority;
int r;
  
@@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,

amdgpu_job_free_resources(job);
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
+   if (ring->funcs->no_gpu_sched_loadbalance)
+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);
  
  	return 0;

  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -115,6 +115,7 @@ struct amdgpu_ring_funcs {
u32 nop;
boolsupport_64bit_ptrs;
boolno_user_fence;
+   boolno_gpu_sched_loadbalance;
unsignedvmhub;
unsignedextra_dw;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c

index 71f61afdc655..749ccdb5fbfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1871,6 +1871,7 @@ 

[PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Nirmoy Das
VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c|  2 ++
 8 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+   struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct drm_sched_entity *entity = p->entity;
enum drm_sched_priority priority;
struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
 
+   if (ring->funcs->no_gpu_sched_loadbalance)
+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);
+
amdgpu_vm_move_to_lru_tail(p->adev, >vm);
 
ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..1127e8f77721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,19 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
}
 }
 
+/**
+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load 
balancer
+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity)
+{
+   struct drm_gpu_scheduler **scheds = >rq->sched;
+
+   /* disable gpu_sched's job load balancer by assigning only one */
+   /* drm scheduler to the entity */
+   drm_sched_entity_modify_sched(entity, scheds, 1);
+}
+
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
   struct drm_sched_entity *entity)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
 void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
 
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity 
*entity);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
  void *owner, struct dma_fence **f)
 {
+   struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
enum drm_sched_priority priority;
int r;
 
@@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,
amdgpu_job_free_resources(job);
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
+   if (ring->funcs->no_gpu_sched_loadbalance)
+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -115,6 +115,7 @@ struct amdgpu_ring_funcs {
u32 nop;
boolsupport_64bit_ptrs;
boolno_user_fence;
+   boolno_gpu_sched_loadbalance;
unsignedvmhub;
unsignedextra_dw;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 71f61afdc655..749ccdb5fbfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1871,6 +1871,7 @@ static const struct amdgpu_ring_funcs 
vcn_v1_0_dec_ring_vm_funcs = {
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
+   .no_gpu_sched_loadbalance = 

[PATCH 1/1] drm/amdgpu: disable gpu_sched load balancer for vcn jobs

2020-03-11 Thread Nirmoy Das
VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This patch modifies entity's
sched_list to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 14 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c|  2 ++
 8 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8304d0c87899..db0eef19c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1203,6 +1203,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+   struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct drm_sched_entity *entity = p->entity;
enum drm_sched_priority priority;
struct amdgpu_bo_list_entry *e;
@@ -1257,6 +1258,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
 
+   if (ring->funcs->no_gpu_sched_loadbalance)
+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);
+
amdgpu_vm_move_to_lru_tail(p->adev, >vm);
 
ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..d699207d6266 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -559,6 +559,20 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
}
 }
 
+/**
+ * amdgpu_ctx_disable_gpu_sched_load_balance - disable gpu_sched's load 
balancer
+ * @entity: entity on which load balancer will be disabled
+ */
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity *entity)
+{
+   struct drm_gpu_scheduler **scheds = >rq->sched;
+
+   /* disable gpu_sched's job load balancer by assigning only one */
+   /* drm scheduler to the entity */
+   drm_sched_entity_modify_sched(entity, scheds, 1);
+
+}
+
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
   struct drm_sched_entity *entity)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..3a2f900b8000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -90,5 +90,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
 void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
 
+void amdgpu_ctx_disable_gpu_sched_load_balance(struct drm_sched_entity 
*entity);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4981e443a884..64dad7ba74da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -140,6 +140,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
  void *owner, struct dma_fence **f)
 {
+   struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
enum drm_sched_priority priority;
int r;
 
@@ -154,6 +155,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,
amdgpu_job_free_resources(job);
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
+   if (ring->funcs->no_gpu_sched_loadbalance)
+   amdgpu_ctx_disable_gpu_sched_load_balance(entity);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 448c76cbf3ed..f78fe1a6912b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -115,6 +115,7 @@ struct amdgpu_ring_funcs {
u32 nop;
boolsupport_64bit_ptrs;
boolno_user_fence;
+   boolno_gpu_sched_loadbalance;
unsignedvmhub;
unsignedextra_dw;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 71f61afdc655..749ccdb5fbfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1871,6 +1871,7 @@ static const struct amdgpu_ring_funcs 
vcn_v1_0_dec_ring_vm_funcs = {
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
+   .no_gpu_sched_loadbalance = 

[PATCH hmm 1/8] mm/hmm: add missing unmaps of the ptep during hmm_vma_handle_pte()

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Many of the direct returns of error skipped doing the pte_unmap(). All non
zero exit paths must unmap the pte.

The pte_unmap() is split unnaturally like this because some of the error
exit paths trigger a sleep and must release the lock before sleeping.

Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed 
filesystem")
Fixes: 53f5c3f489ec ("mm/hmm: factor out pte and pmd handling to simplify 
hmm_vma_walk_pmd()")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 72e5a6d9a41756..35f85424176d14 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -325,6 +325,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
}
 
/* Report error for everything else */
+   pte_unmap(ptep);
*pfn = range->values[HMM_PFN_ERROR];
return -EFAULT;
} else {
@@ -339,10 +340,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
if (pte_devmap(pte)) {
hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
  hmm_vma_walk->pgmap);
-   if (unlikely(!hmm_vma_walk->pgmap))
+   if (unlikely(!hmm_vma_walk->pgmap)) {
+   pte_unmap(ptep);
return -EBUSY;
+   }
} else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) 
{
if (!is_zero_pfn(pte_pfn(pte))) {
+   pte_unmap(ptep);
*pfn = range->values[HMM_PFN_SPECIAL];
return -EFAULT;
}
@@ -437,7 +441,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 
r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]);
if (r) {
-   /* hmm_vma_handle_pte() did unmap pte directory */
+   /* hmm_vma_handle_pte() did pte_unmap() */
hmm_vma_walk->last = addr;
return r;
}
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

hmm_range_fault() should never return 0 if the caller requested a valid
page, but the pfns output for that page would be HMM_PFN_ERROR.

hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to
detect if the page is in faulting mode or not.

Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated
code.

Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd")
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 38 +-
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index bf676cfef3e8ee..f61fddf2ef6505 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
-   uint64_t *pfns = range->pfns;
-   unsigned long addr = start, i;
+   uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT];
+   unsigned long npages = (end - start) >> PAGE_SHIFT;
+   unsigned long addr = start;
+   bool fault, write_fault;
pte_t *ptep;
pmd_t pmd;
 
@@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return hmm_vma_walk_hole(start, end, -1, walk);
 
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
-   bool fault, write_fault;
-   unsigned long npages;
-   uint64_t *pfns;
-
-   i = (addr - range->start) >> PAGE_SHIFT;
-   npages = (end - addr) >> PAGE_SHIFT;
-   pfns = >pfns[i];
-
hmm_range_need_fault(hmm_vma_walk, pfns, npages,
 0, , _fault);
if (fault || write_fault) {
@@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return -EBUSY;
}
return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
-   } else if (!pmd_present(pmd))
+   }
+
+   if (!pmd_present(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
+   }
 
if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
/*
@@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
goto again;
 
-   i = (addr - range->start) >> PAGE_SHIFT;
-   return hmm_vma_handle_pmd(walk, addr, end, [i], pmd);
+   return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
}
 
/*
@@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 * entry pointing to pte directory or it is a bad pmd that will not
 * recover.
 */
-   if (pmd_bad(pmd))
+   if (pmd_bad(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
+   }
 
ptep = pte_offset_map(pmdp, addr);
-   i = (addr - range->start) >> PAGE_SHIFT;
-   for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
+   for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) {
int r;
 
-   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]);
+   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);
if (r) {
/* hmm_vma_handle_pte() did pte_unmap() */
hmm_vma_walk->last = addr;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 3/8] mm/hmm: do not call hmm_vma_walk_hole() while holding a spinlock

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

This eventually calls into handle_mm_fault() which is a sleeping function.
Release the lock first.

hmm_vma_walk_hole() does not touch the contents of the PUD, so it does not
need the lock.

Fixes: 3afc423632a1 ("mm: pagewalk: add p4d_entry() and pgd_entry()")
Cc: Steven Price 
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 9e8f68eb83287a..32dcbfd3908315 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -473,8 +473,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
 
pud = READ_ONCE(*pudp);
if (pud_none(pud)) {
-   ret = hmm_vma_walk_hole(start, end, -1, walk);
-   goto out_unlock;
+   spin_unlock(ptl);
+   return hmm_vma_walk_hole(start, end, -1, walk);
}
 
if (pud_huge(pud) && pud_devmap(pud)) {
@@ -483,8 +483,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
bool fault, write_fault;
 
if (!pud_present(pud)) {
-   ret = hmm_vma_walk_hole(start, end, -1, walk);
-   goto out_unlock;
+   spin_unlock(ptl);
+   return hmm_vma_walk_hole(start, end, -1, walk);
}
 
i = (addr - range->start) >> PAGE_SHIFT;
@@ -495,9 +495,9 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
hmm_range_need_fault(hmm_vma_walk, pfns, npages,
 cpu_flags, , _fault);
if (fault || write_fault) {
-   ret = hmm_vma_walk_hole_(addr, end, fault,
-write_fault, walk);
-   goto out_unlock;
+   spin_unlock(ptl);
+   return hmm_vma_walk_hole_(addr, end, fault, write_fault,
+ walk);
}
 
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 5/8] mm/hmm: add missing call to hmm_range_need_fault() before returning EFAULT

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

All return paths that do EFAULT must call hmm_range_need_fault() to
determine if the user requires this page to be valid.

If the page cannot be made valid if the user later requires it, due to vma
flags in this case, then the return should be HMM_PFN_ERROR.

Fixes: a3e0d41c2b1f ("mm/hmm: improve driver API to work and wait over a range")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 5f5ccf13dd1e85..e10cd0adba7b37 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -582,18 +582,15 @@ static int hmm_vma_walk_test(unsigned long start, 
unsigned long end,
struct vm_area_struct *vma = walk->vma;
 
/*
-* Skip vma ranges that don't have struct page backing them or
-* map I/O devices directly.
-*/
-   if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP))
-   return -EFAULT;
-
-   /*
+* Skip vma ranges that don't have struct page backing them or map I/O
+* devices directly.
+*
 * If the vma does not allow read access, then assume that it does not
-* allow write access either. HMM does not support architectures
-* that allow write without read.
+* allow write access either. HMM does not support architectures that
+* allow write without read.
 */
-   if (!(vma->vm_flags & VM_READ)) {
+   if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) ||
+   !(vma->vm_flags & VM_READ)) {
bool fault, write_fault;
 
/*
@@ -607,7 +604,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned 
long end,
if (fault || write_fault)
return -EFAULT;
 
-   hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
+   hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
hmm_vma_walk->last = end;
 
/* Skip this vma and continue processing the next vma. */
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 8/8] mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL handling

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Currently if a special PTE is encountered hmm_range_fault() immediately
returns EFAULT and sets the HMM_PFN_SPECIAL error output (which nothing
uses).

EFAULT should only be returned after testing with hmm_pte_need_fault().

Also pte_devmap() and pte_special() are exclusive, and there is no need to
check IS_ENABLED, pte_special() is stubbed out to return false on
unsupported architectures.

Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed 
filesystem")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index f61fddf2ef6505..ca33d086bdc190 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -335,16 +335,21 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
pte_unmap(ptep);
return -EBUSY;
}
-   } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) 
{
-   if (!is_zero_pfn(pte_pfn(pte))) {
+   }
+
+   /*
+* Since each architecture defines a struct page for the zero page, just
+* fall through and treat it like a normal page.
+*/
+   if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
+   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, ,
+  _fault);
+   if (fault || write_fault) {
pte_unmap(ptep);
-   *pfn = range->values[HMM_PFN_SPECIAL];
return -EFAULT;
}
-   /*
-* Since each architecture defines a struct page for the zero
-* page, just fall through and treat it like a normal page.
-*/
+   *pfn = range->values[HMM_PFN_SPECIAL];
+   return 0;
}
 
*pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 4/8] mm/hmm: add missing pfns set to hmm_vma_walk_pmd()

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

All success exit paths from the walker functions must set the pfns array.

A migration entry with no required fault is a HMM_PFN_NONE return, just
like the pte case.

Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 32dcbfd3908315..5f5ccf13dd1e85 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -394,7 +394,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
pmd_migration_entry_wait(walk->mm, pmdp);
return -EBUSY;
}
-   return 0;
+   return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
} else if (!pmd_present(pmd))
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 2/8] mm/hmm: don't free the cached pgmap while scanning

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

The pgmap is held in the hmm_vma_walk variable in hope of speeding up
future get_dev_pagemap() calls by hitting the same pointer. The algorithm
doesn't actually care about how long the pgmap is held for.

Move the put of the cached pgmap to after the walk is completed and delete
all the other now redundant puts.

This solves a possible leak of the reference in hmm_vma_walk_pmd() if a
hmm_vma_handle_pte() fails while looping.

Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed 
filesystem")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 31 +--
 1 file changed, 9 insertions(+), 22 deletions(-)

We talked about just deleting this stuff, but I think it makes alot sense for
hmm_range_fault() to trigger fault on devmap pages that are not compatible
with the caller - so lets just fix the leak on error path for now.

diff --git a/mm/hmm.c b/mm/hmm.c
index 35f85424176d14..9e8f68eb83287a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -239,10 +239,6 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, 
unsigned long addr,
}
pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
}
-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
hmm_vma_walk->last = end;
return 0;
 }
@@ -360,10 +356,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
return 0;
 
 fault:
-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
pte_unmap(ptep);
/* Fault any virtual address we were asked to fault */
return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
@@ -446,16 +438,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return r;
}
}
-   if (hmm_vma_walk->pgmap) {
-   /*
-* We do put_dev_pagemap() here and not in hmm_vma_handle_pte()
-* so that we can leverage get_dev_pagemap() optimization which
-* will not re-take a reference on a pgmap if we already have
-* one.
-*/
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
pte_unmap(ptep - 1);
 
hmm_vma_walk->last = addr;
@@ -529,10 +511,6 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
  cpu_flags;
}
-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
hmm_vma_walk->last = end;
goto out_unlock;
}
@@ -694,6 +672,15 @@ long hmm_range_fault(struct hmm_range *range, unsigned int 
flags)
return -EBUSY;
ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
  _walk_ops, _vma_walk);
+   /*
+* A pgmap is kept cached in the hmm_vma_walk to avoid expensive
+* searching in the probably common case that the pgmap is the
+* same for the entire requested range.
+*/
+   if (hmm_vma_walk.pgmap) {
+   put_dev_pagemap(hmm_vma_walk.pgmap);
+   hmm_vma_walk.pgmap = NULL;
+   }
} while (ret == -EBUSY);
 
if (ret)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 0/8] Various error case bug fixes for hmm_range_fault()

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

The hmm_range_fault() flow is fairly complicated. The scheme allows the
caller to specify if it needs a usable result for each page, or if it only
needs the current page table status filled in. This mixture of behavior is
useful for a caller that wants to build a 'prefetch around fault'
algorithm.

Although we have no in-tree users of this capability, I am working on
having RDMA ODP work in this manner, and removing these bugs from
hmm_range_fault() is a necessary step.

The basic principles are:

 - If the caller did not ask for a VA to be valid then hmm_range_fault()
   should not fail because of that VA

 - If 0 is returned from hmm_range_fault() then the entire pfns array
   contains valid data

 - HMM_PFN_ERROR is set if faulting fails, or if asking for faulting
   would fail

 - A 0 return from hmm_range_fault() does not have HMM_PFN_ERROR in any
   VA's the caller asked to be valid

This series does not get there completely, I have a followup series
closing some more complex cases.

I tested this series using Ralph's hmm tester he posted a while back,
other testing would be appreciated.

Jason Gunthorpe (8):
  mm/hmm: add missing unmaps of the ptep during hmm_vma_handle_pte()
  mm/hmm: don't free the cached pgmap while scanning
  mm/hmm: do not call hmm_vma_walk_hole() while holding a spinlock
  mm/hmm: add missing pfns set to hmm_vma_walk_pmd()
  mm/hmm: add missing call to hmm_range_need_fault() before returning
EFAULT
  mm/hmm: reorganize how !pte_present is handled in hmm_vma_handle_pte()
  mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid
pages
  mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL
handling

 mm/hmm.c | 166 ++-
 1 file changed, 79 insertions(+), 87 deletions(-)

-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 6/8] mm/hmm: reorganize how !pte_present is handled in hmm_vma_handle_pte()

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

The intention with this code is to determine if the caller required the
pages to be valid, and if so, then take some action to make them valid.
The action varies depending on the page type.

In all cases, if the caller doesn't ask for the page, then
hmm_range_fault() should not return an error.

Revise the implementation to be clearer, and fix some bugs:

 - hmm_pte_need_fault() must always be called before testing fault or
   write_fault otherwise the defaults of false apply and the if()'s don't
   work. This was missed on the is_migration_entry() branch

 - -EFAULT should not be returned unless hmm_pte_need_fault() indicates
   fault is required - ie snapshotting should not fail.

 - For !pte_present() the cpu_flags are always 0, except in the special
   case of is_device_private_entry(), calling pte_to_hmm_pfn_flags() is
   confusing.

Reorganize the flow so that it always follows the pattern of calling
hmm_pte_need_fault() and then checking fault || write_fault.

Fixes: 2aee09d8c116 ("mm/hmm: change hmm_vma_fault() to allow write fault on 
page basis")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 35 +++
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index e10cd0adba7b37..bf676cfef3e8ee 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -282,15 +282,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
if (!pte_present(pte)) {
swp_entry_t entry = pte_to_swp_entry(pte);
 
-   if (!non_swap_entry(entry)) {
-   cpu_flags = pte_to_hmm_pfn_flags(range, pte);
-   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
-  , _fault);
-   if (fault || write_fault)
-   goto fault;
-   return 0;
-   }
-
/*
 * This is a special swap entry, ignore migration, use
 * device and report anything else as error.
@@ -310,26 +301,30 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
return 0;
}
 
-   if (is_migration_entry(entry)) {
-   if (fault || write_fault) {
-   pte_unmap(ptep);
-   hmm_vma_walk->last = addr;
-   migration_entry_wait(walk->mm, pmdp, addr);
-   return -EBUSY;
-   }
+   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, ,
+  _fault);
+   if (!fault && !write_fault)
return 0;
+
+   if (!non_swap_entry(entry))
+   goto fault;
+
+   if (is_migration_entry(entry)) {
+   pte_unmap(ptep);
+   hmm_vma_walk->last = addr;
+   migration_entry_wait(walk->mm, pmdp, addr);
+   return -EBUSY;
}
 
/* Report error for everything else */
pte_unmap(ptep);
*pfn = range->values[HMM_PFN_ERROR];
return -EFAULT;
-   } else {
-   cpu_flags = pte_to_hmm_pfn_flags(range, pte);
-   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
-  , _fault);
}
 
+   cpu_flags = pte_to_hmm_pfn_flags(range, pte);
+   hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, ,
+  _fault);
if (fault || write_fault)
goto fault;
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RFC PATCH 2/2] drm/amdgpu: disable gpu load balancer for vcn jobs

2020-03-11 Thread Nirmoy


On 3/11/20 7:03 PM, Christian König wrote:

Am 11.03.20 um 18:18 schrieb Nirmoy Das:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This modifies the entity's sched_list
to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  1 +
  4 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 8304d0c87899..00032093d8a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1257,6 +1257,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
  +    amdgpu_ctx_limit_load_balance(entity);
  amdgpu_vm_move_to_lru_tail(p->adev, >vm);
    ttm_eu_fence_buffer_objects(>ticket, >validated, 
p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fa575bdc03c8..57b49188306d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -139,6 +139,7 @@ static int amdgpu_ctx_init_entity(struct 
amdgpu_ctx *ctx, const u32 hw_ip, const

  if (r)
  goto error_free_entity;
  +    entity->hw_ip = hw_ip;
  ctx->entities[hw_ip][ring] = entity;
  return 0;
  @@ -559,6 +560,30 @@ void amdgpu_ctx_priority_override(struct 
amdgpu_ctx *ctx,

  }
  }
  +static void limit_vcn_load_balance(struct amdgpu_ctx_entity *centity)
+{
+    struct drm_gpu_scheduler **scheds = >entity.rq->sched;
+
+    if (drm_sched_entity_num_jobs(>entity) == 1)


That check doesn't work correctly, the job might actually already be 
processed when we hit here.



Okay now I know what Andrey meant. I will resend a updated patch.


Thanks,

Nirmoy





+ drm_sched_entity_modify_sched(>entity, scheds, 1);


Just always update the scheduler here.


+
+}
+
+void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity)
+{
+    struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+
+    if (!centity)
+    return;


That check looks superfluous to me.


+
+    switch (centity->hw_ip) {


Better get the ring from entity->rq->sched instead.


+    case AMDGPU_HW_IP_VCN_DEC:
+    case AMDGPU_HW_IP_VCN_ENC:


Maybe better to make that a flag in the ring functions, but this way 
works as well.


Regards,
Christian.


+    limit_vcn_load_balance(centity);
+    }
+
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..d52d8d562d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -33,6 +33,7 @@ struct amdgpu_fpriv;
    struct amdgpu_ctx_entity {
  uint64_t    sequence;
+    uint32_t    hw_ip;
  struct drm_sched_entity    entity;
  struct dma_fence    *fences[];
  };
@@ -90,5 +91,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
    void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity);
    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 4981e443a884..955d12bc89ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -154,6 +154,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, 
struct drm_sched_entity *entity,

  amdgpu_job_free_resources(job);
  priority = job->base.s_priority;
  drm_sched_entity_push_job(>base, entity);
+    amdgpu_ctx_limit_load_balance(entity);
    return 0;
  }



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RFC PATCH 2/2] drm/amdgpu: disable gpu load balancer for vcn jobs

2020-03-11 Thread Christian König

Am 11.03.20 um 18:18 schrieb Nirmoy Das:

VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This modifies the entity's sched_list
to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  1 +
  4 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8304d0c87899..00032093d8a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1257,6 +1257,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
  
+	amdgpu_ctx_limit_load_balance(entity);

amdgpu_vm_move_to_lru_tail(p->adev, >vm);
  
  	ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..57b49188306d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -139,6 +139,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, 
const u32 hw_ip, const
if (r)
goto error_free_entity;
  
+	entity->hw_ip = hw_ip;

ctx->entities[hw_ip][ring] = entity;
return 0;
  
@@ -559,6 +560,30 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,

}
  }
  
+static void limit_vcn_load_balance(struct amdgpu_ctx_entity *centity)

+{
+   struct drm_gpu_scheduler **scheds = >entity.rq->sched;
+
+   if (drm_sched_entity_num_jobs(>entity) == 1)


That check doesn't work correctly, the job might actually already be 
processed when we hit here.



+   drm_sched_entity_modify_sched(>entity, scheds, 1);


Just always update the scheduler here.


+
+}
+
+void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity)
+{
+   struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+
+   if (!centity)
+   return;


That check looks superfluous to me.


+
+   switch (centity->hw_ip) {


Better get the ring from entity->rq->sched instead.


+   case AMDGPU_HW_IP_VCN_DEC:
+   case AMDGPU_HW_IP_VCN_ENC:


Maybe better to make that a flag in the ring functions, but this way 
works as well.


Regards,
Christian.


+   limit_vcn_load_balance(centity);
+   }
+
+}
+
  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
   struct drm_sched_entity *entity)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..d52d8d562d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -33,6 +33,7 @@ struct amdgpu_fpriv;
  
  struct amdgpu_ctx_entity {

uint64_tsequence;
+   uint32_thw_ip;
struct drm_sched_entity entity;
struct dma_fence*fences[];
  };
@@ -90,5 +91,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  
  void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
  
+void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity);
  
  #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4981e443a884..955d12bc89ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -154,6 +154,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,
amdgpu_job_free_resources(job);
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
+   amdgpu_ctx_limit_load_balance(entity);
  
  	return 0;

  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs

2020-03-11 Thread Christian König

Am 11.03.20 um 18:58 schrieb Nirmoy:


On 3/11/20 6:23 PM, Andrey Grodzovsky wrote:


On 3/11/20 1:18 PM, Nirmoy Das wrote:

Implement drm_sched_entity_num_jobs() so that drm drivers can
query number of jobs in an entity.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/scheduler/sched_entity.c | 15 +++
  include/drm/gpu_scheduler.h  |  1 +
  2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c

index 90fd9c30ae5a..dfe8216f2d52 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct 
drm_sched_entity *entity)

  return false;
  }
  +/**
+ * drm_sched_entity_num_job - Get number of jobs in the entity



typo s/drm_sched_entity_num_job/drm_sched_entity_num_jobs



+ *
+ * @entity: scheduler entity
+ *
+ * Returns number of jobs in the entity
+ */
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity)
+{
+    if (drm_sched_entity_is_idle(entity))
+    return 0;
+
+    return spsc_queue_count(>job_queue);
+}



What about the jobs which already have been dequeued from job_queue 
and are in progress in the HW ring but yet to complete - don't they 
count ?


Hi Andrey,

I am thinking in terms of software side of the counting because for an 
entity once a job dequeued, that job is completely lost.


I might be wrong here that's why tagged RFC :)


My question is rather what do we need that for in the first place?

Thanks,
Christian.




Regards,

Nirmoy




Andrey




+EXPORT_SYMBOL(drm_sched_entity_num_jobs);
  /**
   * drm_sched_entity_is_ready - Check if entity is ready
   *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d8972836d248..b5ceff75cbbe 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct 
drm_sched_fence *fence);
  unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler 
*sched);

  void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
  unsigned long remaining);
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity);
    #endif

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs

2020-03-11 Thread Nirmoy


On 3/11/20 6:23 PM, Andrey Grodzovsky wrote:


On 3/11/20 1:18 PM, Nirmoy Das wrote:

Implement drm_sched_entity_num_jobs() so that drm drivers can
query number of jobs in an entity.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/scheduler/sched_entity.c | 15 +++
  include/drm/gpu_scheduler.h  |  1 +
  2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c

index 90fd9c30ae5a..dfe8216f2d52 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct 
drm_sched_entity *entity)

  return false;
  }
  +/**
+ * drm_sched_entity_num_job - Get number of jobs in the entity



typo s/drm_sched_entity_num_job/drm_sched_entity_num_jobs



+ *
+ * @entity: scheduler entity
+ *
+ * Returns number of jobs in the entity
+ */
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity)
+{
+    if (drm_sched_entity_is_idle(entity))
+    return 0;
+
+    return spsc_queue_count(>job_queue);
+}



What about the jobs which already have been dequeued from job_queue 
and are in progress in the HW ring but yet to complete - don't they 
count ?


Hi Andrey,

I am thinking in terms of software side of the counting because for an 
entity once a job dequeued, that job is completely lost.


I might be wrong here that's why tagged RFC :)


Regards,

Nirmoy




Andrey




+EXPORT_SYMBOL(drm_sched_entity_num_jobs);
  /**
   * drm_sched_entity_is_ready - Check if entity is ready
   *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d8972836d248..b5ceff75cbbe 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct 
drm_sched_fence *fence);
  unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler 
*sched);

  void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
  unsigned long remaining);
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity);
    #endif

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs

2020-03-11 Thread Andrey Grodzovsky



On 3/11/20 1:18 PM, Nirmoy Das wrote:

Implement drm_sched_entity_num_jobs() so that drm drivers can
query number of jobs in an entity.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/scheduler/sched_entity.c | 15 +++
  include/drm/gpu_scheduler.h  |  1 +
  2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 90fd9c30ae5a..dfe8216f2d52 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct 
drm_sched_entity *entity)
return false;
  }
  
+/**

+ * drm_sched_entity_num_job - Get number of jobs in the entity
+ *
+ * @entity: scheduler entity
+ *
+ * Returns number of jobs in the entity
+ */
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity)
+{
+   if (drm_sched_entity_is_idle(entity))
+   return 0;
+
+   return spsc_queue_count(>job_queue);
+}



What about the jobs which already have been dequeued from job_queue and 
are in progress in the HW ring but yet to complete - don't they count ?


Andrey




+EXPORT_SYMBOL(drm_sched_entity_num_jobs);
  /**
   * drm_sched_entity_is_ready - Check if entity is ready
   *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d8972836d248..b5ceff75cbbe 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct drm_sched_fence 
*fence);
  unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
  void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
unsigned long remaining);
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity);
  
  #endif

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[RFC PATCH 1/2] drm/sched: implement drm_sched_entity_num_jobs

2020-03-11 Thread Nirmoy Das
Implement drm_sched_entity_num_jobs() so that drm drivers can
query number of jobs in an entity.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/scheduler/sched_entity.c | 15 +++
 include/drm/gpu_scheduler.h  |  1 +
 2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 90fd9c30ae5a..dfe8216f2d52 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -119,6 +119,21 @@ static bool drm_sched_entity_is_idle(struct 
drm_sched_entity *entity)
return false;
 }
 
+/**
+ * drm_sched_entity_num_job - Get number of jobs in the entity
+ *
+ * @entity: scheduler entity
+ *
+ * Returns number of jobs in the entity
+ */
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity)
+{
+   if (drm_sched_entity_is_idle(entity))
+   return 0;
+
+   return spsc_queue_count(>job_queue);
+}
+EXPORT_SYMBOL(drm_sched_entity_num_jobs);
 /**
  * drm_sched_entity_is_ready - Check if entity is ready
  *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d8972836d248..b5ceff75cbbe 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -341,5 +341,6 @@ void drm_sched_fence_finished(struct drm_sched_fence 
*fence);
 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
unsigned long remaining);
+int drm_sched_entity_num_jobs(struct drm_sched_entity *entity);
 
 #endif
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[RFC PATCH 2/2] drm/amdgpu: disable gpu load balancer for vcn jobs

2020-03-11 Thread Nirmoy Das
VCN HW  doesn't support dynamic load balance on multiple
instances for a context. This modifies the entity's sched_list
to a sched_list consist of only one drm scheduler.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  1 +
 4 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8304d0c87899..00032093d8a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1257,6 +1257,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
 
+   amdgpu_ctx_limit_load_balance(entity);
amdgpu_vm_move_to_lru_tail(p->adev, >vm);
 
ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..57b49188306d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -139,6 +139,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, 
const u32 hw_ip, const
if (r)
goto error_free_entity;
 
+   entity->hw_ip = hw_ip;
ctx->entities[hw_ip][ring] = entity;
return 0;
 
@@ -559,6 +560,30 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
}
 }
 
+static void limit_vcn_load_balance(struct amdgpu_ctx_entity *centity)
+{
+   struct drm_gpu_scheduler **scheds = >entity.rq->sched;
+
+   if (drm_sched_entity_num_jobs(>entity) == 1)
+   drm_sched_entity_modify_sched(>entity, scheds, 1);
+
+}
+
+void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity)
+{
+   struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+
+   if (!centity)
+   return;
+
+   switch (centity->hw_ip) {
+   case AMDGPU_HW_IP_VCN_DEC:
+   case AMDGPU_HW_IP_VCN_ENC:
+   limit_vcn_load_balance(centity);
+   }
+
+}
+
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
   struct drm_sched_entity *entity)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..d52d8d562d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -33,6 +33,7 @@ struct amdgpu_fpriv;
 
 struct amdgpu_ctx_entity {
uint64_tsequence;
+   uint32_thw_ip;
struct drm_sched_entity entity;
struct dma_fence*fences[];
 };
@@ -90,5 +91,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
 void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
 
+void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4981e443a884..955d12bc89ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -154,6 +154,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,
amdgpu_job_free_resources(job);
priority = job->base.s_priority;
drm_sched_entity_push_job(>base, entity);
+   amdgpu_ctx_limit_load_balance(entity);
 
return 0;
 }
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs

2020-03-11 Thread Tom St Denis



On 2020-03-11 11:16 a.m., Alex Deucher wrote:

On Tue, Mar 10, 2020 at 8:53 AM Tom St Denis  wrote:

The offset into the array was specified in bytes but should
be in terms of 32-bit words.  Also prevent large reads that
would also cause a buffer overread.

Signed-off-by: Tom St Denis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c573edf02afc..e0f4ccd91fd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
char __user *buf,
 ssize_t result = 0;
 uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;

-   if (size & 3 || *pos & 3)
+   if (size > 4096 || size & 3 || *pos & 3)

Is size in dwords as well?


Nope it's in bytes (as per the calling convention standards as well as 
later in the function we subtract 4 from it repeatedly).



Tom





Alex


 return -EINVAL;

 /* decode offset */
-   offset = *pos & GENMASK_ULL(11, 0);
+   offset = (*pos & GENMASK_ULL(11, 0)) / 4;
 se = (*pos & GENMASK_ULL(19, 12)) >> 12;
 sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
 cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
--
2.24.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Ctom.stdenis%40amd.com%7C550ca4aaaf084c3d7a9f08d7c5cf2c65%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195365897948298sdata=YTb2YGBlAlDS%2FffaVOo2Yrej21N%2FJYVpFVIc1rQERWg%3Dreserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs

2020-03-11 Thread Alex Deucher
On Tue, Mar 10, 2020 at 8:53 AM Tom St Denis  wrote:
>
> The offset into the array was specified in bytes but should
> be in terms of 32-bit words.  Also prevent large reads that
> would also cause a buffer overread.
>
> Signed-off-by: Tom St Denis 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c573edf02afc..e0f4ccd91fd4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
> char __user *buf,
> ssize_t result = 0;
> uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
>
> -   if (size & 3 || *pos & 3)
> +   if (size > 4096 || size & 3 || *pos & 3)

Is size in dwords as well?

Alex

> return -EINVAL;
>
> /* decode offset */
> -   offset = *pos & GENMASK_ULL(11, 0);
> +   offset = (*pos & GENMASK_ULL(11, 0)) / 4;
> se = (*pos & GENMASK_ULL(19, 12)) >> 12;
> sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
> cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
> --
> 2.24.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v5 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread Christian König

Am 11.03.20 um 16:04 schrieb James Zhu:

Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

v4: Remove extra counter check, and reduce counter before idle
work schedule

Signed-off-by: James Zhu 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 21 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
  2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272f..6dacf78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
int i, r;
  
  	INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);

+   mutex_init(>vcn.vcn_pg_lock);
+   atomic_set(>vcn.total_submission_cnt, 0);
  
  	switch (adev->asic_type) {

case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
  
  	release_firmware(adev->vcn.fw);

+   mutex_destroy(>vcn.vcn_pg_lock);
  
  	return 0;

  }
@@ -307,7 +310,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
fences += fence[j];
}
  
-	if (fences == 0) {

+   if (!fences && !atomic_read(>vcn.total_submission_cnt)) {
amdgpu_gfx_off_ctrl(adev, true);
amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
   AMD_PG_STATE_GATE);
@@ -319,13 +322,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
  void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
  {
struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work);
  
-	if (set_clocks) {

-   amdgpu_gfx_off_ctrl(adev, false);
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+   atomic_inc(>vcn.total_submission_cnt);
+   cancel_delayed_work_sync(>vcn.idle_work);
+
+   mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+   amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);
  
  	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{

struct dpg_pause_state new_state;
@@ -345,10 +349,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
  
  		adev->vcn.pause_dpg_mode(adev, ring->me, _state);

}
+   mutex_unlock(>vcn.vcn_pg_lock);
  }
  
  void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)

  {
+   atomic_dec(>adev->vcn.total_submission_cnt);
+
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
uint32_t num_vcn_enc_sched;
uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;
  
  	unsigned	harvest_config;

int (*pause_dpg_mode)(struct amdgpu_device *adev,


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v5 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread James Zhu
Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

v4: Remove extra counter check, and reduce counter before idle
work schedule

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 21 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272f..6dacf78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
int i, r;
 
INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
+   mutex_init(>vcn.vcn_pg_lock);
+   atomic_set(>vcn.total_submission_cnt, 0);
 
switch (adev->asic_type) {
case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
 
release_firmware(adev->vcn.fw);
+   mutex_destroy(>vcn.vcn_pg_lock);
 
return 0;
 }
@@ -307,7 +310,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
fences += fence[j];
}
 
-   if (fences == 0) {
+   if (!fences && !atomic_read(>vcn.total_submission_cnt)) {
amdgpu_gfx_off_ctrl(adev, true);
amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
   AMD_PG_STATE_GATE);
@@ -319,13 +322,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work);
 
-   if (set_clocks) {
-   amdgpu_gfx_off_ctrl(adev, false);
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+   atomic_inc(>vcn.total_submission_cnt);
+   cancel_delayed_work_sync(>vcn.idle_work);
+
+   mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+   amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);
 
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
struct dpg_pause_state new_state;
@@ -345,10 +349,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
adev->vcn.pause_dpg_mode(adev, ring->me, _state);
}
+   mutex_unlock(>vcn.vcn_pg_lock);
 }
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
+   atomic_dec(>adev->vcn.total_submission_cnt);
+
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
uint32_t num_vcn_enc_sched;
uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;
 
unsignedharvest_config;
int (*pause_dpg_mode)(struct amdgpu_device *adev,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v4 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread Christian König

Am 11.03.20 um 15:15 schrieb James Zhu:

Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

v4: Remove extra counter check, and reduce counter before idle
work schedule

Signed-off-by: James Zhu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
  2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272f..2fa2891 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
int i, r;
  
  	INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);

+   mutex_init(>vcn.vcn_pg_lock);
+   atomic_set(>vcn.total_submission_cnt, 0);
  
  	switch (adev->asic_type) {

case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
  
  	release_firmware(adev->vcn.fw);

+   mutex_destroy(>vcn.vcn_pg_lock);
  
  	return 0;

  }
@@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
fences += fence[j];
}
  
-	if (fences == 0) {

+   if (fences == 0 &&
+   likely(atomic_read(>vcn.total_submission_cnt) == 0)) {


The indentation here looks off, maybe just write that as !fences && 
!atomic_read(>vcn.total_submission_cnt).


Apart from that looks good to me now,
Christian.


amdgpu_gfx_off_ctrl(adev, true);
amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
   AMD_PG_STATE_GATE);
@@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
  void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
  {
struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work);
  
-	if (set_clocks) {

-   amdgpu_gfx_off_ctrl(adev, false);
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+   atomic_inc(>vcn.total_submission_cnt);
+   cancel_delayed_work_sync(>vcn.idle_work);
+
+   mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+   amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);
  
  	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{

struct dpg_pause_state new_state;
@@ -345,10 +350,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
  
  		adev->vcn.pause_dpg_mode(adev, ring->me, _state);

}
+   mutex_unlock(>vcn.vcn_pg_lock);
  }
  
  void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)

  {
+   atomic_dec(>adev->vcn.total_submission_cnt);
+
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
uint32_t num_vcn_enc_sched;
uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;
  
  	unsigned	harvest_config;

int (*pause_dpg_mode)(struct amdgpu_device *adev,


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v4 2/4] drm/amdgpu/vcn: fix race condition issue for dpg unpause mode switch

2020-03-11 Thread James Zhu
Couldn't only rely on enc fence to decide switching to dpg unpaude mode.
Since a enc thread may not schedule a fence in time during multiple
threads running situation.

v3: 1. Rename enc_submission_cnt to dpg_enc_submission_cnt
2. Add dpg_enc_submission_cnt check in idle_work_handler

v4:  Remove extra counter check, and reduce counter before idle
work schedule

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 32 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  1 +
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2fa2891..ba28fb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -65,6 +65,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
mutex_init(>vcn.vcn_pg_lock);
atomic_set(>vcn.total_submission_cnt, 0);
+   for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+   atomic_set(>vcn.inst[i].dpg_enc_submission_cnt, 0);
 
switch (adev->asic_type) {
case CHIP_RAVEN:
@@ -298,7 +300,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
struct dpg_pause_state new_state;
 
-   if (fence[j])
+   if (fence[j] ||
+   
unlikely(atomic_read(>vcn.inst[j].dpg_enc_submission_cnt)))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
@@ -334,19 +337,22 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
struct dpg_pause_state new_state;
-   unsigned int fences = 0;
-   unsigned int i;
 
-   for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
-   fences += 
amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]);
-   }
-   if (fences)
+   if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+   
atomic_inc(>vcn.inst[ring->me].dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
-   else
-   new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+   } else {
+   unsigned int fences = 0;
+   unsigned int i;
 
-   if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
-   new_state.fw_based = VCN_DPG_STATE__PAUSE;
+   for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+   fences += 
amdgpu_fence_count_emitted(>vcn.inst[ring->me].ring_enc[i]);
+
+   if (fences || 
atomic_read(>vcn.inst[ring->me].dpg_enc_submission_cnt))
+   new_state.fw_based = VCN_DPG_STATE__PAUSE;
+   else
+   new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+   }
 
adev->vcn.pause_dpg_mode(adev, ring->me, _state);
}
@@ -355,6 +361,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
+   if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+   ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+   
atomic_dec(>adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
atomic_dec(>adev->vcn.total_submission_cnt);
 
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 111c4cc..e913de8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -183,6 +183,7 @@ struct amdgpu_vcn_inst {
void*dpg_sram_cpu_addr;
uint64_tdpg_sram_gpu_addr;
uint32_t*dpg_sram_curr_addr;
+   atomic_tdpg_enc_submission_cnt;
 };
 
 struct amdgpu_vcn {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v4 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread James Zhu
Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

v4: Remove extra counter check, and reduce counter before idle
work schedule

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272f..2fa2891 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
int i, r;
 
INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
+   mutex_init(>vcn.vcn_pg_lock);
+   atomic_set(>vcn.total_submission_cnt, 0);
 
switch (adev->asic_type) {
case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
 
release_firmware(adev->vcn.fw);
+   mutex_destroy(>vcn.vcn_pg_lock);
 
return 0;
 }
@@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
fences += fence[j];
}
 
-   if (fences == 0) {
+   if (fences == 0 &&
+   likely(atomic_read(>vcn.total_submission_cnt) == 0)) {
amdgpu_gfx_off_ctrl(adev, true);
amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
   AMD_PG_STATE_GATE);
@@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work);
 
-   if (set_clocks) {
-   amdgpu_gfx_off_ctrl(adev, false);
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+   atomic_inc(>vcn.total_submission_cnt);
+   cancel_delayed_work_sync(>vcn.idle_work);
+
+   mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+   amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);
 
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
struct dpg_pause_state new_state;
@@ -345,10 +350,13 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
adev->vcn.pause_dpg_mode(adev, ring->me, _state);
}
+   mutex_unlock(>vcn.vcn_pg_lock);
 }
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
+   atomic_dec(>adev->vcn.total_submission_cnt);
+
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
uint32_t num_vcn_enc_sched;
uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;
 
unsignedharvest_config;
int (*pause_dpg_mode)(struct amdgpu_device *adev,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread James Zhu


On 2020-03-11 7:38 a.m., Christian König wrote:

Am 11.03.20 um 12:30 schrieb Zhu, James:


[AMD Official Use Only - Internal Distribution Only]


ping


*From:* Zhu, James 
*Sent:* Monday, March 9, 2020 12:57 PM
*To:* amd-gfx@lists.freedesktop.org 
*Cc:* Zhu, James ; Koenig, Christian 

*Subject:* [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue 
for vcn start

Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

index a41272f..6aafda1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 int i, r;

INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
+ mutex_init(>vcn.vcn_pg_lock);
+ atomic_set(>vcn.total_submission_cnt, 0);

 switch (adev->asic_type) {
 case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 }

 release_firmware(adev->vcn.fw);
+ mutex_destroy(>vcn.vcn_pg_lock);

 return 0;
 }
@@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)

 fences += fence[j];
 }

-   if (fences == 0) {
+   if (fences == 0 &&
+ likely(atomic_read(>vcn.total_submission_cnt) == 0)) {
 amdgpu_gfx_off_ctrl(adev, true);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
    AMD_PG_STATE_GATE);
@@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)

 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
 struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = 
!cancel_delayed_work_sync(>vcn.idle_work);


-   if (set_clocks) {
-   amdgpu_gfx_off_ctrl(adev, false);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+ atomic_inc(>vcn.total_submission_cnt);
+ cancel_delayed_work_sync(>vcn.idle_work);
+
+ mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);

 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)    {
 struct dpg_pause_state new_state;
@@ -345,11 +350,14 @@ void amdgpu_vcn_ring_begin_use(struct 
amdgpu_ring *ring)


adev->vcn.pause_dpg_mode(adev, ring->me, _state);
 }
+ mutex_unlock(>vcn.vcn_pg_lock);
 }

 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+   if 
(unlikely(atomic_dec_return(>adev->vcn.total_submission_cnt) < 0))

+ atomic_set(>adev->vcn.total_submission_cnt, 0);


You need to decrement first and then call schedule_delayed_work() 
otherwise the work could run with the wrong counter.


And the extra check for an under run should be superfluous.


Thanks! Please check V4

James



Regards,
Christian.


 }

 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
 struct drm_gpu_scheduler 
*vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];

 uint32_t num_vcn_enc_sched;
 uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;

 unsigned    harvest_config;
 int (*pause_dpg_mode)(struct amdgpu_device *adev,
--
2.7.4



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

2020-03-11 Thread Yuan, Xiaojie
[AMD Official Use Only - Internal Distribution Only]

Hi Evan,

Does this patch also fix the baco failure on Navi14 with display connected?

BR,
Xiaojie


From: amd-gfx  on behalf of Evan Quan 

Sent: Wednesday, March 11, 2020 4:18 PM
To: amd-gfx@lists.freedesktop.org
Cc: Quan, Evan
Subject: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

This can fix the baco reset failure seen on Navi10.
And this should be a low risk fix as the same sequence
is already used for system suspend/resume.

Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..a35c89973614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
if (r)
goto out;

+   amdgpu_fbdev_set_suspend(tmp_adev, 0);
+
/* must succeed. */
amdgpu_ras_resume(tmp_adev);

@@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 */
amdgpu_unregister_gpu_instance(tmp_adev);

+   amdgpu_fbdev_set_suspend(adev, 1);
+
/* disable ras on ALL IPs */
if (!(in_ras_intr && !use_baco) &&
  amdgpu_device_ip_need_full_reset(tmp_adev))
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7CXiaojie.Yuan%40amd.com%7C3eace080fd0b4f67337e08d7c594e441%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195115872403614sdata=%2B%2B3xRFOl2Ho%2BRe9VuzqHtZNoVZ3s%2BxIP7xTv6yG11LA%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: reenable runtime pm on navi12

2020-03-11 Thread Deucher, Alexander
[AMD Public Use]

Reviewed-by: Alex Deucher 

From: Quan, Evan 
Sent: Wednesday, March 11, 2020 6:56 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Deucher, Alexander ; Quan, Evan 

Subject: [PATCH] drm/amdgpu: reenable runtime pm on navi12

The runtime pm is verified as working now
on navi12.

Change-Id: I20393633678297308c9651237bbfdc854a3cff94
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 80495652a7c1..e376dc072d42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -174,8 +174,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned 
long flags)
  (adev->asic_type >= CHIP_TOPAZ) &&
  (adev->asic_type != CHIP_VEGA10) &&
  (adev->asic_type != CHIP_VEGA20) &&
-(adev->asic_type != CHIP_ARCTURUS) &&
-(adev->asic_type != CHIP_NAVI12)) /* enable runpm on VI+ */
+(adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */
 adev->runpm = true;
 else if (amdgpu_device_supports_baco(dev) &&
  (amdgpu_runtime_pm > 0))  /* enable runpm if runpm=1 on CI */
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

2020-03-11 Thread Deucher, Alexander
[AMD Public Use]

Reviewed-by: Alex Deucher 


From: amd-gfx  on behalf of Evan Quan 

Sent: Wednesday, March 11, 2020 4:18 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Quan, Evan 
Subject: [PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

This can fix the baco reset failure seen on Navi10.
And this should be a low risk fix as the same sequence
is already used for system suspend/resume.

Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..a35c89973614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
 if (r)
 goto out;

+   amdgpu_fbdev_set_suspend(tmp_adev, 0);
+
 /* must succeed. */
 amdgpu_ras_resume(tmp_adev);

@@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  */
 amdgpu_unregister_gpu_instance(tmp_adev);

+   amdgpu_fbdev_set_suspend(adev, 1);
+
 /* disable ras on ALL IPs */
 if (!(in_ras_intr && !use_baco) &&
   amdgpu_device_ip_need_full_reset(tmp_adev))
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Calexander.deucher%40amd.com%7C3eace080fd0b4f67337e08d7c594e441%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195115932121013sdata=my797McW90E%2FJKHa30fYgLlzHvnN%2BYQ3%2BqBLwllajJ0%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread Christian König

Am 11.03.20 um 12:30 schrieb Zhu, James:


[AMD Official Use Only - Internal Distribution Only]


ping


*From:* Zhu, James 
*Sent:* Monday, March 9, 2020 12:57 PM
*To:* amd-gfx@lists.freedesktop.org 
*Cc:* Zhu, James ; Koenig, Christian 

*Subject:* [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for 
vcn start

Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

index a41272f..6aafda1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 int i, r;

INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
+   mutex_init(>vcn.vcn_pg_lock);
+ atomic_set(>vcn.total_submission_cnt, 0);

 switch (adev->asic_type) {
 case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 }

 release_firmware(adev->vcn.fw);
+ mutex_destroy(>vcn.vcn_pg_lock);

 return 0;
 }
@@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)

 fences += fence[j];
 }

-   if (fences == 0) {
+   if (fences == 0 &&
+ likely(atomic_read(>vcn.total_submission_cnt) == 0)) {
 amdgpu_gfx_off_ctrl(adev, true);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
    AMD_PG_STATE_GATE);
@@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)

 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
 struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work);

-   if (set_clocks) {
-   amdgpu_gfx_off_ctrl(adev, false);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+ atomic_inc(>vcn.total_submission_cnt);
+ cancel_delayed_work_sync(>vcn.idle_work);
+
+   mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);

 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)    {
 struct dpg_pause_state new_state;
@@ -345,11 +350,14 @@ void amdgpu_vcn_ring_begin_use(struct 
amdgpu_ring *ring)


 adev->vcn.pause_dpg_mode(adev, ring->me, _state);
 }
+ mutex_unlock(>vcn.vcn_pg_lock);
 }

 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+   if 
(unlikely(atomic_dec_return(>adev->vcn.total_submission_cnt) < 0))

+ atomic_set(>adev->vcn.total_submission_cnt, 0);


You need to decrement first and then call schedule_delayed_work() 
otherwise the work could run with the wrong counter.


And the extra check for an under run should be superfluous.

Regards,
Christian.


 }

 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
 struct drm_gpu_scheduler 
*vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];

 uint32_t num_vcn_enc_sched;
 uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;

 unsigned    harvest_config;
 int (*pause_dpg_mode)(struct amdgpu_device *adev,
--
2.7.4



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

2020-03-11 Thread Zhu, James
[AMD Official Use Only - Internal Distribution Only]

ping


From: Zhu, James 
Sent: Monday, March 9, 2020 12:57 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Zhu, James ; Koenig, Christian 
Subject: [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

Fix race condition issue when multiple vcn starts are called.

v2: Removed checking the return value of cancel_delayed_work_sync()
to prevent possible races here.

v3: Add total_submission_cnt to avoid gate power unexpectedly.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272f..6aafda1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 int i, r;

 INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
+   mutex_init(>vcn.vcn_pg_lock);
+   atomic_set(>vcn.total_submission_cnt, 0);

 switch (adev->asic_type) {
 case CHIP_RAVEN:
@@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 }

 release_firmware(adev->vcn.fw);
+   mutex_destroy(>vcn.vcn_pg_lock);

 return 0;
 }
@@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
 fences += fence[j];
 }

-   if (fences == 0) {
+   if (fences == 0 &&
+   likely(atomic_read(>vcn.total_submission_cnt) == 0)) {
 amdgpu_gfx_off_ctrl(adev, true);
 amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
@@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
 struct amdgpu_device *adev = ring->adev;
-   bool set_clocks = !cancel_delayed_work_sync(>vcn.idle_work);

-   if (set_clocks) {
-   amdgpu_gfx_off_ctrl(adev, false);
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  AMD_PG_STATE_UNGATE);
-   }
+   atomic_inc(>vcn.total_submission_cnt);
+   cancel_delayed_work_sync(>vcn.idle_work);
+
+   mutex_lock(>vcn.vcn_pg_lock);
+   amdgpu_gfx_off_ctrl(adev, false);
+   amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);

 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
 struct dpg_pause_state new_state;
@@ -345,11 +350,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)

 adev->vcn.pause_dpg_mode(adev, ring->me, _state);
 }
+   mutex_unlock(>vcn.vcn_pg_lock);
 }

 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
 schedule_delayed_work(>adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+   if (unlikely(atomic_dec_return(>adev->vcn.total_submission_cnt) < 
0))
+   atomic_set(>adev->vcn.total_submission_cnt, 0);
 }

 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 6fe0573..111c4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -200,6 +200,8 @@ struct amdgpu_vcn {
 struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
 uint32_t num_vcn_enc_sched;
 uint32_t num_vcn_dec_sched;
+   struct mutex vcn_pg_lock;
+   atomic_t total_submission_cnt;

 unsignedharvest_config;
 int (*pause_dpg_mode)(struct amdgpu_device *adev,
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: reenable runtime pm on navi12

2020-03-11 Thread Evan Quan
The runtime pm is verified as working now
on navi12.

Change-Id: I20393633678297308c9651237bbfdc854a3cff94
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 80495652a7c1..e376dc072d42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -174,8 +174,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned 
long flags)
 (adev->asic_type >= CHIP_TOPAZ) &&
 (adev->asic_type != CHIP_VEGA10) &&
 (adev->asic_type != CHIP_VEGA20) &&
-(adev->asic_type != CHIP_ARCTURUS) &&
-(adev->asic_type != CHIP_NAVI12)) /* enable runpm on VI+ */
+(adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */
adev->runpm = true;
else if (amdgpu_device_supports_baco(dev) &&
 (amdgpu_runtime_pm > 0))  /* enable runpm if runpm=1 on CI */
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH -next 024/491] AMD DISPLAY CORE: Use fallthrough;

2020-03-11 Thread Joe Perches
Convert the various uses of fallthrough comments to fallthrough;

Done via script
Link: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/

Signed-off-by: Joe Perches 
---
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 4 ++--
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 
b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 2f1c958..37fa7b 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -267,7 +267,7 @@ static struct atom_display_object_path_v2 *get_bios_object(
&& id.enum_id == obj_id.enum_id)
return 
>object_info_tbl.v1_4->display_path[i];
}
-   /* fall through */
+   fallthrough;
case OBJECT_TYPE_CONNECTOR:
case OBJECT_TYPE_GENERIC:
/* Both Generic and Connector Object ID
@@ -280,7 +280,7 @@ static struct atom_display_object_path_v2 *get_bios_object(
&& id.enum_id == obj_id.enum_id)
return 
>object_info_tbl.v1_4->display_path[i];
}
-   /* fall through */
+   fallthrough;
default:
return NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index 68c4049..743042 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -645,7 +645,7 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
case AUX_TRANSACTION_REPLY_AUX_DEFER:
case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER:
retry_on_defer = true;
-   /* fall through */
+   fallthrough;
case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK:
if (++aux_defer_retries >= 
AUX_MAX_DEFER_RETRIES) {
goto fail;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
index 8aa937f..51481e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -479,7 +479,7 @@ static void program_grph_pixel_format(
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
sign = 1;
floating = 1;
-   /* fall through */
+   fallthrough;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: /* shouldn't this get 
float too? */
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
grph_depth = 3;
-- 
2.24.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH -next 023/491] AMD KFD: Use fallthrough;

2020-03-11 Thread Joe Perches
Convert the various uses of fallthrough comments to fallthrough;

Done via script
Link: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/

Signed-off-by: Joe Perches 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index d6549e..6529ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -79,7 +79,7 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device 
*adev,
dev_warn(adev->dev,
 "Invalid sdma engine id (%d), using engine id 0\n",
 engine_id);
-   /* fall through */
+   fallthrough;
case 0:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
-- 
2.24.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH -next 025/491] AMD POWERPLAY: Use fallthrough;

2020-03-11 Thread Joe Perches
Convert the various uses of fallthrough comments to fallthrough;

Done via script
Link: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/

Signed-off-by: Joe Perches 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index bf04cf..fc5236c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -1250,7 +1250,7 @@ static void smu7_set_dpm_event_sources(struct pp_hwmgr 
*hwmgr, uint32_t sources)
switch (sources) {
default:
pr_err("Unknown throttling event sources.");
-   /* fall through */
+   fallthrough;
case 0:
protection = false;
/* src is unused */
@@ -3698,12 +3698,12 @@ static int 
smu7_request_link_speed_change_before_state_change(
data->force_pcie_gen = PP_PCIEGen2;
if (current_link_speed == PP_PCIEGen2)
break;
-   /* fall through */
+   fallthrough;
case PP_PCIEGen2:
if (0 == 
amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN2, false))
break;
 #endif
-   /* fall through */
+   fallthrough;
default:
data->force_pcie_gen = 
smu7_get_current_pcie_speed(hwmgr);
break;
-- 
2.24.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH -next 000/491] treewide: use fallthrough;

2020-03-11 Thread Joe Perches
There is a new fallthrough pseudo-keyword macro that can be used
to replace the various /* fallthrough */ style comments that are
used to indicate a case label code block is intended to fallthrough
to the next case label block.

See commit 294f69e662d1 ("compiler_attributes.h: Add 'fallthrough'
pseudo keyword for switch/case use")

These patches are intended to allow clang to detect missing
switch/case fallthrough uses.

Do a depth-first pass on the MAINTAINERS file and find the various
F: pattern files and convert the fallthrough comments to fallthrough;
for all files matched by all  F: patterns in in each section.

Done via the perl script below and the previously posted
cvt_fallthrough.pl script.

Link: 
https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/

These patches are based on next-20200310 and are available in

git://repo.or.cz/linux-2.6/trivial-mods.git in branch 20200310_fallthrough_2

$ cat commit_fallthrough.pl
#!/usr/bin/env perl

use sort 'stable';

#
# Reorder a sorted array so file entries are before directory entries
# depends on a trailing / for directories
# so:
#   foo/
#   foo/bar.c
# becomes
#   foo/bar.c
#   foo/
#
sub file_before_directory {
my ($array_ref) = (@_);

my $count = scalar(@$array_ref);

for (my $i = 1; $i < $count; $i++) {
if (substr(@$array_ref[$i - 1], -1) eq '/' &&
substr(@$array_ref[$i], 0, length(@$array_ref[$i - 1])) eq 
@$array_ref[$i - 1]) {
my $string = @$array_ref[$i - 1];
@$array_ref[$i - 1] = @$array_ref[$i];
@$array_ref[$i] = $string;
}
}
}

sub uniq {
my (@parms) = @_;

my %saw;
@parms = grep(!$saw{$_}++, @parms);

return @parms;
}

# Get all the F: file patterns in MAINTAINERS that could be a .[ch] file
my $maintainer_patterns = `grep -P '^F:\\s+' MAINTAINERS`;
my @patterns = split('\n', $maintainer_patterns);
s/^F:\s*// for @patterns;
@patterns = grep(!/^(?:Documentation|tools|scripts)\//, @patterns);
@patterns = grep(!/\.(?:dtsi?|rst|config)$/, @patterns);
@patterns = sort @patterns;
@patterns = sort { $b =~ tr/\//\// cmp $a =~ tr/\//\// } @patterns;
file_before_directory(\@patterns);

my %sections_done;

foreach my $pattern (@patterns) {

# Find the files the pattern matches
my $pattern_files = `git ls-files -- $pattern`;
my @new_patterns = split('\n', $pattern_files);
$pattern_files = join(' ', @new_patterns);
next if ($pattern_files =~ /^\s*$/);

# Find the section the first file matches
my $pattern_file = @new_patterns[0];
my $section_output = `./scripts/get_maintainer.pl --nogit --nogit-fallback 
--sections --pattern-depth=1 $pattern_file`;
my @section = split('\n', $section_output);
my $section_header = @section[0];

print("Section: <$section_header>\n");

# Skip the section if it's already done
print("Already done '$section_header'\n") if 
($sections_done{$section_header});
next if ($sections_done{$section_header}++);

# Find all the .[ch] files in all F: lines in that section
my @new_section;
foreach my $line (@section) {
last if ($line =~ /^\s*$/);
push(@new_section, $line);
}
@section = grep(/^F:/, @new_section);
s/^F:\s*// for @section;

@section = grep(!/^(?:Documentation|tools|scripts)\//, @section);
@section = grep(!/\.(?:dtsi?|rst|config)$/, @section);
@section = sort @section;
@section = uniq(@section);

my $section_files = join(' ', @section);

print("section_files: <$section_files>\n");

next if ($section_files =~ /^\s*$/);

my $cvt_files = `git ls-files -- $section_files`;
my @files = split('\n', $cvt_files);

@files = grep(!/^(?:Documentation|tools|scripts)\//, @files);
@files = grep(!/\.(?:dtsi?|rst|config)$/, @files);
@files = grep(/\.[ch]$/, @files);
@files = sort @files;
@files = uniq(@files);

$cvt_files = join(' ', @files);
print("files: <$cvt_files>\n");

next if (scalar(@files) < 1);

# Convert fallthroughs for all [.ch] files in the section
print("doing cvt_fallthrough.pl -- $cvt_files\n");

`cvt_fallthrough.pl -- $cvt_files`;

# If nothing changed, nothing to commit
`git diff-index --quiet HEAD --`;
next if (!$?);

# Commit the changes
my $fh;

open($fh, "+>", "cvt_fallthrough.commit_msg") or die "$0: can't create 
temporary file: $!\n";
print $fh 

[PATCH] drm/amdgpu: add fbdev suspend/resume on gpu reset

2020-03-11 Thread Evan Quan
This can fix the baco reset failure seen on Navi10.
And this should be a low risk fix as the same sequence
is already used for system suspend/resume.

Change-Id: Idb4d02c5fcbbd5b7817195ee04c7af34c346a053
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..a35c89973614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3935,6 +3935,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
if (r)
goto out;
 
+   amdgpu_fbdev_set_suspend(tmp_adev, 0);
+
/* must succeed. */
amdgpu_ras_resume(tmp_adev);
 
@@ -4108,6 +4110,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 */
amdgpu_unregister_gpu_instance(tmp_adev);
 
+   amdgpu_fbdev_set_suspend(adev, 1);
+
/* disable ras on ALL IPs */
if (!(in_ras_intr && !use_baco) &&
  amdgpu_device_ip_need_full_reset(tmp_adev))
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c

2020-03-11 Thread Chen, Guchun
[AMD Public Use]

Reviewed-by: Guchun Chen 


Regards,
Guchun

-Original Message-
From: Stanley.Yang  
Sent: Wednesday, March 11, 2020 2:38 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Chen, Guchun ; 
Li, Dennis ; Clements, John ; Yang, 
Stanley 
Subject: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c

include amdgpu_ras.h head file instead of use extern ras_debugfs_create_all 
function

Signed-off-by: Stanley.Yang 
Change-Id: I2697250ba67d4deac4371fea05efb68a976f7e5a
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 1d513e4f9934..a9a278f87498 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -34,6 +34,7 @@
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_dm_debugfs.h"
+#include "amdgpu_ras.h"
 
 /**
  * amdgpu_debugfs_add_files - Add simple debugfs entries @@ -1315,7 +1316,6 @@ 
DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,  
DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL,
amdgpu_debugfs_sclk_set, "%llu\n");
 
-extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);  int 
amdgpu_debugfs_init(struct amdgpu_device *adev)  {
int r, i;
--
2.17.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] drm/amdgpu/pm: Use scnprintf() for avoiding potential buffer overflow

2020-03-11 Thread Takashi Iwai
BTW, please ignore the subject prefix '[1/5]', which was added
mistakenly while extracting a patch from the commit list.
This is a single patch.


thanks,

Takashi

On Wed, 11 Mar 2020 08:29:04 +0100,
Takashi Iwai wrote:
> 
> Since snprintf() returns the would-be-output size instead of the
> actual output size, the succeeding calls may go beyond the given
> buffer limit.  Fix it by replacing with scnprintf().
> 
> Also adjust the size argument passed to scnprintf() so that it really
> cuts off at the right remaining buffer length.
> 
> Signed-off-by: Takashi Iwai 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index bc3cf04a1a94..4a737d074f4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device 
> *dev,
>  
>   buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
>   for (i = 0; i < data.nums; i++)
> - buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
> + buf_len += scnprintf(buf + buf_len, PAGE_SIZE - buf_len, "%d 
> %s\n", i,
>   (data.states[i] == 
> POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" :
>   (data.states[i] == POWER_STATE_TYPE_BATTERY) ? 
> "battery" :
>   (data.states[i] == POWER_STATE_TYPE_BALANCED) ? 
> "balanced" :
> -- 
> 2.16.4
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/5] drm/amdgpu/pm: Use scnprintf() for avoiding potential buffer overflow

2020-03-11 Thread Takashi Iwai
Since snprintf() returns the would-be-output size instead of the
actual output size, the succeeding calls may go beyond the given
buffer limit.  Fix it by replacing with scnprintf().

Also adjust the size argument passed to scnprintf() so that it really
cuts off at the right remaining buffer length.

Signed-off-by: Takashi Iwai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index bc3cf04a1a94..4a737d074f4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 
buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
for (i = 0; i < data.nums; i++)
-   buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
+   buf_len += scnprintf(buf + buf_len, PAGE_SIZE - buf_len, "%d 
%s\n", i,
(data.states[i] == 
POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" :
(data.states[i] == POWER_STATE_TYPE_BATTERY) ? 
"battery" :
(data.states[i] == POWER_STATE_TYPE_BALANCED) ? 
"balanced" :
-- 
2.16.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c

2020-03-11 Thread Zhou1, Tao
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: amd-gfx  On Behalf Of
> Stanley.Yang
> Sent: 2020年3月11日 14:38
> To: amd-gfx@lists.freedesktop.org
> Cc: Yang, Stanley ; Clements, John
> ; Li, Dennis ; Chen,
> Guchun ; Zhang, Hawking
> 
> Subject: [PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c
> 
> include amdgpu_ras.h head file instead of use extern ras_debugfs_create_all
> function
> 
> Signed-off-by: Stanley.Yang 
> Change-Id: I2697250ba67d4deac4371fea05efb68a976f7e5a
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 1d513e4f9934..a9a278f87498 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -34,6 +34,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_pm.h"
>  #include "amdgpu_dm_debugfs.h"
> +#include "amdgpu_ras.h"
> 
>  /**
>   * amdgpu_debugfs_add_files - Add simple debugfs entries @@ -1315,7
> +1316,6 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
> DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL,
>   amdgpu_debugfs_sclk_set, "%llu\n");
> 
> -extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
> int amdgpu_debugfs_init(struct amdgpu_device *adev)  {
>   int r, i;
> --
> 2.17.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.f
> reedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=02%7C01%7Ctao.zhou1%40amd.com%7C19bfa057da7740ca9
> 96f08d7c586b564%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C6
> 37195054687594420sdata=ZBvaPrTx9oUoGfltIm3JQEy5htUQYjXZqk0ju
> fcc98Q%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: use amdgpu_ras.h in amdgpu_debugfs.c

2020-03-11 Thread Stanley . Yang
include amdgpu_ras.h head file instead of use extern
ras_debugfs_create_all function

Signed-off-by: Stanley.Yang 
Change-Id: I2697250ba67d4deac4371fea05efb68a976f7e5a
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 1d513e4f9934..a9a278f87498 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -34,6 +34,7 @@
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_dm_debugfs.h"
+#include "amdgpu_ras.h"
 
 /**
  * amdgpu_debugfs_add_files - Add simple debugfs entries
@@ -1315,7 +1316,6 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
 DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL,
amdgpu_debugfs_sclk_set, "%llu\n");
 
-extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
 int amdgpu_debugfs_init(struct amdgpu_device *adev)
 {
int r, i;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [refactor RLCG wreg path 1/2] drm/amdgpu: refactor RLCG access path part 1

2020-03-11 Thread Tao, Yintian
Reviewed-by: Yintian Tao

-Original Message-
From: amd-gfx  On Behalf Of Monk Liu
Sent: 2020年3月11日 13:58
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: [refactor RLCG wreg path 1/2] drm/amdgpu: refactor RLCG access path 
part 1

what changed:
1)provide new implementation interface for the rlcg access path 2)put 
SQ_CMD/SQ_IND_INDEX/SQ_IND_DATA to GFX9 RLCG path to align with SRIOV RLCG logic

background:
we what to clear the code path for WREG32_RLC, to make it only covered and 
handled by amdgpu_mm_wreg() routine, this way we can let RLCG to serve the 
register access even through UMR (via debugfs interface) the current 
implementation cannot achieve that goal because it can only hardcode 
everywhere, but UMR only pass "offset" as varable to driver

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |  80 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 177 +++-
 drivers/gpu/drm/amd/amdgpu/soc15.h  |   7 ++
 4 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 52509c2..60bb3e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+   bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t 
+reg);
 };
 
 struct amdgpu_rlc {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 82ef08d..3222cd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -224,6 +224,56 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x, 0x0080)  };
 
+static const struct soc15_reg_rlcg rlcg_access_gc_10_0[] = {
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_HI)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_LO)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_LENGTH)},
+   {SOC15_REG_ENTRY(GC, 0, mmCP_ME_CNTL)}, };
+
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, 
+u32 v) {
+   static void *scratch_reg0;
+   static void *scratch_reg1;
+   static void *scratch_reg2;
+   static void *scratch_reg3;
+   static void *spare_int;
+   static uint32_t grbm_cntl;
+   static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+
+   scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+   scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   spare_int = adev->rmmio + 
+(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + 
+mmRLC_SPARE_INT)*4;
+
+   grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
+   grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
+mmGRBM_GFX_INDEX;
+
+   if (amdgpu_sriov_runtime(adev)) {
+   pr_err("shoudn't call rlcg write register during runtime\n");
+   return;
+   }
+
+   writel(v, scratch_reg0);
+   writel(offset | 0x8000, scratch_reg1);
+   writel(1, spare_int);
+   for (i = 0; i < retries; i++) {
+   u32 tmp;
+
+   tmp = readl(scratch_reg1);
+   if (!(tmp & 0x8000))
+   break;
+
+   udelay(10);
+   }
+
+   if (i >= retries)
+   pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); }
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =  {
/* Pending on emulation bring up */
@@ -4247,6 +4297,32 @@ static void gfx_v10_0_update_spm_vmid(struct 
amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);  }
 
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+   uint32_t offset,
+   struct soc15_reg_rlcg *entries, int 
arr_size) {
+   int i;
+   uint32_t reg;
+
+   for (i = 0; i < arr_size; i++) {
+   const struct soc15_reg_rlcg *entry;
+
+   entry = [i];
+   reg = 
adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+   if 

RE: [refactor RLCG wreg path 2/2] drm/amdgpu: refactor RLCG access path part 2

2020-03-11 Thread Tao, Yintian
Reviewed-by: Yintian Tao

-Original Message-
From: amd-gfx  On Behalf Of Monk Liu
Sent: 2020年3月11日 13:58
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: [refactor RLCG wreg path 2/2] drm/amdgpu: refactor RLCG access path 
part 2

switch to new RLCG access path, and drop the legacy WREG32_RLC macros

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   5 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 104 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |  28 +++---
 drivers/gpu/drm/amd/amdgpu/soc15.c|  11 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  57 
 8 files changed, 93 insertions(+), 152 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2..a21f005 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -105,8 +105,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
 
lock_srbm(kgd, 0, 0, 0, vmid);
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
 
unlock_srbm(kgd);
@@ -242,13 +242,13 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32(reg, mqd_hqd[reg - hqd_base]);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 
if (wptr) {
/* Don't read wptr with get_user because the user @@ -277,25 
+277,25 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t 
pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
   lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
   upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
   lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+  get_queue_mask(adev, pipe_id, queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 
release_queue(kgd);
 
@@ -547,7 +547,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
 
if (m->cp_hqd_vmid == 0)
-   WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+   WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -561,7 +561,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);