date:20240506

RE: [PATCH] drm/amdgpu: change log level

2024-05-06 Thread Wang, Yang(Kevin)

Reviewed-by: Yang Wang 

Best Regards,
Kevin

-Original Message-
From: Chai, Thomas  
Sent: 2024年5月7日 14:27
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Wang, Yang(Kevin) ; Yang, 
Stanley ; Chai, Thomas 
Subject: [PATCH] drm/amdgpu: change log level

Change log level.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++--  
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fbc0d9854873..7acf43582ca7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2907,7 +2907,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
 
ras_block = poison_msg.block;
 
-   dev_info(adev->dev, "Start processing ras block %s(%d)\n",
+   dev_dbg(adev->dev, "Start processing ras block %s(%d)\n",
ras_block_str(ras_block), ras_block);
 
if (ras_block == AMDGPU_RAS_BLOCK__UMC) { @@ -2929,7 +2929,7 @@ 
static int amdgpu_ras_page_retirement_thread(void *param)
poison_creation_is_handled = false;
}
 #else
-dev_info(adev->dev, "Start processing page retirement. request:%d\n",
+   dev_dbg(adev->dev, "Start processing page retirement. 
request:%d\n",
 atomic_read(&con->page_retirement_req_cnt));
 
 amdgpu_umc_bad_page_polling_timeout(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index bfe61d86ee6c..94fa6c37b7eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -71,7 +71,7 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device 
*adev)
 
 bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t 
mc_umc_status)  {
-   dev_info(adev->dev,
+   dev_dbg(adev->dev,
"MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, 
PCC:%llu, UC:%llu, TCC:%llu\n",
mc_umc_status,
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val), 
@@ -575,7 +575,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device 
*adev,
err_addr = REG_GET_FIELD(addr,
MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
 
-   dev_info(adev->dev,
+   dev_dbg(adev->dev,
"UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, 
err_addr:0x%llx\n",
ipid,
MCA_IPID_2_SOCKET_ID(ipid),
--
2.34.1

[PATCH] drm/amdgpu: change log level

2024-05-06 Thread YiPeng Chai

Change log level.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fbc0d9854873..7acf43582ca7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2907,7 +2907,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
 
ras_block = poison_msg.block;
 
-   dev_info(adev->dev, "Start processing ras block %s(%d)\n",
+   dev_dbg(adev->dev, "Start processing ras block %s(%d)\n",
ras_block_str(ras_block), ras_block);
 
if (ras_block == AMDGPU_RAS_BLOCK__UMC) {
@@ -2929,7 +2929,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
poison_creation_is_handled = false;
}
 #else
-dev_info(adev->dev, "Start processing page retirement. request:%d\n",
+   dev_dbg(adev->dev, "Start processing page retirement. 
request:%d\n",
 atomic_read(&con->page_retirement_req_cnt));
 
 amdgpu_umc_bad_page_polling_timeout(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index bfe61d86ee6c..94fa6c37b7eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -71,7 +71,7 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device 
*adev)
 
 bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t 
mc_umc_status)
 {
-   dev_info(adev->dev,
+   dev_dbg(adev->dev,
"MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, 
PCC:%llu, UC:%llu, TCC:%llu\n",
mc_umc_status,
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val),
@@ -575,7 +575,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device 
*adev,
err_addr = REG_GET_FIELD(addr,
MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
 
-   dev_info(adev->dev,
+   dev_dbg(adev->dev,
"UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, 
err_addr:0x%llx\n",
ipid,
MCA_IPID_2_SOCKET_ID(ipid),
-- 
2.34.1

Re: [PATCH v1 5/5] drm/ci: update xfails for the new testlist

2024-05-06 Thread Vignesh Raman


Hi Dmitry,

On 30/04/24 15:45, Dmitry Baryshkov wrote:

On Tue, Apr 30, 2024 at 02:41:21PM +0530, Vignesh Raman wrote:

Now the testlist is used from IGT build, so update
xfails with the new testlist.

Signed-off-by: Vignesh Raman 
---
  .../gpu/drm/ci/xfails/amdgpu-stoney-fails.txt | 47 +++
  .../drm/ci/xfails/amdgpu-stoney-flakes.txt|  8 +-
  .../gpu/drm/ci/xfails/amdgpu-stoney-skips.txt | 15 
  drivers/gpu/drm/ci/xfails/i915-amly-fails.txt | 22 -
  .../gpu/drm/ci/xfails/i915-amly-flakes.txt|  8 ++
  drivers/gpu/drm/ci/xfails/i915-amly-skips.txt |  8 ++
  drivers/gpu/drm/ci/xfails/i915-apl-fails.txt  | 45 +-
  drivers/gpu/drm/ci/xfails/i915-apl-flakes.txt |  5 ++
  drivers/gpu/drm/ci/xfails/i915-apl-skips.txt  | 12 +++
  drivers/gpu/drm/ci/xfails/i915-cml-fails.txt  | 26 +-
  drivers/gpu/drm/ci/xfails/i915-cml-flakes.txt |  6 ++
  drivers/gpu/drm/ci/xfails/i915-cml-skips.txt  |  8 ++
  drivers/gpu/drm/ci/xfails/i915-glk-fails.txt  | 28 +--
  drivers/gpu/drm/ci/xfails/i915-glk-skips.txt  | 12 +++
  drivers/gpu/drm/ci/xfails/i915-kbl-fails.txt  | 39 -
  drivers/gpu/drm/ci/xfails/i915-kbl-flakes.txt | 10 ++-
  drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt  | 21 +
  drivers/gpu/drm/ci/xfails/i915-tgl-fails.txt  | 75 +
  drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt  | 13 +++
  drivers/gpu/drm/ci/xfails/i915-whl-fails.txt  | 46 +--
  drivers/gpu/drm/ci/xfails/i915-whl-skips.txt  |  8 ++
  .../drm/ci/xfails/mediatek-mt8173-fails.txt   | 47 +++
  .../drm/ci/xfails/mediatek-mt8183-fails.txt   | 17 +---
  .../drm/ci/xfails/mediatek-mt8183-flakes.txt  |  5 ++
  .../gpu/drm/ci/xfails/meson-g12b-fails.txt| 20 +
  .../gpu/drm/ci/xfails/meson-g12b-flakes.txt   |  5 ++
  .../gpu/drm/ci/xfails/msm-apq8016-fails.txt   | 26 ++
  .../gpu/drm/ci/xfails/msm-apq8016-flakes.txt  |  5 ++
  .../gpu/drm/ci/xfails/msm-apq8096-fails.txt   |  5 +-
  .../gpu/drm/ci/xfails/msm-apq8096-flakes.txt  |  5 ++
  .../gpu/drm/ci/xfails/msm-apq8096-skips.txt   | 67 +++
  .../msm-sc7180-trogdor-kingoftown-fails.txt   | 34 
  .../msm-sc7180-trogdor-kingoftown-flakes.txt  |  5 ++
  ...sm-sc7180-trogdor-lazor-limozeen-fails.txt | 34 
  ...m-sc7180-trogdor-lazor-limozeen-flakes.txt |  5 ++
  .../gpu/drm/ci/xfails/msm-sdm845-fails.txt| 75 -
  .../gpu/drm/ci/xfails/msm-sdm845-flakes.txt   | 26 ++
  .../drm/ci/xfails/rockchip-rk3288-fails.txt   | 54 
  .../drm/ci/xfails/rockchip-rk3399-fails.txt   | 80 ++
  .../drm/ci/xfails/rockchip-rk3399-flakes.txt  |  7 --
  .../drm/ci/xfails/virtio_gpu-none-fails.txt   | 82 +--
  .../drm/ci/xfails/virtio_gpu-none-skips.txt   |  3 +
  42 files changed, 574 insertions(+), 495 deletions(-)
  create mode 100644 drivers/gpu/drm/ci/xfails/i915-amly-flakes.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/i915-apl-flakes.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/i915-cml-flakes.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/mediatek-mt8183-flakes.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/meson-g12b-flakes.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/msm-apq8016-flakes.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/msm-apq8096-flakes.txt
  create mode 100644 
drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-flakes.txt
  create mode 100644 
drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-flakes.txt
  delete mode 100644 drivers/gpu/drm/ci/xfails/rockchip-rk3288-fails.txt
  delete mode 100644 drivers/gpu/drm/ci/xfails/rockchip-rk3399-flakes.txt



[skipped]


diff --git a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt 
b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
index 44a5c62dedad..96e9faf0e607 100644
--- a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
+++ b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
@@ -1,19 +1,9 @@
+core_setmaster_vs_auth,Fail
+device_reset,Fail
+dumb_buffer,Fail


This doesn't look correct, core tests should be passing.


  kms_3d,Fail
-kms_addfb_basic@addfb25-bad-modifier,Fail
-kms_cursor_legacy@all-pipes-forked-bo,Fail
-kms_cursor_legacy@all-pipes-forked-move,Fail
-kms_cursor_legacy@all-pipes-single-bo,Fail
-kms_cursor_legacy@all-pipes-single-move,Fail
-kms_cursor_legacy@all-pipes-torture-bo,Fail
-kms_cursor_legacy@all-pipes-torture-move,Fail
-kms_cursor_legacy@pipe-A-forked-bo,Fail
-kms_cursor_legacy@pipe-A-forked-move,Fail
-kms_cursor_legacy@pipe-A-single-bo,Fail
-kms_cursor_legacy@pipe-A-single-move,Fail
-kms_cursor_legacy@pipe-A-torture-bo,Fail
-kms_cursor_legacy@pipe-A-torture-move,Fail
-kms_force_connector_basic@force-edid,Fail
-kms_hdmi_inject@inject-4k,Fail
-kms_selftest@drm_format,Timeout
-kms_selftest@drm_format_helper,Timeout


Fine, kms_cursor_legacy tests were migrated to -flakes. But what
happened with the rest of the failures? >

-msm_mapping@ring,Fail
+kms_force_connector_basic,Fail
+kms_lease,Fail
+msm_mapping,Fail
+msm_submit,Fail
+

Re: [PATCH v1 4/5] drm/ci: skip driver specific tests

2024-05-06 Thread Vignesh Raman


Hi Helen,

On 30/04/24 20:01, Helen Koike wrote:



On 30/04/2024 06:11, Vignesh Raman wrote:

Skip driver specific tests and skip kms tests for
panfrost driver since it is not a kms driver.

Signed-off-by: Vignesh Raman 
---
  .../gpu/drm/ci/xfails/amdgpu-stoney-skips.txt   | 14 +-
  drivers/gpu/drm/ci/xfails/i915-amly-skips.txt   | 14 +-
  drivers/gpu/drm/ci/xfails/i915-apl-skips.txt    | 14 +-
  drivers/gpu/drm/ci/xfails/i915-cml-skips.txt    | 12 
  drivers/gpu/drm/ci/xfails/i915-glk-skips.txt    | 14 +-
  drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt    | 14 +-
  drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt    | 14 +-
  drivers/gpu/drm/ci/xfails/i915-whl-skips.txt    | 14 +-
  .../gpu/drm/ci/xfails/mediatek-mt8173-skips.txt | 12 
  .../gpu/drm/ci/xfails/mediatek-mt8183-skips.txt | 14 ++
  drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt  | 14 ++
  drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt | 14 ++
  drivers/gpu/drm/ci/xfails/msm-apq8096-skips.txt | 14 +-
  .../msm-sc7180-trogdor-kingoftown-skips.txt | 15 +++
  .../msm-sc7180-trogdor-lazor-limozeen-skips.txt | 15 +++
  drivers/gpu/drm/ci/xfails/msm-sdm845-skips.txt  | 15 +++
  .../gpu/drm/ci/xfails/rockchip-rk3288-skips.txt | 17 -
  .../gpu/drm/ci/xfails/rockchip-rk3399-skips.txt | 15 +++
  .../gpu/drm/ci/xfails/virtio_gpu-none-skips.txt | 15 ++-
  19 files changed, 260 insertions(+), 10 deletions(-)
  create mode 100644 drivers/gpu/drm/ci/xfails/mediatek-mt8173-skips.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/mediatek-mt8183-skips.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt
  create mode 100644 drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt

diff --git a/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt 
b/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt

index e2c538a0f954..70e2f925d06f 100644
--- a/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt
+++ b/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt
@@ -1,2 +1,14 @@
  # Suspend to RAM seems to be broken on this machine
-.*suspend.*
\ No newline at end of file
+.*suspend.*
+
+# Skip driver specific tests
+msm_.*
+nouveau_.*
+panfrost_.*
+^v3d.*
+^vc4.*
+^vmwgfx*
+
+# Skip intel specific tests
+gem_.*
+i915_.*
diff --git a/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt 
b/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt

index fe55540a3f9a..59f8acfaa5ba 100644
--- a/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt
+++ b/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt
@@ -1,4 +1,16 @@
  # Suspend to RAM seems to be broken on this machine
  .*suspend.*
  # This is generating kernel oops with divide error
-kms_plane_scaling@invalid-parameters
\ No newline at end of file
+kms_plane_scaling@invalid-parameters
+
+# Skip driver specific tests
+^amdgpu.*
+msm_.*
+nouveau_.*
+panfrost_.*
+^v3d.*
+^vc4.*
+^vmwgfx*
+
+# GEM tests takes ~1000 hours, so skip it
+gem_.*
diff --git a/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt 
b/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt

index 3430b215c06e..d97bc038b63a 100644
--- a/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt
+++ b/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt
@@ -3,4 +3,16 @@
  # This is generating kernel oops with divide error
  kms_plane_scaling@invalid-parameters
  # This is cascading issues
-kms_3d
\ No newline at end of file
+kms_3d
+
+# Skip driver specific tests
+^amdgpu.*
+msm_.*
+nouveau_.*
+panfrost_.*
+^v3d.*
+^vc4.*
+^vmwgfx*
+
+# GEM tests takes ~1000 hours, so skip it
+gem_.*
diff --git a/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt 
b/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt

index 6d3d7ddc377f..92c0ffee8283 100644
--- a/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt
+++ b/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt
@@ -1,2 +1,14 @@
  # This is generating kernel oops with divide error
  kms_plane_scaling@invalid-parameters
+
+# Skip driver specific tests
+^amdgpu.*
+msm_.*
+nouveau_.*
+panfrost_.*
+^v3d.*
+^vc4.*
+^vmwgfx*
+
+# GEM tests takes ~1000 hours, so skip it
+gem_.*
diff --git a/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt 
b/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt

index 4c7d00ce14bc..a168722caf13 100644
--- a/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt
+++ b/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt
@@ -2,4 +2,16 @@
  .*suspend.*
  # This is generating kernel oops with divide error
-kms_plane_scaling@invalid-parameters
\ No newline at end of file
+kms_plane_scaling@invalid-parameters
+
+# Skip driver specific tests
+^amdgpu.*
+msm_.*
+nouveau_.*
+panfrost_.*
+^v3d.*
+^vc4.*
+^vmwgfx*
+
+# GEM tests takes ~1000 hours, so skip it
+gem_.*
diff --git a/drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt 
b/drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt

index 4c7d00ce14bc..a168722caf13 100644
--- a/drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt
+++ b/drivers/gpu/drm/ci/xf

Re: [PATCH] drm/amdkfd: Ensure gpu_id is unique

2024-05-06 Thread Lazar, Lijo




On 5/4/2024 3:36 AM, Harish Kasiviswanathan wrote:
> gpu_id needs to be unique for user space to identify GPUs via KFD
> interface. In the current implementation there is a very small
> probability of having non unique gpu_ids.
> 
> v2: Add check to confirm if gpu_id is unique. If not unique, find one
> Changed commit header to reflect the above
> 
> Signed-off-by: Harish Kasiviswanathan 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 26 ++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index b93913934b03..01d4c2e10c6d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1095,6 +1095,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node 
> *gpu)
>   uint32_t hashout;
>   uint32_t buf[8];
>   uint64_t local_mem_size;
> + struct kfd_topology_device *dev;
> + bool is_unique;
>   int i;
>  
>   if (!gpu)
> @@ -1115,6 +1117,28 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node 
> *gpu)
>   for (i = 0, hashout = 0; i < 8; i++)
>   hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);

Instead of this, suggest to replace this with crc16(). That has a better
chance to avoid collision as it takes into account the whole contents of
the buffer. It may work better than combining hashes with XOR.

Thanks,
Lijo

>  
> + /* hash generated could be non-unique. Check if it is unique.
> +  * If not unique increment till unique one is found. In case
> +  * of overflow, restart from 1
> + */
> + down_read(&topology_lock);
> + do {
> + is_unique = true;
> + list_for_each_entry(dev, &topology_device_list, list) {
> + if (dev->gpu && dev->gpu_id == hashout) {
> + is_unique = false;
> + break;
> + }
> + }
> + if (unlikely(!is_unique)) {
> + hashout = (hashout + 1) &
> +   ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
> + if (!hashout)
> + hashout = 1;
> + }
> + } while (!is_unique);
> + up_read(&topology_lock);
> +
>   return hashout;
>  }
>  /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
> @@ -1946,7 +1970,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
>   struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
>   struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
>  
> - gpu_id = kfd_generate_gpu_id(gpu);
>   if (gpu->xcp && !gpu->xcp->ddev) {
>   dev_warn(gpu->adev->dev,
>"Won't add GPU to topology since it has no drm node 
> assigned.");
> @@ -1969,6 +1992,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
>   if (res)
>   return res;
>  
> + gpu_id = kfd_generate_gpu_id(gpu);
>   dev->gpu_id = gpu_id;
>   gpu->id = gpu_id;
>

Re: [PATCH v1 2/5] drm/ci: generate testlist from build

2024-05-06 Thread Vignesh Raman


Hi Dmitry,

On 30/04/24 15:47, Dmitry Baryshkov wrote:

On Tue, Apr 30, 2024 at 02:41:18PM +0530, Vignesh Raman wrote:

Stop vendoring the testlist into the kernel. Instead, use the
testlist from the IGT build to ensure we do not miss renamed
or newly added tests.

Signed-off-by: Vignesh Raman 
---
  drivers/gpu/drm/ci/build-igt.sh  |   23 +
  drivers/gpu/drm/ci/igt_runner.sh |9 +-
  drivers/gpu/drm/ci/testlist.txt  | 2761 --
  3 files changed, 28 insertions(+), 2765 deletions(-)
  delete mode 100644 drivers/gpu/drm/ci/testlist.txt

diff --git a/drivers/gpu/drm/ci/build-igt.sh b/drivers/gpu/drm/ci/build-igt.sh
index 500fa4f5c30a..cedc62baba1e 100644
--- a/drivers/gpu/drm/ci/build-igt.sh
+++ b/drivers/gpu/drm/ci/build-igt.sh
@@ -26,6 +26,29 @@ meson build $MESON_OPTIONS $EXTRA_MESON_ARGS
  ninja -C build -j${FDO_CI_CONCURRENT:-4} || ninja -C build -j 1
  ninja -C build install
  
+set +ex

+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/igt/lib64
+while read -r line; do
+if [ "$line" = "TESTLIST" ] || [ "$line" = "END TESTLIST" ]; then
+continue
+fi
+
+tests=$(echo "$line" | tr ' ' '\n')
+
+for test in $tests; do
+output=$(/igt/libexec/igt-gpu-tools/"$test" --list-subtests)
+
+if [ -z "$output" ]; then
+echo "$test"
+else
+echo "$output" | while read -r subtest; do
+echo "$test@$subtest"
+done
+fi
+done
+done < /igt/libexec/igt-gpu-tools/test-list.txt > 
/igt/libexec/igt-gpu-tools/testlist.txt
+set -ex


Is the list in sync between x86 and arm/arm64 IGT builds? Is there a
chance of having a safety net here?


We need to handle arm/arm64 cases also. IGT is not generating test-list 
for arm and it is fixed now with 
https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/commit/1cf83083f855894dd287d9cf84bbcc2952b52d02


Will uprev IGT to latest commit to include this fix. Thanks.

Regards,
Vignesh


+
  mkdir -p artifacts/
  tar -cf artifacts/igt.tar /igt
  
diff --git a/drivers/gpu/drm/ci/igt_runner.sh b/drivers/gpu/drm/ci/igt_runner.sh

index f1a08b9b146f..20026612a9bd 100755
--- a/drivers/gpu/drm/ci/igt_runner.sh
+++ b/drivers/gpu/drm/ci/igt_runner.sh
@@ -59,25 +59,26 @@ fi
  
  curl -L --retry 4 -f --retry-all-errors --retry-delay 60 -s ${FDO_HTTP_CACHE_URI:-}$PIPELINE_ARTIFACTS_BASE/$ARCH/igt.tar.gz | tar --zstd -v -x -C /
  
+TESTLIST="/igt/libexec/igt-gpu-tools/testlist.txt"
  
  # If the job is parallel at the gitab job level, take the corresponding fraction

  # of the caselist.
  if [ -n "$CI_NODE_INDEX" ]; then
-sed -ni $CI_NODE_INDEX~$CI_NODE_TOTAL"p" /install/testlist.txt
+sed -ni $CI_NODE_INDEX~$CI_NODE_TOTAL"p" $TESTLIST
  fi
  
  # core_getversion checks if the driver is loaded and probed correctly

  # so run it in all shards
-if ! grep -q "core_getversion" /install/testlist.txt; then
+if ! grep -q "core_getversion" $TESTLIST; then
  # Add the line to the file
-echo "core_getversion" >> /install/testlist.txt
+echo "core_getversion" >> $TESTLIST
  fi
  
  set +e

  igt-runner \
  run \
  --igt-folder /igt/libexec/igt-gpu-tools \
---caselist /install/testlist.txt \
+--caselist $TESTLIST \
  --output /results \
  $IGT_SKIPS \
  $IGT_FLAKES \

RE: [PATCH 2/2] drm/amd/pm: enable UMD Pstate profile level for renoir

2024-05-06 Thread Huang, Tim

[AMD Official Use Only - General]

> -Original Message-
> From: Jesse Zhang 
> Sent: Tuesday, May 7, 2024 11:43 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, Christian
> ; Huang, Tim ; Zhang,
> Jesse(Jie) ; Zhang, Jesse(Jie) 
> Subject: [PATCH 2/2] drm/amd/pm: enable UMD Pstate profile level for renoir
>
> This patch enable UMD Pstates profile
> level for the renoir_set_performance_level interface.
>
>  -profile_min_sclk
>  -profile_min_fclk
>
> Signed-off-by: Jesse Zhang 
> Suggested-by: Tim Huang 
> ---
>  .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 58 +++
>  1 file changed, 48 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> index 8908bbb3ff1f..e56b7afb5b78 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> @@ -928,11 +928,55 @@ static int renoir_set_peak_clock_by_device(struct
> smu_context *smu)
>   return ret;
>  }
>
> +static int renior_set_dpm_profile_freq(struct smu_context *smu,
> +   enum amd_dpm_forced_level level,
> +   enum smu_clk_type clk_type) {
> +   int ret = 0;
> +   uint32_t sclk = 0, socclk = 0, fclk = 0;
> +
> +   switch (clk_type) {
> +   case SMU_GFXCLK:
> +   case SMU_SCLK:
> +   sclk = RENOIR_UMD_PSTATE_GFXCLK;
> +   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
> +   renoir_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, 
> &sclk);
> +   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK)
> +   renoir_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk, 
> NULL);
> +   break;
> +   case SMU_SOCCLK:
> +   socclk = RENOIR_UMD_PSTATE_SOCCLK;
> +   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
> +   renoir_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL,
> &socclk);
> +   break;
> +   case SMU_FCLK:
We should add case SMU_MCLK here. With this fixed, you can add my FB.

Reviewed-by: Tim Huang 



> +   fclk = RENOIR_UMD_PSTATE_FCLK;
> +   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
> +   renoir_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, 
> &fclk);
> +   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK)
> +   renoir_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk, 
> NULL);
> +   break;
> +   default:
> +   ret = -EINVAL;
> +   break;
> +   }
> +
> +   if (sclk)
> +   ret = smu_v12_0_set_soft_freq_limited_range(smu,
> + SMU_SCLK, sclk, sclk);
> +
> +   if (socclk)
> +   ret = smu_v12_0_set_soft_freq_limited_range(smu,
> + SMU_SOCCLK, socclk, socclk);
> +
> +   if (fclk)
> +   ret = smu_v12_0_set_soft_freq_limited_range(smu,
> + SMU_FCLK, fclk, fclk);
> +
> +   return ret;
> +}
> +
>  static int renoir_set_performance_level(struct smu_context *smu,
>   enum amd_dpm_forced_level level)
>  {
>   int ret = 0;
> - uint32_t sclk_mask, mclk_mask, soc_mask;
>
>   switch (level) {
>   case AMD_DPM_FORCED_LEVEL_HIGH:
> @@ -1012,15 +1056,9 @@ static int renoir_set_performance_level(struct
> smu_context *smu,
>   smu->gfx_actual_hard_min_freq = smu-
> >gfx_default_hard_min_freq;
>   smu->gfx_actual_soft_max_freq = smu-
> >gfx_default_soft_max_freq;
>
> - ret = renoir_get_profiling_clk_mask(smu, level,
> - &sclk_mask,
> - &mclk_mask,
> - &soc_mask);
> - if (ret)
> - return ret;
> - renoir_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask);
> - renoir_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask);
> - renoir_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
> + renior_set_dpm_profile_freq(smu, level, SMU_SCLK);
> + renior_set_dpm_profile_freq(smu, level, SMU_MCLK);
> + renior_set_dpm_profile_freq(smu, level, SMU_SOCCLK);
>   break;
>   case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
>   smu->gfx_actual_hard_min_freq = smu-
> >gfx_default_hard_min_freq;
> --
> 2.25.1

[PATCH 2/2] drm/amd/pm: enable UMD Pstate profile level for renoir

2024-05-06 Thread Jesse Zhang

This patch enable UMD Pstates profile
level for the renoir_set_performance_level interface.

 -profile_min_sclk
 -profile_min_fclk

Signed-off-by: Jesse Zhang 
Suggested-by: Tim Huang 
---
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 58 +++
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 8908bbb3ff1f..e56b7afb5b78 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -928,11 +928,55 @@ static int renoir_set_peak_clock_by_device(struct 
smu_context *smu)
return ret;
 }
 
+static int renior_set_dpm_profile_freq(struct smu_context *smu,
+   enum amd_dpm_forced_level level,
+   enum smu_clk_type clk_type)
+{
+   int ret = 0;
+   uint32_t sclk = 0, socclk = 0, fclk = 0;
+
+   switch (clk_type) {
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   sclk = RENOIR_UMD_PSTATE_GFXCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   renoir_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, 
&sclk);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK)
+   renoir_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk, 
NULL);
+   break;
+   case SMU_SOCCLK:
+   socclk = RENOIR_UMD_PSTATE_SOCCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   renoir_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, 
&socclk);
+   break;
+   case SMU_FCLK:
+   fclk = RENOIR_UMD_PSTATE_FCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   renoir_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, 
&fclk);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK)
+   renoir_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk, 
NULL);
+   break;
+   default:
+   ret = -EINVAL;
+   break;
+   }
+
+   if (sclk)
+   ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SCLK, 
sclk, sclk);
+
+   if (socclk)
+   ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SOCCLK, 
socclk, socclk);
+
+   if (fclk)
+   ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_FCLK, 
fclk, fclk);
+
+   return ret;
+}
+
 static int renoir_set_performance_level(struct smu_context *smu,
enum amd_dpm_forced_level level)
 {
int ret = 0;
-   uint32_t sclk_mask, mclk_mask, soc_mask;
 
switch (level) {
case AMD_DPM_FORCED_LEVEL_HIGH:
@@ -1012,15 +1056,9 @@ static int renoir_set_performance_level(struct 
smu_context *smu,
smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
 
-   ret = renoir_get_profiling_clk_mask(smu, level,
-   &sclk_mask,
-   &mclk_mask,
-   &soc_mask);
-   if (ret)
-   return ret;
-   renoir_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask);
-   renoir_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask);
-   renoir_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
+   renior_set_dpm_profile_freq(smu, level, SMU_SCLK);
+   renior_set_dpm_profile_freq(smu, level, SMU_MCLK);
+   renior_set_dpm_profile_freq(smu, level, SMU_SOCCLK);
break;
case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
-- 
2.25.1

[PATCH 1/2] drm/amd/pm: revert the commit 576bffd10d01

2024-05-06 Thread Jesse Zhang

Revert this commit: 576bffd10d01 and will update new patch.

Signed-off-by: Jesse Zhang 
---
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 32 +++
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 36a49cfc22e4..8908bbb3ff1f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -835,20 +835,10 @@ static int renoir_force_clk_levels(struct smu_context 
*smu,
ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_max_level, 
&max_freq);
if (ret)
return ret;
-/* =  0: min_freq
- * =  1: UMD_PSTATE_CLK
- * >= 2: max_freq
- */
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxSocclkByFreq,
-   soft_max_level == 0 ? 
min_freq :
-   soft_max_level == 1 ? 
RENOIR_UMD_PSTATE_SOCCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxSocclkByFreq, max_freq, NULL);
if (ret)
return ret;
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinSocclkByFreq,
-   soft_min_level == 0 ? 
min_freq :
-   soft_min_level == 1 ? 
RENOIR_UMD_PSTATE_SOCCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinSocclkByFreq, min_freq, NULL);
if (ret)
return ret;
break;
@@ -860,21 +850,10 @@ static int renoir_force_clk_levels(struct smu_context 
*smu,
ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_max_level, 
&max_freq);
if (ret)
return ret;
-   /* mclk levels are in reverse order
-* =  0: max_freq
-* =  1: UMD_PSTATE_CLK
-* >= 2: min_freq
-*/
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxFclkByFreq,
-   soft_max_level >= 2 ? 
min_freq :
-   soft_max_level == 1 ? 
RENOIR_UMD_PSTATE_FCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxFclkByFreq, max_freq, NULL);
if (ret)
return ret;
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinFclkByFreq,
-   soft_min_level >= 2  ? 
min_freq :
-   soft_min_level == 1 ? 
RENOIR_UMD_PSTATE_SOCCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinFclkByFreq, min_freq, NULL);
if (ret)
return ret;
break;
@@ -953,8 +932,7 @@ static int renoir_set_performance_level(struct smu_context 
*smu,
enum amd_dpm_forced_level level)
 {
int ret = 0;
-   /* default mask is UMD PSTATE CLK */
-   uint32_t sclk_mask = 1, mclk_mask = 1, soc_mask = 1;
+   uint32_t sclk_mask, mclk_mask, soc_mask;
 
switch (level) {
case AMD_DPM_FORCED_LEVEL_HIGH:
-- 
2.25.1

[PATCH 1/2] drm/amd/pm: revert the commit 576bffd10d01

2024-05-06 Thread Jesse Zhang

This patch doesn't need and will update new patch.

Signed-off-by: Jesse Zhang 
---
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 32 +++
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 36a49cfc22e4..8908bbb3ff1f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -835,20 +835,10 @@ static int renoir_force_clk_levels(struct smu_context 
*smu,
ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_max_level, 
&max_freq);
if (ret)
return ret;
-/* =  0: min_freq
- * =  1: UMD_PSTATE_CLK
- * >= 2: max_freq
- */
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxSocclkByFreq,
-   soft_max_level == 0 ? 
min_freq :
-   soft_max_level == 1 ? 
RENOIR_UMD_PSTATE_SOCCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxSocclkByFreq, max_freq, NULL);
if (ret)
return ret;
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinSocclkByFreq,
-   soft_min_level == 0 ? 
min_freq :
-   soft_min_level == 1 ? 
RENOIR_UMD_PSTATE_SOCCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinSocclkByFreq, min_freq, NULL);
if (ret)
return ret;
break;
@@ -860,21 +850,10 @@ static int renoir_force_clk_levels(struct smu_context 
*smu,
ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_max_level, 
&max_freq);
if (ret)
return ret;
-   /* mclk levels are in reverse order
-* =  0: max_freq
-* =  1: UMD_PSTATE_CLK
-* >= 2: min_freq
-*/
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxFclkByFreq,
-   soft_max_level >= 2 ? 
min_freq :
-   soft_max_level == 1 ? 
RENOIR_UMD_PSTATE_FCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetSoftMaxFclkByFreq, max_freq, NULL);
if (ret)
return ret;
-   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinFclkByFreq,
-   soft_min_level >= 2  ? 
min_freq :
-   soft_min_level == 1 ? 
RENOIR_UMD_PSTATE_SOCCLK : max_freq,
-   NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_SetHardMinFclkByFreq, min_freq, NULL);
if (ret)
return ret;
break;
@@ -953,8 +932,7 @@ static int renoir_set_performance_level(struct smu_context 
*smu,
enum amd_dpm_forced_level level)
 {
int ret = 0;
-   /* default mask is UMD PSTATE CLK */
-   uint32_t sclk_mask = 1, mclk_mask = 1, soc_mask = 1;
+   uint32_t sclk_mask, mclk_mask, soc_mask;
 
switch (level) {
case AMD_DPM_FORCED_LEVEL_HIGH:
-- 
2.25.1

RE: [PATCH] drm/amdgpu: Fix out-of-bounds read of df_v1_7_channel_number

2024-05-06 Thread Huang, Tim

[Public]

Reviewed-by: Tim Huang 

Best Regards,
Tim Huang



> -Original Message-
> From: amd-gfx  On Behalf Of Ma Jun
> Sent: Tuesday, May 7, 2024 11:19 AM
> To: amd-gfx@lists.freedesktop.org; Koenig, Christian
> ; Deucher, Alexander
> 
> Cc: Ma, Jun 
> Subject: [PATCH] drm/amdgpu: Fix out-of-bounds read of
> df_v1_7_channel_number
>
> Check the fb_channel_number range to avoid the array out-of-bounds read error
>
> Signed-off-by: Ma Jun 
> ---
>  drivers/gpu/drm/amd/amdgpu/df_v1_7.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
> b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
> index 5dfab802..cd298556f7a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
> @@ -70,6 +70,8 @@ static u32 df_v1_7_get_hbm_channel_number(struct
> amdgpu_device *adev)
>   int fb_channel_number;
>
>   fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
> + if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number))
> + fb_channel_number = 0;
>
>   return df_v1_7_channel_number[fb_channel_number];
>  }
> --
> 2.34.1

RE: [PATCH] drm/amdgpu: fix RAS unload driver issue in SRIOV

2024-05-06 Thread Zhang, Hawking

[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Wang, Yang(Kevin) 
Sent: Tuesday, May 7, 2024 10:50
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice 
Subject: [PATCH] drm/amdgpu: fix RAS unload driver issue in SRIOV

Fix null pointer issue when unload driver in SRIOV mode.

Adjust the function position to ensure that the amdgpu_mca/aca_xxx_init() 
related functions can be initialized properly.

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 36509fa9fecf..36deac3b1440 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3611,10 +3611,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
struct amdgpu_ras_block_object *obj;
int r;

-   /* Guest side doesn't need init ras feature */
-   if (amdgpu_sriov_vf(adev))
-   return 0;
-
amdgpu_ras_event_mgr_init(adev);

if (amdgpu_aca_is_enabled(adev)) {
@@ -3625,7 +3621,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
if (r)
return r;

-   amdgpu_ras_set_aca_debug_mode(adev, false);
+   if (!amdgpu_sriov_vf(adev))
+   amdgpu_ras_set_aca_debug_mode(adev, false);
} else {
if (amdgpu_in_reset(adev))
r = amdgpu_mca_reset(adev);
@@ -3634,9 +3631,14 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
if (r)
return r;

-   amdgpu_ras_set_mca_debug_mode(adev, false);
+   if (!amdgpu_sriov_vf(adev))
+   amdgpu_ras_set_mca_debug_mode(adev, false);
}

+   /* Guest side doesn't need init ras feature */
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
obj = node->ras_obj;
if (!obj) {
--
2.34.1

[PATCH] drm/amdgpu: Fix out-of-bounds read of df_v1_7_channel_number

2024-05-06 Thread Ma Jun

Check the fb_channel_number range to avoid the array out-of-bounds
read error

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/df_v1_7.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c 
b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 5dfab802..cd298556f7a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -70,6 +70,8 @@ static u32 df_v1_7_get_hbm_channel_number(struct 
amdgpu_device *adev)
int fb_channel_number;
 
fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+   if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number))
+   fb_channel_number = 0;
 
return df_v1_7_channel_number[fb_channel_number];
 }
-- 
2.34.1

Re: [PATCH] drm/amdgpu: Assign correct bits for SDMA HDP flush

2024-05-06 Thread Lazar, Lijo




On 5/7/2024 6:00 AM, Harry Wentland wrote:
> This patch is causing crashes of Manor Lords on my Navi 21 on the 6.8.9
> stable kernel. It leads to an assertion failure in wine:
> 
> File: ../src-wine/dlls/winevulkan/loader_thunks.c
> Line: 3621
> 
> Expression "!status && vkEndCommandBuffer""
> 
> This happens both with radv and amdvlk. It starts happening on v6.8.8
> with this patch. The previous patch (drm/amdgpu/sdma5.2: use legacy HDP
> flush for SDMA2/3) is fine.
> 

This patch will affect only aquavanjaram SOCs and shouldn't affect any
NV series (unless there is something going totally wrong and coming to
this path).

.emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,

Thanks,
Lijo

> Harry
> 
> On 2024-04-17 09:19, Alex Deucher wrote:
>> On Wed, Apr 17, 2024 at 8:07 AM Lijo Lazar  wrote:
>>>
>>> HDP Flush request bit can be kept unique per AID, and doesn't need to be
>>> unique SOC-wide. Assign only bits 10-13 for SDMA v4.4.2.
>>>
>>> Signed-off-by: Lijo Lazar 
>>
>> Acked-by: Alex Deucher 
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 3 ++-
>>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>> index f8e2cd514493..09e45ef16c0d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>> @@ -368,7 +368,8 @@ static void
>>> sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>>>  u32 ref_and_mask = 0;
>>>  const struct nbio_hdp_flush_reg *nbio_hf_reg =
>>> adev->nbio.hdp_flush_reg;
>>>
>>> -   ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
>>> +   ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
>>> +  << (ring->me % adev->sdma.num_inst_per_aid);
>>>
>>>  sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
>>>    
>>> adev->nbio.funcs->get_hdp_flush_done_offset(adev),
>>> -- 
>>> 2.25.1
>>>

[PATCH] drm/amdgpu: fix RAS unload driver issue in SRIOV

2024-05-06 Thread Yang Wang

Fix null pointer issue when unload driver in SRIOV mode.

Adjust the function position to ensure that the amdgpu_mca/aca_xxx_init()
related functions can be initialized properly.

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 36509fa9fecf..36deac3b1440 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3611,10 +3611,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
struct amdgpu_ras_block_object *obj;
int r;
 
-   /* Guest side doesn't need init ras feature */
-   if (amdgpu_sriov_vf(adev))
-   return 0;
-
amdgpu_ras_event_mgr_init(adev);
 
if (amdgpu_aca_is_enabled(adev)) {
@@ -3625,7 +3621,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
if (r)
return r;
 
-   amdgpu_ras_set_aca_debug_mode(adev, false);
+   if (!amdgpu_sriov_vf(adev))
+   amdgpu_ras_set_aca_debug_mode(adev, false);
} else {
if (amdgpu_in_reset(adev))
r = amdgpu_mca_reset(adev);
@@ -3634,9 +3631,14 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
if (r)
return r;
 
-   amdgpu_ras_set_mca_debug_mode(adev, false);
+   if (!amdgpu_sriov_vf(adev))
+   amdgpu_ras_set_mca_debug_mode(adev, false);
}
 
+   /* Guest side doesn't need init ras feature */
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
obj = node->ras_obj;
if (!obj) {
-- 
2.34.1

RE: [PATCH] drm/amd/pm: fix the uninitialized scalar variable warning

2024-05-06 Thread Huang, Tim

[AMD Official Use Only - General]

Hi Jesse,

> -Original Message-
> From: Zhang, Jesse(Jie) 
> Sent: Monday, May 6, 2024 2:21 PM
> To: Zhang, Jesse(Jie) ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, Christian
> ; Huang, Tim 
> Subject: RE: [PATCH] drm/amd/pm: fix the uninitialized scalar variable warning
>
> [AMD Official Use Only - General]
>
> Ping ...
>
> -Original Message-
> From: Jesse Zhang 
> Sent: Tuesday, April 30, 2024 3:14 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, Christian
> ; Huang, Tim ; Zhang,
> Jesse(Jie) ; Zhang, Jesse(Jie) 
> Subject: [PATCH] drm/amd/pm: fix the uninitialized scalar variable warning
>
> Fix warning for using uninitialized values sclk_mask, mclk_mask and soc_mask.
> v2:Set default variable to UMD PSTATE(Tim Huang)
>
> Signed-off-by: Jesse Zhang 
> ---
>  .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 32 ---
>  1 file changed, 27 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> index 8908bbb3ff1f..36a49cfc22e4 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> @@ -835,10 +835,20 @@ static int renoir_force_clk_levels(struct smu_context
> *smu,
> ret = renoir_get_dpm_clk_limited(smu, clk_type, 
> soft_max_level,
> &max_freq);
> if (ret)
> return ret;
> -   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetSoftMaxSocclkByFreq, max_freq, NULL);
> +/* =  0: min_freq
> + * =  1: UMD_PSTATE_CLK
> + * >= 2: max_freq
> + */
> +   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetSoftMaxSocclkByFreq,
> +   soft_max_level == 0 ? 
> min_freq :
> +   soft_max_level == 1 ?
> RENOIR_UMD_PSTATE_SOCCLK : max_freq,
> +   NULL);
> if (ret)
> return ret;
> -   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetHardMinSocclkByFreq, min_freq, NULL);
> +   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetHardMinSocclkByFreq,
> +   soft_min_level == 0 ? 
> min_freq :
> +   soft_min_level == 1 ?
> RENOIR_UMD_PSTATE_SOCCLK : max_freq,
> +   NULL);
> if (ret)
> return ret;
> break;
> @@ -850,10 +860,21 @@ static int renoir_force_clk_levels(struct smu_context
> *smu,
> ret = renoir_get_dpm_clk_limited(smu, clk_type, 
> soft_max_level,
> &max_freq);
> if (ret)
> return ret;
> -   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetSoftMaxFclkByFreq, max_freq, NULL);
> +   /* mclk levels are in reverse order
> +* =  0: max_freq
> +* =  1: UMD_PSTATE_CLK
> +* >= 2: min_freq
> +*/
> +   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetSoftMaxFclkByFreq,
> +   soft_max_level >= 2 ? 
> min_freq :
> +   soft_max_level == 1 ?
> RENOIR_UMD_PSTATE_FCLK : max_freq,
> +   NULL);
> if (ret)
> return ret;
> -   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetHardMinFclkByFreq, min_freq, NULL);
> +   ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetHardMinFclkByFreq,
> +   soft_min_level >= 2  
> ? min_freq :
> +   soft_min_level == 1 ?
> RENOIR_UMD_PSTATE_SOCCLK : max_freq,
> +   NULL);

It's not the fault of your patch. The original implementation may not set the 
correct min frequency for MCLK when set to the performance level 
PROFILE_MIN_MCLK,
For the case, we should make the  min_freq = max_freq = 
clk_table->FClocks[NUM_FCLK_DPM_LEVELS-1].Freq.

Tim

> if (ret)
> return ret;
> break;
> @@ -932,7 +953,8 @@ static int renoir_set_performance_level(struct
> smu_context *smu,
> enum amd_dpm_forced_level level)  {
> int ret = 0;
> -   uint32_t sclk_mask, mclk_mask, soc_mask;
> +   /* default mask is UMD PSTATE CLK */
> +   uint32_t sclk_mask = 1, mclk_mask = 1, soc_mask = 1;
>
> switch (level) {
>

RE: [PATCH] drm/amdgpu: ignoring unsupported ras blocks when MCA bank dispatches

2024-05-06 Thread Li, Candice

[AMD Official Use Only - General]

Reviewed-by: Candice Li 



Thanks,
Candice

-Original Message-
From: Wang, Yang(Kevin) 
Sent: Tuesday, May 7, 2024 9:36 AM
To: Wang, Yang(Kevin) ; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Zhou, Hao (Claire) 
Subject: RE: [PATCH] drm/amdgpu: ignoring unsupported ras blocks when MCA bank 
dispatches

Ping...

Best Regards,
Kevin

-Original Message-
From: amd-gfx  On Behalf Of Yang Wang
Sent: 2024年5月6日 14:47
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice 
Subject: [PATCH] drm/amdgpu: ignoring unsupported ras blocks when MCA bank 
dispatches

This patch is used to solve the problem of incorrect parsing of error counts.
When the UE trigger gpu is reset, the driver will attempt to parse all possible 
ras blocks.
For ras blocks that are not supported by the current ASIC, the driver should 
ignore this error.

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index a7736aa58ba9..0c9a271c5028 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -415,7 +415,7 @@ static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device 
*adev, enum amdgpu_r

count = 0;
ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, 
entry, &count);
-   if (ret)
+   if (ret && ret != -EOPNOTSUPP)
return ret;

if (!count)
--
2.34.1

RE: [PATCH] drm/amdgpu: ignoring unsupported ras blocks when MCA bank dispatches

2024-05-06 Thread Wang, Yang(Kevin)

Ping...

Best Regards,
Kevin

-Original Message-
From: amd-gfx  On Behalf Of Yang Wang
Sent: 2024年5月6日 14:47
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice 
Subject: [PATCH] drm/amdgpu: ignoring unsupported ras blocks when MCA bank 
dispatches

This patch is used to solve the problem of incorrect parsing of error counts.
When the UE trigger gpu is reset, the driver will attempt to parse all possible 
ras blocks.
For ras blocks that are not supported by the current ASIC, the driver should 
ignore this error.

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index a7736aa58ba9..0c9a271c5028 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -415,7 +415,7 @@ static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device 
*adev, enum amdgpu_r
 
count = 0;
ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, 
entry, &count);
-   if (ret)
+   if (ret && ret != -EOPNOTSUPP)
return ret;
 
if (!count)
-- 
2.34.1

Re: [PATCH] drm/amdgpu: Assign correct bits for SDMA HDP flush

2024-05-06 Thread Harry Wentland

This patch is causing crashes of Manor Lords on my Navi 21 on the 6.8.9 
stable kernel. It leads to an assertion failure in wine:


File: ../src-wine/dlls/winevulkan/loader_thunks.c
Line: 3621

Expression "!status && vkEndCommandBuffer""

This happens both with radv and amdvlk. It starts happening on v6.8.8 
with this patch. The previous patch (drm/amdgpu/sdma5.2: use legacy HDP 
flush for SDMA2/3) is fine.


Harry

On 2024-04-17 09:19, Alex Deucher wrote:

On Wed, Apr 17, 2024 at 8:07 AM Lijo Lazar  wrote:


HDP Flush request bit can be kept unique per AID, and doesn't need to be
unique SOC-wide. Assign only bits 10-13 for SDMA v4.4.2.

Signed-off-by: Lijo Lazar 


Acked-by: Alex Deucher 


---
  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index f8e2cd514493..09e45ef16c0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -368,7 +368,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct 
amdgpu_ring *ring)
 u32 ref_and_mask = 0;
 const struct nbio_hdp_flush_reg *nbio_hf_reg = 
adev->nbio.hdp_flush_reg;

-   ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+   ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
+  << (ring->me % adev->sdma.num_inst_per_aid);

 sdma_v4_4_2_wait_reg_mem(ring, 0, 1,

adev->nbio.funcs->get_hdp_flush_done_offset(adev),
--
2.25.1

Re: [PATCH] drm/amdkfd: Ensure gpu_id is unique

2024-05-06 Thread Felix Kuehling


On 2024-05-06 17:10, Harish Kasiviswanathan wrote:

On 2024-05-06 16:30, Felix Kuehling wrote:

On 2024-05-03 18:06, Harish Kasiviswanathan wrote:

gpu_id needs to be unique for user space to identify GPUs via KFD
interface. In the current implementation there is a very small
probability of having non unique gpu_ids.

v2: Add check to confirm if gpu_id is unique. If not unique, find one
  Changed commit header to reflect the above

Signed-off-by: Harish Kasiviswanathan 
---
   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 26 ++-
   1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index b93913934b03..01d4c2e10c6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1095,6 +1095,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
   uint32_t hashout;
   uint32_t buf[8];
   uint64_t local_mem_size;
+    struct kfd_topology_device *dev;
+    bool is_unique;
   int i;
     if (!gpu)
@@ -1115,6 +1117,28 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
   for (i = 0, hashout = 0; i < 8; i++)
   hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
   +    /* hash generated could be non-unique. Check if it is unique.
+ * If not unique increment till unique one is found. In case
+ * of overflow, restart from 1
+    */
+    down_read(&topology_lock);
+    do {
+    is_unique = true;
+    list_for_each_entry(dev, &topology_device_list, list) {
+    if (dev->gpu && dev->gpu_id == hashout) {
+    is_unique = false;
+    break;
+    }
+    }
+    if (unlikely(!is_unique)) {
+    hashout = (hashout + 1) &
+  ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+    if (!hashout)
+    hashout = 1;

This doesn't catch the case that hashout was 0 before incrementing it, and was 
found to be unique.

I didn't actively think about this case when I sent the patch out. However, we 
don't have gpu_id to be 0. There are places where gpu_id=0 means it is CPU node


I think we make that assumption in a few places, both in kernel mode and 
user mode, e.g.:


struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, 
uint32_t gpu_id)
{
int i;

if (gpu_id) {
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];

if (pdd->user_gpu_id == gpu_id)
return pdd;
}
}
return NULL;
}

Or in the Thunk in hsaKmtGetNodeProperties:

/* For CPU only node don't add any additional GPU memory banks. */
if (gpu_id) {
uint64_t base, limit;
if (is_dgpu)
NodeProperties->NumMemoryBanks += NUM_OF_DGPU_HEAPS;
else
NodeProperties->NumMemoryBanks += NUM_OF_IGPU_HEAPS;
if (fmm_get_aperture_base_and_limit(FMM_MMIO, gpu_id, &base,
&limit) == HSAKMT_STATUS_SUCCESS)
NodeProperties->NumMemoryBanks += 1;
}

Regards,
  Felix





Regards,
   Felix



+    }
+    } while (!is_unique);
+    up_read(&topology_lock);
+
   return hashout;
   }
   /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
@@ -1946,7 +1970,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
   struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
   struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
   -    gpu_id = kfd_generate_gpu_id(gpu);
   if (gpu->xcp && !gpu->xcp->ddev) {
   dev_warn(gpu->adev->dev,
    "Won't add GPU to topology since it has no drm node assigned.");
@@ -1969,6 +1992,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
   if (res)
   return res;
   +    gpu_id = kfd_generate_gpu_id(gpu);
   dev->gpu_id = gpu_id;
   gpu->id = gpu_id;

Re: [PATCH] drm/amdkfd: Ensure gpu_id is unique

2024-05-06 Thread Harish Kasiviswanathan




On 2024-05-06 16:30, Felix Kuehling wrote:
> 
> On 2024-05-03 18:06, Harish Kasiviswanathan wrote:
>> gpu_id needs to be unique for user space to identify GPUs via KFD
>> interface. In the current implementation there is a very small
>> probability of having non unique gpu_ids.
>>
>> v2: Add check to confirm if gpu_id is unique. If not unique, find one
>>  Changed commit header to reflect the above
>>
>> Signed-off-by: Harish Kasiviswanathan 
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 26 ++-
>>   1 file changed, 25 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> index b93913934b03..01d4c2e10c6d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> @@ -1095,6 +1095,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node 
>> *gpu)
>>   uint32_t hashout;
>>   uint32_t buf[8];
>>   uint64_t local_mem_size;
>> +    struct kfd_topology_device *dev;
>> +    bool is_unique;
>>   int i;
>>     if (!gpu)
>> @@ -1115,6 +1117,28 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node 
>> *gpu)
>>   for (i = 0, hashout = 0; i < 8; i++)
>>   hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
>>   +    /* hash generated could be non-unique. Check if it is unique.
>> + * If not unique increment till unique one is found. In case
>> + * of overflow, restart from 1
>> +    */
>> +    down_read(&topology_lock);
>> +    do {
>> +    is_unique = true;
>> +    list_for_each_entry(dev, &topology_device_list, list) {
>> +    if (dev->gpu && dev->gpu_id == hashout) {
>> +    is_unique = false;
>> +    break;
>> +    }
>> +    }
>> +    if (unlikely(!is_unique)) {
>> +    hashout = (hashout + 1) &
>> +  ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
>> +    if (!hashout)
>> +    hashout = 1;
> 
> This doesn't catch the case that hashout was 0 before incrementing it, and 
> was found to be unique.

I didn't actively think about this case when I sent the patch out. However, we 
don't have gpu_id to be 0. There are places where gpu_id=0 means it is CPU node

> 
> Regards,
>   Felix
> 
> 
>> +    }
>> +    } while (!is_unique);
>> +    up_read(&topology_lock);
>> +
>>   return hashout;
>>   }
>>   /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
>> @@ -1946,7 +1970,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
>>   struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
>>   struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
>>   -    gpu_id = kfd_generate_gpu_id(gpu);
>>   if (gpu->xcp && !gpu->xcp->ddev) {
>>   dev_warn(gpu->adev->dev,
>>    "Won't add GPU to topology since it has no drm node 
>> assigned.");
>> @@ -1969,6 +1992,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
>>   if (res)
>>   return res;
>>   +    gpu_id = kfd_generate_gpu_id(gpu);
>>   dev->gpu_id = gpu_id;
>>   gpu->id = gpu_id;
>>

Re: [PATCH] drm/amdkfd: Ensure gpu_id is unique

2024-05-06 Thread Felix Kuehling




On 2024-05-03 18:06, Harish Kasiviswanathan wrote:

gpu_id needs to be unique for user space to identify GPUs via KFD
interface. In the current implementation there is a very small
probability of having non unique gpu_ids.

v2: Add check to confirm if gpu_id is unique. If not unique, find one
 Changed commit header to reflect the above

Signed-off-by: Harish Kasiviswanathan 
---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 26 ++-
  1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index b93913934b03..01d4c2e10c6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1095,6 +1095,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
uint32_t hashout;
uint32_t buf[8];
uint64_t local_mem_size;
+   struct kfd_topology_device *dev;
+   bool is_unique;
int i;
  
  	if (!gpu)

@@ -1115,6 +1117,28 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
for (i = 0, hashout = 0; i < 8; i++)
hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
  
+	/* hash generated could be non-unique. Check if it is unique.

+* If not unique increment till unique one is found. In case
+* of overflow, restart from 1
+   */
+   down_read(&topology_lock);
+   do {
+   is_unique = true;
+   list_for_each_entry(dev, &topology_device_list, list) {
+   if (dev->gpu && dev->gpu_id == hashout) {
+   is_unique = false;
+   break;
+   }
+   }
+   if (unlikely(!is_unique)) {
+   hashout = (hashout + 1) &
+ ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+   if (!hashout)
+   hashout = 1;


This doesn't catch the case that hashout was 0 before incrementing it, 
and was found to be unique.


Regards,
  Felix



+   }
+   } while (!is_unique);
+   up_read(&topology_lock);
+
return hashout;
  }
  /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
@@ -1946,7 +1970,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
  
-	gpu_id = kfd_generate_gpu_id(gpu);

if (gpu->xcp && !gpu->xcp->ddev) {
dev_warn(gpu->adev->dev,
 "Won't add GPU to topology since it has no drm node 
assigned.");
@@ -1969,6 +1992,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
if (res)
return res;
  
+	gpu_id = kfd_generate_gpu_id(gpu);

dev->gpu_id = gpu_id;
gpu->id = gpu_id;

Re: [PATCH] drm/amdkfd: Refactor kfd CRIU into its own file

2024-05-06 Thread Felix Kuehling




On 2024-05-06 15:20, David Francis wrote:

The kfd CRIU code takes up about a thousand lines
in the kfd_chardev file; move it to its own file.

No functional change intended.

Signed-off-by: David Francis 
---
  drivers/gpu/drm/amd/amdkfd/Makefile  |   1 +
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 972 +-
  drivers/gpu/drm/amd/amdkfd/kfd_criu.c| 989 +++
  drivers/gpu/drm/amd/amdkfd/kfd_criu.h|  50 ++
  4 files changed, 1046 insertions(+), 966 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_criu.c
  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_criu.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d3d8972240d..e06af4073ac5 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -32,6 +32,7 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_flat_memory.o \
$(AMDKFD_PATH)/kfd_process.o \
$(AMDKFD_PATH)/kfd_queue.o \
+   $(AMDKFD_PATH)/kfd_criu.o \


Any particular reason for adding this in the middle and not the end?



$(AMDKFD_PATH)/kfd_mqd_manager.o \
$(AMDKFD_PATH)/kfd_mqd_manager_cik.o \
$(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6b713fb0b818..e6e44a199a93 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -45,6 +45,7 @@


Can you remove #include  and "amdgpu_dma_buf.h" here? 
Or is it still needed by something else left in kfd_chardev.c?


Other than that, this patch is

Reviewed-by: Felix Kuehling 



  #include "kfd_smi_events.h"
  #include "amdgpu_dma_buf.h"
  #include "kfd_debug.h"
+#include "kfd_criu.h"
  
  static long kfd_ioctl(struct file *, unsigned int, unsigned long);

  static int kfd_open(struct inode *, struct file *);
@@ -1751,967 +1752,6 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
  }
  #endif
  
-static int criu_checkpoint_process(struct kfd_process *p,

-uint8_t __user *user_priv_data,
-uint64_t *priv_offset)
-{
-   struct kfd_criu_process_priv_data process_priv;
-   int ret;
-
-   memset(&process_priv, 0, sizeof(process_priv));
-
-   process_priv.version = KFD_CRIU_PRIV_VERSION;
-   /* For CR, we don't consider negative xnack mode which is used for
-* querying without changing it, here 0 simply means disabled and 1
-* means enabled so retry for finding a valid PTE.
-*/
-   process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
-
-   ret = copy_to_user(user_priv_data + *priv_offset,
-   &process_priv, sizeof(process_priv));
-
-   if (ret) {
-   pr_err("Failed to copy process information to user\n");
-   ret = -EFAULT;
-   }
-
-   *priv_offset += sizeof(process_priv);
-   return ret;
-}
-
-static int criu_checkpoint_devices(struct kfd_process *p,
-uint32_t num_devices,
-uint8_t __user *user_addr,
-uint8_t __user *user_priv_data,
-uint64_t *priv_offset)
-{
-   struct kfd_criu_device_priv_data *device_priv = NULL;
-   struct kfd_criu_device_bucket *device_buckets = NULL;
-   int ret = 0, i;
-
-   device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), 
GFP_KERNEL);
-   if (!device_buckets) {
-   ret = -ENOMEM;
-   goto exit;
-   }
-
-   device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
-   if (!device_priv) {
-   ret = -ENOMEM;
-   goto exit;
-   }
-
-   for (i = 0; i < num_devices; i++) {
-   struct kfd_process_device *pdd = p->pdds[i];
-
-   device_buckets[i].user_gpu_id = pdd->user_gpu_id;
-   device_buckets[i].actual_gpu_id = pdd->dev->id;
-
-   /*
-* priv_data does not contain useful information for now and is 
reserved for
-* future use, so we do not set its contents.
-*/
-   }
-
-   ret = copy_to_user(user_addr, device_buckets, num_devices * 
sizeof(*device_buckets));
-   if (ret) {
-   pr_err("Failed to copy device information to user\n");
-   ret = -EFAULT;
-   goto exit;
-   }
-
-   ret = copy_to_user(user_priv_data + *priv_offset,
-  device_priv,
-  num_devices * sizeof(*device_priv));
-   if (ret) {
-   pr_err("Failed to copy device information to user\n");
-   ret = -EFAULT;
-   }
-   *priv_offset += num_devices * sizeof(*device_priv);
-
-exit:
-   kvfree(device_buckets)

Re: [PATCH] drm/amdkfd: Remove arbitrary timeout for hmm_range_fault

2024-05-06 Thread Felix Kuehling




On 2024-05-01 18:56, Philip Yang wrote:

On system with khugepaged enabled and user cases with THP buffer, the
hmm_range_fault may takes > 15 seconds to return -EBUSY, the arbitrary
timeout value is not accurate, cause memory allocation failure.

Remove the arbitrary timeout value, return EAGAIN to application if
hmm_range_fault return EBUSY, then userspace libdrm and Thunk will call
ioctl again.

Change EAGAIN to debug message as this is not error.

Signed-off-by: Philip Yang 


Assuming this passes your stress testing without CPU stall warnings, 
this patch is


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  5 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c  | 12 +++-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  5 +
  3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 54198c3928c7..02696c2102f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1087,7 +1087,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr,
  
  	ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);

if (ret) {
-   pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+   if (ret == -EAGAIN)
+   pr_debug("Failed to get user pages, try again\n");
+   else
+   pr_err("%s: Failed to get user pages: %d\n", __func__, 
ret);
goto unregister_out;
}
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c

index 431ec72655ec..e36fede7f74c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -202,20 +202,12 @@ int amdgpu_hmm_range_get_pages(struct 
mmu_interval_notifier *notifier,
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
hmm_range->start, hmm_range->end);
  
-		/* Assuming 64MB takes maximum 1 second to fault page address */

-   timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL);
-   timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
-   timeout = jiffies + msecs_to_jiffies(timeout);
+   timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
  
  retry:

hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
r = hmm_range_fault(hmm_range);
if (unlikely(r)) {
-   schedule();
-   /*
-* FIXME: This timeout should encompass the retry from
-* mmu_interval_read_retry() as well.
-*/
if (r == -EBUSY && !time_after(jiffies, timeout))
goto retry;
goto out_free_pfns;
@@ -247,6 +239,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier 
*notifier,
  out_free_range:
kfree(hmm_range);
  
+	if (r == -EBUSY)

+   r = -EAGAIN;
return r;
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 94f83be2232d..e7040f809f33 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1670,11 +1670,8 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
   readonly, owner, NULL,
   &hmm_range);
WRITE_ONCE(p->svms.faulting_task, NULL);
-   if (r) {
+   if (r)
pr_debug("failed %d to get svm range pages\n", 
r);
-   if (r == -EBUSY)
-   r = -EAGAIN;
-   }
} else {
r = -EFAULT;
}

Re: [PATCH 2/2] drm/amd/amdgpu: use the default reset for ras recovery

2024-05-06 Thread Alex Deucher

On Mon, Apr 29, 2024 at 4:07 AM Kenneth Feng  wrote:
>
> use the default reset for ras recovery
>
> Signed-off-by: Kenneth Feng 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index a037e8fba29f..f92b2c4f0d5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2437,6 +2437,7 @@ static void amdgpu_ras_do_recovery(struct work_struct 
> *work)
> struct amdgpu_device *adev = ras->adev;
> struct list_head device_list, *device_list_handle =  NULL;
> struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
> +   int save_reset_method = amdgpu_reset_method;
>
> if (hive) {
> atomic_set(&hive->ras_recovery, 1);
> @@ -2501,7 +2502,13 @@ static void amdgpu_ras_do_recovery(struct work_struct 
> *work)
> }
> }
>
> +   if (amdgpu_gpu_recovery == 2)
> +   amdgpu_reset_method = -1;
> +
> amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
> +
> +   if (amdgpu_gpu_recovery == 2)
> +   amdgpu_reset_method = save_reset_method;

This is racy.  amdgpu_gpu_recovery is a global variable and will be
referenced by all of the AMD GPUs in the system that are using amdgpu.
To handle this properly, we should store the selected reset method in
the adev structure and set that based on the module parameter at
driver bind time.  Then at runtime if we need to change the reset
method, we can change the device specific one in adev.  Maybe it would
be better to have two variable in adev.  E.g., default_reset_method
and override_reset_method.  In cases where have to use the default
method, we can use that.  In other cases, we can use the override
method.

Alex

> }
> atomic_set(&ras->in_recovery, 0);
> if (hive) {
> --
> 2.34.1
>

[PATCH] drm/amdkfd: Refactor kfd CRIU into its own file

2024-05-06 Thread David Francis

The kfd CRIU code takes up about a thousand lines
in the kfd_chardev file; move it to its own file.

No functional change intended.

Signed-off-by: David Francis 
---
 drivers/gpu/drm/amd/amdkfd/Makefile  |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 972 +-
 drivers/gpu/drm/amd/amdkfd/kfd_criu.c| 989 +++
 drivers/gpu/drm/amd/amdkfd/kfd_criu.h|  50 ++
 4 files changed, 1046 insertions(+), 966 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_criu.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_criu.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d3d8972240d..e06af4073ac5 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -32,6 +32,7 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_flat_memory.o \
$(AMDKFD_PATH)/kfd_process.o \
$(AMDKFD_PATH)/kfd_queue.o \
+   $(AMDKFD_PATH)/kfd_criu.o \
$(AMDKFD_PATH)/kfd_mqd_manager.o \
$(AMDKFD_PATH)/kfd_mqd_manager_cik.o \
$(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6b713fb0b818..e6e44a199a93 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -45,6 +45,7 @@
 #include "kfd_smi_events.h"
 #include "amdgpu_dma_buf.h"
 #include "kfd_debug.h"
+#include "kfd_criu.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -1751,967 +1752,6 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
 }
 #endif
 
-static int criu_checkpoint_process(struct kfd_process *p,
-uint8_t __user *user_priv_data,
-uint64_t *priv_offset)
-{
-   struct kfd_criu_process_priv_data process_priv;
-   int ret;
-
-   memset(&process_priv, 0, sizeof(process_priv));
-
-   process_priv.version = KFD_CRIU_PRIV_VERSION;
-   /* For CR, we don't consider negative xnack mode which is used for
-* querying without changing it, here 0 simply means disabled and 1
-* means enabled so retry for finding a valid PTE.
-*/
-   process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
-
-   ret = copy_to_user(user_priv_data + *priv_offset,
-   &process_priv, sizeof(process_priv));
-
-   if (ret) {
-   pr_err("Failed to copy process information to user\n");
-   ret = -EFAULT;
-   }
-
-   *priv_offset += sizeof(process_priv);
-   return ret;
-}
-
-static int criu_checkpoint_devices(struct kfd_process *p,
-uint32_t num_devices,
-uint8_t __user *user_addr,
-uint8_t __user *user_priv_data,
-uint64_t *priv_offset)
-{
-   struct kfd_criu_device_priv_data *device_priv = NULL;
-   struct kfd_criu_device_bucket *device_buckets = NULL;
-   int ret = 0, i;
-
-   device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), 
GFP_KERNEL);
-   if (!device_buckets) {
-   ret = -ENOMEM;
-   goto exit;
-   }
-
-   device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
-   if (!device_priv) {
-   ret = -ENOMEM;
-   goto exit;
-   }
-
-   for (i = 0; i < num_devices; i++) {
-   struct kfd_process_device *pdd = p->pdds[i];
-
-   device_buckets[i].user_gpu_id = pdd->user_gpu_id;
-   device_buckets[i].actual_gpu_id = pdd->dev->id;
-
-   /*
-* priv_data does not contain useful information for now and is 
reserved for
-* future use, so we do not set its contents.
-*/
-   }
-
-   ret = copy_to_user(user_addr, device_buckets, num_devices * 
sizeof(*device_buckets));
-   if (ret) {
-   pr_err("Failed to copy device information to user\n");
-   ret = -EFAULT;
-   goto exit;
-   }
-
-   ret = copy_to_user(user_priv_data + *priv_offset,
-  device_priv,
-  num_devices * sizeof(*device_priv));
-   if (ret) {
-   pr_err("Failed to copy device information to user\n");
-   ret = -EFAULT;
-   }
-   *priv_offset += num_devices * sizeof(*device_priv);
-
-exit:
-   kvfree(device_buckets);
-   kvfree(device_priv);
-   return ret;
-}
-
-static uint32_t get_process_num_bos(struct kfd_process *p)
-{
-   uint32_t num_of_bos = 0;
-   int i;
-
-   /* Run over all PDDs of the process */
-   for (i = 0; i < p->n_pdds; i++) {
-   struct kfd_process_device *pdd = p->pdds[i];
-

[PATCH 06/14] drm/amdgpu: add set_reg_remap callback for NBIO 7.11

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index 05020141c0aeb..7a9adfda5814a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -352,6 +352,20 @@ static void nbio_v7_11_get_clockgating_state(struct 
amdgpu_device *adev,
*flags |= AMD_CG_SUPPORT_BIF_LS;
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset =
+   SOC15_REG_OFFSET(NBIO, 0, 
regBIF_BX_PF1_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_11_funcs = {
.get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset,
@@ -374,4 +388,5 @@ const struct amdgpu_nbio_funcs nbio_v7_11_funcs = {
.ih_control = nbio_v7_11_ih_control,
.init_registers = nbio_v7_11_init_registers,
.remap_hdp_registers = nbio_v7_11_remap_hdp_registers,
+   .set_reg_remap = nbio_v7_11_set_reg_remap,
 };
-- 
2.44.0

[PATCH 13/14] drm/amdgpu/nv: use common nbio callback to set remap offset

2024-05-06 Thread Alex Deucher

This fixes HDP flushes on systems with non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 4 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 4 
 drivers/gpu/drm/amd/amdgpu/nv.c| 6 +-
 3 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 41ae0a6f9db86..fa479dfa1ec15 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -339,10 +339,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device 
*adev)
 
if (def != data)
WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
-
-   if (amdgpu_sriov_vf(adev))
-   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
-   mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 
2;
 }
 
 #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x // off by 
default, no gains over L1
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index 52774a096350e..a766e2d90cd00 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -402,10 +402,6 @@ static void nbio_v7_2_init_registers(struct amdgpu_device 
*adev)
WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
break;
}
-
-   if (amdgpu_sriov_vf(adev))
-   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
-   regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 #define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 12e54047bf795..7e30a89fe03ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -637,13 +637,9 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = {
 
 static int nv_common_early_init(void *handle)
 {
-#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   if (!amdgpu_sriov_vf(adev)) {
-   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
-   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
-   }
+   adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &amdgpu_device_indirect_rreg;
-- 
2.44.0

[PATCH 14/14] drm/amdgpu/soc21: use common nbio callback to set remap offset

2024-05-06 Thread Alex Deucher

This fixes HDP flushes on systems with non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 3 ---
 drivers/gpu/drm/amd/amdgpu/soc21.c | 4 +---
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
index da731a6f6c063..a54052dea8bf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -345,9 +345,6 @@ static void nbio_v4_3_init_registers(struct amdgpu_device 
*adev)
data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
}
-   if (amdgpu_sriov_vf(adev))
-   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
-   regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) 
<< 2;
 }
 
 static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c 
b/drivers/gpu/drm/amd/amdgpu/soc21.c
index fb67974675719..15845ecca7c79 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -557,11 +557,9 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = {
 
 static int soc21_common_early_init(void *handle)
 {
-#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
-   adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+   adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &amdgpu_device_indirect_rreg;
-- 
2.44.0

[PATCH 12/14] drm/amdgpu/soc15: use common nbio callback to set remap offset

2024-05-06 Thread Alex Deucher

This fixes HDP flushes on systems with non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c |  4 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c |  3 ---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c |  4 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c |  6 --
 drivers/gpu/drm/amd/amdgpu/soc15.c | 11 +--
 5 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 5dc8663a29e95..34180c6070dd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -276,10 +276,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device 
*adev)
 
if (def != data)
WREG32_PCIE(smnPCIE_CI_CNTL, data);
-
-   if (amdgpu_sriov_vf(adev))
-   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
-   mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 
2;
 }
 
 #ifdef CONFIG_PCIEASPM
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 3dd743ebbc02d..b1b57dcc5a737 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -273,9 +273,6 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
 
 static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
 {
-   if (amdgpu_sriov_vf(adev))
-   adev->rmmio_remap.reg_offset =
-   SOC15_REG_OFFSET(NBIO, 0, 
mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 #define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index b684eb519d2a9..fe18df10daaa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -343,10 +343,6 @@ static void nbio_v7_4_init_registers(struct amdgpu_device 
*adev)
 {
uint32_t baco_cntl;
 
-   if (amdgpu_sriov_vf(adev))
-   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
-   mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 
2;
-
if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4) &&
!amdgpu_sriov_vf(adev)) {
baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL);
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index c2e78294c4fdc..d1bd79bbae532 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -422,12 +422,6 @@ static void nbio_v7_9_init_registers(struct amdgpu_device 
*adev)
u32 inst_mask;
int i;
 
-   if (amdgpu_sriov_vf(adev))
-   adev->rmmio_remap.reg_offset =
-   SOC15_REG_OFFSET(
-   NBIO, 0,
-   
regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
-   << 2;
WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
0xff & ~(adev->gfx.xcc_mask));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 3a39a7d311891..c757d39c767a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -931,13 +931,9 @@ static const struct amdgpu_asic_funcs 
aqua_vanjaram_asic_funcs =
 
 static int soc15_common_early_init(void *handle)
 {
-#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   if (!amdgpu_sriov_vf(adev)) {
-   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
-   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
-   }
+   adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &amdgpu_device_indirect_rreg;
@@ -1188,11 +1184,6 @@ static int soc15_common_early_init(void *handle)
AMD_PG_SUPPORT_JPEG;
/*TODO: need a new external_rev_id for GC 9.4.4? */
adev->external_rev_id = adev->rev_id + 0x46;
-   /* GC 9.4.3 uses MMIO register region hole at a different 
offset */
-   if (!amdgpu_sriov_vf(adev)) {
-   adev->rmmio_remap.reg_offset = 0x1A000;
-   adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000;
-   }
break;
default:
/* FIXME: not supported yet */
-- 
2.44.0

[PATCH 08/14] drm/amdgpu: add set_reg_remap callback for NBIO 2.3

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index df218d5ca775c..41ae0a6f9db86 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -553,6 +553,20 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct 
amdgpu_device *adev)
}
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v2_3_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+   mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 
2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -577,4 +591,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
.apply_l1_link_width_reconfig_wa = 
nbio_v2_3_apply_l1_link_width_reconfig_wa,
.clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
+   .set_reg_remap = nbio_v2_3_set_reg_remap,
 };
-- 
2.44.0

[PATCH 09/14] drm/amdgpu: add set_reg_remap callback for NBIO 4.3

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
index a3622897e3fe3..da731a6f6c063 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -475,6 +475,20 @@ static void nbio_v4_3_program_aspm(struct amdgpu_device 
*adev)
 #endif
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v4_3_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+   regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) 
<< 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
@@ -497,6 +511,7 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
.get_rom_offset = nbio_v4_3_get_rom_offset,
.program_aspm = nbio_v4_3_program_aspm,
+   .set_reg_remap = nbio_v4_3_set_reg_remap,
 };
 
 
@@ -541,6 +556,7 @@ const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs = {
.init_registers = nbio_v4_3_init_registers,
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
.get_rom_offset = nbio_v4_3_get_rom_offset,
+   .set_reg_remap = nbio_v4_3_set_reg_remap,
 };
 
 static int nbio_v4_3_set_ras_err_event_athub_irq_state(struct amdgpu_device 
*adev,
-- 
2.44.0

[PATCH 11/14] drm/amdgpu: add set_reg_remap callback for NBIF 6.3.1

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c 
b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
index 96ed00ac81acf..fe64c04ee20b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
@@ -424,6 +424,20 @@ static void nbif_v6_3_1_program_aspm(struct amdgpu_device 
*adev)
 #endif
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbif_v6_3_1_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+   regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
.get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset,
@@ -446,6 +460,7 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
.remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
.get_rom_offset = nbif_v6_3_1_get_rom_offset,
.program_aspm = nbif_v6_3_1_program_aspm,
+   .set_reg_remap = nbif_v6_3_1_set_reg_remap,
 };
 
 
@@ -492,4 +507,5 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = {
.init_registers = nbif_v6_3_1_init_registers,
.remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
.get_rom_offset = nbif_v6_3_1_get_rom_offset,
+   .set_reg_remap = nbif_v6_3_1_set_reg_remap,
 };
-- 
2.44.0

[PATCH 10/14] drm/amdgpu: add set_reg_remap callback for NBIO 7.7

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index 4df1055e640a4..fb37e354a9d5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -324,6 +324,21 @@ static void nbio_v7_7_get_clockgating_state(struct 
amdgpu_device *adev,
*flags |= AMD_CG_SUPPORT_BIF_LS;
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v7_7_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset =
+   SOC15_REG_OFFSET(NBIO, 0,
+
regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
@@ -345,4 +360,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.ih_control = nbio_v7_7_ih_control,
.init_registers = nbio_v7_7_init_registers,
.remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
+   .set_reg_remap = nbio_v7_7_set_reg_remap,
 };
-- 
2.44.0

[PATCH 07/14] drm/amdgpu: add set_reg_remap callback for NBIO 7.2

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index e962821ae6a11..52774a096350e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -408,6 +408,21 @@ static void nbio_v7_2_init_registers(struct amdgpu_device 
*adev)
regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v7_2_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset =
+   SOC15_REG_OFFSET(NBIO, 0,
+
regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
.get_hdp_flush_req_offset = nbio_v7_2_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_2_get_hdp_flush_done_offset,
@@ -429,4 +444,5 @@ const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
.ih_control = nbio_v7_2_ih_control,
.init_registers = nbio_v7_2_init_registers,
.remap_hdp_registers = nbio_v7_2_remap_hdp_registers,
+   .set_reg_remap = nbio_v7_2_set_reg_remap,
 };
-- 
2.44.0

[PATCH 03/14] drm/amdgpu: add set_reg_remap callback for NBIO 7.0

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index aa0326d00c724..3dd743ebbc02d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -278,6 +278,20 @@ static void nbio_v7_0_init_registers(struct amdgpu_device 
*adev)
SOC15_REG_OFFSET(NBIO, 0, 
mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v7_0_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset =
+   SOC15_REG_OFFSET(NBIO, 0, 
mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset,
@@ -297,4 +311,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.ih_control = nbio_v7_0_ih_control,
.init_registers = nbio_v7_0_init_registers,
.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
+   .set_reg_remap = nbio_v7_0_set_reg_remap,
 };
-- 
2.44.0

[PATCH 05/14] drm/amdgpu: add set_reg_remap callback for NBIO 7.9

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 40d1e209eab7a..c2e78294c4fdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -475,6 +475,23 @@ static u64 nbio_v7_9_get_pcie_replay_count(struct 
amdgpu_device *adev)
return (nak_r + nak_g);
 }
 
+#define MMIO_REG_HOLE_OFFSET 0x1A000
+
+static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset =
+   SOC15_REG_OFFSET(
+   NBIO, 0,
+   
regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
+   << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset,
@@ -499,6 +516,7 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
.init_registers = nbio_v7_9_init_registers,
.get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count,
+   .set_reg_remap = nbio_v7_9_set_reg_remap,
 };
 
 static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
-- 
2.44.0

[PATCH 02/14] drm/amdgpu: add set_reg_remap callback for NBIO 6.1

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 37615a77287bc..5dc8663a29e95 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -394,6 +394,21 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device 
*adev)
 #endif
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v6_1_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset =
+   SOC15_REG_OFFSET(NBIO, 0,
+
mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
.get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
@@ -412,5 +427,6 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
.ih_control = nbio_v6_1_ih_control,
.init_registers = nbio_v6_1_init_registers,
.remap_hdp_registers = nbio_v6_1_remap_hdp_registers,
-   .program_aspm =  nbio_v6_1_program_aspm,
+   .program_aspm = nbio_v6_1_program_aspm,
+   .set_reg_remap = nbio_v6_1_set_reg_remap,
 };
-- 
2.44.0

[PATCH 04/14] drm/amdgpu: add set_reg_remap callback for NBIO 7.4

2024-05-06 Thread Alex Deucher

This will be used to consolidate the register remap offset
configuration and fix  HDP flushes on systems non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 19986ff6a48d7..b684eb519d2a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -792,6 +792,21 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device 
*adev)
 #endif
 }
 
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
+
+static void nbio_v7_4_set_reg_remap(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + 
MMIO_REG_HOLE_OFFSET;
+   } else {
+   adev->rmmio_remap.reg_offset =
+   SOC15_REG_OFFSET(NBIO, 0,
+
mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+   adev->rmmio_remap.bus_addr = 0;
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
@@ -813,4 +828,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.init_registers = nbio_v7_4_init_registers,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
.program_aspm =  nbio_v7_4_program_aspm,
+   .set_reg_remap = nbio_v7_4_set_reg_remap,
 };
-- 
2.44.0

[PATCH 01/14] drm/amdgpu: add nbio set_reg_remap helper

2024-05-06 Thread Alex Deucher

Will be used to consolidate reg remap settings and fix HDP
flushes on systems with non-4K pages.

Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 7b8c03be1d9e7..f61d117b0cafe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -102,6 +102,7 @@ struct amdgpu_nbio_funcs {
u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
 u32 *supp_modes);
u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
+   void (*set_reg_remap)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio {
-- 
2.44.0

[PATCH] drm/amdkfd: don't allow mapping the MMIO HDP page with large pages

2024-05-06 Thread Alex Deucher

We don't get the right offset in that case.  The GPU has
an unused 4K area of the register BAR space into which you can
remap registers.  We remap the HDP flush registers into this
space to allow userspace (CPU or GPU) to flush the HDP when it
updates VRAM.  However, on systems with >4K pages, we end up
exposing PAGE_SIZE of MMIO space.

Fixes: d8e408a82704 ("drm/amdkfd: Expose HDP registers to user space")
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
Cc: sta...@vger.kernel.org
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6b713fb0b818f..fdf171ad4a3c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1144,7 +1144,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
goto err_unlock;
}
offset = dev->adev->rmmio_remap.bus_addr;
-   if (!offset) {
+   if (!offset || (PAGE_SIZE > 4096)) {
err = -ENOMEM;
goto err_unlock;
}
@@ -2312,7 +2312,7 @@ static int criu_restore_memory_of_gpu(struct 
kfd_process_device *pdd,
return -EINVAL;
}
offset = pdd->dev->adev->rmmio_remap.bus_addr;
-   if (!offset) {
+   if (!offset || (PAGE_SIZE > 4096)) {
pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr 
failed\n");
return -ENOMEM;
}
@@ -3354,6 +3354,9 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct 
kfd_process *process,
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
 
+   if (PAGE_SIZE > 4096)
+   return -EINVAL;
+
address = dev->adev->rmmio_remap.bus_addr;
 
vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
-- 
2.44.0

[PATCH] drm/amdkfd: Reconcile the definition and use of oem_id in struct kfd_topology_device

2024-05-06 Thread Michael Chen

Currently oem_id is defined as uint8_t[6] and casted to uint64_t* in some use
case. This would lead code scanner to complain about access beyond. Re-define
it in union to enforce 8-byte size and alignment to avoid potential issue.

Signed-off-by: Michael Chen 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 2 --
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 6 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 300634b9f668..a8ca7ecb6d27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -42,8 +42,6 @@
 #define CRAT_OEMTABLEID_LENGTH 8
 #define CRAT_RESERVED_LENGTH   6
 
-#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
-
 /* Compute Unit flags */
 #define COMPUTE_UNIT_CPU   (1 << 0)  /* Create Virtual CRAT for CPU */
 #define COMPUTE_UNIT_GPU   (1 << 1)  /* Create Virtual CRAT for GPU */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c51f131eaa2f..f7fa0cb18482 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -958,8 +958,7 @@ static void kfd_update_system_properties(void)
dev = list_last_entry(&topology_device_list,
struct kfd_topology_device, list);
if (dev) {
-   sys_props.platform_id =
-   (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+   sys_props.platform_id = dev->oem_id64;
sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
sys_props.platform_rev = dev->oem_revision;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 27386ce9a021..b3e79ce3bd35 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -154,7 +154,11 @@ struct kfd_topology_device {
struct attributeattr_gpuid;
struct attributeattr_name;
struct attributeattr_props;
-   uint8_t oem_id[CRAT_OEMID_LENGTH];
+   
+   union {
+   uint8_t oem_id[CRAT_OEMID_LENGTH];
+   uint64_toem_id64;
+   };
uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH];
uint32_toem_revision;
 };
-- 
2.34.1

[PATCH 5/5] drm/amdgpu: Use drm_print_memory_stats helper from fdinfo

2024-05-06 Thread Tvrtko Ursulin

From: Tvrtko Ursulin 

Convert fdinfo memory stats to use the common drm_print_memory_stats
helper.

This achieves alignment with the common keys as documented in
drm-usage-stats.rst, adding specifically drm-total- key the driver was
missing until now.

Additionally I made the code stop skipping total size for objects which
currently do not have a backing store, and I added resident and active
reporting.

Legacy keys have been preserved, with the outlook of only potentially
removing only the drm-memory- when the time gets right.

The example output now looks like this:

 pos:   0
 flags: 0212
 mnt_id:24
 ino:   1204
 drm-driver:amdgpu
 drm-client-id: 30
 drm-pdev:  :04:00.0
 pasid: 32785
 drm-total-cpu: 0
 drm-shared-cpu:0
 drm-active-cpu:0
 drm-resident-cpu:  0
 drm-total-gtt: 22 MiB
 drm-shared-gtt:0
 drm-active-gtt:10 MiB
 drm-resident-gtt:  22 MiB
 drm-total-vram:544428 KiB
 drm-shared-vram:   13584 KiB
 drm-active-vram:   260356 KiB
 drm-resident-vram: 544428 KiB
 drm-memory-vram:   544428 KiB
 drm-memory-gtt:22528 KiB
 drm-memory-cpu:0 KiB
 amd-memory-visible-vram:   544428 KiB
 amd-evicted-vram:  0 KiB
 amd-evicted-visible-vram:  0 KiB
 amd-requested-vram:544428 KiB
 amd-requested-visible-vram:90112 KiB
 amd-requested-gtt: 22528 KiB
 drm-engine-gfx:39456116802 ns

Signed-off-by: Tvrtko Ursulin 
Cc: Alex Deucher 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Rob Clark 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 47 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 94 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 35 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +-
 6 files changed, 116 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index c7df7fa3459f..1422ea642ad0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -59,18 +59,21 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct 
drm_file *file)
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
 
-   struct amdgpu_mem_stats stats;
+   struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { };
ktime_t usage[AMDGPU_HW_IP_NUM];
-   unsigned int hw_ip;
+   const char *pl_name[] = {
+   [TTM_PL_VRAM] = "vram",
+   [TTM_PL_TT] = "gtt",
+   [TTM_PL_SYSTEM] = "cpu",
+   };
+   unsigned int hw_ip, i;
int ret;
 
-   memset(&stats, 0, sizeof(stats));
-
ret = amdgpu_bo_reserve(vm->root.bo, false);
if (ret)
return;
 
-   amdgpu_vm_get_memory(vm, &stats);
+   amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats));
amdgpu_bo_unreserve(vm->root.bo);
 
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
@@ -82,24 +85,34 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct 
drm_file *file)
 */
 
drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
-   drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
-   drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
-   drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
+
+   for (i = 0; i < TTM_PL_PRIV; i++)
+   drm_print_memory_stats(p,
+  &stats[i].drm,
+  DRM_GEM_OBJECT_RESIDENT,
+  pl_name[i]);
+
+   /* Legacy amdgpu keys, alias to drm-total-memory-: */
+   drm_printf(p, "drm-memory-vram:\t%llu KiB\n",
+  stats[TTM_PL_VRAM].total/1024UL);
+   drm_printf(p, "drm-memory-gtt: \t%llu KiB\n",
+  stats[TTM_PL_TT].total/1024UL);
+   drm_printf(p, "drm-memory-cpu: \t%llu KiB\n",
+  stats[TTM_PL_SYSTEM].total/1024UL);
+
+   /* Amdgpu specific memory accounting keys: */
drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n",
-  stats.visible_vram/1024UL);
+  stats[TTM_PL_VRAM].visible/1024UL);
drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
-  stats.evicted_vram/1024UL);
+  stats[TTM_PL_VRAM].evicted/1024UL);
drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n",
-  stats.evicted_visible_vram/1024UL);
+  stats[TTM_PL_VRAM].evicted_visible/1024UL);
drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
-  stats.requested_vram/1024UL);
+  stats[TTM_PL_VRAM].requested/1024UL);
drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n",
-  stats.requested_visible_vram/1024UL);
+  stats[TTM_PL_VRA

[PATCH 1/5] drm/amdgpu: Add amdgpu_bo_is_vm_bo helper

2024-05-06 Thread Tvrtko Ursulin

From: Tvrtko Ursulin 

Help code readability by replacing a bunch of:

bo->tbo.base.resv == vm->root.bo->tbo.base.resv

With:

amdgpu_vm_is_bo_always_valid(vm, bo)

No functional changes.

v2:
 * Rename helper and move to amdgpu_vm. (Christian)

v3:
 * Use Christian's kerneldoc.

v4:
 * Fixed logic inversion in amdgpu_vm_bo_get_memory.

Signed-off-by: Tvrtko Ursulin 
Cc: Christian König 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 41 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 ++
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 67c234bcf89f..e698d65e9508 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -174,7 +174,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
*obj,
return -EPERM;
 
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
-   abo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+   !amdgpu_vm_is_bo_always_valid(vm, abo))
return -EPERM;
 
r = amdgpu_bo_reserve(abo, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4e2391c83d7c..9ccb9814874c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -333,7 +333,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
 
-   if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+   if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
 
dma_resv_assert_held(vm->root.bo->tbo.base.resv);
@@ -1101,13 +1101,13 @@ static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va 
*bo_va,
 * For now ignore BOs which are currently locked and potentially
 * changing their location.
 */
-   if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv &&
+   if (!amdgpu_vm_is_bo_always_valid(vm, bo) &&
!dma_resv_trylock(bo->tbo.base.resv))
return;
 
amdgpu_bo_get_memory(bo, stats);
-   if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
-   dma_resv_unlock(bo->tbo.base.resv);
+   if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+   dma_resv_unlock(bo->tbo.base.resv);
 }
 
 void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
@@ -1203,8 +1203,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
uncached = false;
}
 
-   if (clear || (bo && bo->tbo.base.resv ==
- vm->root.bo->tbo.base.resv))
+   if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
last_update = &vm->last_update;
else
last_update = &bo_va->last_pt_update;
@@ -1246,7 +1245,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
 * the evicted list so that it gets validated again on the
 * next command submission.
 */
-   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+   if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
uint32_t mem_type = bo->tbo.resource->mem_type;
 
if (!(bo->preferred_domains &
@@ -1640,10 +1639,9 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device 
*adev,
if (mapping->flags & AMDGPU_PTE_PRT)
amdgpu_vm_prt_get(adev);
 
-   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
-   !bo_va->base.moved) {
+   if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
amdgpu_vm_bo_moved(&bo_va->base);
-   }
+
trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
 
@@ -1942,7 +1940,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
if (before->flags & AMDGPU_PTE_PRT)
amdgpu_vm_prt_get(adev);
 
-   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+   if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
!before->bo_va->base.moved)
amdgpu_vm_bo_moved(&before->bo_va->base);
} else {
@@ -1957,7 +1955,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
if (after->flags & AMDGPU_PTE_PRT)
amdgpu_vm_prt_get(adev);
 
-   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+   if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
!after->bo_va->base.moved)
amdgpu_vm_bo_moved(&after->bo_va->base);
} else {
@@ -2037,7 +2035,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
 
if (bo) {
dma_resv_assert_held(bo->tbo.base.resv);
-   if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+   if (amd

[PATCH 3/5] drm/amdgpu: Describe all object placements in debugfs

2024-05-06 Thread Tvrtko Ursulin

From: Tvrtko Ursulin 

Accurately show all placements when describing objects in debugfs, instead
of bunching them up under the 'CPU' placement.

Signed-off-by: Tvrtko Ursulin 
Cc: Christian König 
Cc: Felix Kuehling 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 4f9073dd19eb..fa5227a4aac2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1612,6 +1612,21 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, 
struct seq_file *m)
case TTM_PL_TT:
placement = "GTT";
break;
+   case AMDGPU_PL_GDS:
+   placement = "GDS";
+   break;
+   case AMDGPU_PL_GWS:
+   placement = "GWS";
+   break;
+   case AMDGPU_PL_OA:
+   placement = "OA";
+   break;
+   case AMDGPU_PL_PREEMPT:
+   placement = "PREEMPTIBLE";
+   break;
+   case AMDGPU_PL_DOORBELL:
+   placement = "DOORBELL";
+   break;
case TTM_PL_SYSTEM:
default:
placement = "CPU";
-- 
2.44.0

[PATCH 2/5] drm/amdgpu: Reduce mem_type to domain double indirection

2024-05-06 Thread Tvrtko Ursulin

From: Tvrtko Ursulin 

All apart from AMDGPU_GEM_DOMAIN_GTT memory domains map 1:1 to TTM
placements. And the former be either AMDGPU_PL_PREEMPT or TTM_PL_TT,
depending on AMDGPU_GEM_CREATE_PREEMPTIBLE.

Simplify a few places in the code which convert the TTM placement into
a domain by checking against the current placement directly.

In the conversion AMDGPU_PL_PREEMPT either does not have to be handled
because amdgpu_mem_type_to_domain() cannot return that value anyway.

v2:
 * Remove AMDGPU_PL_PREEMPT handling.

v3:
 * Rebase.

Signed-off-by: Tvrtko Ursulin 
Reviewed-by: Christian König  # v1
Reviewed-by: Felix Kuehling  # v2
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 29 +
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 055ba2ea4c12..0b3b10d21952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -165,8 +165,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct 
dma_buf_attachment *attach,
if (r)
return ERR_PTR(r);
 
-   } else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) &
-AMDGPU_GEM_DOMAIN_GTT)) {
+   } else if (bo->tbo.resource->mem_type != TTM_PL_TT) {
return ERR_PTR(-EBUSY);
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8d8c39be6129..4f9073dd19eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -983,12 +983,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
 
ttm_bo_pin(&bo->tbo);
 
-   domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
-   if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+   if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
 &adev->visible_pin_size);
-   } else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+   } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
 
@@ -1293,7 +1292,6 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct ttm_resource *res = bo->tbo.resource;
uint64_t size = amdgpu_bo_size(bo);
struct drm_gem_object *obj;
-   unsigned int domain;
bool shared;
 
/* Abort if the BO doesn't currently have a backing store */
@@ -1303,21 +1301,20 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
obj = &bo->tbo.base;
shared = drm_gem_object_is_shared_for_memory_stats(obj);
 
-   domain = amdgpu_mem_type_to_domain(res->mem_type);
-   switch (domain) {
-   case AMDGPU_GEM_DOMAIN_VRAM:
+   switch (res->mem_type) {
+   case TTM_PL_VRAM:
stats->vram += size;
-   if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+   if (amdgpu_res_cpu_visible(adev, res))
stats->visible_vram += size;
if (shared)
stats->vram_shared += size;
break;
-   case AMDGPU_GEM_DOMAIN_GTT:
+   case TTM_PL_TT:
stats->gtt += size;
if (shared)
stats->gtt_shared += size;
break;
-   case AMDGPU_GEM_DOMAIN_CPU:
+   case TTM_PL_SYSTEM:
default:
stats->cpu += size;
if (shared)
@@ -1330,7 +1327,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
stats->requested_visible_vram += size;
 
-   if (domain != AMDGPU_GEM_DOMAIN_VRAM) {
+   if (res->mem_type != TTM_PL_VRAM) {
stats->evicted_vram += size;
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
stats->evicted_visible_vram += size;
@@ -1604,20 +1601,18 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, 
struct seq_file *m)
u64 size;
 
if (dma_resv_trylock(bo->tbo.base.resv)) {
-   unsigned int domain;
 
-   domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
-   switch (domain) {
-   case AMDGPU_GEM_DOMAIN_VRAM:
+   switch (bo->tbo.resource->mem_type) {
+   case TTM_PL_VRAM:
if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
placement = "VRAM VISIBLE";
else
placement = "VRAM";
break;
-   case AMDGPU_GEM_DOMAIN_GTT:
+

[PATCH 4/5] Documentation/gpu: Document the situation with unqualified drm-memory-

2024-05-06 Thread Tvrtko Ursulin

From: Tvrtko Ursulin 

Currently it is not well defined what is drm-memory- compared to other
categories.

In practice the only driver which emits these keys is amdgpu and in them
exposes the total memory use (including shared).

To prevent any confusion, document that drm-memory- is deprecated and an
alias to drm-total-memory-.

While at it also clarify that the reserved sub-string 'memory' refers to
the memory region component, and also clarify the intended semantics of
other memory categories.

v2:
 * Also mark drm-memory- as deprecated.
 * Add some more text describing memory categories. (Alex)

Signed-off-by: Tvrtko Ursulin 
Cc: Alex Deucher 
Cc: Christian König 
Cc: Rob Clark 
---
 Documentation/gpu/drm-usage-stats.rst | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/Documentation/gpu/drm-usage-stats.rst 
b/Documentation/gpu/drm-usage-stats.rst
index 6dc299343b48..7fee316c9582 100644
--- a/Documentation/gpu/drm-usage-stats.rst
+++ b/Documentation/gpu/drm-usage-stats.rst
@@ -128,7 +128,9 @@ Memory
 
 Each possible memory type which can be used to store buffer objects by the
 GPU in question shall be given a stable and unique name to be returned as the
-string here.  The name "memory" is reserved to refer to normal system memory.
+string here.
+
+The region name "memory" is reserved to refer to normal system memory.
 
 Value shall reflect the amount of storage currently consumed by the buffer
 objects belong to this client, in the respective memory region.
@@ -136,6 +138,9 @@ objects belong to this client, in the respective memory 
region.
 Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB'
 indicating kibi- or mebi-bytes.
 
+This key is deprecated and is an alias for drm-total-. Only one of the
+two should be present.
+
 - drm-shared-:  [KiB|MiB]
 
 The total size of buffers that are shared with another file (e.g., have more
@@ -143,20 +148,34 @@ than a single handle).
 
 - drm-total-:  [KiB|MiB]
 
-The total size of buffers that including shared and private memory.
+The total size of all created buffers including shared and private memory. The
+backing store for the buffers does not have to be currently instantiated to be
+counted under this category.
+
+This is an alias for drm-memory- and only one of the two should be
+present.
 
 - drm-resident-:  [KiB|MiB]
 
-The total size of buffers that are resident in the specified region.
+The total size of buffers that are resident (have their backing store present 
or
+instantiated) in the specified region.
 
 - drm-purgeable-:  [KiB|MiB]
 
 The total size of buffers that are purgeable.
 
+For example drivers which implement a form of 'madvise' like functionality can
+here count buffers which have instantiated backing store, but have been marked
+with an equivalent of MADV_DONTNEED.
+
 - drm-active-:  [KiB|MiB]
 
 The total size of buffers that are active on one or more engines.
 
+One practical example of this can be presence of unsignaled fences in an GEM
+buffer reservation object. Therefore the active category is a subset of
+resident.
+
 Implementation Details
 ==
 
-- 
2.44.0

[PATCH 0/5] Fdinfo memory stats clarification and amdgpu refactor

2024-05-06 Thread Tvrtko Ursulin

From: Tvrtko Ursulin 

Following on from the recent discussion from:
  
https://lore.kernel.org/dri-devel/ZjjTaeZYNqVSj2y-@phenom.ffwll.local/T/#mb8105d8d7de055d3068975d27c428525e55cff84

I included some old three patches to start with, since the last in the series
depends on first, plus first had a bug which is now fixed.

Apart from extending drm-usage-stats.rst with some more information on what
various keys should contain, on a suggestion from Alex, in the last patch I also
attempt to refactor amdgpu to use DRM common drm_print_memory_stats.

There are perhaps a couple discussion points there so see what you think please.

Cc: Alex Deucher 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Rob Clark 

Tvrtko Ursulin (5):
  drm/amdgpu: Add amdgpu_bo_is_vm_bo helper
  drm/amdgpu: Reduce mem_type to domain double indirection
  drm/amdgpu: Describe all object placements in debugfs
  Documentation/gpu: Document the situation with unqualified drm-memory-
  drm/amdgpu: Use drm_print_memory_stats helper from fdinfo

 Documentation/gpu/drm-usage-stats.rst   |  25 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c  |  47 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 126 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  35 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  61 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   5 +-
 9 files changed, 189 insertions(+), 116 deletions(-)

-- 
2.44.0

Re: [PATCH 1/2] drm/amdgpu: Fix uninitialized variable warning in amdgpu_info_ioctl

2024-05-06 Thread Alex Deucher

On Mon, May 6, 2024 at 6:22 AM Ma Jun  wrote:
>
> Check the return value of amdgpu_xcp_get_inst_details, otherwise we
> may use an uninitialized variable inst_mask
>
> Signed-off-by: Ma Jun 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index a0ea6fe8d060..977cde6d1362 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -623,25 +623,32 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *filp)
> switch (type) {
> case AMD_IP_BLOCK_TYPE_GFX:
> ret = amdgpu_xcp_get_inst_details(xcp, 
> AMDGPU_XCP_GFX, &inst_mask);
> +   if (ret)
> +   return ret;
> count = hweight32(inst_mask);
> break;
> case AMD_IP_BLOCK_TYPE_SDMA:
> ret = amdgpu_xcp_get_inst_details(xcp, 
> AMDGPU_XCP_SDMA, &inst_mask);
> +   if (ret)
> +   return ret;
> count = hweight32(inst_mask);
> break;
> case AMD_IP_BLOCK_TYPE_JPEG:
> ret = amdgpu_xcp_get_inst_details(xcp, 
> AMDGPU_XCP_VCN, &inst_mask);
> +   if (ret)
> +   return ret;
> count = hweight32(inst_mask) * 
> adev->jpeg.num_jpeg_rings;
> break;
> case AMD_IP_BLOCK_TYPE_VCN:
> ret = amdgpu_xcp_get_inst_details(xcp, 
> AMDGPU_XCP_VCN, &inst_mask);
> +   if (ret)
> +   return ret;
> count = hweight32(inst_mask);
> break;
> default:
> return -EINVAL;
> }
> -   if (ret)
> -   return ret;
> +
> return copy_to_user(out, &count, min(size, 4u)) ? 
> -EFAULT : 0;
> }
>
> --
> 2.34.1
>

Re: [PATCH 3/3] drm/amdgpu: fix uninitialized variable warning for jpeg_v4

2024-05-06 Thread Alex Deucher

On Mon, May 6, 2024 at 2:32 AM Tim Huang  wrote:
>
> Clear warning that using uninitialized variable r.
>
> Signed-off-by: Tim Huang 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c 
> b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
> index da6bb9022b80..4c8f9772437b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
> +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
> @@ -187,7 +187,7 @@ static int jpeg_v4_0_5_hw_init(void *handle)
>  {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> struct amdgpu_ring *ring;
> -   int r, i;
> +   int i, r = 0;
>
> // TODO: Enable ring test with DPG support
> if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
> --
> 2.39.2
>

Re: [PATCH 1/2] drm/amdgpu: fix ucode out-of-bounds read warning

2024-05-06 Thread Alex Deucher

On Mon, May 6, 2024 at 5:26 AM Tim Huang  wrote:
>
> Clear warning that read ucode[] may out-of-bounds.
>
> Signed-off-by: Tim Huang 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> index b8280be6225d..c3d89088123d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> @@ -213,6 +213,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device 
> *cgs_device,
> struct amdgpu_firmware_info *ucode;
>
> id = fw_type_convert(cgs_device, type);
> +   if (id >= AMDGPU_UCODE_ID_MAXIMUM)
> +   return -EINVAL;
> +
> ucode = &adev->firmware.ucode[id];
> if (ucode->fw == NULL)
> return -EINVAL;
> --
> 2.39.2
>

Re: Error in amd driver?

2024-05-06 Thread Alex Deucher

On Mon, May 6, 2024 at 6:00 AM Borislav Petkov  wrote:
>
> + amd-gfx@lists.freedesktop.org
>
> On Sun, May 05, 2024 at 09:59:22PM +0300, Tranton Baddy wrote:
> > I have this in my dmesg since version 6.8.6, not sure when it appeared. Is 
> > amdgpu driver has bug?

Should be fixed in:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d3a9331a6591e9df64791e076f6591f440af51c3

Alex

> > [   64.253144] 
> > ==
> > [   64.253162] BUG: KFENCE: use-after-free read in 
> > amdgpu_bo_move+0x51f/0x7a0
> >
> > [   64.253183] Use-after-free read at 0x671c48dd (in kfence-#111):
> > [   64.253192]  amdgpu_bo_move+0x51f/0x7a0
> > [   64.253202]  ttm_bo_handle_move_mem+0xcf/0x180
> > [   64.253211]  ttm_mem_evict_first+0x1c5/0x500
> > [   64.253218]  ttm_resource_manager_evict_all+0xa3/0x1e0
> > [   64.253228]  amdgpu_device_prepare+0x66/0x110
> > [   64.253237]  amdgpu_pmops_runtime_suspend+0xbe/0x1c0
> > [   64.253248]  pci_pm_runtime_suspend+0x74/0x200
> > [   64.253259]  vga_switcheroo_runtime_suspend+0x21/0xb0
> > [   64.253268]  __rpm_callback+0x5f/0x190
> > [   64.253277]  rpm_callback+0x7f/0x90
> > [   64.253283]  rpm_suspend+0x120/0x6a0
> > [   64.253290]  pm_runtime_work+0x9c/0xa0
> > [   64.253297]  process_one_work+0x164/0x330
> > [   64.253310]  worker_thread+0x302/0x430
> > [   64.253320]  kthread+0xe4/0x110
> > [   64.253329]  ret_from_fork+0x4c/0x60
> > [   64.253341]  ret_from_fork_asm+0x1b/0x30
> >
> > [   64.253353] kfence-#111: 0xd018cf03-0x34e821d1, size=96, 
> > cache=kmalloc-96
> >
> > [   64.253363] allocated by task 152 on cpu 3 at 64.248952s:
> > [   64.253418]  kmalloc_trace+0x283/0x340
> > [   64.253427]  amdgpu_vram_mgr_new+0x8f/0x3f0
> > [   64.253435]  ttm_resource_alloc+0x39/0x90
> > [   64.253444]  ttm_bo_mem_space+0xa4/0x260
> > [   64.253450]  ttm_mem_evict_first+0x18a/0x500
> > [   64.253456]  ttm_resource_manager_evict_all+0xa3/0x1e0
> > [   64.253465]  amdgpu_device_prepare+0x66/0x110
> > [   64.253472]  amdgpu_pmops_runtime_suspend+0xbe/0x1c0
> > [   64.253481]  pci_pm_runtime_suspend+0x74/0x200
> > [   64.253489]  vga_switcheroo_runtime_suspend+0x21/0xb0
> > [   64.253496]  __rpm_callback+0x5f/0x190
> > [   64.253503]  rpm_callback+0x7f/0x90
> > [   64.253509]  rpm_suspend+0x120/0x6a0
> > [   64.253516]  pm_runtime_work+0x9c/0xa0
> > [   64.253523]  process_one_work+0x164/0x330
> > [   64.253532]  worker_thread+0x302/0x430
> > [   64.253542]  kthread+0xe4/0x110
> > [   64.253550]  ret_from_fork+0x4c/0x60
> > [   64.253559]  ret_from_fork_asm+0x1b/0x30
> >
> > [   64.253570] freed by task 152 on cpu 3 at 64.253117s:
> > [   64.253582]  ttm_resource_free+0x67/0x90
> > [   64.253591]  ttm_bo_move_accel_cleanup+0x247/0x2e0
> > [   64.253598]  amdgpu_bo_move+0x1bd/0x7a0
> > [   64.253605]  ttm_bo_handle_move_mem+0xcf/0x180
> > [   64.253612]  ttm_mem_evict_first+0x1c5/0x500
> > [   64.253618]  ttm_resource_manager_evict_all+0xa3/0x1e0
> > [   64.253626]  amdgpu_device_prepare+0x66/0x110
> > [   64.253634]  amdgpu_pmops_runtime_suspend+0xbe/0x1c0
> > [   64.253642]  pci_pm_runtime_suspend+0x74/0x200
> > [   64.253650]  vga_switcheroo_runtime_suspend+0x21/0xb0
> > [   64.253658]  __rpm_callback+0x5f/0x190
> > [   64.253664]  rpm_callback+0x7f/0x90
> > [   64.253671]  rpm_suspend+0x120/0x6a0
> > [   64.253677]  pm_runtime_work+0x9c/0xa0
> > [   64.253684]  process_one_work+0x164/0x330
> > [   64.253693]  worker_thread+0x302/0x430
> > [   64.253703]  kthread+0xe4/0x110
> > [   64.253711]  ret_from_fork+0x4c/0x60
> > [   64.253723]  ret_from_fork_asm+0x1b/0x30
> >
> > [   64.253735] CPU: 3 PID: 152 Comm: kworker/3:2 Tainted: P   OE
> >   6.8.9 #3 e7323d0d25f89e853881fc823e59523bdcc577c6
> > [   64.253756] Hardware name: Hewlett-Packard HP Pavilion Notebook /80B9, 
> > BIOS F.54 05/27/2019
> > [   64.253761] Workqueue: pm pm_runtime_work
> > [   64.253771] 
> > ==
> >
>
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH 2/2] drm/amdgpu: fix mc_data out-of-bounds read warning

2024-05-06 Thread Alex Deucher

On Mon, May 6, 2024 at 5:37 AM Tim Huang  wrote:
>
> Clear warning that read mc_data[i-1] may out-of-bounds.
>
> Signed-off-by: Tim Huang 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
> index 52b12c1718eb..7dc102f0bc1d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
> @@ -1484,6 +1484,8 @@ int amdgpu_atombios_init_mc_reg_table(struct 
> amdgpu_device *adev,
>   
>   (u32)le32_to_cpu(*((u32 *)reg_data + j));
> j++;
> } else if 
> ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == 
> DATA_EQU_PREV) {
> +   if (i 
> == 0)
> + 
>   continue;
> 
> reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
>   
>   reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
> }
> --
> 2.39.2
>

[PATCH] drm/amdgpu: Fix the BO release clear memory warning

2024-05-06 Thread Arunpravin Paneer Selvam

This happens when the amdgpu_bo_release_notify running
before amdgpu_ttm_set_buffer_funcs_status set the buffer
funcs to enabled.

check the buffer funcs enablement before calling the fill
buffer memory.

Log snip:
[6.036477] [drm:amdgpu_fill_buffer [amdgpu]] *ERROR* Trying to clear memory 
with ring turned off.
[6.036667] [ cut here ]
[6.036668] WARNING: CPU: 3 PID: 370 at 
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:1355 
amdgpu_bo_release_notify+0x201/0x220 [amdgpu]
[6.036767] Modules linked in: hid_generic amdgpu(+) amdxcp drm_exec 
gpu_sched drm_buddy i2c_algo_bit usbhid drm_suballoc_helper drm_display_helper 
hid sd_mod cec rc_core drm_ttm_helper ahci ttm nvme libahci drm_kms_helper 
nvme_core r8169 xhci_pci libata t10_pi xhci_hcd realtek crc32_pclmul 
crc64_rocksoft mdio_devres crc64 drm crc32c_intel scsi_mod usbcore thunderbolt 
crc_t10dif i2c_piix4 libphy crct10dif_generic crct10dif_pclmul crct10dif_common 
scsi_common usb_common video wmi gpio_amdpt gpio_generic button
[6.036793] CPU: 3 PID: 370 Comm: (udev-worker) Not tainted 6.8.7-dirty #1
[6.036795] Hardware name: ASRock X670E Taichi/X670E Taichi, BIOS 2.10 
03/26/2024
[6.036796] RIP: 0010:amdgpu_bo_release_notify+0x201/0x220 [amdgpu]
[6.036891] Code: 0b e9 af fe ff ff 48 ba ff ff ff ff ff ff ff 7f 31 f6 4c 
89 e7 e8 7f 2f 7a d8 eb 98 e8 18 28 7a d8 eb b2 0f 0b e9 58 fe ff ff <0f> 0b eb 
a7 be 03 00 00 00 e8 e1 89 4e d8 eb 9b e8 aa 4d ad d8 66
[6.036892] RSP: 0018:bbe140d1f638 EFLAGS: 00010282
[6.036894] RAX: ffea RBX: 90cba9e4e858 RCX: 90dabde38c28
[6.036895] RDX:  RSI: dfff RDI: 0001
[6.036896] RBP: 90cba980ef40 R08:  R09: bbe140d1f3c0
[6.036896] R10: bbe140d1f3b8 R11: 0003 R12: 90cba9e4e800
[6.036897] R13: 90cba9e4e958 R14: 90cba980ef40 R15: 0258
[6.036898] FS:  7f2bd1679d00() GS:90da7e2c() 
knlGS:
[6.036899] CS:  0010 DS:  ES:  CR0: 80050033
[6.036900] CR2: 55a9b0f7299d CR3: 00011bb6e000 CR4: 00750ef0
[6.036901] PKRU: 5554
[6.036901] Call Trace:
[6.036903]  
[6.036904]  ? amdgpu_bo_release_notify+0x201/0x220 [amdgpu]
[6.036998]  ? __warn+0x81/0x130
[6.037002]  ? amdgpu_bo_release_notify+0x201/0x220 [amdgpu]
[6.037095]  ? report_bug+0x171/0x1a0
[6.037099]  ? handle_bug+0x3c/0x80
[6.037101]  ? exc_invalid_op+0x17/0x70
[6.037103]  ? asm_exc_invalid_op+0x1a/0x20
[6.037107]  ? amdgpu_bo_release_notify+0x201/0x220 [amdgpu]
[6.037199]  ? amdgpu_bo_release_notify+0x14a/0x220 [amdgpu]
[6.037292]  ttm_bo_release+0xff/0x2e0 [ttm]
[6.037297]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.037299]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.037301]  ? ttm_resource_move_to_lru_tail+0x140/0x1e0 [ttm]
[6.037306]  amdgpu_bo_free_kernel+0xcb/0x120 [amdgpu]
[6.037399]  dm_helpers_free_gpu_mem+0x41/0x80 [amdgpu]
[6.037544]  dcn315_clk_mgr_construct+0x198/0x7e0 [amdgpu]
[6.037692]  dc_clk_mgr_create+0x16e/0x5f0 [amdgpu]
[6.037826]  dc_create+0x28a/0x650 [amdgpu]
[6.037958]  amdgpu_dm_init.isra.0+0x2d5/0x1ec0 [amdgpu]
[6.038085]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038087]  ? prb_read_valid+0x1b/0x30
[6.038089]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038090]  ? console_unlock+0x78/0x120
[6.038092]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038094]  ? vprintk_emit+0x175/0x2c0
[6.038095]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038097]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038098]  ? dev_printk_emit+0xa5/0xd0
[6.038104]  dm_hw_init+0x12/0x30 [amdgpu]
[6.038209]  amdgpu_device_init+0x1e50/0x2500 [amdgpu]
[6.038308]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038310]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038313]  amdgpu_driver_load_kms+0x19/0x190 [amdgpu]
[6.038409]  amdgpu_pci_probe+0x18b/0x510 [amdgpu]
[6.038505]  local_pci_probe+0x42/0xa0
[6.038508]  pci_device_probe+0xc7/0x240
[6.038510]  really_probe+0x19b/0x3e0
[6.038513]  ? __pfx___driver_attach+0x10/0x10
[6.038514]  __driver_probe_device+0x78/0x160
[6.038516]  driver_probe_device+0x1f/0x90
[6.038517]  __driver_attach+0xd2/0x1c0
[6.038519]  bus_for_each_dev+0x85/0xd0
[6.038521]  bus_add_driver+0x116/0x220
[6.038523]  driver_register+0x59/0x100
[6.038525]  ? __pfx_amdgpu_init+0x10/0x10 [amdgpu]
[6.038618]  do_one_initcall+0x58/0x320
[6.038621]  do_init_module+0x60/0x230
[6.038624]  init_module_from_file+0x89/0xe0
[6.038628]  idempotent_init_module+0x120/0x2b0
[6.038630]  __x64_sys_finit_module+0x5e/0xb0
[6.038632]  do_syscall_64+0x84/0x1a0
[6.038634]  ? do_syscall_64+0x90/0x1a0
[6.038635]  ? srso_alias_return_thunk+0x5/0xfbef5
[6.038637]  ? do_syscall_64+0x90/0x1a0
[6.038638]  ? srso_alias_r

[PATCH] drm/buddy: Fix the range bias clear memory allocation issue

2024-05-06 Thread Arunpravin Paneer Selvam

Problem statement: During the system boot time, an application request
for the bulk volume of cleared range bias memory when the clear_avail
is zero, we dont fallback into normal allocation method as we had an
unnecessary clear_avail check which prevents the fallback method leads
to fb allocation failure following system goes into unresponsive state.

Solution: Remove the unnecessary clear_avail check in the range bias
allocation function.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
---
 drivers/gpu/drm/drm_buddy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 284ebae71cc4..831929ac95eb 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -574,7 +574,7 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
 
block = __alloc_range_bias(mm, start, end, order,
   flags, fallback);
-   if (IS_ERR(block) && mm->clear_avail)
+   if (IS_ERR(block))
return __alloc_range_bias(mm, start, end, order,
  flags, !fallback);
 
-- 
2.25.1

Re: [PATCH] Documentation/gpu: Document the situation with unqualified drm-memory-

2024-05-06 Thread Daniel Vetter

On Fri, May 03, 2024 at 06:06:03PM +0100, Tvrtko Ursulin wrote:
> 
> On 03/05/2024 16:58, Alex Deucher wrote:
> > On Fri, May 3, 2024 at 11:33 AM Daniel Vetter  wrote:
> > > 
> > > On Fri, May 03, 2024 at 01:58:38PM +0100, Tvrtko Ursulin wrote:
> > > > 
> > > > [And I forgot dri-devel.. doing well!]
> > > > 
> > > > On 03/05/2024 13:40, Tvrtko Ursulin wrote:
> > > > > 
> > > > > [Correcting Christian's email]
> > > > > 
> > > > > On 03/05/2024 13:36, Tvrtko Ursulin wrote:
> > > > > > From: Tvrtko Ursulin 
> > > > > > 
> > > > > > Currently it is not well defined what is drm-memory- compared to 
> > > > > > other
> > > > > > categories.
> > > > > > 
> > > > > > In practice the only driver which emits these keys is amdgpu and in 
> > > > > > them
> > > > > > exposes the total memory use (including shared).
> > > > > > 
> > > > > > Document that drm-memory- and drm-total-memory- are aliases to
> > > > > > prevent any
> > > > > > confusion in the future.
> > > > > > 
> > > > > > While at it also clarify that the reserved sub-string 'memory' 
> > > > > > refers to
> > > > > > the memory region component.
> > > > > > 
> > > > > > Signed-off-by: Tvrtko Ursulin 
> > > > > > Cc: Alex Deucher 
> > > > > > Cc: Christian König 
> > > > > 
> > > > > Mea culpa, I copied the mistake from
> > > > > 77d17c4cd0bf52eacfad88e63e8932eb45d643c5. :)
> > > > > 
> > > > > Regards,
> > > > > 
> > > > > Tvrtko
> > > > > 
> > > > > > Cc: Rob Clark 
> > > > > > ---
> > > > > >Documentation/gpu/drm-usage-stats.rst | 10 +-
> > > > > >1 file changed, 9 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/Documentation/gpu/drm-usage-stats.rst
> > > > > > b/Documentation/gpu/drm-usage-stats.rst
> > > > > > index 6dc299343b48..ef5c0a0aa477 100644
> > > > > > --- a/Documentation/gpu/drm-usage-stats.rst
> > > > > > +++ b/Documentation/gpu/drm-usage-stats.rst
> > > > > > @@ -128,7 +128,9 @@ Memory
> > > > > >Each possible memory type which can be used to store buffer
> > > > > > objects by the
> > > > > >GPU in question shall be given a stable and unique name to be
> > > > > > returned as the
> > > > > > -string here.  The name "memory" is reserved to refer to normal
> > > > > > system memory.
> > > > > > +string here.
> > > > > > +
> > > > > > +The region name "memory" is reserved to refer to normal system 
> > > > > > memory.
> > > > > >Value shall reflect the amount of storage currently consumed by
> > > > > > the buffer
> > > > > >objects belong to this client, in the respective memory region.
> > > > > > @@ -136,6 +138,9 @@ objects belong to this client, in the respective
> > > > > > memory region.
> > > > > >Default unit shall be bytes with optional unit specifiers of 
> > > > > > 'KiB'
> > > > > > or 'MiB'
> > > > > >indicating kibi- or mebi-bytes.
> > > > > > +This is an alias for drm-total- and only one of the two
> > > > > > should be
> > > > > > +present.
> > > 
> > > This feels a bit awkward and seems to needlessly complicate fdinfo uapi.
> > > 
> > > - Could we just patch amdgpu to follow everyone else, and avoid the
> > >special case? If there's no tool that relies on the special amdgpu
> > >prefix then that would be a lot easier.
> > > 
> > > - If that's not on the table, could we make everyone (with a suitable
> > >helper or something) just print both variants, so that we again have
> > >consisent fdinfo output? Or breaks that a different set of existing
> > >tools.
> > > 
> > > - Finally maybe could we get away with fixing amd by adding the common
> > >format there, deprecating the old, fixing the tools that would break 
> > > and
> > >then maybe if we're lucky, remove the old one from amdgpu in a year or
> > >so?
> > 
> > I'm not really understanding what amdgpu is doing wrong.  It seems to
> > be following the documentation.  Is the idea that we would like to
> > deprecate drm-memory- in favor of drm-total-?
> > If that's the case, I think the 3rd option is probably the best.  We
> > have a lot of tools and customers using this.  It would have also been
> > nice to have "memory" in the string for the newer ones to avoid
> > conflicts with other things that might be a total or shared in the
> > future, but I guess that ship has sailed.  We should also note that
> > drm-memory- is deprecated.  While we are here, maybe we should
> > clarify the semantics of resident, purgeable, and active.  For
> > example, isn't resident just a duplicate of total?  If the memory was
> > not resident, it would be in a different region.
> 
> Amdgpu isn't doing anything wrong. It just appears when the format was
> discussed no one noticed (me included) that the two keys are not clearly
> described. And it looks there also wasn't a plan to handle the uncelar
> duality in the future.

Yeah I didnt want to imply that amdgpu did anything wrong, just that if we
have a spec where everyone does one thing, except one driver, that's a
really unfortunate situation that wi

[PATCH] drm/amdkfd: Check debug trap enable before write dbg_ev_file

2024-05-06 Thread Lin . Cao

In interrupt context, write dbg_ev_file will be run by work queue. It
will cause write dbg_ev_file execution after debug_trap_disable, which
will cause NULL pointer access.
v2: cancel work "debug_event_workarea" before set dbg_ev_file as NULL.

Signed-off-by: Lin.Cao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index d889e3545120..6c2f6a26c479 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -103,7 +103,8 @@ void debug_event_write_work_handler(struct work_struct 
*work)
struct kfd_process,
debug_event_workarea);
 
-   kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+   if (process->debug_trap_enabled && process->dbg_ev_file)
+   kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
 }
 
 /* update process/device/queue exception status, write to descriptor
@@ -645,6 +646,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
else if (target->runtime_info.runtime_state != 
DEBUG_RUNTIME_STATE_DISABLED)
target->runtime_info.runtime_state = 
DEBUG_RUNTIME_STATE_ENABLED;
 
+   cancel_work_sync(&target->debug_event_workarea);
fput(target->dbg_ev_file);
target->dbg_ev_file = NULL;
 
-- 
2.25.1

Re: Error in amd driver?

2024-05-06 Thread Borislav Petkov

+ amd-gfx@lists.freedesktop.org

On Sun, May 05, 2024 at 09:59:22PM +0300, Tranton Baddy wrote:
> I have this in my dmesg since version 6.8.6, not sure when it appeared. Is 
> amdgpu driver has bug?
> [   64.253144] 
> ==
> [   64.253162] BUG: KFENCE: use-after-free read in amdgpu_bo_move+0x51f/0x7a0
> 
> [   64.253183] Use-after-free read at 0x671c48dd (in kfence-#111):
> [   64.253192]  amdgpu_bo_move+0x51f/0x7a0
> [   64.253202]  ttm_bo_handle_move_mem+0xcf/0x180
> [   64.253211]  ttm_mem_evict_first+0x1c5/0x500
> [   64.253218]  ttm_resource_manager_evict_all+0xa3/0x1e0
> [   64.253228]  amdgpu_device_prepare+0x66/0x110
> [   64.253237]  amdgpu_pmops_runtime_suspend+0xbe/0x1c0
> [   64.253248]  pci_pm_runtime_suspend+0x74/0x200
> [   64.253259]  vga_switcheroo_runtime_suspend+0x21/0xb0
> [   64.253268]  __rpm_callback+0x5f/0x190
> [   64.253277]  rpm_callback+0x7f/0x90
> [   64.253283]  rpm_suspend+0x120/0x6a0
> [   64.253290]  pm_runtime_work+0x9c/0xa0
> [   64.253297]  process_one_work+0x164/0x330
> [   64.253310]  worker_thread+0x302/0x430
> [   64.253320]  kthread+0xe4/0x110
> [   64.253329]  ret_from_fork+0x4c/0x60
> [   64.253341]  ret_from_fork_asm+0x1b/0x30
> 
> [   64.253353] kfence-#111: 0xd018cf03-0x34e821d1, size=96, 
> cache=kmalloc-96
> 
> [   64.253363] allocated by task 152 on cpu 3 at 64.248952s:
> [   64.253418]  kmalloc_trace+0x283/0x340
> [   64.253427]  amdgpu_vram_mgr_new+0x8f/0x3f0
> [   64.253435]  ttm_resource_alloc+0x39/0x90
> [   64.253444]  ttm_bo_mem_space+0xa4/0x260
> [   64.253450]  ttm_mem_evict_first+0x18a/0x500
> [   64.253456]  ttm_resource_manager_evict_all+0xa3/0x1e0
> [   64.253465]  amdgpu_device_prepare+0x66/0x110
> [   64.253472]  amdgpu_pmops_runtime_suspend+0xbe/0x1c0
> [   64.253481]  pci_pm_runtime_suspend+0x74/0x200
> [   64.253489]  vga_switcheroo_runtime_suspend+0x21/0xb0
> [   64.253496]  __rpm_callback+0x5f/0x190
> [   64.253503]  rpm_callback+0x7f/0x90
> [   64.253509]  rpm_suspend+0x120/0x6a0
> [   64.253516]  pm_runtime_work+0x9c/0xa0
> [   64.253523]  process_one_work+0x164/0x330
> [   64.253532]  worker_thread+0x302/0x430
> [   64.253542]  kthread+0xe4/0x110
> [   64.253550]  ret_from_fork+0x4c/0x60
> [   64.253559]  ret_from_fork_asm+0x1b/0x30
> 
> [   64.253570] freed by task 152 on cpu 3 at 64.253117s:
> [   64.253582]  ttm_resource_free+0x67/0x90
> [   64.253591]  ttm_bo_move_accel_cleanup+0x247/0x2e0
> [   64.253598]  amdgpu_bo_move+0x1bd/0x7a0
> [   64.253605]  ttm_bo_handle_move_mem+0xcf/0x180
> [   64.253612]  ttm_mem_evict_first+0x1c5/0x500
> [   64.253618]  ttm_resource_manager_evict_all+0xa3/0x1e0
> [   64.253626]  amdgpu_device_prepare+0x66/0x110
> [   64.253634]  amdgpu_pmops_runtime_suspend+0xbe/0x1c0
> [   64.253642]  pci_pm_runtime_suspend+0x74/0x200
> [   64.253650]  vga_switcheroo_runtime_suspend+0x21/0xb0
> [   64.253658]  __rpm_callback+0x5f/0x190
> [   64.253664]  rpm_callback+0x7f/0x90
> [   64.253671]  rpm_suspend+0x120/0x6a0
> [   64.253677]  pm_runtime_work+0x9c/0xa0
> [   64.253684]  process_one_work+0x164/0x330
> [   64.253693]  worker_thread+0x302/0x430
> [   64.253703]  kthread+0xe4/0x110
> [   64.253711]  ret_from_fork+0x4c/0x60
> [   64.253723]  ret_from_fork_asm+0x1b/0x30
> 
> [   64.253735] CPU: 3 PID: 152 Comm: kworker/3:2 Tainted: P   OE  
> 6.8.9 #3 e7323d0d25f89e853881fc823e59523bdcc577c6
> [   64.253756] Hardware name: Hewlett-Packard HP Pavilion Notebook /80B9, 
> BIOS F.54 05/27/2019
> [   64.253761] Workqueue: pm pm_runtime_work
> [   64.253771] 
> ==
> 

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH] Documentation/gpu: Document the situation with unqualified drm-memory-

2024-05-06 Thread Tvrtko Ursulin




On 03/05/2024 16:58, Alex Deucher wrote:

On Fri, May 3, 2024 at 11:33 AM Daniel Vetter  wrote:


On Fri, May 03, 2024 at 01:58:38PM +0100, Tvrtko Ursulin wrote:


[And I forgot dri-devel.. doing well!]

On 03/05/2024 13:40, Tvrtko Ursulin wrote:


[Correcting Christian's email]

On 03/05/2024 13:36, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Currently it is not well defined what is drm-memory- compared to other
categories.

In practice the only driver which emits these keys is amdgpu and in them
exposes the total memory use (including shared).

Document that drm-memory- and drm-total-memory- are aliases to
prevent any
confusion in the future.

While at it also clarify that the reserved sub-string 'memory' refers to
the memory region component.

Signed-off-by: Tvrtko Ursulin 
Cc: Alex Deucher 
Cc: Christian König 


Mea culpa, I copied the mistake from
77d17c4cd0bf52eacfad88e63e8932eb45d643c5. :)

Regards,

Tvrtko


Cc: Rob Clark 
---
   Documentation/gpu/drm-usage-stats.rst | 10 +-
   1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/gpu/drm-usage-stats.rst
b/Documentation/gpu/drm-usage-stats.rst
index 6dc299343b48..ef5c0a0aa477 100644
--- a/Documentation/gpu/drm-usage-stats.rst
+++ b/Documentation/gpu/drm-usage-stats.rst
@@ -128,7 +128,9 @@ Memory
   Each possible memory type which can be used to store buffer
objects by the
   GPU in question shall be given a stable and unique name to be
returned as the
-string here.  The name "memory" is reserved to refer to normal
system memory.
+string here.
+
+The region name "memory" is reserved to refer to normal system memory.
   Value shall reflect the amount of storage currently consumed by
the buffer
   objects belong to this client, in the respective memory region.
@@ -136,6 +138,9 @@ objects belong to this client, in the respective
memory region.
   Default unit shall be bytes with optional unit specifiers of 'KiB'
or 'MiB'
   indicating kibi- or mebi-bytes.
+This is an alias for drm-total- and only one of the two
should be
+present.


This feels a bit awkward and seems to needlessly complicate fdinfo uapi.

- Could we just patch amdgpu to follow everyone else, and avoid the
   special case? If there's no tool that relies on the special amdgpu
   prefix then that would be a lot easier.

- If that's not on the table, could we make everyone (with a suitable
   helper or something) just print both variants, so that we again have
   consisent fdinfo output? Or breaks that a different set of existing
   tools.

- Finally maybe could we get away with fixing amd by adding the common
   format there, deprecating the old, fixing the tools that would break and
   then maybe if we're lucky, remove the old one from amdgpu in a year or
   so?


I'm not really understanding what amdgpu is doing wrong.  It seems to
be following the documentation.  Is the idea that we would like to
deprecate drm-memory- in favor of drm-total-?
If that's the case, I think the 3rd option is probably the best.  We
have a lot of tools and customers using this.  It would have also been
nice to have "memory" in the string for the newer ones to avoid
conflicts with other things that might be a total or shared in the
future, but I guess that ship has sailed.  We should also note that
drm-memory- is deprecated.  While we are here, maybe we should
clarify the semantics of resident, purgeable, and active.  For
example, isn't resident just a duplicate of total?  If the memory was
not resident, it would be in a different region.


Amdgpu isn't doing anything wrong. It just appears when the format was 
discussed no one noticed (me included) that the two keys are not clearly 
described. And it looks there also wasn't a plan to handle the uncelar 
duality in the future.


For me deprecating sounds fine, the 3rd option. I understand we would 
only make amdgpu emit both sets of keys and then remove drm-memory- in 
due time.


With regards to key naming, yeah, memory in the name would have been 
nice. We had a lot of discussion on this topic but ship has indeed 
sailed. It is probably workarble for anything new that might come to add 
their prefix. As long as it does not clash with the memory categories is 
should be fine.


In terms of resident semantics, think of it as VIRT vs RES in top(1). It 
is for drivers which allocate backing store lazily, on first use.


Purgeable is for drivers which have a form of MADV_DONTNEED ie. 
currently have backing store but userspace has indicated it can be 
dropped without preserving the content on memory pressure.


Active is when reservation object says there is activity on the buffer.

Regards,

Tvrtko



Alex



Uapi that's "either do $foo or on this one driver, do $bar" is just
guaranteed to fragement the ecosystem, so imo that should be the absolute
last resort.
-Sima


+
   - drm-shared-:  [KiB|MiB]
   The total size of buffers that are shared with another file (e.g.,
have more
@@ -145,6 +150,9 @@ than a single handle).
   The

Re: [RFC 5/5] drm/amdgpu: Only show VRAM in fdinfo if it exists

2024-05-06 Thread Tvrtko Ursulin




On 03/05/2024 14:47, Alex Deucher wrote:

On Fri, May 3, 2024 at 3:50 AM Tvrtko Ursulin  wrote:



On 02/05/2024 14:16, Christian König wrote:

Am 30.04.24 um 19:27 schrieb Tvrtko Ursulin:

From: Tvrtko Ursulin 

Do not emit the key-value pairs if the VRAM does not exist ie. VRAM
placement is not valid and accessible.


Yeah, that's unfortunately rather misleading.

Even APUs have VRAM or rather stolen system memory which is managed by
the graphics driver.

We only have a single compute model which really doesn't have VRAM at all.


Hm what is misleading and how more precisely? :) Maybe in other words,
if is_app_apu is not the right criteria to know when TTM_PL_VRAM is
impossible, what is? Is the compute model you mentio the only thing
which sets is_app_apu and uses the dummy vram manager?


Probably better to check if adev->gmc.real_vram_size is non-0.


Hmm "real VRAM" - will that handle APUs correctly?

I am looking at this:

if (!adev->gmc.is_app_apu) {
man->func = &amdgpu_vram_mgr_func;

err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
if (err)
return err;
} else {
man->func = &amdgpu_dummy_vram_mgr_func;
DRM_INFO("Setup dummy vram mgr\n");
}

And assuming that unless the dummy manager is used, TTM_PL_VRAM will be 
valid. Wrong assumption?


Regards,

Tvrtko



Alex



Regards,

Tvrtko


Regards,
Christian.



Signed-off-by: Tvrtko Ursulin 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 29 +-
   1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index a09944104c41..603a5c010f5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -83,25 +83,30 @@ void amdgpu_show_fdinfo(struct drm_printer *p,
struct drm_file *file)
*/
   drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
-drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
   drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
   drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
-drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n",
-   stats.visible_vram/1024UL);
-drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
-   stats.evicted_vram/1024UL);
-drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n",
-   stats.evicted_visible_vram/1024UL);
-drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
-   stats.requested_vram/1024UL);
-drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n",
-   stats.requested_visible_vram/1024UL);
   drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
  stats.requested_gtt/1024UL);
-drm_printf(p, "drm-shared-vram:\t%llu KiB\n",
stats.vram_shared/1024UL);
   drm_printf(p, "drm-shared-gtt:\t%llu KiB\n",
stats.gtt_shared/1024UL);
   drm_printf(p, "drm-shared-cpu:\t%llu KiB\n",
stats.cpu_shared/1024UL);
+if (!adev->gmc.is_app_apu) {
+drm_printf(p, "drm-memory-vram:\t%llu KiB\n",
+   stats.vram/1024UL);
+drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n",
+   stats.visible_vram/1024UL);
+drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
+   stats.evicted_vram/1024UL);
+drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n",
+   stats.evicted_visible_vram/1024UL);
+drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
+   stats.requested_vram/1024UL);
+drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n",
+   stats.requested_visible_vram/1024UL);
+drm_printf(p, "drm-shared-vram:\t%llu KiB\n",
+   stats.vram_shared/1024UL);
+}
+
   for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
   if (!usage[hw_ip])
   continue;

Re: [PATCH] Documentation/gpu: Document the situation with unqualified drm-memory-

2024-05-06 Thread Tvrtko Ursulin




On 03/05/2024 14:39, Alex Deucher wrote:

On Fri, May 3, 2024 at 8:58 AM Tvrtko Ursulin  wrote:



[And I forgot dri-devel.. doing well!]

On 03/05/2024 13:40, Tvrtko Ursulin wrote:


[Correcting Christian's email]

On 03/05/2024 13:36, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Currently it is not well defined what is drm-memory- compared to other
categories.

In practice the only driver which emits these keys is amdgpu and in them
exposes the total memory use (including shared).

Document that drm-memory- and drm-total-memory- are aliases to prevent
any
confusion in the future.

While at it also clarify that the reserved sub-string 'memory' refers to
the memory region component.

Signed-off-by: Tvrtko Ursulin 
Cc: Alex Deucher 
Cc: Christian König 


Mea culpa, I copied the mistake from
77d17c4cd0bf52eacfad88e63e8932eb45d643c5. :)



I'm not following.  What is the mistake from that commit?


Just the spelling of Christian's last name in the email address, nothing 
in the code itself. I failed to spot both that when copying the email 
for git commit, and also failed to cc dri-devel so I am having a bad day.


Regards,

Tvrtko




Regards,

Tvrtko


Cc: Rob Clark 
---
   Documentation/gpu/drm-usage-stats.rst | 10 +-
   1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/gpu/drm-usage-stats.rst
b/Documentation/gpu/drm-usage-stats.rst
index 6dc299343b48..ef5c0a0aa477 100644
--- a/Documentation/gpu/drm-usage-stats.rst
+++ b/Documentation/gpu/drm-usage-stats.rst
@@ -128,7 +128,9 @@ Memory
   Each possible memory type which can be used to store buffer objects
by the
   GPU in question shall be given a stable and unique name to be
returned as the
-string here.  The name "memory" is reserved to refer to normal system
memory.
+string here.
+
+The region name "memory" is reserved to refer to normal system memory.


Is this supposed to mean drm-memory-memory?  That was my impression,
but that seems sort of weird.  Maybe we should just drop that
sentence.

Alex


   Value shall reflect the amount of storage currently consumed by the
buffer
   objects belong to this client, in the respective memory region.
@@ -136,6 +138,9 @@ objects belong to this client, in the respective
memory region.
   Default unit shall be bytes with optional unit specifiers of 'KiB'
or 'MiB'
   indicating kibi- or mebi-bytes.
+This is an alias for drm-total- and only one of the two
should be
+present.
+
   - drm-shared-:  [KiB|MiB]
   The total size of buffers that are shared with another file (e.g.,
have more
@@ -145,6 +150,9 @@ than a single handle).
   The total size of buffers that including shared and private memory.
+This is an alias for drm-memory- and only one of the two
should be
+present.
+
   - drm-resident-:  [KiB|MiB]
   The total size of buffers that are resident in the specified region.

Re: [PATCH] drm/amd/pm: Fix error code in vega10_hwmgr_backend_init()

2024-05-06 Thread Markus Elfring

> Return -EINVAL on error instead of success.  Also on the success path,
> return a literal zero instead of "return result;"

How do you think about to omit the initialisation for the variable “result”
in another update step?

Regards,
Markus

[PATCH v2 02/12] drm/gma500: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Acked-by: Thomas Zimmermann 
Signed-off-by: Easwar Hariharan 
---
 drivers/gpu/drm/gma500/cdv_intel_lvds.c |  2 +-
 drivers/gpu/drm/gma500/intel_bios.c | 22 ++---
 drivers/gpu/drm/gma500/intel_bios.h |  4 ++--
 drivers/gpu/drm/gma500/intel_gmbus.c|  2 +-
 drivers/gpu/drm/gma500/psb_drv.h|  2 +-
 drivers/gpu/drm/gma500/psb_intel_drv.h  |  2 +-
 drivers/gpu/drm/gma500/psb_intel_lvds.c |  4 ++--
 drivers/gpu/drm/gma500/psb_intel_sdvo.c | 26 -
 8 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c 
b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index f08a6803dc18..c7652a02b42e 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -565,7 +565,7 @@ void cdv_intel_lvds_init(struct drm_device *dev,
dev->dev, "I2C bus registration failed.\n");
goto err_encoder_cleanup;
}
-   gma_encoder->i2c_bus->slave_addr = 0x2C;
+   gma_encoder->i2c_bus->target_addr = 0x2C;
dev_priv->lvds_i2c_bus = gma_encoder->i2c_bus;
 
/*
diff --git a/drivers/gpu/drm/gma500/intel_bios.c 
b/drivers/gpu/drm/gma500/intel_bios.c
index 8245b5603d2c..d5924ca3ed05 100644
--- a/drivers/gpu/drm/gma500/intel_bios.c
+++ b/drivers/gpu/drm/gma500/intel_bios.c
@@ -14,8 +14,8 @@
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
 
-#defineSLAVE_ADDR1 0x70
-#defineSLAVE_ADDR2 0x72
+#defineTARGET_ADDR10x70
+#defineTARGET_ADDR20x72
 
 static void *find_section(struct bdb_header *bdb, int section_id)
 {
@@ -357,10 +357,10 @@ parse_sdvo_device_mapping(struct drm_psb_private 
*dev_priv,
/* skip the device block if device type is invalid */
continue;
}
-   if (p_child->slave_addr != SLAVE_ADDR1 &&
-   p_child->slave_addr != SLAVE_ADDR2) {
+   if (p_child->target_addr != TARGET_ADDR1 &&
+   p_child->target_addr != TARGET_ADDR2) {
/*
-* If the slave address is neither 0x70 nor 0x72,
+* If the target address is neither 0x70 nor 0x72,
 * it is not a SDVO device. Skip it.
 */
continue;
@@ -371,22 +371,22 @@ parse_sdvo_device_mapping(struct drm_psb_private 
*dev_priv,
DRM_DEBUG_KMS("Incorrect SDVO port. Skip it\n");
continue;
}
-   DRM_DEBUG_KMS("the SDVO device with slave addr %2x is found on"
+   DRM_DEBUG_KMS("the SDVO device with target addr %2x is found on"
" %s port\n",
-   p_child->slave_addr,
+   p_child->target_addr,
(p_child->dvo_port == DEVICE_PORT_DVOB) ?
"SDVOB" : "SDVOC");
p_mapping = &(dev_priv->sdvo_mappings[p_child->dvo_port - 1]);
if (!p_mapping->initialized) {
p_mapping->dvo_port = p_child->dvo_port;
-   p_mapping->slave_addr = p_child->slave_addr;
+   p_mapping->target_addr = p_child->target_addr;
p_mapping->dvo_wiring = p_child->dvo_wiring;
p_mapping->ddc_pin = p_child->ddc_pin;
p_mapping->i2c_pin = p_child->i2c_pin;
p_mapping->initialized = 1;
DRM_DEBUG_KMS("SDVO device: dvo=%x, addr=%x, wiring=%d, 
ddc_pin=%d, i2c_pin=%d\n",
  p_mapping->dvo_port,
- p_mapping->slave_addr,
+ p_mapping->target_addr,
  p_mapping->dvo_wiring,
  p_mapping->ddc_pin,
  p_mapping->i2c_pin);
@@ -394,10 +394,10 @@ parse_sdvo_device_mapping(struct drm_psb_private 
*dev_priv,
DRM_DEBUG_KMS("Maybe one SDVO port is shared by "
 "two SDVO device.\n");
}
-   if (p_child->slave2_addr) {
+   if (p_child->target2_addr) {
/* Maybe this is a SDVO device with multiple inputs */
/* And the

[PATCH v2 09/12] media: cx23885: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/media/pci/cx23885/cx23885-core.c | 6 +++---
 drivers/media/pci/cx23885/cx23885-f300.c | 8 
 drivers/media/pci/cx23885/cx23885-i2c.c  | 6 +++---
 drivers/media/pci/cx23885/cx23885.h  | 2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/media/pci/cx23885/cx23885-core.c 
b/drivers/media/pci/cx23885/cx23885-core.c
index c8705d786cdd..0adbdf529cec 100644
--- a/drivers/media/pci/cx23885/cx23885-core.c
+++ b/drivers/media/pci/cx23885/cx23885-core.c
@@ -942,7 +942,7 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
dev->pci_slot = PCI_SLOT(dev->pci->devfn);
cx23885_irq_add(dev, 0x001f00);
 
-   /* External Master 1 Bus */
+   /* External Controller 1 Bus */
dev->i2c_bus[0].nr = 0;
dev->i2c_bus[0].dev = dev;
dev->i2c_bus[0].reg_stat  = I2C1_STAT;
@@ -952,7 +952,7 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
dev->i2c_bus[0].reg_wdata = I2C1_WDATA;
dev->i2c_bus[0].i2c_period = (0x9d << 24); /* 100kHz */
 
-   /* External Master 2 Bus */
+   /* External Controller 2 Bus */
dev->i2c_bus[1].nr = 1;
dev->i2c_bus[1].dev = dev;
dev->i2c_bus[1].reg_stat  = I2C2_STAT;
@@ -962,7 +962,7 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
dev->i2c_bus[1].reg_wdata = I2C2_WDATA;
dev->i2c_bus[1].i2c_period = (0x9d << 24); /* 100kHz */
 
-   /* Internal Master 3 Bus */
+   /* Internal Controller 3 Bus */
dev->i2c_bus[2].nr = 2;
dev->i2c_bus[2].dev = dev;
dev->i2c_bus[2].reg_stat  = I2C3_STAT;
diff --git a/drivers/media/pci/cx23885/cx23885-f300.c 
b/drivers/media/pci/cx23885/cx23885-f300.c
index ac1c434e8e24..2ef7454e0f61 100644
--- a/drivers/media/pci/cx23885/cx23885-f300.c
+++ b/drivers/media/pci/cx23885/cx23885-f300.c
@@ -92,7 +92,7 @@ static u8 f300_xfer(struct dvb_frontend *fe, u8 *buf)
f300_set_line(dev, F300_RESET, 0);/* begin to send data */
msleep(1);
 
-   f300_send_byte(dev, 0xe0);/* the slave address is 0xe0, write */
+   f300_send_byte(dev, 0xe0);/* the target address is 0xe0, write */
msleep(1);
 
temp = buf[0];
@@ -112,10 +112,10 @@ static u8 f300_xfer(struct dvb_frontend *fe, u8 *buf)
}
 
if (i > 7) {
-   pr_err("%s: timeout, the slave no response\n",
+   pr_err("%s: timeout, the target no response\n",
__func__);
-   ret = 1; /* timeout, the slave no response */
-   } else { /* the slave not busy, prepare for getting data */
+   ret = 1; /* timeout, the target no response */
+   } else { /* the target not busy, prepare for getting data */
f300_set_line(dev, F300_RESET, 0);/*ready...*/
msleep(1);
f300_send_byte(dev, 0xe1);/* 0xe1 is Read */
diff --git a/drivers/media/pci/cx23885/cx23885-i2c.c 
b/drivers/media/pci/cx23885/cx23885-i2c.c
index f51fad33dc04..ddafeccb2b0a 100644
--- a/drivers/media/pci/cx23885/cx23885-i2c.c
+++ b/drivers/media/pci/cx23885/cx23885-i2c.c
@@ -34,7 +34,7 @@ MODULE_PARM_DESC(i2c_scan, "scan i2c bus at insmod time");
 #define I2C_EXTEND  (1 << 3)
 #define I2C_NOSTOP  (1 << 4)
 
-static inline int i2c_slave_did_ack(struct i2c_adapter *i2c_adap)
+static inline int i2c_target_did_ack(struct i2c_adapter *i2c_adap)
 {
struct cx23885_i2c *bus = i2c_adap->algo_data;
struct cx23885_dev *dev = bus->dev;
@@ -84,7 +84,7 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
cx_write(bus->reg_ctrl, bus->i2c_period | (1 << 2));
if (!i2c_wait_done(i2c_adap))
return -EIO;
-   if (!i2c_slave_did_ack(i2c_adap))
+   if (!i2c_target_did_ack(i2c_adap))
return -ENXIO;
 
dprintk(1, "%s() returns 0\n", __func__);
@@ -163,7 +163,7 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
cx_write(bus->reg_ctrl, bus->i2c_period | (1 << 2) | 1);
if (!i2c_wait_done(i2c_adap))
return -EIO;
-   if (!i2c_slave_did_ack(i2c_adap))
+   if (!i2c_target_did_ack(i2c_adap))
return -ENXIO;
 
 
diff --git a/drivers/media/pci/cx23885/cx23885.h 
b/drivers/media/pci/cx23885/cx23885.h
index 349462ee2c48..c2d7a95933d5 100644
--- a/drivers/media/pci/cx23885/cx23885.h
+++ b/drivers/media/pci/

[PATCH v2 03/12] drm/i915: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Reviewed-by: Rodrigo Vivi 
Acked-by: Rodrigo Vivi 
Acked-by: Zhi Wang 
Signed-off-by: Easwar Hariharan 
---
 drivers/gpu/drm/i915/display/dvo_ch7017.c | 14 -
 drivers/gpu/drm/i915/display/dvo_ch7xxx.c | 18 +--
 drivers/gpu/drm/i915/display/dvo_ivch.c   | 16 +-
 drivers/gpu/drm/i915/display/dvo_ns2501.c | 18 +--
 drivers/gpu/drm/i915/display/dvo_sil164.c | 18 +--
 drivers/gpu/drm/i915/display/dvo_tfp410.c | 18 +--
 drivers/gpu/drm/i915/display/intel_bios.c | 22 +++---
 drivers/gpu/drm/i915/display/intel_ddi.c  |  2 +-
 .../gpu/drm/i915/display/intel_display_core.h |  2 +-
 drivers/gpu/drm/i915/display/intel_dsi.h  |  2 +-
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c  | 20 ++---
 drivers/gpu/drm/i915/display/intel_dvo.c  | 14 -
 drivers/gpu/drm/i915/display/intel_dvo_dev.h  |  2 +-
 drivers/gpu/drm/i915/display/intel_gmbus.c|  4 +--
 drivers/gpu/drm/i915/display/intel_sdvo.c | 30 +--
 drivers/gpu/drm/i915/display/intel_vbt_defs.h |  4 +--
 drivers/gpu/drm/i915/gvt/edid.c   | 28 -
 drivers/gpu/drm/i915/gvt/edid.h   |  4 +--
 drivers/gpu/drm/i915/gvt/opregion.c   |  2 +-
 19 files changed, 119 insertions(+), 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/dvo_ch7017.c 
b/drivers/gpu/drm/i915/display/dvo_ch7017.c
index d0c3880d7f80..493e730c685b 100644
--- a/drivers/gpu/drm/i915/display/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/display/dvo_ch7017.c
@@ -170,13 +170,13 @@ static bool ch7017_read(struct intel_dvo_device *dvo, u8 
addr, u8 *val)
 {
struct i2c_msg msgs[] = {
{
-   .addr = dvo->slave_addr,
+   .addr = dvo->target_addr,
.flags = 0,
.len = 1,
.buf = &addr,
},
{
-   .addr = dvo->slave_addr,
+   .addr = dvo->target_addr,
.flags = I2C_M_RD,
.len = 1,
.buf = val,
@@ -189,7 +189,7 @@ static bool ch7017_write(struct intel_dvo_device *dvo, u8 
addr, u8 val)
 {
u8 buf[2] = { addr, val };
struct i2c_msg msg = {
-   .addr = dvo->slave_addr,
+   .addr = dvo->target_addr,
.flags = 0,
.len = 2,
.buf = buf,
@@ -197,7 +197,7 @@ static bool ch7017_write(struct intel_dvo_device *dvo, u8 
addr, u8 val)
return i2c_transfer(dvo->i2c_bus, &msg, 1) == 1;
 }
 
-/** Probes for a CH7017 on the given bus and slave address. */
+/** Probes for a CH7017 on the given bus and target address. */
 static bool ch7017_init(struct intel_dvo_device *dvo,
struct i2c_adapter *adapter)
 {
@@ -227,13 +227,13 @@ static bool ch7017_init(struct intel_dvo_device *dvo,
break;
default:
DRM_DEBUG_KMS("ch701x not detected, got %d: from %s "
- "slave %d.\n",
- val, adapter->name, dvo->slave_addr);
+ "target %d.\n",
+ val, adapter->name, dvo->target_addr);
goto fail;
}
 
DRM_DEBUG_KMS("%s detected on %s, addr %d\n",
- str, adapter->name, dvo->slave_addr);
+ str, adapter->name, dvo->target_addr);
return true;
 
 fail:
diff --git a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c 
b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c
index 2e8e85da5a40..534b8544e0a4 100644
--- a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c
+++ b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c
@@ -153,13 +153,13 @@ static bool ch7xxx_readb(struct intel_dvo_device *dvo, 
int addr, u8 *ch)
 
struct i2c_msg msgs[] = {
{
-   .addr = dvo->slave_addr,
+   .addr = dvo->target_addr,
.flags = 0,
.len = 1,
.buf = out_buf,
},
{
-   .addr = dvo->slave_addr,
+   .addr = dvo->target_addr,
.flags = I2C_M_RD,
.len = 1,
.buf = in_buf,
@@ -176,7 +176,7 @@ static bool ch7xxx_readb(struct intel_dvo_device *dvo, int 
addr, u8 *ch)
 
if (!ch7xxx->quiet) {
DRM_DEBUG_KMS(

[PATCH v2 01/12] drm/amdgpu, drm/radeon: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c  |  8 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c   | 10 +++
 drivers/gpu/drm/amd/amdgpu/atombios_i2c.c |  8 +++---
 drivers/gpu/drm/amd/amdgpu/atombios_i2c.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c| 20 ++---
 .../gpu/drm/amd/display/dc/bios/bios_parser.c |  2 +-
 .../drm/amd/display/dc/bios/bios_parser2.c|  2 +-
 .../drm/amd/display/dc/core/dc_link_exports.c |  4 +--
 drivers/gpu/drm/amd/display/dc/dc.h   |  2 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c  |  4 +--
 .../display/include/grph_object_ctrl_defs.h   |  2 +-
 drivers/gpu/drm/amd/include/atombios.h|  2 +-
 drivers/gpu/drm/amd/include/atomfirmware.h| 26 -
 .../powerplay/hwmgr/vega20_processpptables.c  |  4 +--
 .../amd/pm/powerplay/inc/smu11_driver_if.h|  2 +-
 .../inc/pmfw_if/smu11_driver_if_arcturus.h|  2 +-
 .../inc/pmfw_if/smu11_driver_if_navi10.h  |  2 +-
 .../pmfw_if/smu11_driver_if_sienna_cichlid.h  |  2 +-
 .../inc/pmfw_if/smu13_driver_if_aldebaran.h   |  2 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_0.h |  2 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_7.h |  2 +-
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c |  4 +--
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  8 +++---
 drivers/gpu/drm/radeon/atombios.h | 16 +--
 drivers/gpu/drm/radeon/atombios_i2c.c |  4 +--
 drivers/gpu/drm/radeon/radeon_combios.c   | 28 +--
 drivers/gpu/drm/radeon/radeon_i2c.c   | 10 +++
 drivers/gpu/drm/radeon/radeon_mode.h  |  6 ++--
 28 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 6857c586ded7..37f50fc5d496 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -614,7 +614,7 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device 
*adev,
if ((frev == 3 && crev >= 4) || (frev > 3)) {
firmware_info = (union firmware_info *)
(mode_info->atom_context->bios + data_offset);
-   /* The ras_rom_i2c_slave_addr should ideally
+   /* The ras_rom_i2c_target_addr should ideally
 * be a 19-bit EEPROM address, which would be
 * used as is by the driver; see top of
 * amdgpu_eeprom.c.
@@ -625,13 +625,13 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct 
amdgpu_device *adev,
 * leave the check for the pointer.
 *
 * The reason this works right now is because
-* ras_rom_i2c_slave_addr contains the EEPROM
+* ras_rom_i2c_target_addr contains the EEPROM
 * device type qualifier 1010b in the top 4
 * bits.
 */
-   if (firmware_info->v34.ras_rom_i2c_slave_addr) {
+   if (firmware_info->v34.ras_rom_i2c_target_addr) {
if (i2c_address)
-   *i2c_address = 
firmware_info->v34.ras_rom_i2c_slave_addr;
+   *i2c_address = 
firmware_info->v34.ras_rom_i2c_target_addr;
return true;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index d79cb13e1aa8..070049c92e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -280,7 +280,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev,
 }
 
 static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
-u8 slave_addr,
+u8 target_addr,
 u8 addr,
 u8 *val)
 {
@@ -288,13 +288,13 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan 
*i2c_bus,
u8 in_buf[2];
struct i2c_msg msgs[] = {
{
-   .addr = slave_addr,
+   .addr = target_addr,
.flags = 0,
.len = 1,
.buf = out_buf,
},
{
-

[PATCH v2 06/12] media: cx18: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

I2S specification has also updated the terms in v.3 to use "controller"
and "target" respectively. Make those changes in the relevant spaces as
well.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/media/pci/cx18/cx18-av-firmware.c | 8 
 drivers/media/pci/cx18/cx18-cards.c   | 6 +++---
 drivers/media/pci/cx18/cx18-cards.h   | 4 ++--
 drivers/media/pci/cx18/cx18-gpio.c| 6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/media/pci/cx18/cx18-av-firmware.c 
b/drivers/media/pci/cx18/cx18-av-firmware.c
index 61aeb8c9af7f..906e0b33cffc 100644
--- a/drivers/media/pci/cx18/cx18-av-firmware.c
+++ b/drivers/media/pci/cx18/cx18-av-firmware.c
@@ -140,22 +140,22 @@ int cx18_av_loadfw(struct cx18 *cx)
cx18_av_and_or4(cx, CXADEC_PIN_CTRL1, ~0, 0x78000);
 
/* Audio input control 1 set to Sony mode */
-   /* Audio output input 2 is 0 for slave operation input */
+   /* Audio output input 2 is 0 for target operation input */
/* 0xC4000914[5]: 0 = left sample on WS=0, 1 = left sample on WS=1 */
/* 0xC4000914[7]: 0 = Philips mode, 1 = Sony mode (1st SCK rising edge
   after WS transition for first bit of audio word. */
cx18_av_write4(cx, CXADEC_I2S_IN_CTL, 0x00A0);
 
/* Audio output control 1 is set to Sony mode */
-   /* Audio output control 2 is set to 1 for master mode */
+   /* Audio output control 2 is set to 1 for controller mode */
/* 0xC4000918[5]: 0 = left sample on WS=0, 1 = left sample on WS=1 */
/* 0xC4000918[7]: 0 = Philips mode, 1 = Sony mode (1st SCK rising edge
   after WS transition for first bit of audio word. */
-   /* 0xC4000918[8]: 0 = slave operation, 1 = master (SCK_OUT and WS_OUT
+   /* 0xC4000918[8]: 0 = target operation, 1 = controller (SCK_OUT and 
WS_OUT
   are generated) */
cx18_av_write4(cx, CXADEC_I2S_OUT_CTL, 0x01A0);
 
-   /* set alt I2s master clock to /0x16 and enable alt divider i2s
+   /* set alt I2s controller clock to /0x16 and enable alt divider i2s
   passthrough */
cx18_av_write4(cx, CXADEC_PIN_CFG3, 0x5600B687);
 
diff --git a/drivers/media/pci/cx18/cx18-cards.c 
b/drivers/media/pci/cx18/cx18-cards.c
index f5a30959a367..4a04bc984578 100644
--- a/drivers/media/pci/cx18/cx18-cards.c
+++ b/drivers/media/pci/cx18/cx18-cards.c
@@ -82,7 +82,7 @@ static const struct cx18_card cx18_card_hvr1600_esmt = {
},
.gpio_init.initial_value = 0x3001,
.gpio_init.direction = 0x3001,
-   .gpio_i2c_slave_reset = {
+   .gpio_i2c_target_reset = {
.active_lo_mask = 0x3001,
.msecs_asserted = 10,
.msecs_recovery = 40,
@@ -129,7 +129,7 @@ static const struct cx18_card cx18_card_hvr1600_s5h1411 = {
},
.gpio_init.initial_value = 0x3801,
.gpio_init.direction = 0x3801,
-   .gpio_i2c_slave_reset = {
+   .gpio_i2c_target_reset = {
.active_lo_mask = 0x3801,
.msecs_asserted = 10,
.msecs_recovery = 40,
@@ -176,7 +176,7 @@ static const struct cx18_card cx18_card_hvr1600_samsung = {
},
.gpio_init.initial_value = 0x3001,
.gpio_init.direction = 0x3001,
-   .gpio_i2c_slave_reset = {
+   .gpio_i2c_target_reset = {
.active_lo_mask = 0x3001,
.msecs_asserted = 10,
.msecs_recovery = 40,
diff --git a/drivers/media/pci/cx18/cx18-cards.h 
b/drivers/media/pci/cx18/cx18-cards.h
index ae9cf5bfdd59..a886ff735e89 100644
--- a/drivers/media/pci/cx18/cx18-cards.h
+++ b/drivers/media/pci/cx18/cx18-cards.h
@@ -69,7 +69,7 @@ struct cx18_gpio_init { /* set initial GPIO DIR and OUT 
values */
u32 initial_value;
 };
 
-struct cx18_gpio_i2c_slave_reset {
+struct cx18_gpio_i2c_target_reset {
u32 active_lo_mask; /* GPIO outputs that reset i2c chips when low */
u32 active_hi_mask; /* GPIO outputs that reset i2c chips when high */
int msecs_asserted; /* time period reset must remain asserted */
@@ -121,7 +121,7 @@ struct cx18_card {
/* GPIO card-specific settings */
u8 xceive_pin;  /* XCeive tuner GPIO reset pin */
struct cx18_gpio_initgpio_init;
-   struct cx18_gpio_i2c_slave_reset gpio_i2c_slave_reset;
+   struct cx18_gpio_i2c_target_reset gpio_i2c_target_reset;
struct cx18_gpio_audio_inputgpio_audio_input;
 
struct cx18_card_tuner tuners[CX18_CARD_MAX_TUNERS];
diff --

[PATCH v2 07/12] media: cx25821: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/media/pci/cx25821/cx25821-core.c | 2 +-
 drivers/media/pci/cx25821/cx25821-i2c.c  | 6 +++---
 drivers/media/pci/cx25821/cx25821-medusa-video.c | 2 +-
 drivers/media/pci/cx25821/cx25821.h  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/media/pci/cx25821/cx25821-core.c 
b/drivers/media/pci/cx25821/cx25821-core.c
index 6627fa9166d3..a9af18910c1f 100644
--- a/drivers/media/pci/cx25821/cx25821-core.c
+++ b/drivers/media/pci/cx25821/cx25821-core.c
@@ -877,7 +877,7 @@ static int cx25821_dev_setup(struct cx25821_dev *dev)
dev->pci_slot = PCI_SLOT(dev->pci->devfn);
dev->pci_irqmask = 0x001f00;
 
-   /* External Master 1 Bus */
+   /* External Controller 1 Bus */
dev->i2c_bus[0].nr = 0;
dev->i2c_bus[0].dev = dev;
dev->i2c_bus[0].reg_stat = I2C1_STAT;
diff --git a/drivers/media/pci/cx25821/cx25821-i2c.c 
b/drivers/media/pci/cx25821/cx25821-i2c.c
index 0ef4cd6528a0..f3322dd2 100644
--- a/drivers/media/pci/cx25821/cx25821-i2c.c
+++ b/drivers/media/pci/cx25821/cx25821-i2c.c
@@ -33,7 +33,7 @@ do {  
\
 #define I2C_EXTEND  (1 << 3)
 #define I2C_NOSTOP  (1 << 4)
 
-static inline int i2c_slave_did_ack(struct i2c_adapter *i2c_adap)
+static inline int i2c_target_did_ack(struct i2c_adapter *i2c_adap)
 {
struct cx25821_i2c *bus = i2c_adap->algo_data;
struct cx25821_dev *dev = bus->dev;
@@ -85,7 +85,7 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
if (!i2c_wait_done(i2c_adap))
return -EIO;
 
-   if (!i2c_slave_did_ack(i2c_adap))
+   if (!i2c_target_did_ack(i2c_adap))
return -EIO;
 
dprintk(1, "%s(): returns 0\n", __func__);
@@ -174,7 +174,7 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
cx_write(bus->reg_ctrl, bus->i2c_period | (1 << 2) | 1);
if (!i2c_wait_done(i2c_adap))
return -EIO;
-   if (!i2c_slave_did_ack(i2c_adap))
+   if (!i2c_target_did_ack(i2c_adap))
return -EIO;
 
dprintk(1, "%s(): returns 0\n", __func__);
diff --git a/drivers/media/pci/cx25821/cx25821-medusa-video.c 
b/drivers/media/pci/cx25821/cx25821-medusa-video.c
index f0a1ac77f048..67a18add6ed3 100644
--- a/drivers/media/pci/cx25821/cx25821-medusa-video.c
+++ b/drivers/media/pci/cx25821/cx25821-medusa-video.c
@@ -659,7 +659,7 @@ int medusa_video_init(struct cx25821_dev *dev)
if (ret_val < 0)
goto error;
 
-   /* Turn off Master source switch enable */
+   /* Turn off Controller source switch enable */
value = cx25821_i2c_read(&dev->i2c_bus[0], MON_A_CTRL, &tmp);
value &= 0xFFDF;
ret_val = cx25821_i2c_write(&dev->i2c_bus[0], MON_A_CTRL, value);
diff --git a/drivers/media/pci/cx25821/cx25821.h 
b/drivers/media/pci/cx25821/cx25821.h
index 3aa7604fb944..e96be9127467 100644
--- a/drivers/media/pci/cx25821/cx25821.h
+++ b/drivers/media/pci/cx25821/cx25821.h
@@ -234,7 +234,7 @@ struct cx25821_dev {
 
u32 clk_freq;
 
-   /* I2C adapters: Master 1 & 2 (External) & Master 3 (Internal only) */
+   /* I2C adapters: Controller 1 & 2 (External) & Controller 3 (Internal 
only) */
struct cx25821_i2c i2c_bus[3];
 
int nr;
-- 
2.34.1

[PATCH] drm/amdgpu: delete unnecessary check

2024-05-06 Thread Dan Carpenter

The "ret" variable is zero.  No need to check.

Signed-off-by: Dan Carpenter 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a037e8fba29f..4d50fb039509 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2807,7 +2807,7 @@ static void amdgpu_ras_do_page_retirement(struct 
work_struct *work)
 static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
uint32_t timeout_ms)
 {
-   int ret = 0;
+   int ret;
struct ras_ecc_log_info *ecc_log;
struct ras_query_if info;
uint32_t timeout = timeout_ms;
@@ -2836,8 +2836,7 @@ static void amdgpu_ras_poison_creation_handler(struct 
amdgpu_device *adev,
return;
}
 
-   if (!ret)
-   schedule_delayed_work(&ras->page_retirement_dwork, 0);
+   schedule_delayed_work(&ras->page_retirement_dwork, 0);
 }
 
 static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
-- 
2.43.0

[PATCH v2 04/12] media: au0828: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/media/usb/au0828/au0828-i2c.c   | 4 ++--
 drivers/media/usb/au0828/au0828-input.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/usb/au0828/au0828-i2c.c 
b/drivers/media/usb/au0828/au0828-i2c.c
index 749f90d73b5b..743cb44f52aa 100644
--- a/drivers/media/usb/au0828/au0828-i2c.c
+++ b/drivers/media/usb/au0828/au0828-i2c.c
@@ -23,7 +23,7 @@ MODULE_PARM_DESC(i2c_scan, "scan i2c bus at insmod time");
 #define I2C_WAIT_DELAY 25
 #define I2C_WAIT_RETRY 1000
 
-static inline int i2c_slave_did_read_ack(struct i2c_adapter *i2c_adap)
+static inline int i2c_target_did_read_ack(struct i2c_adapter *i2c_adap)
 {
struct au0828_dev *dev = i2c_adap->algo_data;
return au0828_read(dev, AU0828_I2C_STATUS_201) &
@@ -35,7 +35,7 @@ static int i2c_wait_read_ack(struct i2c_adapter *i2c_adap)
int count;
 
for (count = 0; count < I2C_WAIT_RETRY; count++) {
-   if (!i2c_slave_did_read_ack(i2c_adap))
+   if (!i2c_target_did_read_ack(i2c_adap))
break;
udelay(I2C_WAIT_DELAY);
}
diff --git a/drivers/media/usb/au0828/au0828-input.c 
b/drivers/media/usb/au0828/au0828-input.c
index 3d3368202cd0..6c9e5ea795f2 100644
--- a/drivers/media/usb/au0828/au0828-input.c
+++ b/drivers/media/usb/au0828/au0828-input.c
@@ -30,7 +30,7 @@ struct au0828_rc {
int polling;
struct delayed_work work;
 
-   /* i2c slave address of external device (if used) */
+   /* i2c target address of external device (if used) */
u16 i2c_dev_addr;
 
int  (*get_key_i2c)(struct au0828_rc *ir);
-- 
2.34.1

[bug report] drm/amdkfd: mark GFX12 system and peer GPU memory mappings as MTYPE_NC

2024-05-06 Thread Dan Carpenter

Hello Sreekant Somasekharan,

This is a semi-automatic email about new static checker warnings.

Commit 628e1ace2379 ("drm/amdkfd: mark GFX12 system and peer GPU
memory mappings as MTYPE_NC") from Mar 26, 2024, leads to the
following Smatch complaint:

drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c:518 gmc_v12_0_get_vm_pte()
warn: variable dereferenced before check 'bo' (see line 500)

drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
   499  struct amdgpu_bo *bo = mapping->bo_va->base.bo;
   500  struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);

   501  bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
^
   502  bool is_system = bo->tbo.resource->mem_type == TTM_PL_SYSTEM;
 ^^^
The patch adds unchecked dereferences.

   503  
   504  
   505  *flags &= ~AMDGPU_PTE_EXECUTABLE;
   506  *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
   507  
   508  *flags &= ~AMDGPU_PTE_MTYPE_GFX12_MASK;
   509  *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_GFX12_MASK);
   510  
   511  if (mapping->flags & AMDGPU_PTE_PRT_GFX12) {
   512  *flags |= AMDGPU_PTE_PRT_GFX12;
   513  *flags |= AMDGPU_PTE_SNOOPED;
   514  *flags |= AMDGPU_PTE_SYSTEM;
   515  *flags &= ~AMDGPU_PTE_VALID;
   516  }
   517  
   518  if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
^^
But previously we assumed bo could be NULL.

   519 AMDGPU_GEM_CREATE_UNCACHED))
   520  *flags = (*flags & ~AMDGPU_PTE_MTYPE_GFX12_MASK) |

regards,
dan carpenter

[PATCH v2 08/12] media: ivtv: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/media/pci/ivtv/ivtv-i2c.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/media/pci/ivtv/ivtv-i2c.c 
b/drivers/media/pci/ivtv/ivtv-i2c.c
index c052c57c6dce..a22c7caa92f7 100644
--- a/drivers/media/pci/ivtv/ivtv-i2c.c
+++ b/drivers/media/pci/ivtv/ivtv-i2c.c
@@ -33,14 +33,14 @@
 Some more general comments about what we are doing:
 
 The i2c bus is a 2 wire serial bus, with clock (SCL) and data (SDA)
-lines.  To communicate on the bus (as a master, we don't act as a slave),
+lines.  To communicate on the bus (as a controller, we don't act as a 
target),
 we first initiate a start condition (ivtv_start).  We then write the
 address of the device that we want to communicate with, along with a flag
-that indicates whether this is a read or a write.  The slave then issues
+that indicates whether this is a read or a write.  The target then issues
 an ACK signal (ivtv_ack), which tells us that it is ready for reading /
 writing.  We then proceed with reading or writing (ivtv_read/ivtv_write),
 and finally issue a stop condition (ivtv_stop) to make the bus available
-to other masters.
+to other controllers.
 
 There is an additional form of transaction where a write may be
 immediately followed by a read.  In this case, there is no intervening
@@ -379,7 +379,7 @@ static int ivtv_waitsda(struct ivtv *itv, int val)
return 0;
 }
 
-/* Wait for the slave to issue an ACK */
+/* Wait for the target to issue an ACK */
 static int ivtv_ack(struct ivtv *itv)
 {
int ret = 0;
@@ -396,7 +396,7 @@ static int ivtv_ack(struct ivtv *itv)
ivtv_scldelay(itv);
ivtv_setscl(itv, 1);
if (!ivtv_waitsda(itv, 0)) {
-   IVTV_DEBUG_I2C("Slave did not ack\n");
+   IVTV_DEBUG_I2C("Target did not ack\n");
ret = -EREMOTEIO;
}
ivtv_setscl(itv, 0);
@@ -407,7 +407,7 @@ static int ivtv_ack(struct ivtv *itv)
return ret;
 }
 
-/* Write a single byte to the i2c bus and wait for the slave to ACK */
+/* Write a single byte to the i2c bus and wait for the target to ACK */
 static int ivtv_sendbyte(struct ivtv *itv, unsigned char byte)
 {
int i, bit;
@@ -427,7 +427,7 @@ static int ivtv_sendbyte(struct ivtv *itv, unsigned char 
byte)
}
ivtv_setscl(itv, 1);
if (!ivtv_waitscl(itv, 1)) {
-   IVTV_DEBUG_I2C("Slave not ready for bit\n");
+   IVTV_DEBUG_I2C("Target not ready for bit\n");
return -EREMOTEIO;
}
}
@@ -471,7 +471,7 @@ static int ivtv_readbyte(struct ivtv *itv, unsigned char 
*byte, int nack)
return 0;
 }
 
-/* Issue a start condition on the i2c bus to alert slaves to prepare for
+/* Issue a start condition on the i2c bus to alert targets to prepare for
an address write */
 static int ivtv_start(struct ivtv *itv)
 {
@@ -534,7 +534,7 @@ static int ivtv_stop(struct ivtv *itv)
return 0;
 }
 
-/* Write a message to the given i2c slave.  do_stop may be 0 to prevent
+/* Write a message to the given i2c target.  do_stop may be 0 to prevent
issuing the i2c stop condition (when following with a read) */
 static int ivtv_write(struct ivtv *itv, unsigned char addr, unsigned char 
*data, u32 len, int do_stop)
 {
@@ -558,7 +558,7 @@ static int ivtv_write(struct ivtv *itv, unsigned char addr, 
unsigned char *data,
return ret;
 }
 
-/* Read data from the given i2c slave.  A stop condition is always issued. */
+/* Read data from the given i2c target.  A stop condition is always issued. */
 static int ivtv_read(struct ivtv *itv, unsigned char addr, unsigned char 
*data, u32 len)
 {
int retry, ret = -EREMOTEIO;
-- 
2.34.1

[PATCH v2 11/12] fbdev/smscufx: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/video/fbdev/smscufx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index 35d682b110c4..5f0dd01fd834 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c
@@ -1292,7 +1292,7 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, 
struct fb_info *info)
return 0;
 }
 
-/* sets up I2C Controller for 100 Kbps, std. speed, 7-bit addr, master,
+/* sets up DDC channel for 100 Kbps, std. speed, 7-bit addr, controller mode,
  * restart enabled, but no start byte, enable controller */
 static int ufx_i2c_init(struct ufx_data *dev)
 {
@@ -1321,7 +1321,7 @@ static int ufx_i2c_init(struct ufx_data *dev)
/* 7-bit (not 10-bit) addressing */
tmp &= ~(0x10);
 
-   /* enable restart conditions and master mode */
+   /* enable restart conditions and controller mode */
tmp |= 0x21;
 
status = ufx_reg_write(dev, 0x1000, tmp);
-- 
2.34.1

Re: [PATCH v1 12/12] fbdev/viafb: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

On 5/3/2024 12:39 AM, Thomas Zimmermann wrote:
> Hi
> 
> Am 03.05.24 um 00:26 schrieb Easwar Hariharan:
>> On 5/2/2024 3:46 AM, Thomas Zimmermann wrote:
>>>
>>> Am 30.04.24 um 19:38 schrieb Easwar Hariharan:
 I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced 
 "master/slave"
 with more appropriate terms. Inspired by and following on to Wolfram's
 series to fix drivers/i2c/[1], fix the terminology for users of
 I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
 in the specification.

 Compile tested, no functionality changes intended

 [1]: 
 https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

 Signed-off-by: Easwar Hariharan 
>>> Acked-by: Thomas Zimmermann 
>>>
>> Thanks for the ack! I had been addressing feedback as I got it on the v0 
>> series, and it seems
>> I missed out on updating viafb and smscufx to spec-compliant 
>> controller/target terminology like
>> the v0->v1 changelog calls out before posting v1.
>>
>> For smscufx, I feel phrasing the following line (as an example)
>>
>>> -/* sets up I2C Controller for 100 Kbps, std. speed, 7-bit addr, host,
>>> +/* sets up I2C Controller for 100 Kbps, std. speed, 7-bit addr, 
>>> *controller*,
>> would actually impact readability negatively, so I propose to leave smscufx 
>> as is.
> 
> Why? I don't see much of a difference.
> 
>>
>> For viafb, I propose making it compliant with the spec using the 
>> controller/target terminology and
>> posting a v2 respin (which I can send out as soon as you say) and ask you to 
>> review again.
>>
>> What do you think?
> 
> I think we should adopt the spec's language everywhere. That makes it 
> possible to grep the spec for terms used in the source code. Using 'host' in 
> smscufx appears to introduce yet another term. If you are worried about using 
> 'I2C controller' and 'controller' in the same sentence, you can replace 'I2C 
> controller' with 'DDC channel'. That's even more precise about the purpose of 
> this code.

Great, thanks! That was exactly my concern, I will fix up smscufx and send a v2.

Thanks,
Easwar

[PATCH v2 10/12] sfc: falcon: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Reviewed-by: Martin Habets 
Reviewed-by: Simon Horman 
Signed-off-by: Easwar Hariharan 
---
 drivers/net/ethernet/sfc/falcon/falcon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c 
b/drivers/net/ethernet/sfc/falcon/falcon.c
index 7a1c9337081b..36114ce88034 100644
--- a/drivers/net/ethernet/sfc/falcon/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon.c
@@ -367,7 +367,7 @@ static const struct i2c_algo_bit_data 
falcon_i2c_bit_operations = {
.getsda = falcon_getsda,
.getscl = falcon_getscl,
.udelay = 5,
-   /* Wait up to 50 ms for slave to let us pull SCL high */
+   /* Wait up to 50 ms for target to let us pull SCL high */
.timeout= DIV_ROUND_UP(HZ, 20),
 };
 
-- 
2.34.1

[bug report] drm/amd/display: Do cursor programming with rest of pipe

2024-05-06 Thread Dan Carpenter

Hello Harry Wentland,

Commit 66eba12a5482 ("drm/amd/display: Do cursor programming with
rest of pipe") from Mar 15, 2024 (linux-next), leads to the following
Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8433 
amdgpu_dm_update_cursor()
error: we previously assumed 'afb' could be null (see line 8388)

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c
8379 static void amdgpu_dm_update_cursor(struct drm_plane *plane,
8380 struct drm_plane_state 
*old_plane_state,
8381 struct dc_stream_update *update)
8382 {
8383 struct amdgpu_device *adev = drm_to_adev(plane->dev);
8384 struct amdgpu_framebuffer *afb = 
to_amdgpu_framebuffer(plane->state->fb);
8385 struct drm_crtc *crtc = afb ? plane->state->crtc : 
old_plane_state->crtc;
 ^

8386 struct dm_crtc_state *crtc_state = crtc ? 
to_dm_crtc_state(crtc->state) : NULL;
8387 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
8388 uint64_t address = afb ? afb->address : 0;
^
Checks for NULL

8389 struct dc_cursor_position position = {0};
8390 struct dc_cursor_attributes attributes;
8391 int ret;
8392 
8393 if (!plane->state->fb && !old_plane_state->fb)
8394 return;
8395 
8396 drm_dbg_atomic(plane->dev, "crtc_id=%d with size %d to %d\n",
8397amdgpu_crtc->crtc_id, plane->state->crtc_w,
8398plane->state->crtc_h);
8399 
8400 ret = amdgpu_dm_plane_get_cursor_position(plane, crtc, 
&position);
8401 if (ret)
8402 return;
8403 
8404 if (!position.enable) {
8405 /* turn off cursor */
8406 if (crtc_state && crtc_state->stream) {
8407 
dc_stream_set_cursor_position(crtc_state->stream,
8408   &position);
8409 update->cursor_position = 
&crtc_state->stream->cursor_position;
8410 }
8411 return;
8412 }
8413 
8414 amdgpu_crtc->cursor_width = plane->state->crtc_w;
8415 amdgpu_crtc->cursor_height = plane->state->crtc_h;
8416 
8417 memset(&attributes, 0, sizeof(attributes));
8418 attributes.address.high_part = upper_32_bits(address);
8419 attributes.address.low_part  = lower_32_bits(address);
8420 attributes.width = plane->state->crtc_w;
8421 attributes.height= plane->state->crtc_h;
8422 attributes.color_format  = 
CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA;
8423 attributes.rotation_angle= 0;
8424 attributes.attribute_flags.value = 0;
8425 
8426 /* Enable cursor degamma ROM on DCN3+ for implicit sRGB 
degamma in DRM
8427  * legacy gamma setup.
8428  */
8429 if (crtc_state->cm_is_degamma_srgb &&
8430 adev->dm.dc->caps.color.dpp.gamma_corr)
8431 attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA 
= 1;
8432 
--> 8433 attributes.pitch = afb->base.pitches[0] / 
afb->base.format->cpp[0];
^  ^
Unchecked dereferences

8434 
8435 if (crtc_state->stream) {
8436 if 
(!dc_stream_set_cursor_attributes(crtc_state->stream,
8437  &attributes))
8438 DRM_ERROR("DC failed to set cursor 
attributes\n");
8439 
8440 update->cursor_attributes = 
&crtc_state->stream->cursor_attributes;
8441 
8442 if (!dc_stream_set_cursor_position(crtc_state->stream,
8443&position))
8444 DRM_ERROR("DC failed to set cursor 
position\n");
8445 
8446 update->cursor_position = 
&crtc_state->stream->cursor_position;
8447 }
8448 }

regards,
dan carpenter

[bug report] drm/amdgpu: Add sdma v7_0 ip block support (v7)

2024-05-06 Thread Dan Carpenter

Hello Likun Gao,

Commit b412351e91bd ("drm/amdgpu: Add sdma v7_0 ip block support
(v7)") from Jul 4, 2023 (linux-next), leads to the following Smatch
static checker warning:

drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c:171 sdma_v7_0_ring_set_wptr()
warn: duplicate check '*is_queue_unmap' (previous on line 161)

drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
140 static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
141 {
142 struct amdgpu_device *adev = ring->adev;
143 uint32_t *wptr_saved;
144 uint32_t *is_queue_unmap;
145 uint64_t aggregated_db_index;
146 uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
147 
148 DRM_DEBUG("Setting write pointer\n");
149 
150 if (ring->is_mes_queue) {
151 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
152 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +

Set here

153   sizeof(uint32_t));
154 aggregated_db_index =
155 amdgpu_mes_get_aggregated_doorbell_index(adev,
156  ring->hw_prio);
157 
158 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
159  ring->wptr << 2);
160 *wptr_saved = ring->wptr << 2;
161 if (*is_queue_unmap) {
^^^
Checked here

162 WDOORBELL64(aggregated_db_index, ring->wptr << 
2);
163 DRM_DEBUG("calling WDOORBELL64(0x%08x, 
0x%016llx)\n",
164 ring->doorbell_index, 
ring->wptr << 2);
165 WDOORBELL64(ring->doorbell_index, ring->wptr << 
2);
166 } else {
167 DRM_DEBUG("calling WDOORBELL64(0x%08x, 
0x%016llx)\n",
168 ring->doorbell_index, 
ring->wptr << 2);
169 WDOORBELL64(ring->doorbell_index, ring->wptr << 
2);
170 
--> 171 if (*is_queue_unmap)
^^^
This is dead code.  We know it's false.

172 WDOORBELL64(aggregated_db_index,
173 ring->wptr << 2);
174 }
175 } else {
176 if (ring->use_doorbell) {
177 DRM_DEBUG("Using doorbell -- "
178   "wptr_offs == 0x%08x "

regards,
dan carpenter

[PATCH] drm/amd/pm: Fix error code in vega10_hwmgr_backend_init()

2024-05-06 Thread Dan Carpenter

Return -EINVAL on error instead of success.  Also on the success path,
return a literal zero instead of "return result;"

Fixes: e098bc9612c2 ("drm/amd/pm: optimize the power related source code 
layout")
Signed-off-by: Dan Carpenter 
---
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
index 37c915d7723c..9b9f8615070a 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
@@ -924,7 +924,7 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
data->total_active_cus = adev->gfx.cu_info.number;
if (!hwmgr->not_vf)
-   return result;
+   return -EINVAL;
 
/* Setup default Overdrive Fan control settings */
data->odn_fan_table.target_fan_speed =
@@ -947,7 +947,7 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
"Mem Channel Index Exceeded maximum!",
return -EINVAL);
 
-   return result;
+   return 0;
 }
 
 static int vega10_init_sclk_threshold(struct pp_hwmgr *hwmgr)
-- 
2.43.0

[bug report] drm/amd/display: Separate setting and programming of cursor

2024-05-06 Thread Dan Carpenter

Hello Harry Wentland,

Commit f63f86b5affc ("drm/amd/display: Separate setting and
programming of cursor") from Mar 15, 2024 (linux-next), leads to the
following Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c:398 
dc_stream_program_cursor_position()
error: we previously assumed 'stream' could be null (see line 397)

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c
389 bool dc_stream_program_cursor_position(
390 struct dc_stream_state *stream,
391 const struct dc_cursor_position *position)
392 {
393 struct dc *dc;
394 bool reset_idle_optimizations = false;
395 const struct dc_cursor_position *old_position;
396 
397 old_position = stream ? &stream->cursor_position : NULL;
   
The patch adds a NULL check

--> 398 dc = stream->ctx->dc;
 
The old code didn't check

399 
400 if (dc_stream_set_cursor_position(stream, position)) {
401 dc_z10_restore(dc);
402 
403 /* disable idle optimizations if enabling cursor */
404 if (dc->idle_optimizations_allowed &&
405 (!old_position->enable || 
dc->debug.exit_idle_opt_for_cursor_updates) &&
406 position->enable) {
407 dc_allow_idle_optimizations(dc, false);

regards,
dan carpenter

[PATCH v2 05/12] media: cobalt: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/media/pci/cobalt/cobalt-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/pci/cobalt/cobalt-i2c.c 
b/drivers/media/pci/cobalt/cobalt-i2c.c
index 10c9ee33f73e..011130aef2ca 100644
--- a/drivers/media/pci/cobalt/cobalt-i2c.c
+++ b/drivers/media/pci/cobalt/cobalt-i2c.c
@@ -45,10 +45,10 @@ struct cobalt_i2c_regs {
 /* I2C stop condition */
 #define M00018_CR_BITMAP_STO_MSK   (1 << 6)
 
-/* I2C read from slave */
+/* I2C read from target */
 #define M00018_CR_BITMAP_RD_MSK(1 << 5)
 
-/* I2C write to slave */
+/* I2C write to target */
 #define M00018_CR_BITMAP_WR_MSK(1 << 4)
 
 /* I2C ack */
@@ -59,7 +59,7 @@ struct cobalt_i2c_regs {
 
 /* SR[7:0] - Status register */
 
-/* Receive acknowledge from slave */
+/* Receive acknowledge from target */
 #define M00018_SR_BITMAP_RXACK_MSK (1 << 7)
 
 /* Busy, I2C bus busy (as defined by start / stop bits) */
-- 
2.34.1

Re: [PATCH v2 03/12] drm/i915: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

On 5/3/2024 12:34 PM, Rodrigo Vivi wrote:
> On Fri, May 03, 2024 at 06:13:24PM +, Easwar Hariharan wrote:
>> I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
>> with more appropriate terms. Inspired by and following on to Wolfram's
>> series to fix drivers/i2c/[1], fix the terminology for users of
>> I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
>> in the specification.
>>
>> Compile tested, no functionality changes intended
>>
>> [1]: 
>> https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/
>>
>> Reviewed-by: Rodrigo Vivi 
>> Acked-by: Rodrigo Vivi 
> 
> It looks like the ack is not needed since we are merging this through
> drm-intel-next. But I'm planing to merge this only after seeing the
> main drivers/i2c accepting the new terminology. So we don't have a
> risk of that getting push back and new names there and we having
> to rename it once again.

Just to be explicit, did you want me to remove the Acked-by in v3, or will you 
when you pull
the patch into drm-intel-next?

> 
> (more below)
> 
>> Acked-by: Zhi Wang 
>> Signed-off-by: Easwar Hariharan 
> 
> Cc: Jani Nikula 
> 
> Jani, what bits were you concerned that were not necessarily i2c?
> I believe although not necessarily/directly i2c, I believe they
> are related and could benefit from the massive single shot renable.
> or do you have any better split to suggest here?
> 
> (more below)
> 
>> ---
>>  drivers/gpu/drm/i915/display/dvo_ch7017.c | 14 -
>>  drivers/gpu/drm/i915/display/dvo_ch7xxx.c | 18 +--
>>  drivers/gpu/drm/i915/display/dvo_ivch.c   | 16 +-
>>  drivers/gpu/drm/i915/display/dvo_ns2501.c | 18 +--
>>  drivers/gpu/drm/i915/display/dvo_sil164.c | 18 +--
>>  drivers/gpu/drm/i915/display/dvo_tfp410.c | 18 +--
>>  drivers/gpu/drm/i915/display/intel_bios.c | 22 +++---
>>  drivers/gpu/drm/i915/display/intel_ddi.c  |  2 +-
>>  .../gpu/drm/i915/display/intel_display_core.h |  2 +-
>>  drivers/gpu/drm/i915/display/intel_dsi.h  |  2 +-
>>  drivers/gpu/drm/i915/display/intel_dsi_vbt.c  | 20 ++---
>>  drivers/gpu/drm/i915/display/intel_dvo.c  | 14 -
>>  drivers/gpu/drm/i915/display/intel_dvo_dev.h  |  2 +-
>>  drivers/gpu/drm/i915/display/intel_gmbus.c|  4 +--
>>  drivers/gpu/drm/i915/display/intel_sdvo.c | 30 +--
>>  drivers/gpu/drm/i915/display/intel_vbt_defs.h |  4 +--
>>  drivers/gpu/drm/i915/gvt/edid.c   | 28 -
>>  drivers/gpu/drm/i915/gvt/edid.h   |  4 +--
>>  drivers/gpu/drm/i915/gvt/opregion.c   |  2 +-
>>  19 files changed, 119 insertions(+), 119 deletions(-)
>>



>> diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
>> b/drivers/gpu/drm/i915/display/intel_ddi.c
>> index c17462b4c2ac..64db211148a8 100644
>> --- a/drivers/gpu/drm/i915/display/intel_ddi.c
>> +++ b/drivers/gpu/drm/i915/display/intel_ddi.c
>> @@ -4332,7 +4332,7 @@ static int intel_ddi_compute_config_late(struct 
>> intel_encoder *encoder,
>>  
>> connector->tile_group->id);
>>  
>>  /*
>> - * EDP Transcoders cannot be ensalved
>> + * EDP Transcoders cannot be slaves
> 
>  ^ here
> perhaps you meant 'targeted' ?
> 
>>   * make them a master always when present



This is not actually I2C related as far as I could tell when I was making the 
change, so this was more of a typo fix.

If we want to improve this, a quick check with the eDP v1.5a spec suggests 
using primary/secondary instead,
though in a global fashion rather than specifically for eDP transcoders. There 
is also source/sink terminology
in the spec related to DP encoders.

Which would be a more acceptable change here?

Thanks,
Easwar

[PATCH v2 12/12] fbdev/viafb: Make I2C terminology more inclusive

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 
---
 drivers/video/fbdev/via/chip.h|  8 
 drivers/video/fbdev/via/dvi.c | 24 
 drivers/video/fbdev/via/lcd.c |  6 +++---
 drivers/video/fbdev/via/via_aux.h |  2 +-
 drivers/video/fbdev/via/via_i2c.c | 12 ++--
 drivers/video/fbdev/via/vt1636.c  |  6 +++---
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/video/fbdev/via/chip.h b/drivers/video/fbdev/via/chip.h
index f0a19cbcb9e5..f81af13630e2 100644
--- a/drivers/video/fbdev/via/chip.h
+++ b/drivers/video/fbdev/via/chip.h
@@ -69,7 +69,7 @@
 #define VT1632_TMDS 0x01
 #define INTEGRATED_TMDS 0x42
 
-/* Definition TMDS Trasmitter I2C Slave Address */
+/* Definition TMDS Trasmitter I2C Target Address */
 #define VT1632_TMDS_I2C_ADDR0x10
 
 /**/
@@ -88,21 +88,21 @@
 #define TX_DATA_DDR_MODE0x04
 #define TX_DATA_SDR_MODE0x08
 
-/* Definition LVDS Trasmitter I2C Slave Address */
+/* Definition LVDS Trasmitter I2C Target Address */
 #define VT1631_LVDS_I2C_ADDR0x70
 #define VT3271_LVDS_I2C_ADDR0x80
 #define VT1636_LVDS_I2C_ADDR0x80
 
 struct tmds_chip_information {
int tmds_chip_name;
-   int tmds_chip_slave_addr;
+   int tmds_chip_target_addr;
int output_interface;
int i2c_port;
 };
 
 struct lvds_chip_information {
int lvds_chip_name;
-   int lvds_chip_slave_addr;
+   int lvds_chip_target_addr;
int output_interface;
int i2c_port;
 };
diff --git a/drivers/video/fbdev/via/dvi.c b/drivers/video/fbdev/via/dvi.c
index 13147e3066eb..27990a73bfa3 100644
--- a/drivers/video/fbdev/via/dvi.c
+++ b/drivers/video/fbdev/via/dvi.c
@@ -70,7 +70,7 @@ bool viafb_tmds_trasmitter_identify(void)
/* Check for VT1632: */
viaparinfo->chip_info->tmds_chip_info.tmds_chip_name = VT1632_TMDS;
viaparinfo->chip_info->
-   tmds_chip_info.tmds_chip_slave_addr = VT1632_TMDS_I2C_ADDR;
+   tmds_chip_info.tmds_chip_target_addr = VT1632_TMDS_I2C_ADDR;
viaparinfo->chip_info->tmds_chip_info.i2c_port = VIA_PORT_31;
if (check_tmds_chip(VT1632_DEVICE_ID_REG, VT1632_DEVICE_ID)) {
/*
@@ -128,14 +128,14 @@ bool viafb_tmds_trasmitter_identify(void)
viaparinfo->chip_info->
tmds_chip_info.tmds_chip_name = NON_TMDS_TRANSMITTER;
viaparinfo->chip_info->tmds_chip_info.
-   tmds_chip_slave_addr = VT1632_TMDS_I2C_ADDR;
+   tmds_chip_target_addr = VT1632_TMDS_I2C_ADDR;
return false;
 }
 
 static void tmds_register_write(int index, u8 data)
 {
viafb_i2c_writebyte(viaparinfo->chip_info->tmds_chip_info.i2c_port,
-   
viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr,
+   
viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr,
index, data);
 }
 
@@ -144,7 +144,7 @@ static int tmds_register_read(int index)
u8 data;
 
viafb_i2c_readbyte(viaparinfo->chip_info->tmds_chip_info.i2c_port,
-  (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr,
+  (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr,
   (u8) index, &data);
return data;
 }
@@ -152,7 +152,7 @@ static int tmds_register_read(int index)
 static int tmds_register_read_bytes(int index, u8 *buff, int buff_len)
 {
viafb_i2c_readbytes(viaparinfo->chip_info->tmds_chip_info.i2c_port,
-   (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr,
+   (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr,
(u8) index, buff, buff_len);
return 0;
 }
@@ -256,14 +256,14 @@ static int viafb_dvi_query_EDID(void)
 
DEBUG_MSG(KERN_INFO "viafb_dvi_query_EDID!!\n");
 
-   restore = viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr;
-   viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr = 0xA0;
+   restore = viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr;
+   viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr = 0xA0;
 
data0 = (u8) tmds_register_read(0x00);
data1 = (u8) tmds_register_read(0x01);
if ((data0 == 0) && (data1 == 0xFF)) {
viaparinfo->chip_info->
-

[PATCH v2 00/12] Make I2C terminology more inclusive for I2C Algobit and consumers

2024-05-06 Thread Easwar Hariharan

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of the
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

Please chime in with your opinions and suggestions.

This series is based on 3d25a941ea50 ("Merge tag 'block-6.9-20240503' of 
git://git.kernel.dk/linux")

[1]:
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/


changelog:
v1->v2:
- v1 link: 
https://lore.kernel.org/all/20240430173812.1423757-1-eahar...@linux.microsoft.com/
 
- Switch to specification verbiage master->controller, slave->target,
  drop usage of host/client [Thomas]
- Pick up Reviewed-bys and Acked-bys from Rodrigo, Zhi, and Thomas [gma500, 
i915]
- Fix up some straggler master/slave terms in amdgpu, cx25821, ivtv,
  cx23885

v0->v1:
- v0 link: 
https://lore.kernel.org/all/20240329170038.3863998-1-eahar...@linux.microsoft.com/
- Drop drivers/infiniband patches [Leon, Dennis]
- Switch to specification verbiage master->controller, slave->target,
  drop usage of client [Andi, Ville, Jani, Christian]
- Add I3C specification version in commit messages [Andi]
- Pick up Reviewed-bys from Martin and Simon [sfc]
- Drop i2c/treewide patch to make this series independent from Wolfram's
  ([1]) [Wolfram]
- Split away drm/nouveau patch to allow expansion into non-I2C
  non-inclusive terms


Easwar Hariharan (12):
  drm/amdgpu, drm/radeon: Make I2C terminology more inclusive
  drm/gma500: Make I2C terminology more inclusive
  drm/i915: Make I2C terminology more inclusive
  media: au0828: Make I2C terminology more inclusive
  media: cobalt: Make I2C terminology more inclusive
  media: cx18: Make I2C terminology more inclusive
  media: cx25821: Make I2C terminology more inclusive
  media: ivtv: Make I2C terminology more inclusive
  media: cx23885: Make I2C terminology more inclusive
  sfc: falcon: Make I2C terminology more inclusive
  fbdev/smscufx: Make I2C terminology more inclusive
  fbdev/viafb: Make I2C terminology more inclusive

 .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c   | 10 +++
 drivers/gpu/drm/amd/amdgpu/atombios_i2c.c |  8 ++---
 drivers/gpu/drm/amd/amdgpu/atombios_i2c.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c| 20 ++---
 .../gpu/drm/amd/display/dc/bios/bios_parser.c |  2 +-
 .../drm/amd/display/dc/bios/bios_parser2.c|  2 +-
 .../drm/amd/display/dc/core/dc_link_exports.c |  4 +--
 drivers/gpu/drm/amd/display/dc/dc.h   |  2 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_i2c.c  |  4 +--
 .../display/include/grph_object_ctrl_defs.h   |  2 +-
 drivers/gpu/drm/amd/include/atombios.h|  2 +-
 drivers/gpu/drm/amd/include/atomfirmware.h| 26 
 .../powerplay/hwmgr/vega20_processpptables.c  |  4 +--
 .../amd/pm/powerplay/inc/smu11_driver_if.h|  2 +-
 .../inc/pmfw_if/smu11_driver_if_arcturus.h|  2 +-
 .../inc/pmfw_if/smu11_driver_if_navi10.h  |  2 +-
 .../pmfw_if/smu11_driver_if_sienna_cichlid.h  |  2 +-
 .../inc/pmfw_if/smu13_driver_if_aldebaran.h   |  2 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_0.h |  2 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_7.h |  2 +-
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c |  4 +--
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  8 ++---
 drivers/gpu/drm/gma500/cdv_intel_lvds.c   |  2 +-
 drivers/gpu/drm/gma500/intel_bios.c   | 22 +++---
 drivers/gpu/drm/gma500/intel_bios.h   |  4 +--
 drivers/gpu/drm/gma500/intel_gmbus.c  |  2 +-
 drivers/gpu/drm/gma500/psb_drv.h  |  2 +-
 drivers/gpu/drm/gma500/psb_intel_drv.h|  2 +-
 drivers/gpu/drm/gma500/psb_intel_lvds.c   |  4 +--
 drivers/gpu/drm/gma500/psb_intel_sdvo.c   | 26 
 drivers/gpu/drm/i915/display/dvo_ch7017.c | 14 -
 drivers/gpu/drm/i915/display/dvo_ch7xxx.c | 18 +--
 drivers/gpu/drm/i915/display/dvo_ivch.c   | 16 +-
 drivers/gpu/drm/i915/display/dvo_ns2501.c | 18 +--
 drivers/gpu/drm/i915/display/dvo_sil164.c | 18 +--
 drivers/gpu/drm/i915/display/dvo_tfp410.c | 18 +--
 drivers/gpu/drm/i915/display/intel_bios.c | 22 +++---
 drivers/gpu/drm/i915/display/intel_ddi.c  |  2 +-
 .../gpu/drm/i915/display/intel_display_core.h |  2 +-
 drivers/gpu/drm/i915/display/intel_dsi.h  |  2 +-
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c  | 20 ++---
 drivers/gpu/drm/i915/display/intel_dvo.c  | 14 -
 drivers/gpu/drm/i915/display/intel_dvo_dev.h  |  2 +-
 drivers/gpu/drm/i915/display/intel_gmbus.c|  4 +--
 drivers/gpu/drm/i915/display/intel_sdvo.c | 30 +--
 drivers/gpu/drm/i915/display/intel_vbt_defs.h |  4 +--
 drivers/gpu/drm/i915/

[PATCH 2/2] drm/amdgpu/pm: Fix the param type of set_power_profile_mode

2024-05-06 Thread Ma Jun

Function .set_power_profile_mode need an array as input
parameter. So define variable workload as an array to fix
the below coverity warning.

"Passing &workload to function hwmgr->hwmgr_func->set_power_profile_mode
which uses it as an array. This might corrupt or misinterpret adjacent
memory locations"

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c |  8 
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c  |  8 
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 16 
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 1c40a362d5ab..a71c6117d7e5 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -929,7 +929,7 @@ static int pp_dpm_switch_power_profile(void *handle,
enum PP_SMC_POWER_PROFILE type, bool en)
 {
struct pp_hwmgr *hwmgr = handle;
-   long workload;
+   long workload[1];
uint32_t index;
 
if (!hwmgr || !hwmgr->pm_en)
@@ -947,12 +947,12 @@ static int pp_dpm_switch_power_profile(void *handle,
hwmgr->workload_mask &= ~(1 << hwmgr->workload_prority[type]);
index = fls(hwmgr->workload_mask);
index = index > 0 && index <= Workload_Policy_Max ? index - 1 : 
0;
-   workload = hwmgr->workload_setting[index];
+   workload[0] = hwmgr->workload_setting[index];
} else {
hwmgr->workload_mask |= (1 << hwmgr->workload_prority[type]);
index = fls(hwmgr->workload_mask);
index = index <= Workload_Policy_Max ? index - 1 : 0;
-   workload = hwmgr->workload_setting[index];
+   workload[0] = hwmgr->workload_setting[index];
}
 
if (type == PP_SMC_POWER_PROFILE_COMPUTE &&
@@ -962,7 +962,7 @@ static int pp_dpm_switch_power_profile(void *handle,
}
 
if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
-   hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0);
+   hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, workload, 0);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
index 1d829402cd2e..f4bd8e9357e2 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
@@ -269,7 +269,7 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, 
bool skip_display_set
struct pp_power_state *new_ps)
 {
uint32_t index;
-   long workload;
+   long workload[1];
 
if (hwmgr->not_vf) {
if (!skip_display_settings)
@@ -294,10 +294,10 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr 
*hwmgr, bool skip_display_set
if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
index = fls(hwmgr->workload_mask);
index = index > 0 && index <= Workload_Policy_Max ? index - 1 : 
0;
-   workload = hwmgr->workload_setting[index];
+   workload[0] = hwmgr->workload_setting[index];
 
-   if (hwmgr->power_profile_mode != workload && 
hwmgr->hwmgr_func->set_power_profile_mode)
-   hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, 
&workload, 0);
+   if (hwmgr->power_profile_mode != workload[0] && 
hwmgr->hwmgr_func->set_power_profile_mode)
+   hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, 
workload, 0);
}
 
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index dc0d3a9f1d6d..e61aa4418d44 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2210,7 +2210,7 @@ static int smu_adjust_power_state_dynamic(struct 
smu_context *smu,
 {
int ret = 0;
int index = 0;
-   long workload;
+   long workload[1];
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
 
if (!skip_display_settings) {
@@ -2250,10 +2250,10 @@ static int smu_adjust_power_state_dynamic(struct 
smu_context *smu,
smu_dpm_ctx->dpm_level != 
AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
index = fls(smu->workload_mask);
index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 
0;
-   workload = smu->workload_setting[index];
+   workload[0] = smu->workload_setting[index];
 
-   if (smu->power_profile_mode != workload)
-   smu_bump_power_profile_mode(smu, &workload, 0);
+   if (smu->power_profile_mode != workload[0])
+   smu_bump_power_profile_mode(smu, workload, 0);
}
 
return ret;
@@ -2303,7 +2303,7 @@ static int smu_switc

[PATCH 1/2] drm/amdgpu: Fix uninitialized variable warning in amdgpu_info_ioctl

2024-05-06 Thread Ma Jun

Check the return value of amdgpu_xcp_get_inst_details, otherwise we
may use an uninitialized variable inst_mask

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index a0ea6fe8d060..977cde6d1362 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -623,25 +623,32 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
switch (type) {
case AMD_IP_BLOCK_TYPE_GFX:
ret = amdgpu_xcp_get_inst_details(xcp, 
AMDGPU_XCP_GFX, &inst_mask);
+   if (ret)
+   return ret;
count = hweight32(inst_mask);
break;
case AMD_IP_BLOCK_TYPE_SDMA:
ret = amdgpu_xcp_get_inst_details(xcp, 
AMDGPU_XCP_SDMA, &inst_mask);
+   if (ret)
+   return ret;
count = hweight32(inst_mask);
break;
case AMD_IP_BLOCK_TYPE_JPEG:
ret = amdgpu_xcp_get_inst_details(xcp, 
AMDGPU_XCP_VCN, &inst_mask);
+   if (ret)
+   return ret;
count = hweight32(inst_mask) * 
adev->jpeg.num_jpeg_rings;
break;
case AMD_IP_BLOCK_TYPE_VCN:
ret = amdgpu_xcp_get_inst_details(xcp, 
AMDGPU_XCP_VCN, &inst_mask);
+   if (ret)
+   return ret;
count = hweight32(inst_mask);
break;
default:
return -EINVAL;
}
-   if (ret)
-   return ret;
+
return copy_to_user(out, &count, min(size, 4u)) ? 
-EFAULT : 0;
}
 
-- 
2.34.1

[PATCH 2/2] drm/amdgpu: fix mc_data out-of-bounds read warning

2024-05-06 Thread Tim Huang

Clear warning that read mc_data[i-1] may out-of-bounds.

Signed-off-by: Tim Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 52b12c1718eb..7dc102f0bc1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1484,6 +1484,8 @@ int amdgpu_atombios_init_mc_reg_table(struct 
amdgpu_device *adev,

(u32)le32_to_cpu(*((u32 *)reg_data + j));
j++;
} else if 
((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == 
DATA_EQU_PREV) {
+   if (i 
== 0)
+   
continue;

reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =

reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
}
-- 
2.39.2

[PATCH 1/2] drm/amdgpu: fix ucode out-of-bounds read warning

2024-05-06 Thread Tim Huang

Clear warning that read ucode[] may out-of-bounds.

Signed-off-by: Tim Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index b8280be6225d..c3d89088123d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -213,6 +213,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device 
*cgs_device,
struct amdgpu_firmware_info *ucode;
 
id = fw_type_convert(cgs_device, type);
+   if (id >= AMDGPU_UCODE_ID_MAXIMUM)
+   return -EINVAL;
+
ucode = &adev->firmware.ucode[id];
if (ucode->fw == NULL)
return -EINVAL;
-- 
2.39.2

Re: [PATCH v2 12/12] fbdev/viafb: Make I2C terminology more inclusive

2024-05-06 Thread Thomas Zimmermann





Am 03.05.24 um 20:13 schrieb Easwar Hariharan:

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 


Acked-by: Thomas Zimmermann 


---
  drivers/video/fbdev/via/chip.h|  8 
  drivers/video/fbdev/via/dvi.c | 24 
  drivers/video/fbdev/via/lcd.c |  6 +++---
  drivers/video/fbdev/via/via_aux.h |  2 +-
  drivers/video/fbdev/via/via_i2c.c | 12 ++--
  drivers/video/fbdev/via/vt1636.c  |  6 +++---
  6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/video/fbdev/via/chip.h b/drivers/video/fbdev/via/chip.h
index f0a19cbcb9e5..f81af13630e2 100644
--- a/drivers/video/fbdev/via/chip.h
+++ b/drivers/video/fbdev/via/chip.h
@@ -69,7 +69,7 @@
  #define VT1632_TMDS 0x01
  #define INTEGRATED_TMDS 0x42
  
-/* Definition TMDS Trasmitter I2C Slave Address */

+/* Definition TMDS Trasmitter I2C Target Address */
  #define VT1632_TMDS_I2C_ADDR0x10
  
  /**/

@@ -88,21 +88,21 @@
  #define TX_DATA_DDR_MODE0x04
  #define TX_DATA_SDR_MODE0x08
  
-/* Definition LVDS Trasmitter I2C Slave Address */

+/* Definition LVDS Trasmitter I2C Target Address */
  #define VT1631_LVDS_I2C_ADDR0x70
  #define VT3271_LVDS_I2C_ADDR0x80
  #define VT1636_LVDS_I2C_ADDR0x80
  
  struct tmds_chip_information {

int tmds_chip_name;
-   int tmds_chip_slave_addr;
+   int tmds_chip_target_addr;
int output_interface;
int i2c_port;
  };
  
  struct lvds_chip_information {

int lvds_chip_name;
-   int lvds_chip_slave_addr;
+   int lvds_chip_target_addr;
int output_interface;
int i2c_port;
  };
diff --git a/drivers/video/fbdev/via/dvi.c b/drivers/video/fbdev/via/dvi.c
index 13147e3066eb..27990a73bfa3 100644
--- a/drivers/video/fbdev/via/dvi.c
+++ b/drivers/video/fbdev/via/dvi.c
@@ -70,7 +70,7 @@ bool viafb_tmds_trasmitter_identify(void)
/* Check for VT1632: */
viaparinfo->chip_info->tmds_chip_info.tmds_chip_name = VT1632_TMDS;
viaparinfo->chip_info->
-   tmds_chip_info.tmds_chip_slave_addr = VT1632_TMDS_I2C_ADDR;
+   tmds_chip_info.tmds_chip_target_addr = VT1632_TMDS_I2C_ADDR;
viaparinfo->chip_info->tmds_chip_info.i2c_port = VIA_PORT_31;
if (check_tmds_chip(VT1632_DEVICE_ID_REG, VT1632_DEVICE_ID)) {
/*
@@ -128,14 +128,14 @@ bool viafb_tmds_trasmitter_identify(void)
viaparinfo->chip_info->
tmds_chip_info.tmds_chip_name = NON_TMDS_TRANSMITTER;
viaparinfo->chip_info->tmds_chip_info.
-   tmds_chip_slave_addr = VT1632_TMDS_I2C_ADDR;
+   tmds_chip_target_addr = VT1632_TMDS_I2C_ADDR;
return false;
  }
  
  static void tmds_register_write(int index, u8 data)

  {
viafb_i2c_writebyte(viaparinfo->chip_info->tmds_chip_info.i2c_port,
-   
viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr,
+   
viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr,
index, data);
  }
  
@@ -144,7 +144,7 @@ static int tmds_register_read(int index)

u8 data;
  
  	viafb_i2c_readbyte(viaparinfo->chip_info->tmds_chip_info.i2c_port,

-  (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr,
+  (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr,
   (u8) index, &data);
return data;
  }
@@ -152,7 +152,7 @@ static int tmds_register_read(int index)
  static int tmds_register_read_bytes(int index, u8 *buff, int buff_len)
  {
viafb_i2c_readbytes(viaparinfo->chip_info->tmds_chip_info.i2c_port,
-   (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr,
+   (u8) 
viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr,
(u8) index, buff, buff_len);
return 0;
  }
@@ -256,14 +256,14 @@ static int viafb_dvi_query_EDID(void)
  
  	DEBUG_MSG(KERN_INFO "viafb_dvi_query_EDID!!\n");
  
-	restore = viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr;

-   viaparinfo->chip_info->tmds_chip_info.tmds_chip_slave_addr = 0xA0;
+   restore = viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr;
+   viaparinfo->chip_info->tmds_chip_info.tmds_chip_target_addr = 0xA0;
  
  	data0 = (u8) tmds_register_read(0x00);

data1 = (u8) tmds_register_read(0x0

Re: [PATCH v2 11/12] fbdev/smscufx: Make I2C terminology more inclusive

2024-05-06 Thread Thomas Zimmermann





Am 03.05.24 um 20:13 schrieb Easwar Hariharan:

I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
with more appropriate terms. Inspired by and following on to Wolfram's
series to fix drivers/i2c/[1], fix the terminology for users of
I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
in the specification.

Compile tested, no functionality changes intended

[1]: 
https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/

Signed-off-by: Easwar Hariharan 


Acked-by: Thomas Zimmermann 


---
  drivers/video/fbdev/smscufx.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index 35d682b110c4..5f0dd01fd834 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c
@@ -1292,7 +1292,7 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, 
struct fb_info *info)
return 0;
  }
  
-/* sets up I2C Controller for 100 Kbps, std. speed, 7-bit addr, master,

+/* sets up DDC channel for 100 Kbps, std. speed, 7-bit addr, controller mode,
   * restart enabled, but no start byte, enable controller */
  static int ufx_i2c_init(struct ufx_data *dev)
  {
@@ -1321,7 +1321,7 @@ static int ufx_i2c_init(struct ufx_data *dev)
/* 7-bit (not 10-bit) addressing */
tmp &= ~(0x10);
  
-	/* enable restart conditions and master mode */

+   /* enable restart conditions and controller mode */
tmp |= 0x21;
  
  	status = ufx_reg_write(dev, 0x1000, tmp);


--
--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Frankenstrasse 146, 90461 Nuernberg, Germany
GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
HRB 36809 (AG Nuernberg)

RE: [PATCH v2] drm/amdgpu: Fix out-of-bounds write warning

2024-05-06 Thread Huang, Tim

[Public]

Reviewed-by: Tim Huang 

Best Regards,
Tim Huang



> -Original Message-
> From: amd-gfx  On Behalf Of Ma Jun
> Sent: Monday, May 6, 2024 1:59 PM
> To: amd-gfx@lists.freedesktop.org; Koenig, Christian
> ; Deucher, Alexander
> 
> Cc: Ma, Jun 
> Subject: [PATCH v2] drm/amdgpu: Fix out-of-bounds write warning
>
> Check the ring type value to fix the out-of-bounds write warning
>
> Signed-off-by: Ma Jun 
> Suggested-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 15c240656470..ad49cecb20b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -352,7 +352,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct
> amdgpu_ring *ring,
>   ring->max_dw = max_dw;
>   ring->hw_prio = hw_prio;
>
> - if (!ring->no_scheduler) {
> + if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM)
> {
>   hw_ip = ring->funcs->type;
>   num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
>   adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
> --
> 2.34.1

Re: [PATCH v2 03/12] drm/i915: Make I2C terminology more inclusive

2024-05-06 Thread Andi Shyti

Hi,

On Fri, May 03, 2024 at 03:34:12PM -0400, Rodrigo Vivi wrote:
> On Fri, May 03, 2024 at 06:13:24PM +, Easwar Hariharan wrote:
> > I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave"
> > with more appropriate terms. Inspired by and following on to Wolfram's
> > series to fix drivers/i2c/[1], fix the terminology for users of
> > I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
> > in the specification.
> > 
> > Compile tested, no functionality changes intended
> > 
> > [1]: 
> > https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/
> > 
> > Reviewed-by: Rodrigo Vivi 
> > Acked-by: Rodrigo Vivi 
> 
> It looks like the ack is not needed since we are merging this through
> drm-intel-next. But I'm planing to merge this only after seeing the
> main drivers/i2c accepting the new terminology. So we don't have a
> risk of that getting push back and new names there and we having
> to rename it once again.

I basically agree with this patch (without the eDP part).

But the documentation is still not update and I think we need to
hold until Wolfram has done that.

In any case, it's good to anticipate the reviews.

Thanks Easwar,
Andi

Re: [PATCH v2 03/12] drm/i915: Make I2C terminology more inclusive

2024-05-06 Thread Jani Nikula

On Fri, 03 May 2024, Rodrigo Vivi  wrote:
> On Fri, May 03, 2024 at 02:04:15PM -0700, Easwar Hariharan wrote:
>> On 5/3/2024 12:34 PM, Rodrigo Vivi wrote:
>> > On Fri, May 03, 2024 at 06:13:24PM +, Easwar Hariharan wrote:
>> >> I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced 
>> >> "master/slave"
>> >> with more appropriate terms. Inspired by and following on to Wolfram's
>> >> series to fix drivers/i2c/[1], fix the terminology for users of
>> >> I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
>> >> in the specification.
>> >>
>> >> Compile tested, no functionality changes intended
>> >>
>> >> [1]: 
>> >> https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/
>> >>
>> >> Reviewed-by: Rodrigo Vivi 
>> >> Acked-by: Rodrigo Vivi 
>> > 
>> > It looks like the ack is not needed since we are merging this through
>> > drm-intel-next. But I'm planing to merge this only after seeing the
>> > main drivers/i2c accepting the new terminology. So we don't have a
>> > risk of that getting push back and new names there and we having
>> > to rename it once again.
>> 
>> Just to be explicit, did you want me to remove the Acked-by in v3, or will 
>> you when you pull
>> the patch into drm-intel-next?
>> 
>> > 
>> > (more below)
>> > 
>> >> Acked-by: Zhi Wang 
>> >> Signed-off-by: Easwar Hariharan 
>> > 
>> > Cc: Jani Nikula 
>> > 
>> > Jani, what bits were you concerned that were not necessarily i2c?
>> > I believe although not necessarily/directly i2c, I believe they
>> > are related and could benefit from the massive single shot renable.
>> > or do you have any better split to suggest here?
>> > 
>> > (more below)
>> > 
>> >> ---
>> >>  drivers/gpu/drm/i915/display/dvo_ch7017.c | 14 -
>> >>  drivers/gpu/drm/i915/display/dvo_ch7xxx.c | 18 +--
>> >>  drivers/gpu/drm/i915/display/dvo_ivch.c   | 16 +-
>> >>  drivers/gpu/drm/i915/display/dvo_ns2501.c | 18 +--
>> >>  drivers/gpu/drm/i915/display/dvo_sil164.c | 18 +--
>> >>  drivers/gpu/drm/i915/display/dvo_tfp410.c | 18 +--
>> >>  drivers/gpu/drm/i915/display/intel_bios.c | 22 +++---
>> >>  drivers/gpu/drm/i915/display/intel_ddi.c  |  2 +-
>> >>  .../gpu/drm/i915/display/intel_display_core.h |  2 +-
>> >>  drivers/gpu/drm/i915/display/intel_dsi.h  |  2 +-
>> >>  drivers/gpu/drm/i915/display/intel_dsi_vbt.c  | 20 ++---
>> >>  drivers/gpu/drm/i915/display/intel_dvo.c  | 14 -
>> >>  drivers/gpu/drm/i915/display/intel_dvo_dev.h  |  2 +-
>> >>  drivers/gpu/drm/i915/display/intel_gmbus.c|  4 +--
>> >>  drivers/gpu/drm/i915/display/intel_sdvo.c | 30 +--
>> >>  drivers/gpu/drm/i915/display/intel_vbt_defs.h |  4 +--
>> >>  drivers/gpu/drm/i915/gvt/edid.c   | 28 -
>> >>  drivers/gpu/drm/i915/gvt/edid.h   |  4 +--
>> >>  drivers/gpu/drm/i915/gvt/opregion.c   |  2 +-
>> >>  19 files changed, 119 insertions(+), 119 deletions(-)
>> >>
>> 
>> 
>> 
>> >> diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
>> >> b/drivers/gpu/drm/i915/display/intel_ddi.c
>> >> index c17462b4c2ac..64db211148a8 100644
>> >> --- a/drivers/gpu/drm/i915/display/intel_ddi.c
>> >> +++ b/drivers/gpu/drm/i915/display/intel_ddi.c
>> >> @@ -4332,7 +4332,7 @@ static int intel_ddi_compute_config_late(struct 
>> >> intel_encoder *encoder,
>> >>   
>> >> connector->tile_group->id);
>> >>  
>> >>   /*
>> >> -  * EDP Transcoders cannot be ensalved
>> >> +  * EDP Transcoders cannot be slaves
>> > 
>> >  ^ here
>> > perhaps you meant 'targeted' ?
>> > 
>> >>* make them a master always when present
>> 
>> 
>> 
>> This is not actually I2C related as far as I could tell when I was making 
>> the change, so this was more of a typo fix.
>> 
>> If we want to improve this, a quick check with the eDP v1.5a spec suggests 
>> using primary/secondary instead,
>> though in a global fashion rather than specifically for eDP transcoders. 
>> There is also source/sink terminology
>> in the spec related to DP encoders.
>> 
>> Which would be a more acceptable change here?
>
> hmmm probably better to split the patches and align with the spec naming 
> where it applies.
> and with i2c name where it applies.

Yeah this one is completely unrelated to i2c and aux, and what the eDP
spec says is irrelevant here. This should follow Intel hw specs.

BR,
Jani.



-- 
Jani Nikula, Intel

Re: [PATCH 2/2] drm/amd/amdgpu: use the default reset for ras recovery

2024-05-06 Thread Zhang, GuoQing (Sam)

[AMD Official Use Only - General]

Hi @Deucher, Alexander and @Koenig, 
Christian

Could you help review this patch?
Without this patch, when customer set `reset_method=3` modprobe param to use 
mode2 reset, ras recovery will also use mode2 reset and skip mode1 reset.
When ECC error happens, GPU can’t be recovered with mode2 reset and mode1 reset 
is skipped, this will cause GPU reset failure.

This patch is to always use mode1 reset for ras recovery (ECC error) when 
setting `reset_method=3`.

Thanks
Sam

From: Feng, Kenneth 
Date: Monday, April 29, 2024 at 16:15
To: Feng, Kenneth , amd-gfx@lists.freedesktop.org 
, Zhang, GuoQing (Sam) 
Cc: Zhang, Owen(SRDC) , Aldabagh, Maad 
, Ma, Qing (Mark) 
Subject: RE: [PATCH 2/2] drm/amd/amdgpu: use the default reset for ras recovery
[AMD Official Use Only - General]

+@Zhang, GuoQing (Sam)

-Original Message-
From: Kenneth Feng 
Sent: Monday, April 29, 2024 3:32 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Owen(SRDC) ; Aldabagh, Maad 
; Ma, Qing (Mark) ; Feng, Kenneth 

Subject: [PATCH 2/2] drm/amd/amdgpu: use the default reset for ras recovery

use the default reset for ras recovery

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a037e8fba29f..f92b2c4f0d5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2437,6 +2437,7 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle =  NULL;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+   int save_reset_method = amdgpu_reset_method;

if (hive) {
atomic_set(&hive->ras_recovery, 1);
@@ -2501,7 +2502,13 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
}
}

+   if (amdgpu_gpu_recovery == 2)
+   amdgpu_reset_method = -1;
+
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
+
+   if (amdgpu_gpu_recovery == 2)
+   amdgpu_reset_method = save_reset_method;
}
atomic_set(&ras->in_recovery, 0);
if (hive) {
--
2.34.1

Re: [PATCH 1/2] drm/amd/amdgpu: customized the reset to skip soft recovery

2024-05-06 Thread Zhang, GuoQing (Sam)

[AMD Official Use Only - General]

Hi @Deucher, Alexander and @Koenig, 
Christian

Could you help review this patch? Customer needs this toggle to turn off 
software recovery and use mode2 reset directly. This toggle is also needed when 
we test mode2 reset using quark tool.

Thanks
Sam

From: Feng, Kenneth 
Date: Monday, April 29, 2024 at 16:14
To: Feng, Kenneth , amd-gfx@lists.freedesktop.org 
, Zhang, GuoQing (Sam) 
Cc: Zhang, Owen(SRDC) , Aldabagh, Maad 
, Ma, Qing (Mark) 
Subject: RE: [PATCH 1/2] drm/amd/amdgpu: customized the reset to skip soft 
recovery
[AMD Official Use Only - General]

+@Zhang, GuoQing (Sam)

-Original Message-
From: Kenneth Feng 
Sent: Monday, April 29, 2024 3:32 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Owen(SRDC) ; Aldabagh, Maad 
; Ma, Qing (Mark) ; Feng, Kenneth 

Subject: [PATCH 1/2] drm/amd/amdgpu: customized the reset to skip soft recovery

customized the reset to skip soft recovery

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +-  
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ea14f1c8f430..65c3a387fec7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -548,7 +548,7 @@ module_param_named(compute_multipipe, 
amdgpu_compute_multipipe, int, 0444);
  * DOC: gpu_recovery (int)
  * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default 
is -1 (auto, disabled except SRIOV).
  */
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 
= disable, -1 = auto)");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 =
+enable, 0 = disable, -1 = auto, 2 = customized)");
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);

 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e4742b65032d..5c75993698a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -51,7 +51,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)

adev->job_hang = true;

-   if (amdgpu_gpu_recovery &&
+   if (amdgpu_gpu_recovery && amdgpu_gpu_recovery != 2 &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) 
{
DRM_ERROR("ring %s timeout, but soft recovered\n",
  s_job->sched->name);
--
2.34.1

94 matches

Mail list logo