date:20230810

[PATCH] drm/amd/pm: bump SMU v13.0.5 driver_if header version

2023-08-10 Thread Tim Huang

Align the SMU driver interface version with PMFW to
suppress the version mismatch message on driver loading.

Signed-off-by: Tim Huang 
---
 .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h
index 7589faa0232d..779c2524806c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h
@@ -23,7 +23,7 @@
 #ifndef __SMU13_DRIVER_IF_V13_0_5_H__
 #define __SMU13_DRIVER_IF_V13_0_5_H__
 
-#define SMU13_0_5_DRIVER_IF_VERSION 4
+#define SMU13_0_5_DRIVER_IF_VERSION 5
 
 // Throttler Status Bitmask
 #define THROTTLER_STATUS_BIT_SPL0
@@ -103,7 +103,6 @@ typedef struct {
   uint16_t ThrottlerStatus;
 
   uint16_t CurrentSocketPower;  //[mW]
-  uint16_t spare1;
 } SmuMetrics_t;
 
 //Freq in MHz
-- 
2.34.1

[PATCH 3/4] drm/amdgpu: Add bootloader wait for PSP v13

2023-08-10 Thread Lijo Lazar

Implement the wait for bootloader call back for PSP v13.0 ASICs. Only
for ASICs with PSP v13.0.6, it needs an additional check for VBIOS
mailbox status.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Reviewed-by: Asad Kamal 
Tested-by: Asad Kamal 
---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 28 --
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 10b17bd5aebe..d2a88bc630d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -133,12 +133,35 @@ static bool psp_v13_0_is_sos_alive(struct psp_context 
*psp)
return sol_reg != 0x0;
 }
 
+static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
+{
+   struct amdgpu_device *adev = psp->adev;
+   int retry_loop, ret;
+
+   for (retry_loop = 0; retry_loop < 70; retry_loop++) {
+   /* Wait for bootloader to signify that is
+  ready having bit 31 of C2PMSG_33 set to 1 */
+   ret = psp_wait_for(
+   psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
+   0x8000, 0x, false);
+
+   if (ret == 0)
+   break;
+   }
+
+   if (ret)
+   dev_warn(adev->dev, "Bootloader wait timed out");
+
+   return ret;
+}
+
 static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
 {
struct amdgpu_device *adev = psp->adev;
+   int retry_loop, ret;
 
-   int ret;
-   int retry_loop;
+   if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6))
+   psp_v13_0_wait_for_vmbx_ready(psp);
 
/* Wait for bootloader to signify that it is ready having bit 31 of
 * C2PMSG_35 set to 1. All other bits are expected to be cleared.
@@ -714,6 +737,7 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct 
psp_context *psp)
 
 static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
+   .wait_for_bootloader = psp_v13_0_wait_for_bootloader,
.bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
.bootloader_load_spl = psp_v13_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
-- 
2.25.1

[PATCH 4/4] drm/amdgpu: Add SMU v13.0.6 default reset methods

2023-08-10 Thread Lijo Lazar

For APUs with SMU v13.0.6, mode-2 reset is kept as default and for
others mode-1 is the default reset method.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Reviewed-by: Asad Kamal 
Tested-by: Asad Kamal 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c   | 4 +++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index c45721ca916e..f5be40d7ba36 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -559,8 +559,10 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
 */
if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
return AMD_RESET_METHOD_MODE2;
+   else if (!(adev->flags & AMD_IS_APU))
+   return AMD_RESET_METHOD_MODE1;
else
-   return AMD_RESET_METHOD_NONE;
+   return AMD_RESET_METHOD_MODE2;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 362acbb242bb..552a739533ea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2096,8 +2096,7 @@ static int smu_v13_0_6_mode1_reset(struct smu_context 
*smu)
 
 static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu)
 {
-   /* TODO: Enable this when FW support is added */
-   return false;
+   return true;
 }
 
 static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu)
-- 
2.25.1

[PATCH 2/4] drm/amdgpu: Add bootloader status check

2023-08-10 Thread Lijo Lazar

Add a function to wait till bootloader has reached steady state.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Reviewed-by: Asad Kamal 
Tested-by: Asad Kamal 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 11 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|  3 +++
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6809bf7dae57..9061d79cd387 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -885,13 +885,20 @@ static void amdgpu_block_invalid_wreg(struct 
amdgpu_device *adev,
  */
 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 {
+   int ret;
+
amdgpu_asic_pre_asic_init(adev);
 
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
-   adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
-   return amdgpu_atomfirmware_asic_init(adev, true);
-   else
+   adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+   amdgpu_psp_wait_for_bootloader(adev);
+   ret = amdgpu_atomfirmware_asic_init(adev, true);
+   return ret;
+   } else {
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+   }
+
+   return 0;
 }
 
 /**
@@ -4702,6 +4709,9 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
dev_err(adev->dev, "GPU mode1 reset failed\n");
 
amdgpu_device_load_pci_state(adev->pdev);
+   ret = amdgpu_psp_wait_for_bootloader(adev);
+   if (ret)
+   return ret;
 
/* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) {
@@ -4713,6 +4723,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
}
 
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 8fdca54bb8a1..429ef212c1f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2078,6 +2078,17 @@ int psp_securedisplay_invoke(struct psp_context *psp, 
uint32_t ta_cmd_id)
 }
 /* SECUREDISPLAY end */
 
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+   struct psp_context *psp = >psp;
+   int ret = 0;
+
+   if (!amdgpu_sriov_vf(adev) && psp->funcs && 
psp->funcs->wait_for_bootloader != NULL)
+   ret = psp->funcs->wait_for_bootloader(psp);
+
+   return ret;
+}
+
 static int psp_hw_start(struct psp_context *psp)
 {
struct amdgpu_device *adev = psp->adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 3384eb94fde0..3e67ed63e638 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -109,6 +109,7 @@ enum psp_reg_prog_id {
 
 struct psp_funcs {
int (*init_microcode)(struct psp_context *psp);
+   int (*wait_for_bootloader)(struct psp_context *psp);
int (*bootloader_load_kdb)(struct psp_context *psp);
int (*bootloader_load_spl)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
@@ -533,4 +534,6 @@ int psp_spatial_partition(struct psp_context *psp, int 
mode);
 
 int is_psp_fw_valid(struct psp_bin_desc bin);
 
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+
 #endif
-- 
2.25.1

[PATCH 1/4] drm/amdgpu: Unset baco dummy mode on nbio v7.9

2023-08-10 Thread Lijo Lazar

BACO dummy mode could be set under reset conditions and that affects
framebuffer access. Check If baco dummy mode is set, unset it if so.

Signed-off-by: Lijo Lazar 
Signed-off-by: Le Ma 
Reviewed-by: Hawking Zhang 
Reviewed-by: Asad Kamal 
Tested-by: Asad Kamal 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 9ea072374cb7..f85eec05d218 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -437,6 +437,24 @@ static void nbio_v7_9_init_registers(struct amdgpu_device 
*adev)
XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
 
}
+
+   if (!amdgpu_sriov_vf(adev)) {
+   u32 baco_cntl;
+   for_each_inst(i, adev->aid_mask) {
+   baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL);
+   if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+BIF_BX0_BACO_CNTL__BACO_EN_MASK)) {
+   baco_cntl &= ~(
+   BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+   BIF_BX0_BACO_CNTL__BACO_EN_MASK);
+   dev_dbg(adev->dev,
+   "Unsetting baco dummy mode %x",
+   baco_cntl);
+   WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL,
+baco_cntl);
+   }
+   }
+   }
 }
 
 static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev)
-- 
2.25.1

[PATCH 0/4] Add mode-1 reset support for SMU v13.0.6

2023-08-10 Thread Lijo Lazar

The series adds mode-1 reset support to SMU v13.0.6 SOCs. For now, it's used as
the default reset method on dGPUs with SMU v13.0.6.  Presently, reset takes
longer than expected. Hence a method is added to check the bootloader status
after reset. It checks if bootloader has set mailbox ready bit which is an
indication that boot loader has completed initialization.

Lijo Lazar (4):
  drm/amdgpu: Unset baco dummy mode on nbio v7.9
  drm/amdgpu: Add bootloader status check
  drm/amdgpu: Add bootloader wait for PSP v13
  drm/amdgpu: Add SMU v13.0.6 default reset methods

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 17 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 11 
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c| 18 
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c| 28 +--
 drivers/gpu/drm/amd/amdgpu/soc15.c|  4 ++-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  |  3 +-
 7 files changed, 76 insertions(+), 8 deletions(-)

-- 
2.25.1

RE: [PATCH] drm/amd/pm: add unique_id for gc 11.0.3

2023-08-10 Thread Xu, Feifei

[AMD Official Use Only - General]

Reviewed-by: Feifei Xu 

-Original Message-
From: amd-gfx  On Behalf Of Kenneth Feng
Sent: Friday, August 11, 2023 12:28 PM
To: amd-gfx@lists.freedesktop.org
Cc: Feng, Kenneth 
Subject: [PATCH] drm/amd/pm: add unique_id for gc 11.0.3

drm/amd/pm: add unique_id for gc 11.0.3

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 5aed023f7402..c69701da94ea 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2076,6 +2076,7 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
+   case IP_VERSION(11, 0, 3):
*states = ATTR_STATE_SUPPORTED;
break;
default:
--
2.34.1

[PATCH] drm/amd/pm: add unique_id for gc 11.0.3

2023-08-10 Thread Kenneth Feng

drm/amd/pm: add unique_id for gc 11.0.3

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 5aed023f7402..c69701da94ea 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2076,6 +2076,7 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
+   case IP_VERSION(11, 0, 3):
*states = ATTR_STATE_SUPPORTED;
break;
default:
-- 
2.34.1

[PATCH] drm/amdgpu: Add memory vendor information

2023-08-10 Thread Lijo Lazar

For ASICs with GC v9.4.3, determine the vendor information from scratch
register.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 880460cd3239..f9a5a2c0573e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1998,6 +1998,19 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device 
*adev)
return 0;
 }
 
+static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
+{
+   static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
+   u32 vram_info;
+
+   if (!amdgpu_sriov_vf(adev)) {
+   vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
+   adev->gmc.vram_vendor = vram_info & 0xF;
+   }
+   adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+   adev->gmc.vram_width = 128 * 64;
+}
+
 static int gmc_v9_0_sw_init(void *handle)
 {
int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
@@ -2010,15 +2023,12 @@ static int gmc_v9_0_sw_init(void *handle)
 
spin_lock_init(>gmc.invalidate_lock);
 
-   if (!(adev->bios) || adev->gmc.is_app_apu) {
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+   gmc_v9_4_3_init_vram_info(adev);
+   } else if (!adev->bios) {
if (adev->flags & AMD_IS_APU) {
-   if (adev->gmc.is_app_apu) {
-   adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
-   adev->gmc.vram_width = 128 * 64;
-   } else {
-   adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
-   adev->gmc.vram_width = 64 * 64;
-   }
+   adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+   adev->gmc.vram_width = 64 * 64;
} else {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
adev->gmc.vram_width = 128 * 64;
-- 
2.25.1

[PATCH 9/9] drm/amd: Hide unsupported power attributes

2023-08-10 Thread Mario Limonciello

Some ASICS only offer one type of power attribute, so in the visible
callback check whether the attributes are supported and hide if not
supported.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 8133d968f75b9..5b1d73b00ef73 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3179,6 +3179,7 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
uint32_t gc_ver = adev->ip_versions[GC_HWIP][0];
+   uint32_t tmp;
 
/* under multi-vf mode, the hwmon attributes are all not supported */
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
@@ -3264,6 +3265,14 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
(attr == _dev_attr_power1_average.dev_attr.attr))
return 0;
 
+   /* not all products support both average and instantaneous */
+   if (attr == _dev_attr_power1_average.dev_attr.attr &&
+   amdgpu_hwmon_get_sensor_generic(adev, 
AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)) == -EOPNOTSUPP)
+   return 0;
+   if (attr == _dev_attr_power1_input.dev_attr.attr &&
+   amdgpu_hwmon_get_sensor_generic(adev, 
AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)) == -EOPNOTSUPP)
+   return 0;
+
/* hide max/min values if we can't both query and manage the fan */
if (((amdgpu_dpm_set_fan_speed_pwm(adev, U32_MAX) == -EOPNOTSUPP) &&
  (amdgpu_dpm_get_fan_speed_pwm(adev, NULL) == -EOPNOTSUPP) &&
-- 
2.34.1

[PATCH 7/9] drm/amd: Show both power attributes for vega20

2023-08-10 Thread Mario Limonciello

Vega20 can offer average power in some versions of the PMFW and current
power in others.

Signed-off-by: Mario Limonciello 
---
 .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c  | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index b6dd7f8daf725..ad40395be6028 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -2129,7 +2129,7 @@ static int vega20_get_metrics_table(struct pp_hwmgr 
*hwmgr,
return ret;
 }
 
-static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr,
+static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr, int idx,
uint32_t *query)
 {
int ret = 0;
@@ -2140,10 +2140,17 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr,
return ret;
 
/* For the 40.46 release, they changed the value name */
-   if (hwmgr->smu_version == 0x282e00)
-   *query = metrics_table.AverageSocketPower << 8;
-   else
+   switch (idx) {
+   case AMDGPU_PP_SENSOR_GPU_POWER:
+   if (hwmgr->smu_version == 0x282e00)
+   *query = metrics_table.AverageSocketPower << 8;
+   else
+   ret = -EOPNOTSUPP;
+   break;
+   case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
*query = metrics_table.CurrSocketPower << 8;
+   break;
+   }
 
return ret;
 }
@@ -2253,9 +2260,10 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, 
int idx,
*((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
*size = 4;
break;
+   case AMDGPU_PP_SENSOR_GPU_POWER:
case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
*size = 16;
-   ret = vega20_get_gpu_power(hwmgr, (uint32_t *)value);
+   ret = vega20_get_gpu_power(hwmgr, idx, (uint32_t *)value);
break;
case AMDGPU_PP_SENSOR_VDDGFX:
val_vid = (RREG32_SOC15(SMUIO, 0, mmSMUSVI0_TEL_PLANE0) &
-- 
2.34.1

[PATCH 8/9] drm/amd: Rename AMDGPU_PP_SENSOR_GPU_POWER

2023-08-10 Thread Mario Limonciello

Use the clearer name `AMDGPU_PP_SENSOR_GPU_AVG_POWER` instead.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
 drivers/gpu/drm/amd/include/kgd_pp_interface.h  | 2 +-
 drivers/gpu/drm/amd/pm/amdgpu_pm.c  | 4 ++--
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c   | 4 ++--
 drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c   | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c| 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c  | 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c| 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c| 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c| 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c| 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c| 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c| 1 +
 17 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 631c5ab3f7dc5..99f4df133ed3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1019,7 +1019,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
case AMDGPU_INFO_SENSOR_GPU_AVG_POWER:
/* get average GPU power */
if (amdgpu_dpm_read_sensor(adev,
-  AMDGPU_PP_SENSOR_GPU_POWER,
+  
AMDGPU_PP_SENSOR_GPU_AVG_POWER,
   (void *), _size)) {
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 6582cdf2736f3..84c5224d994c4 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -132,7 +132,7 @@ enum amd_pp_sensors {
AMDGPU_PP_SENSOR_MEM_TEMP,
AMDGPU_PP_SENSOR_VCE_POWER,
AMDGPU_PP_SENSOR_UVD_POWER,
-   AMDGPU_PP_SENSOR_GPU_POWER,
+   AMDGPU_PP_SENSOR_GPU_AVG_POWER,
AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
AMDGPU_PP_SENSOR_SS_APU_SHARE,
AMDGPU_PP_SENSOR_SS_DGPU_SHARE,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index bb42851592291..8133d968f75b9 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2796,7 +2796,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device 
*dev,
 {
unsigned int val;
 
-   val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_POWER);
+   val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_AVG_POWER);
if (val < 0)
return val;
 
@@ -3460,7 +3460,7 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, 
struct amdgpu_device *a
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void 
*), ))
seq_printf(m, "\t%u mV (VDDNB)\n", value);
size = sizeof(uint32_t);
-   if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void 
*), ))
+   if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void 
*), ))
seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 
0xff);
size = sizeof(value);
seq_printf(m, "\n");
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index ad40395be6028..3b33af30eb0fb 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -2141,7 +2141,7 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr, 
int idx,
 
/* For the 40.46 release, they changed the value name */
switch (idx) {
-   case AMDGPU_PP_SENSOR_GPU_POWER:
+   case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
if (hwmgr->smu_version == 0x282e00)
*query = metrics_table.AverageSocketPower << 8;
else
@@ -2260,7 +2260,7 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int 
idx,
*((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
*size = 4;
break;
-   case AMDGPU_PP_SENSOR_GPU_POWER:
+   case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
*size = 16;
ret = vega20_get_gpu_power(hwmgr, idx, (uint32_t *)value);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c

[PATCH 4/9] drm/amd: Fix SMU 13.0.4/13.0.11 GPU metrics average power

2023-08-10 Thread Mario Limonciello

The average power for the GPU metrics sysfs file contains the input
power not the average power.  The member that is set is called average
power though, so correct it to the right value.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index a4e87984645ec..0951659299c15 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -257,7 +257,7 @@ static ssize_t smu_v13_0_4_get_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->average_gfx_activity = metrics.GfxActivity;
gpu_metrics->average_mm_activity = metrics.UvdActivity;
 
-   gpu_metrics->average_socket_power = metrics.CurrentSocketPower;
+   gpu_metrics->average_socket_power = metrics.AverageSocketPower;
gpu_metrics->average_gfx_power = metrics.Power[0];
gpu_metrics->average_soc_power = metrics.Power[1];
memcpy(_metrics->average_core_power[0],
-- 
2.34.1

[PATCH 5/9] drm/amd: Drop unnecessary helper for aldebaran

2023-08-10 Thread Mario Limonciello

aldebaran_get_gpu_power() is only called by one place and just calls
aldebaran_get_smu_metrics_data(), so drop the helper.

Signed-off-by: Mario Limonciello 
---
 .../gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 4e5043b6ce40b..0dbb1a1c25d24 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1094,16 +1094,6 @@ static int aldebaran_get_current_activity_percent(struct 
smu_context *smu,
return ret;
 }
 
-static int aldebaran_get_gpu_power(struct smu_context *smu, uint32_t *value)
-{
-   if (!value)
-   return -EINVAL;
-
-   return aldebaran_get_smu_metrics_data(smu,
- METRICS_AVERAGE_SOCKETPOWER,
- value);
-}
-
 static int aldebaran_thermal_get_temperature(struct smu_context *smu,
 enum amd_pp_sensors sensor,
 uint32_t *value)
@@ -1157,8 +1147,9 @@ static int aldebaran_read_sensor(struct smu_context *smu,
 (uint32_t *)data);
*size = 4;
break;
-   case AMDGPU_PP_SENSOR_GPU_POWER:
-   ret = aldebaran_get_gpu_power(smu, (uint32_t *)data);
+   ret = aldebaran_get_smu_metrics_data(smu,
+
METRICS_AVERAGE_SOCKETPOWER,
+(uint32_t *)data);
*size = 4;
break;
case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
-- 
2.34.1

[PATCH 3/9] drm/amd: Introduce `AMDGPU_PP_SENSOR_GPU_INPUT_POWER`

2023-08-10 Thread Mario Limonciello

Some GPUs have been overloading average power values and input power
values. To disambiguate these, introduce a new
`AMDGPU_PP_SENSOR_GPU_INPUT_POWER` and the GPUs that share input
power update to use this instead of average power.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h  |  1 +
 drivers/gpu/drm/amd/pm/amdgpu_pm.c  |  2 +-
 .../gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c |  2 +-
 .../drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c   |  2 +-
 .../drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c   |  2 +-
 .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c   |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h   |  1 +
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c   |  1 +
 .../drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 12 +++-
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c |  1 +
 .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c |  1 +
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c| 10 ++
 drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c |  6 +++---
 .../gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c|  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c|  9 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c|  6 +++---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c| 17 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c|  1 +
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c|  6 +++---
 20 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 90989405eddcf..6582cdf2736f3 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -133,6 +133,7 @@ enum amd_pp_sensors {
AMDGPU_PP_SENSOR_VCE_POWER,
AMDGPU_PP_SENSOR_UVD_POWER,
AMDGPU_PP_SENSOR_GPU_POWER,
+   AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
AMDGPU_PP_SENSOR_SS_APU_SHARE,
AMDGPU_PP_SENSOR_SS_DGPU_SHARE,
AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 816f034cfe3f1..bb42851592291 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2809,7 +2809,7 @@ static ssize_t amdgpu_hwmon_show_power_input(struct 
device *dev,
 {
unsigned int val;
 
-   val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_POWER);
+   val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER);
if (val < 0)
return val;
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index c91b2a3966cdb..5a2371484a58c 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -4039,7 +4039,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int 
idx,
*((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
*size = 4;
return 0;
-   case AMDGPU_PP_SENSOR_GPU_POWER:
+   case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
case AMDGPU_PP_SENSOR_VDDGFX:
if ((data->vr_config & VRCONF_VDDGFX_MASK) ==
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
index 52ae6fa2d2a6d..6d6bc6a380b36 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
@@ -3966,7 +3966,7 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int 
idx,
*((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
*size = 4;
break;
-   case AMDGPU_PP_SENSOR_GPU_POWER:
+   case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
ret = vega10_get_gpu_power(hwmgr, (uint32_t *)value);
break;
case AMDGPU_PP_SENSOR_VDDGFX:
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
index 4bd573d815ff5..460067933de2e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
@@ -1529,7 +1529,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int 
idx,
*((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
*size = 4;
break;
-   case AMDGPU_PP_SENSOR_GPU_POWER:
+   case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value);
if (!ret)
*size = 4;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 492ca33637d6f..b6dd7f8daf725 100644
---

[PATCH 6/9] drm/amd: Fix the return for average power on aldebaran

2023-08-10 Thread Mario Limonciello

Aldebaran can only return average socket power for the first die.
The other dies return 0.  Instead of returning a bad value, return
-EOPNOTSUPP so that the attribute will be hidden.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 0dbb1a1c25d24..1341363ab01a8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -625,9 +625,10 @@ static int aldebaran_get_smu_metrics_data(struct 
smu_context *smu,
break;
case METRICS_AVERAGE_SOCKETPOWER:
/* Valid power data is available only from primary die */
-   *value = aldebaran_is_primary(smu) ?
-metrics->AverageSocketPower << 8 :
-0;
+   if (aldebaran_is_primary(smu))
+   *value = metrics->AverageSocketPower << 8;
+   else
+   ret = -EOPNOTSUPP;
break;
case METRICS_TEMPERATURE_EDGE:
*value = metrics->TemperatureEdge *
-- 
2.34.1

[PATCH 1/9] drm/amd: Add amdgpu_hwmon_get_sensor_generic()

2023-08-10 Thread Mario Limonciello

Many sensor function have a lot of boilerplate checks.  Move these
into a generic amdgpu_hwmon_get_sensor_generic() instead.

No intended functional changes.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 321 -
 1 file changed, 88 insertions(+), 233 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 5aed023f74022..c0eda9bf09824 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1467,6 +1467,32 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct 
device *dev,
return -EINVAL;
 }
 
+static unsigned int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
+   enum amd_pp_sensors sensor,
+   void *query)
+{
+   int r, size = sizeof(uint32_t);
+
+   if (amdgpu_in_reset(adev))
+   return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
+
+   r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+   if (r < 0) {
+   pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+   return r;
+   }
+
+   /* get the sensor value */
+   r = amdgpu_dpm_read_sensor(adev, sensor, query, );
+
+   pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+   pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+   return r;
+}
+
 /**
  * DOC: gpu_busy_percent
  *
@@ -1481,26 +1507,10 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct 
device *dev,
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
-   int r, value, size = sizeof(value);
-
-   if (amdgpu_in_reset(adev))
-   return -EPERM;
-   if (adev->in_suspend && !adev->in_runpm)
-   return -EPERM;
-
-   r = pm_runtime_get_sync(ddev->dev);
-   if (r < 0) {
-   pm_runtime_put_autosuspend(ddev->dev);
-   return r;
-   }
-
-   /* read the IP busy sensor */
-   r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
-  (void *), );
-
-   pm_runtime_mark_last_busy(ddev->dev);
-   pm_runtime_put_autosuspend(ddev->dev);
+   unsigned int value;
+   int r;
 
+   r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_LOAD, 
);
if (r)
return r;
 
@@ -1521,26 +1531,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct 
device *dev,
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
-   int r, value, size = sizeof(value);
-
-   if (amdgpu_in_reset(adev))
-   return -EPERM;
-   if (adev->in_suspend && !adev->in_runpm)
-   return -EPERM;
-
-   r = pm_runtime_get_sync(ddev->dev);
-   if (r < 0) {
-   pm_runtime_put_autosuspend(ddev->dev);
-   return r;
-   }
-
-   /* read the IP busy sensor */
-   r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
-  (void *), );
-
-   pm_runtime_mark_last_busy(ddev->dev);
-   pm_runtime_put_autosuspend(ddev->dev);
+   unsigned int value;
+   int r;
 
+   r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_LOAD, 
);
if (r)
return r;
 
@@ -1814,45 +1808,15 @@ static ssize_t amdgpu_get_gpu_metrics(struct device 
*dev,
return size;
 }
 
-static int amdgpu_device_read_powershift(struct amdgpu_device *adev,
-   uint32_t *ss_power, bool 
dgpu_share)
-{
-   struct drm_device *ddev = adev_to_drm(adev);
-   uint32_t size;
-   int r = 0;
-
-   if (amdgpu_in_reset(adev))
-   return -EPERM;
-   if (adev->in_suspend && !adev->in_runpm)
-   return -EPERM;
-
-   r = pm_runtime_get_sync(ddev->dev);
-   if (r < 0) {
-   pm_runtime_put_autosuspend(ddev->dev);
-   return r;
-   }
-
-   if (dgpu_share)
-   r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_SS_DGPU_SHARE,
-  (void *)ss_power, );
-   else
-   r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE,
-  (void *)ss_power, );
-
-   pm_runtime_mark_last_busy(ddev->dev);
-   pm_runtime_put_autosuspend(ddev->dev);
-   return r;
-}
-
 static int amdgpu_show_powershift_percent(struct device *dev,
-   char *buf, bool dgpu_share)
+   char *buf, enum amd_pp_sensors sensor)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
uint32_t ss_power;
int r = 0, i;
 
-   r =

[PATCH 2/9] drm/amd: Add a new hwmon attribute for instantaneous power

2023-08-10 Thread Mario Limonciello

Some GPUs provide support for current power, some average power,
and some both.  To be able to support all these combinations,
introduce a new attribute.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index c0eda9bf09824..816f034cfe3f1 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2803,6 +2803,19 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device 
*dev,
return sysfs_emit(buf, "%u\n", val);
 }
 
+static ssize_t amdgpu_hwmon_show_power_input(struct device *dev,
+struct device_attribute *attr,
+char *buf)
+{
+   unsigned int val;
+
+   val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_POWER);
+   if (val < 0)
+   return val;
+
+   return sysfs_emit(buf, "%u\n", val);
+}
+
 static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
 struct device_attribute *attr,
 char *buf)
@@ -3023,6 +3036,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device 
*dev,
  *
  * - power1_average: average power used by the SoC in microWatts.  On APUs 
this includes the CPU.
  *
+ * - power1_input: instantaneous power used by the SoC in microWatts.  On APUs 
this includes the CPU.
+ *
  * - power1_cap_min: minimum cap supported in microWatts
  *
  * - power1_cap_max: maximum cap supported in microWatts
@@ -3091,6 +3106,7 @@ static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, 
amdgpu_hwmon_show_vddgfx_label, NU
 static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 
0);
 static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, 
NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, 
amdgpu_hwmon_show_power_avg, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, 
amdgpu_hwmon_show_power_input, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, 
amdgpu_hwmon_show_power_cap_max, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, 
amdgpu_hwmon_show_power_cap_min, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, 
amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0);
@@ -3137,6 +3153,7 @@ static struct attribute *hwmon_attributes[] = {
_dev_attr_in1_input.dev_attr.attr,
_dev_attr_in1_label.dev_attr.attr,
_dev_attr_power1_average.dev_attr.attr,
+   _dev_attr_power1_input.dev_attr.attr,
_dev_attr_power1_cap_max.dev_attr.attr,
_dev_attr_power1_cap_min.dev_attr.attr,
_dev_attr_power1_cap.dev_attr.attr,
-- 
2.34.1

RE: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Kim, Jonathan

[Public]

Sounds good.

I'd also change:

>> KFD currently relies on MEC FW to clear tcp watch control
>> register by sending MAP_PROCESS packet with 0 of field
>> tcp_watch_cntl to HWS, but if the queue is suspended, the
>> packet will not be sent and the previous value will be
>> left on the register, that will affect the following apps.
>> So the solution is to clear the register as gfx v9 in KFD.

To something like:

KFD currently relies on MEC FW to clear tcp watch control
register on UNMAP_QUEUES.  Due to a FW bug, MEC does not
do this.
So the solution is to clear the register as gfx v9 in KFD.

With those fixed, this patch is Reviewed-by: Jonathan Kim 

Hopefully we can get away with this since every watch instance register is 
supposed to be 1-1 to a process ...
And that there's no race scenarios with trailing exceptions on dynamic watch 
point address changes ...

Thanks,

Jon

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, August 10, 2023 6:31 PM
> To: Kim, Jonathan ; Kuehling, Felix
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2
>
> I will change title to "drm/amdkfd: workaround address watch clearing
> bug for gfx v9.4.2". is it OK?
>
> Regards,
> Eric
>
> On 2023-08-10 18:25, Kim, Jonathan wrote:
> > [Public]
> >
> > Yeah this is a recent bug so this workaround is new.  More rigorous tests
> revealed this is probably a miss on the FW side.  We explicitly requested
> UNMAP_QUEUES unconditionally invalidate watch controls during the
> beginning of design to prevent any watch point racing.
> >
> > Note GFX11 MES calls are different on the surface but under the hood it's
> the same (registers get invalidated on unmap then get updated on map.
> Only difference it's at the queue level).
> >
> > I'm fine with this solution but I think it'd be good to describe this as a
> workaround somewhere (as opposed to a driver issue) so that folks aren't
> scratching their heads later on looking at code for GFX11 and up and
> wondering why we don't nuke the control setting with the KFD for those
> devices.
> >
> > Thanks,
> >
> > Jon
> >
> >> -Original Message-
> >> From: Kuehling, Felix 
> >> Sent: Thursday, August 10, 2023 5:56 PM
> >> To: Huang, JinHuiEric ; Kim, Jonathan
> >> ; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx
> v9.4.2
> >>
> >> I think Jon is suggesting that the UNMAP_QUEUES command should clear
> the
> >> address watch registers. Requesting such a change from the the HWS team
> >> may take a long time.
> >>
> >> That said, when was this workaround implemented and reviewed? Did I
> >> review it as part of Jon's debugger upstreaming patch series? Or did
> >> this come later? This patch only enables the workaround for v9.4.2.
> >>
> >> Regards,
> >> Felix
> >>
> >>
> >> On 2023-08-10 17:52, Eric Huang wrote:
> >>> The problem is the queue is suspended before clearing address watch
> >>> call in KFD, there is not queue preemption and queue resume after
> >>> clearing call, and the test ends. So there is not chance to send
> >>> MAP_PROCESS to HWS. At this point FW has nothing to do. We have
> >>> several test FWs from Tej, none of them works, so I recalled the
> >>> kernel debug log and found out the problem.
> >>>
> >>> GFX11 has different scheduler, when calling clear address watch, KFD
> >>> directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it
> >> doesn't
> >>> consider if the queue is suspended. So GFX11 doesn't have this issue.
> >>>
> >>> Regards,
> >>> Eric
> >>>
> >>> On 2023-08-10 17:27, Kim, Jonathan wrote:
>  [AMD Official Use Only - General]
> 
>  This is a strange solution because the MEC should set watch controls
>  as non-valid automatically on queue preemption to avoid this kind of
>  issue in the first place by design.  MAP_PROCESS on resume will take
>  whatever the driver requests.
>  GFX11 has no issue with letting the HWS do this.
> 
>  Are we sure we're not working around some HWS bug?
> 
>  Thanks,
> 
>  Jon
> 
> > -Original Message-
> > From: Kuehling, Felix 
> > Sent: Thursday, August 10, 2023 5:03 PM
> > To: Huang, JinHuiEric ; amd-
> > g...@lists.freedesktop.org
> > Cc: Kim, Jonathan 
> > Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for
> > gfx v9.4.2
> >
> > I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a
> bit
> > different because it needs to support multiple XCCs.
> >
> > That said, this patch is
> >
> > Reviewed-by: Felix Kuehling 
> >
> >
> > On 2023-08-10 16:47, Eric Huang wrote:
> >> KFD currently relies on MEC FW to clear tcp watch control
> >> register by sending MAP_PROCESS packet with 0 of field
> >> tcp_watch_cntl to HWS, but if the queue is suspended, the
> >> packet will

Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Eric Huang

I will change title to "drm/amdkfd: workaround address watch clearing 
bug for gfx v9.4.2". is it OK?


Regards,
Eric

On 2023-08-10 18:25, Kim, Jonathan wrote:

[Public]

Yeah this is a recent bug so this workaround is new.  More rigorous tests 
revealed this is probably a miss on the FW side.  We explicitly requested 
UNMAP_QUEUES unconditionally invalidate watch controls during the beginning of 
design to prevent any watch point racing.

Note GFX11 MES calls are different on the surface but under the hood it's the 
same (registers get invalidated on unmap then get updated on map.  Only 
difference it's at the queue level).

I'm fine with this solution but I think it'd be good to describe this as a 
workaround somewhere (as opposed to a driver issue) so that folks aren't 
scratching their heads later on looking at code for GFX11 and up and wondering 
why we don't nuke the control setting with the KFD for those devices.

Thanks,

Jon


-Original Message-
From: Kuehling, Felix 
Sent: Thursday, August 10, 2023 5:56 PM
To: Huang, JinHuiEric ; Kim, Jonathan
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

I think Jon is suggesting that the UNMAP_QUEUES command should clear the
address watch registers. Requesting such a change from the the HWS team
may take a long time.

That said, when was this workaround implemented and reviewed? Did I
review it as part of Jon's debugger upstreaming patch series? Or did
this come later? This patch only enables the workaround for v9.4.2.

Regards,
Felix


On 2023-08-10 17:52, Eric Huang wrote:

The problem is the queue is suspended before clearing address watch
call in KFD, there is not queue preemption and queue resume after
clearing call, and the test ends. So there is not chance to send
MAP_PROCESS to HWS. At this point FW has nothing to do. We have
several test FWs from Tej, none of them works, so I recalled the
kernel debug log and found out the problem.

GFX11 has different scheduler, when calling clear address watch, KFD
directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it

doesn't

consider if the queue is suspended. So GFX11 doesn't have this issue.

Regards,
Eric

On 2023-08-10 17:27, Kim, Jonathan wrote:

[AMD Official Use Only - General]

This is a strange solution because the MEC should set watch controls
as non-valid automatically on queue preemption to avoid this kind of
issue in the first place by design.  MAP_PROCESS on resume will take
whatever the driver requests.
GFX11 has no issue with letting the HWS do this.

Are we sure we're not working around some HWS bug?

Thanks,

Jon


-Original Message-
From: Kuehling, Felix 
Sent: Thursday, August 10, 2023 5:03 PM
To: Huang, JinHuiEric ; amd-
g...@lists.freedesktop.org
Cc: Kim, Jonathan 
Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for
gfx v9.4.2

I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
different because it needs to support multiple XCCs.

That said, this patch is

Reviewed-by: Felix Kuehling 


On 2023-08-10 16:47, Eric Huang wrote:

KFD currently relies on MEC FW to clear tcp watch control
register by sending MAP_PROCESS packet with 0 of field
tcp_watch_cntl to HWS, but if the queue is suspended, the
packet will not be sent and the previous value will be
left on the register, that will affect the following apps.
So the solution is to clear the register as gfx v9 in KFD.

Signed-off-by: Eric Huang 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +-

--

1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

index e2fed6edbdd0..aff08321e976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -163,12 +163,6 @@ static uint32_t

kgd_gfx_aldebaran_set_address_watch(

  return watch_address_cntl;
}

-static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct

amdgpu_device *adev,

- uint32_t watch_id)
-{
-   return 0;
-}
-
const struct kfd2kgd_calls aldebaran_kfd2kgd = {
  .program_sh_mem_settings =

kgd_gfx_v9_program_sh_mem_settings,

  .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd =

{

  .set_wave_launch_trap_override =

kgd_aldebaran_set_wave_launch_trap_override,

  .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
  .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
-   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
  .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
  .build_grace_period_packet_info =

kgd_gfx_v9_build_grace_period_packet_info,

  .program_trap_handler_settings =

kgd_gfx_v9_program_trap_handler_settings,

Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Eric Huang

There is not UNMAP_QUEUES command sending for queue preemption because 
the queue is suspended and test is closed to the end. Function 
unmap_queue_cpsch will do nothing after that.


The workaround is new and only for gfx v9.4.2, because debugger tests 
has changed to check if all address watch points are correctly set, i.e. 
test A sets more than one watchpoint and leave, the following test B 
only sets one watchpoint, and test A's setting will cause more than one 
watchpoint event, so test B check out and report error on second or 
third watchpoint not set by itself.


Regards,
Eric

On 2023-08-10 17:56, Felix Kuehling wrote:
I think Jon is suggesting that the UNMAP_QUEUES command should clear 
the address watch registers. Requesting such a change from the the HWS 
team may take a long time.


That said, when was this workaround implemented and reviewed? Did I 
review it as part of Jon's debugger upstreaming patch series? Or did 
this come later? This patch only enables the workaround for v9.4.2.


Regards,
  Felix


On 2023-08-10 17:52, Eric Huang wrote:
The problem is the queue is suspended before clearing address watch 
call in KFD, there is not queue preemption and queue resume after 
clearing call, and the test ends. So there is not chance to send 
MAP_PROCESS to HWS. At this point FW has nothing to do. We have 
several test FWs from Tej, none of them works, so I recalled the 
kernel debug log and found out the problem.


GFX11 has different scheduler, when calling clear address watch, KFD 
directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it doesn't 
consider if the queue is suspended. So GFX11 doesn't have this issue.


Regards,
Eric

On 2023-08-10 17:27, Kim, Jonathan wrote:

[AMD Official Use Only - General]

This is a strange solution because the MEC should set watch controls 
as non-valid automatically on queue preemption to avoid this kind of 
issue in the first place by design. MAP_PROCESS on resume will take 
whatever the driver requests.

GFX11 has no issue with letting the HWS do this.

Are we sure we're not working around some HWS bug?

Thanks,

Jon


-Original Message-
From: Kuehling, Felix 
Sent: Thursday, August 10, 2023 5:03 PM
To: Huang, JinHuiEric ; amd-
g...@lists.freedesktop.org
Cc: Kim, Jonathan 
Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for 
gfx v9.4.2


I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
different because it needs to support multiple XCCs.

That said, this patch is

Reviewed-by: Felix Kuehling 


On 2023-08-10 16:47, Eric Huang wrote:

KFD currently relies on MEC FW to clear tcp watch control
register by sending MAP_PROCESS packet with 0 of field
tcp_watch_cntl to HWS, but if the queue is suspended, the
packet will not be sent and the previous value will be
left on the register, that will affect the following apps.
So the solution is to clear the register as gfx v9 in KFD.

Signed-off-by: Eric Huang 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
   1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

index e2fed6edbdd0..aff08321e976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -163,12 +163,6 @@ static uint32_t

kgd_gfx_aldebaran_set_address_watch(

 return watch_address_cntl;
   }

-static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct

amdgpu_device *adev,

- uint32_t watch_id)
-{
-   return 0;
-}
-
   const struct kfd2kgd_calls aldebaran_kfd2kgd = {
 .program_sh_mem_settings =

kgd_gfx_v9_program_sh_mem_settings,

 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
 .set_wave_launch_trap_override =

kgd_aldebaran_set_wave_launch_trap_override,

 .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
 .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
-   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
 .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
 .build_grace_period_packet_info =

kgd_gfx_v9_build_grace_period_packet_info,

 .program_trap_handler_settings =

kgd_gfx_v9_program_trap_handler_settings,

RE: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Kim, Jonathan

[Public]

Yeah this is a recent bug so this workaround is new.  More rigorous tests 
revealed this is probably a miss on the FW side.  We explicitly requested 
UNMAP_QUEUES unconditionally invalidate watch controls during the beginning of 
design to prevent any watch point racing.

Note GFX11 MES calls are different on the surface but under the hood it's the 
same (registers get invalidated on unmap then get updated on map.  Only 
difference it's at the queue level).

I'm fine with this solution but I think it'd be good to describe this as a 
workaround somewhere (as opposed to a driver issue) so that folks aren't 
scratching their heads later on looking at code for GFX11 and up and wondering 
why we don't nuke the control setting with the KFD for those devices.

Thanks,

Jon

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, August 10, 2023 5:56 PM
> To: Huang, JinHuiEric ; Kim, Jonathan
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2
>
> I think Jon is suggesting that the UNMAP_QUEUES command should clear the
> address watch registers. Requesting such a change from the the HWS team
> may take a long time.
>
> That said, when was this workaround implemented and reviewed? Did I
> review it as part of Jon's debugger upstreaming patch series? Or did
> this come later? This patch only enables the workaround for v9.4.2.
>
> Regards,
>Felix
>
>
> On 2023-08-10 17:52, Eric Huang wrote:
> > The problem is the queue is suspended before clearing address watch
> > call in KFD, there is not queue preemption and queue resume after
> > clearing call, and the test ends. So there is not chance to send
> > MAP_PROCESS to HWS. At this point FW has nothing to do. We have
> > several test FWs from Tej, none of them works, so I recalled the
> > kernel debug log and found out the problem.
> >
> > GFX11 has different scheduler, when calling clear address watch, KFD
> > directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it
> doesn't
> > consider if the queue is suspended. So GFX11 doesn't have this issue.
> >
> > Regards,
> > Eric
> >
> > On 2023-08-10 17:27, Kim, Jonathan wrote:
> >> [AMD Official Use Only - General]
> >>
> >> This is a strange solution because the MEC should set watch controls
> >> as non-valid automatically on queue preemption to avoid this kind of
> >> issue in the first place by design.  MAP_PROCESS on resume will take
> >> whatever the driver requests.
> >> GFX11 has no issue with letting the HWS do this.
> >>
> >> Are we sure we're not working around some HWS bug?
> >>
> >> Thanks,
> >>
> >> Jon
> >>
> >>> -Original Message-
> >>> From: Kuehling, Felix 
> >>> Sent: Thursday, August 10, 2023 5:03 PM
> >>> To: Huang, JinHuiEric ; amd-
> >>> g...@lists.freedesktop.org
> >>> Cc: Kim, Jonathan 
> >>> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for
> >>> gfx v9.4.2
> >>>
> >>> I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
> >>> different because it needs to support multiple XCCs.
> >>>
> >>> That said, this patch is
> >>>
> >>> Reviewed-by: Felix Kuehling 
> >>>
> >>>
> >>> On 2023-08-10 16:47, Eric Huang wrote:
>  KFD currently relies on MEC FW to clear tcp watch control
>  register by sending MAP_PROCESS packet with 0 of field
>  tcp_watch_cntl to HWS, but if the queue is suspended, the
>  packet will not be sent and the previous value will be
>  left on the register, that will affect the following apps.
>  So the solution is to clear the register as gfx v9 in KFD.
> 
>  Signed-off-by: Eric Huang 
>  ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +-
> --
> 1 file changed, 1 insertion(+), 7 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
>  index e2fed6edbdd0..aff08321e976 100644
>  --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
>  +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
>  @@ -163,12 +163,6 @@ static uint32_t
> >>> kgd_gfx_aldebaran_set_address_watch(
>   return watch_address_cntl;
> }
> 
>  -static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct
> >>> amdgpu_device *adev,
>  - uint32_t watch_id)
>  -{
>  -   return 0;
>  -}
>  -
> const struct kfd2kgd_calls aldebaran_kfd2kgd = {
>   .program_sh_mem_settings =
> >>> kgd_gfx_v9_program_sh_mem_settings,
>   .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
>  @@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd =
> {
>   .set_wave_launch_trap_override =
> >>> kgd_aldebaran_set_wave_launch_trap_override,
>   .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
>   .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
>  -   .clear_address_watch =

Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Felix Kuehling

I think Jon is suggesting that the UNMAP_QUEUES command should clear the 
address watch registers. Requesting such a change from the the HWS team 
may take a long time.


That said, when was this workaround implemented and reviewed? Did I 
review it as part of Jon's debugger upstreaming patch series? Or did 
this come later? This patch only enables the workaround for v9.4.2.


Regards,
  Felix


On 2023-08-10 17:52, Eric Huang wrote:
The problem is the queue is suspended before clearing address watch 
call in KFD, there is not queue preemption and queue resume after 
clearing call, and the test ends. So there is not chance to send 
MAP_PROCESS to HWS. At this point FW has nothing to do. We have 
several test FWs from Tej, none of them works, so I recalled the 
kernel debug log and found out the problem.


GFX11 has different scheduler, when calling clear address watch, KFD 
directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it doesn't 
consider if the queue is suspended. So GFX11 doesn't have this issue.


Regards,
Eric

On 2023-08-10 17:27, Kim, Jonathan wrote:

[AMD Official Use Only - General]

This is a strange solution because the MEC should set watch controls 
as non-valid automatically on queue preemption to avoid this kind of 
issue in the first place by design.  MAP_PROCESS on resume will take 
whatever the driver requests.

GFX11 has no issue with letting the HWS do this.

Are we sure we're not working around some HWS bug?

Thanks,

Jon


-Original Message-
From: Kuehling, Felix 
Sent: Thursday, August 10, 2023 5:03 PM
To: Huang, JinHuiEric ; amd-
g...@lists.freedesktop.org
Cc: Kim, Jonathan 
Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for 
gfx v9.4.2


I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
different because it needs to support multiple XCCs.

That said, this patch is

Reviewed-by: Felix Kuehling 


On 2023-08-10 16:47, Eric Huang wrote:

KFD currently relies on MEC FW to clear tcp watch control
register by sending MAP_PROCESS packet with 0 of field
tcp_watch_cntl to HWS, but if the queue is suspended, the
packet will not be sent and the previous value will be
left on the register, that will affect the following apps.
So the solution is to clear the register as gfx v9 in KFD.

Signed-off-by: Eric Huang 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
   1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

index e2fed6edbdd0..aff08321e976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -163,12 +163,6 @@ static uint32_t

kgd_gfx_aldebaran_set_address_watch(

 return watch_address_cntl;
   }

-static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct

amdgpu_device *adev,

- uint32_t watch_id)
-{
-   return 0;
-}
-
   const struct kfd2kgd_calls aldebaran_kfd2kgd = {
 .program_sh_mem_settings =

kgd_gfx_v9_program_sh_mem_settings,

 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
 .set_wave_launch_trap_override =

kgd_aldebaran_set_wave_launch_trap_override,

 .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
 .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
-   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
 .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
 .build_grace_period_packet_info =

kgd_gfx_v9_build_grace_period_packet_info,

 .program_trap_handler_settings =

kgd_gfx_v9_program_trap_handler_settings,

Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Eric Huang

The problem is the queue is suspended before clearing address watch call 
in KFD, there is not queue preemption and queue resume after clearing 
call, and the test ends. So there is not chance to send MAP_PROCESS to 
HWS. At this point FW has nothing to do. We have several test FWs from 
Tej, none of them works, so I recalled the kernel debug log and found 
out the problem.


GFX11 has different scheduler, when calling clear address watch, KFD 
directly sends the MES_MISC_OP_SET_SHADER_DEBUGGER to MES, it doesn't 
consider if the queue is suspended. So GFX11 doesn't have this issue.


Regards,
Eric

On 2023-08-10 17:27, Kim, Jonathan wrote:

[AMD Official Use Only - General]

This is a strange solution because the MEC should set watch controls as 
non-valid automatically on queue preemption to avoid this kind of issue in the 
first place by design.  MAP_PROCESS on resume will take whatever the driver 
requests.
GFX11 has no issue with letting the HWS do this.

Are we sure we're not working around some HWS bug?

Thanks,

Jon


-Original Message-
From: Kuehling, Felix 
Sent: Thursday, August 10, 2023 5:03 PM
To: Huang, JinHuiEric ; amd-
g...@lists.freedesktop.org
Cc: Kim, Jonathan 
Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
different because it needs to support multiple XCCs.

That said, this patch is

Reviewed-by: Felix Kuehling 


On 2023-08-10 16:47, Eric Huang wrote:

KFD currently relies on MEC FW to clear tcp watch control
register by sending MAP_PROCESS packet with 0 of field
tcp_watch_cntl to HWS, but if the queue is suspended, the
packet will not be sent and the previous value will be
left on the register, that will affect the following apps.
So the solution is to clear the register as gfx v9 in KFD.

Signed-off-by: Eric Huang 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
   1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

index e2fed6edbdd0..aff08321e976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -163,12 +163,6 @@ static uint32_t

kgd_gfx_aldebaran_set_address_watch(

 return watch_address_cntl;
   }

-static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct

amdgpu_device *adev,

- uint32_t watch_id)
-{
-   return 0;
-}
-
   const struct kfd2kgd_calls aldebaran_kfd2kgd = {
 .program_sh_mem_settings =

kgd_gfx_v9_program_sh_mem_settings,

 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
 .set_wave_launch_trap_override =

kgd_aldebaran_set_wave_launch_trap_override,

 .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
 .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
-   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
 .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
 .build_grace_period_packet_info =

kgd_gfx_v9_build_grace_period_packet_info,

 .program_trap_handler_settings =

kgd_gfx_v9_program_trap_handler_settings,

RE: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Kim, Jonathan

[AMD Official Use Only - General]

This is a strange solution because the MEC should set watch controls as 
non-valid automatically on queue preemption to avoid this kind of issue in the 
first place by design.  MAP_PROCESS on resume will take whatever the driver 
requests.
GFX11 has no issue with letting the HWS do this.

Are we sure we're not working around some HWS bug?

Thanks,

Jon

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, August 10, 2023 5:03 PM
> To: Huang, JinHuiEric ; amd-
> g...@lists.freedesktop.org
> Cc: Kim, Jonathan 
> Subject: Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2
>
> I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit
> different because it needs to support multiple XCCs.
>
> That said, this patch is
>
> Reviewed-by: Felix Kuehling 
>
>
> On 2023-08-10 16:47, Eric Huang wrote:
> > KFD currently relies on MEC FW to clear tcp watch control
> > register by sending MAP_PROCESS packet with 0 of field
> > tcp_watch_cntl to HWS, but if the queue is suspended, the
> > packet will not be sent and the previous value will be
> > left on the register, that will affect the following apps.
> > So the solution is to clear the register as gfx v9 in KFD.
> >
> > Signed-off-by: Eric Huang 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
> >   1 file changed, 1 insertion(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > index e2fed6edbdd0..aff08321e976 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > @@ -163,12 +163,6 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
> > return watch_address_cntl;
> >   }
> >
> > -static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct
> amdgpu_device *adev,
> > - uint32_t watch_id)
> > -{
> > -   return 0;
> > -}
> > -
> >   const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> > .program_sh_mem_settings =
> kgd_gfx_v9_program_sh_mem_settings,
> > .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> > @@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> > .set_wave_launch_trap_override =
> kgd_aldebaran_set_wave_launch_trap_override,
> > .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
> > .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
> > -   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
> > +   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> > .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> > .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> > .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,

Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Eric Huang


Yes. I will send out the fix for gc v9.4.3 later. Thanks for your review.

Eric

On 2023-08-10 17:02, Felix Kuehling wrote:
I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit 
different because it needs to support multiple XCCs.


That said, this patch is

Reviewed-by: Felix Kuehling 


On 2023-08-10 16:47, Eric Huang wrote:

KFD currently relies on MEC FW to clear tcp watch control
register by sending MAP_PROCESS packet with 0 of field
tcp_watch_cntl to HWS, but if the queue is suspended, the
packet will not be sent and the previous value will be
left on the register, that will affect the following apps.
So the solution is to clear the register as gfx v9 in KFD.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
  1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

index e2fed6edbdd0..aff08321e976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -163,12 +163,6 @@ static uint32_t 
kgd_gfx_aldebaran_set_address_watch(

  return watch_address_cntl;
  }
  -static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct 
amdgpu_device *adev,

-  uint32_t watch_id)
-{
-    return 0;
-}
-
  const struct kfd2kgd_calls aldebaran_kfd2kgd = {
  .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
  .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
  .set_wave_launch_trap_override = 
kgd_aldebaran_set_wave_launch_trap_override,

  .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
  .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
-    .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+    .clear_address_watch = kgd_gfx_v9_clear_address_watch,
  .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
  .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
  .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,

Re: [PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Felix Kuehling

I think amdgpu_amdkfd_gc_9_4_3.c needs a similar fix. But maybe a bit 
different because it needs to support multiple XCCs.


That said, this patch is

Reviewed-by: Felix Kuehling 


On 2023-08-10 16:47, Eric Huang wrote:

KFD currently relies on MEC FW to clear tcp watch control
register by sending MAP_PROCESS packet with 0 of field
tcp_watch_cntl to HWS, but if the queue is suspended, the
packet will not be sent and the previous value will be
left on the register, that will affect the following apps.
So the solution is to clear the register as gfx v9 in KFD.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
  1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index e2fed6edbdd0..aff08321e976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -163,12 +163,6 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
return watch_address_cntl;
  }
  
-static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device *adev,

- uint32_t watch_id)
-{
-   return 0;
-}
-
  const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_wave_launch_trap_override = 
kgd_aldebaran_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
-   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,

[PATCH] drm/amdkfd: fix address watch clearing bug for gfx v9.4.2

2023-08-10 Thread Eric Huang

KFD currently relies on MEC FW to clear tcp watch control
register by sending MAP_PROCESS packet with 0 of field
tcp_watch_cntl to HWS, but if the queue is suspended, the
packet will not be sent and the previous value will be
left on the register, that will affect the following apps.
So the solution is to clear the register as gfx v9 in KFD.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index e2fed6edbdd0..aff08321e976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -163,12 +163,6 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
return watch_address_cntl;
 }
 
-static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device 
*adev,
- uint32_t watch_id)
-{
-   return 0;
-}
-
 const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -193,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_wave_launch_trap_override = 
kgd_aldebaran_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
-   .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+   .clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
-- 
2.34.1

Re: [PATCH] drm/amdkfd: fix double assign skip process context clear

2023-08-10 Thread Felix Kuehling


On 2023-08-10 15:03, Jonathan Kim wrote:

Remove redundant assignment when skipping process ctx clear.

Signed-off-by: Jonathan Kim 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index aa5091f18681..89c2bfcb36ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -227,7 +227,6 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
-   queue_input.skip_process_ctx_clear = 
qpd->pqm->process->debug_trap_enabled;
queue_input.skip_process_ctx_clear = 
qpd->pqm->process->debug_trap_enabled ||
 
kfd_dbg_has_ttmps_always_setup(q->device);

Re: [PATCH v3 0/4] Add GPU page fault query interface

2023-08-10 Thread Alex Deucher

Ping?

On Thu, Jul 27, 2023 at 2:11 PM Alex Deucher  wrote:
>
> This patch set adds support for an application to query GPU
> page faults.  It's useful for debugging and there are
> vulkan extensions that could make use of this.  Preliminary
> user space code which uses this can be found here:
> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
> https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/298
>
> Note, that I made a small change to the vmhub definition to
> decouple it from how the kernel tracks vmhubs so that we have
> a consistent user view even if we decide to add more vmhubs
> like we recently did for gfx 9.4.3.
>
> I've also pushed the changed to:
> https://gitlab.freedesktop.org/agd5f/linux/-/commits/gpu_fault_info_ioctl
>
> Open question, currently we just expose the raw GPU fault status
> register value for each GPU so UMDs need GPU specific knowlege to decode
> it, although it's largely the same across generations.  One option would be to
> translate to a generic GPU independent fault status.  Opinions?
>
> v2:
> - Fix spelling typos noted by Guchun
> v3:
> - Add locking in IOCTL query
> - Only update cache if fault status is valid
>
> Alex Deucher (4):
>   drm/amdgpu: add cached GPU fault structure to vm struct
>   drm/amdgpu: cache gpuvm fault information for gmc7+
>   drm/amdgpu: add new INFO ioctl query for the last GPU page fault
>   drm/amdgpu: refine fault cache updates
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 20 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 50 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 31 +--
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  3 ++
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  |  3 ++
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |  3 ++
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |  3 ++
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 11 --
>  include/uapi/drm/amdgpu_drm.h   | 16 
>  10 files changed, 135 insertions(+), 8 deletions(-)
>
> --
> 2.41.0
>

[RESEND v3 5/5] drm/amdgpu: Create version number for coredumps

2023-08-10 Thread André Almeida

Even if there's nothing currently parsing amdgpu's coredump files, if
we eventually have such tools they will be glad to find a version field
to properly read the file.

Create a version number to be displayed on top of coredump file, to be
incremented when the file format or content get changed.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index b02b56193447..202c101772a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -192,6 +192,7 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t 
offset,
p = drm_coredump_printer();
 
drm_printf(, " AMDGPU Device Coredump \n");
+   drm_printf(, "version: " AMDGPU_COREDUMP_VERSION "\n");
drm_printf(, "kernel: " UTS_RELEASE "\n");
drm_printf(, "module: " KBUILD_MODNAME "\n");
drm_printf(, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, 
coredump->reset_time.tv_nsec);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 362954521721..7b6767ca8127 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -88,6 +88,9 @@ struct amdgpu_reset_domain {
 };
 
 #ifdef CONFIG_DEV_COREDUMP
+
+#define AMDGPU_COREDUMP_VERSION "1"
+
 struct amdgpu_coredump_info {
struct amdgpu_device*adev;
struct amdgpu_task_info reset_task_info;
-- 
2.41.0

[RESEND v3 4/5] drm/amdgpu: Move coredump code to amdgpu_reset file

2023-08-10 Thread André Almeida

Giving that we use codedump just for device resets, move it's functions
and structs to a more semantic file, the amdgpu_reset.{c, h}.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  9 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 80 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  | 78 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  | 11 +++
 4 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0d560b713948..314b06cddc39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1100,15 +1100,6 @@ struct amdgpu_device {
uint32_taid_mask;
 };
 
-#ifdef CONFIG_DEV_COREDUMP
-struct amdgpu_coredump_info {
-   struct amdgpu_device*adev;
-   struct amdgpu_task_info reset_task_info;
-   struct timespec64   reset_time;
-   boolreset_vram_lost;
-};
-#endif
-
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
 {
return container_of(ddev, struct amdgpu_device, ddev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 419b6336de64..9706f608723a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -32,8 +32,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 #include 
 
@@ -4799,84 +4797,6 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device 
*adev)
return 0;
 }
 
-#ifndef CONFIG_DEV_COREDUMP
-static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
-   struct amdgpu_reset_context *reset_context)
-{
-}
-#else
-static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
-   size_t count, void *data, size_t datalen)
-{
-   struct drm_printer p;
-   struct amdgpu_coredump_info *coredump = data;
-   struct drm_print_iterator iter;
-   int i;
-
-   iter.data = buffer;
-   iter.offset = 0;
-   iter.start = offset;
-   iter.remain = count;
-
-   p = drm_coredump_printer();
-
-   drm_printf(, " AMDGPU Device Coredump \n");
-   drm_printf(, "kernel: " UTS_RELEASE "\n");
-   drm_printf(, "module: " KBUILD_MODNAME "\n");
-   drm_printf(, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, 
coredump->reset_time.tv_nsec);
-   if (coredump->reset_task_info.pid)
-   drm_printf(, "process_name: %s PID: %d\n",
-  coredump->reset_task_info.process_name,
-  coredump->reset_task_info.pid);
-
-   if (coredump->reset_vram_lost)
-   drm_printf(, "VRAM is lost due to GPU reset!\n");
-   if (coredump->adev->num_regs) {
-   drm_printf(, "AMDGPU register dumps:\nOffset: Value:\n");
-
-   for (i = 0; i < coredump->adev->num_regs; i++)
-   drm_printf(, "0x%08x: 0x%08x\n",
-  coredump->adev->reset_dump_reg_list[i],
-  coredump->adev->reset_dump_reg_value[i]);
-   }
-
-   return count - iter.remain;
-}
-
-static void amdgpu_devcoredump_free(void *data)
-{
-   kfree(data);
-}
-
-static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
-   struct amdgpu_reset_context *reset_context)
-{
-   struct amdgpu_coredump_info *coredump;
-   struct drm_device *dev = adev_to_drm(adev);
-
-   coredump = kmalloc(sizeof(*coredump), GFP_NOWAIT);
-
-   if (!coredump) {
-   DRM_ERROR("%s: failed to allocate memory for coredump\n", 
__func__);
-   return;
-   }
-
-   memset(coredump, 0, sizeof(*coredump));
-
-   coredump->reset_vram_lost = vram_lost;
-
-   if (reset_context->job && reset_context->job->vm)
-   coredump->reset_task_info = reset_context->job->vm->task_info;
-
-   coredump->adev = adev;
-
-   ktime_get_ts64(>reset_time);
-
-   dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
- amdgpu_devcoredump_read, amdgpu_devcoredump_free);
-}
-#endif
-
 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 struct amdgpu_reset_context *reset_context)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 5fed06ffcc6b..b02b56193447 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -21,6 +21,9 @@
  *
  */
 
+#include 
+#include 
+
 #include "amdgpu_reset.h"
 #include "aldebaran.h"
 #include "sienna_cichlid.h"
@@ -167,5 +170,80 @@ void amdgpu_device_unlock_reset_domain(struct 
amdgpu_reset_domain *reset_domain)
up_write(_domain->sem);
 }
 
+#ifndef CONFIG_DEV_COREDUMP
+void

[RESEND v3 3/5] drm/amdgpu: Rework coredump to use memory dynamically

2023-08-10 Thread André Almeida

Instead of storing coredump information inside amdgpu_device struct,
move if to a proper separated struct and allocate it dynamically. This
will make it easier to further expand the logged information.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 14 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 65 ++
 2 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c6a332261ab..0d560b713948 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1088,11 +1088,6 @@ struct amdgpu_device {
uint32_t*reset_dump_reg_list;
uint32_t*reset_dump_reg_value;
int num_regs;
-#ifdef CONFIG_DEV_COREDUMP
-   struct amdgpu_task_info reset_task_info;
-   boolreset_vram_lost;
-   struct timespec64   reset_time;
-#endif
 
boolscpm_enabled;
uint32_tscpm_status;
@@ -1105,6 +1100,15 @@ struct amdgpu_device {
uint32_taid_mask;
 };
 
+#ifdef CONFIG_DEV_COREDUMP
+struct amdgpu_coredump_info {
+   struct amdgpu_device*adev;
+   struct amdgpu_task_info reset_task_info;
+   struct timespec64   reset_time;
+   boolreset_vram_lost;
+};
+#endif
+
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
 {
return container_of(ddev, struct amdgpu_device, ddev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bf4781551f88..419b6336de64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4799,12 +4799,17 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device 
*adev)
return 0;
 }
 
-#ifdef CONFIG_DEV_COREDUMP
+#ifndef CONFIG_DEV_COREDUMP
+static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
+   struct amdgpu_reset_context *reset_context)
+{
+}
+#else
 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
 {
struct drm_printer p;
-   struct amdgpu_device *adev = data;
+   struct amdgpu_coredump_info *coredump = data;
struct drm_print_iterator iter;
int i;
 
@@ -4818,21 +4823,21 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, 
loff_t offset,
drm_printf(, " AMDGPU Device Coredump \n");
drm_printf(, "kernel: " UTS_RELEASE "\n");
drm_printf(, "module: " KBUILD_MODNAME "\n");
-   drm_printf(, "time: %lld.%09ld\n", adev->reset_time.tv_sec, 
adev->reset_time.tv_nsec);
-   if (adev->reset_task_info.pid)
+   drm_printf(, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, 
coredump->reset_time.tv_nsec);
+   if (coredump->reset_task_info.pid)
drm_printf(, "process_name: %s PID: %d\n",
-  adev->reset_task_info.process_name,
-  adev->reset_task_info.pid);
+  coredump->reset_task_info.process_name,
+  coredump->reset_task_info.pid);
 
-   if (adev->reset_vram_lost)
+   if (coredump->reset_vram_lost)
drm_printf(, "VRAM is lost due to GPU reset!\n");
-   if (adev->num_regs) {
+   if (coredump->adev->num_regs) {
drm_printf(, "AMDGPU register dumps:\nOffset: Value:\n");
 
-   for (i = 0; i < adev->num_regs; i++)
+   for (i = 0; i < coredump->adev->num_regs; i++)
drm_printf(, "0x%08x: 0x%08x\n",
-  adev->reset_dump_reg_list[i],
-  adev->reset_dump_reg_value[i]);
+  coredump->adev->reset_dump_reg_list[i],
+  coredump->adev->reset_dump_reg_value[i]);
}
 
return count - iter.remain;
@@ -4840,14 +4845,34 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, 
loff_t offset,
 
 static void amdgpu_devcoredump_free(void *data)
 {
+   kfree(data);
 }
 
-static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
+static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
+   struct amdgpu_reset_context *reset_context)
 {
+   struct amdgpu_coredump_info *coredump;
struct drm_device *dev = adev_to_drm(adev);
 
-   ktime_get_ts64(>reset_time);
-   dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
+   coredump = kmalloc(sizeof(*coredump), GFP_NOWAIT);
+
+   if (!coredump) {
+   DRM_ERROR("%s: failed to allocate memory for coredump\n", 
__func__);
+

[RESEND v3 2/5] drm/amdgpu: Allocate coredump memory in a nonblocking way

2023-08-10 Thread André Almeida

During a GPU reset, a normal memory reclaim could block to reclaim
memory. Giving that coredump is a best effort mechanism, it shouldn't
disturb the reset path. Change its memory allocation flag to a
nonblocking one.

Signed-off-by: André Almeida 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index aa171db68639..bf4781551f88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4847,7 +4847,7 @@ static void amdgpu_reset_capture_coredumpm(struct 
amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
 
ktime_get_ts64(>reset_time);
-   dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
+   dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
  amdgpu_devcoredump_read, amdgpu_devcoredump_free);
 }
 #endif
-- 
2.41.0

[RESEND v3 1/5] drm/amdgpu: Create a module param to disable soft recovery

2023-08-10 Thread André Almeida

Create a module parameter to disable soft recoveries on amdgpu, making
every recovery go through the device reset path. This option makes
easier to force device resets for testing and debugging purposes.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 9 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 6 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2e3c7c15cb8e..9c6a332261ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -189,6 +189,7 @@ extern uint amdgpu_force_long_training;
 extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
+extern bool amdgpu_soft_recovery;
 extern int amdgpu_emu_mode;
 extern uint amdgpu_smu_memory_pool_size;
 extern int amdgpu_smu_pptable_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0fec81d6a7df..27e7fa36cc60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -163,6 +163,7 @@ uint amdgpu_force_long_training;
 int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
+bool amdgpu_soft_recovery = true;
 int amdgpu_emu_mode;
 uint amdgpu_smu_memory_pool_size;
 int amdgpu_smu_pptable_id = -1;
@@ -538,6 +539,14 @@ module_param_named(compute_multipipe, 
amdgpu_compute_multipipe, int, 0444);
 MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 
= disable, -1 = auto)");
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
 
+/**
+ * DOC: gpu_soft_recovery (bool)
+ * Set true to allow the driver to try soft recoveries if a job get stuck. Set
+ * to false to always force a GPU reset during recovery.
+ */
+MODULE_PARM_DESC(gpu_soft_recovery, "Enable GPU soft recovery mechanism 
(default: true)");
+module_param_named(gpu_soft_recovery, amdgpu_soft_recovery, bool, 0644);
+
 /**
  * DOC: emu_mode (int)
  * Set value 1 to enable emulation mode. This is only needed when running on 
an emulator. The default is 0 (disabled).
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 80d6e132e409..40678d9fb17e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, 
unsigned int vmid,
   struct dma_fence *fence)
 {
unsigned long flags;
+   ktime_t deadline;
 
-   ktime_t deadline = ktime_add_us(ktime_get(), 1);
+   if (!amdgpu_soft_recovery)
+   return false;
+
+   deadline = ktime_add_us(ktime_get(), 1);
 
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || 
!fence)
return false;
-- 
2.41.0

[RESEND v3 0/5] drm/amdgpu: Add new reset option and rework coredump

2023-08-10 Thread André Almeida

Hi,

The goal of this patchset is to improve debugging device resets on amdgpu.

The first patch creates a new module parameter to disable soft recoveries,
ensuring every recovery go through the full device reset, making easier to
generate resets from userspace tools like [0] and [1]. This is important to
validate how the stack behaves on resets, from end-to-end.

The last patches are a rework to alloc devcoredump dynamically and to move it to
a better source file.

I have dropped the patches that add more information to devcoredump for now,
until I figure out a better way to do so, like storing the IB address in the
fence.

Thanks,
André

[0] https://gitlab.freedesktop.org/andrealmeid/gpu-timeout
[1] https://github.com/andrealmeid/vulkan-triangle-v1

Changelog:

v2: 
https://lore.kernel.org/dri-devel/20230713213242.680944-1-andrealm...@igalia.com/
- Drop the IB and ring patch
- Drop patch that limited information from kernel threads
- Add patch to move coredump to amdgpu_reset

v1: 
https://lore.kernel.org/dri-devel/20230711213501.526237-1-andrealm...@igalia.com/
 - Drop "Mark contexts guilty for causing soft recoveries" patch
 - Use GFP_NOWAIT for devcoredump allocation

André Almeida (5):
  drm/amdgpu: Create a module param to disable soft recovery
  drm/amdgpu: Allocate coredump memory in a nonblocking way
  drm/amdgpu: Rework coredump to use memory dynamically
  drm/amdgpu: Move coredump code to amdgpu_reset file
  drm/amdgpu: Create version number for coredumps

 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 67 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  9 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  | 79 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  | 14 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   |  6 +-
 6 files changed, 111 insertions(+), 70 deletions(-)

-- 
2.41.0

[PATCH] drm/amdkfd: fix double assign skip process context clear

2023-08-10 Thread Jonathan Kim

Remove redundant assignment when skipping process ctx clear.

Signed-off-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index aa5091f18681..89c2bfcb36ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -227,7 +227,6 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
-   queue_input.skip_process_ctx_clear = 
qpd->pqm->process->debug_trap_enabled;
queue_input.skip_process_ctx_clear = 
qpd->pqm->process->debug_trap_enabled ||
 
kfd_dbg_has_ttmps_always_setup(q->device);
 
-- 
2.25.1

Re: [PATCH] drm/amdgpu: Keep reset handlers shared

2023-08-10 Thread Lazar, Lijo





On 8/10/2023 8:41 PM, Christian König wrote:

Am 10.08.23 um 13:44 schrieb Lijo Lazar:

Instead of maintaining a list per device, keep the reset handlers common
per ASIC family. A pointer to the list of handlers is maintained in
reset control.


Why should this be beneficial?
There is a global reset handler object for each type of reset for a 
particular ASIC family. Each device has a reset control which holds a 
reference to these handlers.


Earlier, the handler used to be a list object. This creates trouble when 
there are multiple devices of the same ASIC family - the same global 
object gets added to reset control of each device and that corrupts list.


Keeping an array of reset handlers and having the reset control holding 
a reference to the array of handlers makes it simpler.


Thanks,
Lijo


Christian.



Signed-off-by: Lijo Lazar 
---
  drivers/gpu/drm/amd/amdgpu/aldebaran.c  | 19 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   |  8 
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h   | 16 
  drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 20 +++-
  drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c   | 19 +++
  5 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/aldebaran.c

index 2b97b8a96fb4..82e1c83a7ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -48,20 +48,19 @@ aldebaran_get_reset_handler(struct 
amdgpu_reset_control *reset_ctl,

  {
  struct amdgpu_reset_handler *handler;
  struct amdgpu_device *adev = (struct amdgpu_device 
*)reset_ctl->handle;

+    int i;
  if (reset_context->method != AMD_RESET_METHOD_NONE) {
  dev_dbg(adev->dev, "Getting reset handler for method %d\n",
  reset_context->method);
-    list_for_each_entry(handler, _ctl->reset_handlers,
- handler_list) {
+    for_each_handler(i, handler, reset_ctl) {
  if (handler->reset_method == reset_context->method)
  return handler;
  }
  }
  if (aldebaran_is_mode2_default(reset_ctl)) {
-    list_for_each_entry(handler, _ctl->reset_handlers,
- handler_list) {
+    for_each_handler(i, handler, reset_ctl)    {
  if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
  reset_context->method = AMD_RESET_METHOD_MODE2;
  return handler;
@@ -124,9 +123,9 @@ static void aldebaran_async_reset(struct 
work_struct *work)

  struct amdgpu_reset_control *reset_ctl =
  container_of(work, struct amdgpu_reset_control, reset_work);
  struct amdgpu_device *adev = (struct amdgpu_device 
*)reset_ctl->handle;

+    int i;
-    list_for_each_entry(handler, _ctl->reset_handlers,
- handler_list) {
+    for_each_handler(i, handler, reset_ctl)    {
  if (handler->reset_method == reset_ctl->active_reset) {
  dev_dbg(adev->dev, "Resetting device\n");
  handler->do_reset(adev);
@@ -395,6 +394,11 @@ static struct amdgpu_reset_handler 
aldebaran_mode2_handler = {

  .do_reset    = aldebaran_mode2_reset,
  };
+static struct amdgpu_reset_handler
+    *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+    _mode2_handler,
+    };
+
  int aldebaran_reset_init(struct amdgpu_device *adev)
  {
  struct amdgpu_reset_control *reset_ctl;
@@ -408,10 +412,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev)
  reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
  reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
-    INIT_LIST_HEAD(_ctl->reset_handlers);
  INIT_WORK(_ctl->reset_work, reset_ctl->async_reset);
  /* Only mode2 is handled through reset control now */
-    amdgpu_reset_add_handler(reset_ctl, _mode2_handler);
+    reset_ctl->reset_handlers = _rst_handlers;
  adev->reset_cntl = reset_ctl;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

index 5fed06ffcc6b..02d874799c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -26,14 +26,6 @@
  #include "sienna_cichlid.h"
  #include "smu_v13_0_10.h"
-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
- struct amdgpu_reset_handler *handler)
-{
-    /* TODO: Check if handler exists? */
-    list_add_tail(>handler_list, _ctl->reset_handlers);
-    return 0;
-}
-
  int amdgpu_reset_init(struct amdgpu_device *adev)
  {
  int ret = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

index f4a501ff87d9..471d789b33a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -26,6 +26,8 @@
  #include "amdgpu.h"
+#define AMDGPU_RESET_MAX_HANDLERS 5
+
  enum AMDGPU_RESET_FLAGS {
  AMDGPU_NEED_FULL_RESET

[PATCH] drm/amdkfd: ratelimited SQ interrupt messages

2023-08-10 Thread Harish Kasiviswanathan

No functional change. Use ratelimited version of pr_ to avoid
overflowing of dmesg buffer

Signed-off-by: Harish Kasiviswanathan 
---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c  | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index c7991e07b6be..a7697ec8188e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -268,7 +268,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
SQ_INTERRUPT_WORD_WAVE_CTXID1, 
ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
-   pr_debug(
+   pr_debug_ratelimited(
"sq_intr: auto, se %d, ttrace %d, wlt 
%d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
REG_GET_FIELD(context_id1, 
SQ_INTERRUPT_WORD_AUTO_CTXID1,
SE_ID),
@@ -284,7 +284,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,

THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
-   pr_debug("sq_intr: inst, se %d, data 0x%x, sa 
%d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+   pr_debug_ratelimited("sq_intr: inst, se %d, 
data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
REG_GET_FIELD(context_id1, 
SQ_INTERRUPT_WORD_WAVE_CTXID1,
SE_ID),
REG_GET_FIELD(context_id0, 
SQ_INTERRUPT_WORD_WAVE_CTXID0,
@@ -310,7 +310,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err_type = REG_GET_FIELD(context_id0, 
KFD_CTXID0,
ERR_TYPE);
-   pr_warn("sq_intr: error, se %d, data 0x%x, sa 
%d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+   pr_warn_ratelimited("sq_intr: error, se %d, 
data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
REG_GET_FIELD(context_id1, 
SQ_INTERRUPT_WORD_WAVE_CTXID1,
SE_ID),
REG_GET_FIELD(context_id0, 
SQ_INTERRUPT_WORD_WAVE_CTXID0,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index f933bd231fb9..2a65792fd116 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -150,7 +150,7 @@ enum SQ_INTERRUPT_ERROR_TYPE {
 
 static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
 {
-   pr_debug(
+   pr_debug_ratelimited(
"sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms 
%d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err 
%d\n",
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, 
THREAD_TRACE),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
@@ -165,7 +165,7 @@ static void print_sq_intr_info_auto(uint32_t context_id0, 
uint32_t context_id1)
 
 static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
 {
-   pr_debug(
+   pr_debug_ratelimited(
"sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, 
simd_id %d, wgp_id %d\n",
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, 
SH_ID),
@@ -177,7 +177,7 @@ static void print_sq_intr_info_inst(uint32_t context_id0, 
uint32_t context_id1)
 
 static void print_sq_intr_info_error(uint32_t context_id0, uint32_t 
context_id1)
 {
-   pr_warn(
+   pr_warn_ratelimited(
"sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, 
wave_id %d, simd_id %d, wgp_id %d\n",
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, 
DETAIL),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, 
TYPE),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index f0731a6a5306..02695ccd22d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++

[PATCH v2 34/34] drm/amd/display: Use 3x4 CTM for plane CTM

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 32 +--
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |  2 +-
 include/uapi/drm/drm_mode.h   |  8 +
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 7ff329101fd4..0a51af44efd5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -412,6 +412,32 @@ static void __drm_ctm_to_dc_matrix(const struct 
drm_color_ctm *ctm,
}
 }
 
+/**
+ * __drm_ctm2_to_dc_matrix - converts a DRM CTM2 to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix
+ * @matrix: DC CSC float matrix
+ *
+ * The matrix needs to be a 3x4 (12 entry) matrix.
+ */
+static void __drm_ctm2_to_dc_matrix(const struct drm_color_ctm2 *ctm,
+  struct fixed31_32 *matrix)
+{
+   int i;
+
+   /*
+* DRM gives a 3x3 matrix, but DC wants 3x4. Assuming we're operating
+* with homogeneous coordinates, augment the matrix with 0's.
+*
+* The format provided is S31.32, using signed-magnitude representation.
+* Our fixed31_32 is also S31.32, but is using 2's complement. We have
+* to convert from signed-magnitude to 2's complement.
+*/
+   for (i = 0; i < 12; i++) {
+   /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
+   matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]);
+   }
+}
+
 /**
  * __set_legacy_tf - Calculates the legacy transfer function
  * @func: transfer function
@@ -1159,7 +1185,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
 {
struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-   struct drm_color_ctm *ctm = NULL;
+   struct drm_color_ctm2 *ctm = NULL;
struct dc_color_caps *color_caps = NULL;
bool has_crtc_cm_degamma;
int ret;
@@ -1213,7 +1239,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
 
/* Setup CRTC CTM. */
if (dm_plane_state->ctm) {
-   ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data;
+   ctm = (struct drm_color_ctm2 *)dm_plane_state->ctm->data;
 
/*
 * So far, if we have both plane and CRTC CTM, plane CTM takes
@@ -1224,7 +1250,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
 * provide support for both DPP and MPC matrix at the same
 * time.
 */
-   __drm_ctm_to_dc_matrix(ctm, 
dc_plane_state->gamut_remap_matrix.matrix);
+   __drm_ctm2_to_dc_matrix(ctm, 
dc_plane_state->gamut_remap_matrix.matrix);
 
dc_plane_state->gamut_remap_matrix.enable_remap = true;
dc_plane_state->input_csc_color_matrix.enable_adjustment = 
false;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 0b1081c690cb..27962a3d30f5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1543,7 +1543,7 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
ret = drm_property_replace_blob_from_id(plane->dev,
_plane_state->ctm,
val,
-   sizeof(struct 
drm_color_ctm), -1,
+   sizeof(struct 
drm_color_ctm2), -1,
);
dm_plane_state->base.color_mgmt_changed |= replaced;
return ret;
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 46becedf5b2f..402288133e4c 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -838,6 +838,14 @@ struct drm_color_ctm {
__u64 matrix[9];
 };
 
+struct drm_color_ctm2 {
+   /*
+* Conversion matrix in S31.32 sign-magnitude
+* (not two's complement!) format.
+*/
+   __u64 matrix[12];
+};
+
 struct drm_color_lut {
/*
 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
-- 
2.40.1

[PATCH v2 33/34] drm/amd/display: add plane CTM support

2023-08-10 Thread Melissa Wen

Map the plane CTM driver-specific property to DC plane, instead of DC
stream. The remaining steps to program DPP block are already implemented
on DC shared-code.

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 25 +++
 2 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index dfe61c5ed49e..f239410234b3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9578,6 +9578,7 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
if (dm_old_other_state->degamma_tf != 
dm_new_other_state->degamma_tf ||
dm_old_other_state->degamma_lut != 
dm_new_other_state->degamma_lut ||
dm_old_other_state->hdr_mult != 
dm_new_other_state->hdr_mult ||
+   dm_old_other_state->ctm != dm_new_other_state->ctm ||
dm_old_other_state->shaper_lut != 
dm_new_other_state->shaper_lut ||
dm_old_other_state->shaper_tf != 
dm_new_other_state->shaper_tf ||
dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 86a918ab82be..7ff329101fd4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1158,6 +1158,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
  struct dc_plane_state *dc_plane_state)
 {
struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   struct drm_color_ctm *ctm = NULL;
struct dc_color_caps *color_caps = NULL;
bool has_crtc_cm_degamma;
int ret;
@@ -1209,6 +1211,29 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
return ret;
}
 
+   /* Setup CRTC CTM. */
+   if (dm_plane_state->ctm) {
+   ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data;
+
+   /*
+* So far, if we have both plane and CRTC CTM, plane CTM takes
+* the priority and we discard data for CRTC CTM, as
+* implemented in dcn10_program_gamut_remap().  However, we
+* have MPC gamut_remap_matrix from DCN3 family, therefore we
+* can remap MPC programing of the matrix to MPC block and
+* provide support for both DPP and MPC matrix at the same
+* time.
+*/
+   __drm_ctm_to_dc_matrix(ctm, 
dc_plane_state->gamut_remap_matrix.matrix);
+
+   dc_plane_state->gamut_remap_matrix.enable_remap = true;
+   dc_plane_state->input_csc_color_matrix.enable_adjustment = 
false;
+   } else {
+   /* Bypass CTM. */
+   dc_plane_state->gamut_remap_matrix.enable_remap = false;
+   dc_plane_state->input_csc_color_matrix.enable_adjustment = 
false;
+   }
+
return amdgpu_dm_plane_set_color_properties(plane_state,
dc_plane_state, color_caps);
 }
-- 
2.40.1

[PATCH v2 32/34] drm/amd/display: add plane CTM driver-specific property

2023-08-10 Thread Melissa Wen

Plane CTM for pre-blending color space conversion. Only enable
driver-specific plane CTM property on drivers that support both pre- and
post-blending gamut remap matrix, i.e., DCN3+ family. Otherwise it
conflits with DRM CRTC CTM property.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  2 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  7 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  7 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 20 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index abb871a912d7..84bf501b02f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -363,6 +363,8 @@ struct amdgpu_mode_info {
 * @plane_hdr_mult_property:
 */
struct drm_property *plane_hdr_mult_property;
+
+   struct drm_property *plane_ctm_property;
/**
 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
 * that converts color content before 3D LUT.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 095f39f04210..6252ee912a63 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -769,6 +769,13 @@ struct dm_plane_state {
 * S31.32 sign-magnitude.
 */
__u64 hdr_mult;
+   /**
+* @ctm:
+*
+* Color transformation matrix. See drm_crtc_enable_color_mgmt(). The
+* blob (if not NULL) is a  drm_color_ctm.
+*/
+   struct drm_property_blob *ctm;
/**
 * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
 * array of  drm_color_lut.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 4356846a2bce..86a918ab82be 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -218,6 +218,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_hdr_mult_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_CTM", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_ctm_property = prop;
+
prop = drm_property_create(adev_to_drm(adev),
   DRM_MODE_PROP_BLOB,
   "AMD_PLANE_SHAPER_LUT", 0);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 3fd57de7c5be..0b1081c690cb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1355,6 +1355,8 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
 
if (dm_plane_state->degamma_lut)
drm_property_blob_get(dm_plane_state->degamma_lut);
+   if (dm_plane_state->ctm)
+   drm_property_blob_get(dm_plane_state->ctm);
if (dm_plane_state->shaper_lut)
drm_property_blob_get(dm_plane_state->shaper_lut);
if (dm_plane_state->lut3d)
@@ -1436,6 +1438,8 @@ static void dm_drm_plane_destroy_state(struct drm_plane 
*plane,
 
if (dm_plane_state->degamma_lut)
drm_property_blob_put(dm_plane_state->degamma_lut);
+   if (dm_plane_state->ctm)
+   drm_property_blob_put(dm_plane_state->ctm);
if (dm_plane_state->lut3d)
drm_property_blob_put(dm_plane_state->lut3d);
if (dm_plane_state->shaper_lut)
@@ -1473,6 +1477,11 @@ dm_atomic_plane_attach_color_mgmt_properties(struct 
amdgpu_display_manager *dm,
   dm->adev->mode_info.plane_hdr_mult_property,
   AMDGPU_HDR_MULT_DEFAULT);
 
+   /* Only enable plane CTM if both DPP and MPC gamut remap is available. 
*/
+   if (dm->dc->caps.color.mpc.gamut_remap)
+   drm_object_attach_property(>base,
+  
dm->adev->mode_info.plane_ctm_property, 0);
+
if (dpp_color_caps.hw_3d_lut) {
drm_object_attach_property(>base,
   mode_info.plane_shaper_lut_property, 
0);
@@ -1530,6 +1539,14 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
dm_plane_state->hdr_mult = val;
dm_plane_state->base.color_mgmt_changed = 1;
}
+   } else if (property == adev->mode_info.plane_ctm_property) {
+   ret = drm_property_replace_blob_from_id(plane->dev,
+

[PATCH v2 29/34] drm/amd/display: allow newer DC hardware to use degamma ROM for PQ/HLG

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Need to funnel the color caps through to these functions so it can check
that the hardware is capable.

v2:
- remove redundant color caps assignment on plane degamma map (Harry)
- pass color caps to degamma params

Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 35 ---
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index f638e5b3a70b..4356846a2bce 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -538,6 +538,7 @@ static int amdgpu_dm_set_atomic_regamma(struct 
dc_stream_state *stream,
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
+ * @caps: dc color capabilities
  * @func: transfer function
  * @lut: lookup table that defines the color space
  * @lut_size: size of respective lut.
@@ -545,7 +546,7 @@ static int amdgpu_dm_set_atomic_regamma(struct 
dc_stream_state *stream,
  * Returns:
  * 0 in case of success. -ENOMEM if fails.
  */
-static int __set_input_tf(struct dc_transfer_func *func,
+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func 
*func,
  const struct drm_color_lut *lut, uint32_t lut_size)
 {
struct dc_gamma *gamma = NULL;
@@ -562,7 +563,7 @@ static int __set_input_tf(struct dc_transfer_func *func,
__drm_lut_to_dc_gamma(lut, gamma, false);
}
 
-   res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != 
NULL);
+   res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != 
NULL);
 
if (gamma)
dc_gamma_release();
@@ -725,7 +726,7 @@ static int amdgpu_dm_atomic_blend_lut(const struct 
drm_color_lut *blend_lut,
func_blend->tf = tf;
func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
-   ret = __set_input_tf(func_blend, blend_lut, blend_size);
+   ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
} else {
func_blend->type = TF_TYPE_BYPASS;
func_blend->tf = TRANSFER_FUNCTION_LINEAR;
@@ -950,7 +951,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
 
 static int
 map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
-struct dc_plane_state *dc_plane_state)
+struct dc_plane_state *dc_plane_state,
+struct dc_color_caps *caps)
 {
const struct drm_color_lut *degamma_lut;
enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -1005,7 +1007,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
dc_plane_state->in_transfer_func->tf =
TRANSFER_FUNCTION_LINEAR;
 
-   r = __set_input_tf(dc_plane_state->in_transfer_func,
+   r = __set_input_tf(caps, dc_plane_state->in_transfer_func,
   degamma_lut, degamma_size);
if (r)
return r;
@@ -1018,7 +1020,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
dc_plane_state->in_transfer_func->tf = tf;
 
if (tf != TRANSFER_FUNCTION_SRGB &&
-   !mod_color_calculate_degamma_params(NULL,
+   !mod_color_calculate_degamma_params(caps,

dc_plane_state->in_transfer_func,
NULL, false))
return -ENOMEM;
@@ -1029,7 +1031,8 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
 
 static int
 __set_dm_plane_degamma(struct drm_plane_state *plane_state,
-  struct dc_plane_state *dc_plane_state)
+  struct dc_plane_state *dc_plane_state,
+  struct dc_color_caps *color_caps)
 {
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
const struct drm_color_lut *degamma_lut;
@@ -1060,7 +1063,7 @@ __set_dm_plane_degamma(struct drm_plane_state 
*plane_state,
dc_plane_state->in_transfer_func->type =
TF_TYPE_DISTRIBUTED_POINTS;
 
-   ret = __set_input_tf(dc_plane_state->in_transfer_func,
+   ret = __set_input_tf(color_caps, 
dc_plane_state->in_transfer_func,
 degamma_lut, degamma_size);
if (ret)
return ret;
@@ -1068,7 +1071,7 @@ __set_dm_plane_degamma(struct drm_plane_state 
*plane_state,
dc_plane_state->in_transfer_func->type =
TF_TYPE_PREDEFINED;
 
-   if (!mod_color_calculate_degamma_params(NULL,
+

[PATCH v2 31/34] drm/amd/display: set stream gamut remap matrix to MPC for DCN301

2023-08-10 Thread Melissa Wen

dc->caps.color.mpc.gamut_remap says there is a post-blending color block
for gamut remap matrix for DCN3 HW family and newer versions. However,
those drivers still follow DCN10 programming that remap stream
gamut_remap_matrix to DPP (pre-blending).

To enable pre-blending and post-blending gamut_remap matrix supports at
the same time, set stream gamut_remap to MPC and plane gamut_remap to
DPP for DCN301 that support both.

It was tested using IGT KMS color tests for DRM CRTC CTM property and it
preserves test results.

Signed-off-by: Melissa Wen 
---
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 +++
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.h|  3 ++
 .../drm/amd/display/dc/dcn301/dcn301_init.c   |  2 +-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 4cd4ae07d73d..4fb4e9ec03f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -186,6 +186,43 @@ bool dcn30_set_input_transfer_func(struct dc *dc,
return result;
 }
 
+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx)
+{
+   int i = 0;
+   struct dpp_grph_csc_adjustment dpp_adjust;
+   struct mpc_grph_gamut_adjustment mpc_adjust;
+   int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+   struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
+
+   memset(_adjust, 0, sizeof(dpp_adjust));
+   dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+
+   if (pipe_ctx->plane_state &&
+   pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
+   dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+   for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+   dpp_adjust.temperature_matrix[i] =
+   
pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
+   }
+
+   
pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp,
+   _adjust);
+
+   memset(_adjust, 0, sizeof(mpc_adjust));
+   mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+
+   if (pipe_ctx->top_pipe == NULL) {
+   if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
+   mpc_adjust.gamut_adjust_type = 
GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+   for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+   mpc_adjust.temperature_matrix[i] =
+   
pipe_ctx->stream->gamut_remap_matrix.matrix[i];
+   }
+   }
+
+   mpc->funcs->set_gamut_remap(mpc, mpcc_id, _adjust);
+}
+
 bool dcn30_set_output_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_stream_state *stream)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
index a24a8e33a3d2..cb34ca932a5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
@@ -58,6 +58,9 @@ bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx,
 bool dcn30_set_input_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_plane_state *plane_state);
+
+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx);
+
 bool dcn30_set_output_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_stream_state *stream);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c 
b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
index 257df8660b4c..81fd50ee97c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
@@ -33,7 +33,7 @@
 #include "dcn301_init.h"
 
 static const struct hw_sequencer_funcs dcn301_funcs = {
-   .program_gamut_remap = dcn10_program_gamut_remap,
+   .program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn10_init_hw,
.power_down_on_boot = dcn10_power_down_on_boot,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
-- 
2.40.1

[PATCH v2 30/34] drm/amd/display: copy 3D LUT settings from crtc state to stream_update

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

When commiting planes, we copy color mgmt resources to the stream state.
Do the same for shaper and 3D LUTs.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d37269a2179e..dfe61c5ed49e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8274,6 +8274,10 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
_state->stream->csc_color_matrix;
bundle->stream_update.out_transfer_func =
acrtc_state->stream->out_transfer_func;
+   bundle->stream_update.lut3d_func =
+   (struct dc_3dlut *) 
acrtc_state->stream->lut3d_func;
+   bundle->stream_update.func_shaper =
+   (struct dc_transfer_func *) 
acrtc_state->stream->func_shaper;
}
 
acrtc_state->stream->abm_level = acrtc_state->abm_level;
-- 
2.40.1

[PATCH v2 26/34] drm/amd/display: add plane 3D LUT support

2023-08-10 Thread Melissa Wen

Wire up DC 3D LUT to DM plane color management (pre-blending). On AMD
display HW, 3D LUT comes after a shaper curve and we always have to
program a shaper curve to delinearize or normalize the color space
before applying a 3D LUT (since we have a reduced number of LUT
entries).

In this version, the default values of 3D LUT for size and bit_depth are
17x17x17 and 12-bit, but we already provide here a more generic
mechanisms to program other supported values (9x9x9 size and 10-bit).

v2:
- started with plane 3D LUT instead of CRTC 3D LUT support

Reviewed-by: Harry Wentland  (v1)
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 96 ++-
 2 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 347ecff2c920..025a7eb5c8aa 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8062,6 +8062,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,

bundle->surface_updates[planes_count].gamut_remap_matrix = 
_plane->gamut_remap_matrix;
bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
bundle->surface_updates[planes_count].func_shaper = 
dc_plane->in_shaper_func;
+   bundle->surface_updates[planes_count].lut3d_func = 
dc_plane->lut3d_func;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 90ec09ca4118..58c4797f506e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -598,6 +598,85 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
}
 }
 
+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
+   const struct drm_color_lut lut,
+   int bit_precision)
+{
+   rgb->red = drm_color_lut_extract(lut.red, bit_precision);
+   rgb->green = drm_color_lut_extract(lut.green, bit_precision);
+   rgb->blue  = drm_color_lut_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
+   uint32_t lut3d_size,
+   struct tetrahedral_params *params,
+   bool use_tetrahedral_9,
+   int bit_depth)
+{
+   struct dc_rgb *lut0;
+   struct dc_rgb *lut1;
+   struct dc_rgb *lut2;
+   struct dc_rgb *lut3;
+   int lut_i, i;
+
+
+   if (use_tetrahedral_9) {
+   lut0 = params->tetrahedral_9.lut0;
+   lut1 = params->tetrahedral_9.lut1;
+   lut2 = params->tetrahedral_9.lut2;
+   lut3 = params->tetrahedral_9.lut3;
+   } else {
+   lut0 = params->tetrahedral_17.lut0;
+   lut1 = params->tetrahedral_17.lut1;
+   lut2 = params->tetrahedral_17.lut2;
+   lut3 = params->tetrahedral_17.lut3;
+   }
+
+   for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+   /* We should consider the 3dlut RGB values are distributed
+* along four arrays lut0-3 where the first sizes 1229 and the
+* other 1228. The bit depth supported for 3dlut channel is
+* 12-bit, but DC also supports 10-bit.
+*
+* TODO: improve color pipeline API to enable the userspace set
+* bit depth and 3D LUT size/stride, as specified by VA-API.
+*/
+   __to_dc_lut3d_color([lut_i], lut[i], bit_depth);
+   __to_dc_lut3d_color([lut_i], lut[i + 1], bit_depth);
+   __to_dc_lut3d_color([lut_i], lut[i + 2], bit_depth);
+   __to_dc_lut3d_color([lut_i], lut[i + 3], bit_depth);
+   }
+   /* lut0 has 1229 points (lut_size/4 + 1) */
+   __to_dc_lut3d_color([lut_i], lut[i], bit_depth);
+}
+
+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
+ * @drm_lut3d: DRM CRTC (user) 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map DRM CRTC 3D LUT to DC 3D LUT and all necessary bits to program it
+ * on DCN MPC accordingly.
+ */
+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut,
+  uint32_t drm_lut3d_size,
+  struct dc_3dlut *lut)
+{
+   if (!drm_lut3d_size) {
+   lut->state.bits.initialized = 0;
+   } else {
+   /* Stride and bit depth are not programmable by API yet.
+* Therefore, only supports 17x17x17 3D LUT (12-bit).

[PATCH v2 28/34] drm/amd/display: add plane blend LUT and TF support

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Map plane blend properties to DPP blend gamma. Plane blend is a
post-3D LUT curve that linearizes color space for blending. It may be
defined by a user-blob LUT and/or predefined transfer function. As
hardcoded curve (ROM) is not supported on blend gamma, we use AMD color
module to fill parameters when setting non-linear TF with empty LUT.

v2:
- rename DRM TFs to AMDGPU TFs

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 55 +--
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 025a7eb5c8aa..d37269a2179e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8063,6 +8063,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
bundle->surface_updates[planes_count].func_shaper = 
dc_plane->in_shaper_func;
bundle->surface_updates[planes_count].lut3d_func = 
dc_plane->lut3d_func;
+   bundle->surface_updates[planes_count].blend_tf = 
dc_plane->blend_tf;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 2c96501d2fc0..f638e5b3a70b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -706,6 +706,34 @@ static int amdgpu_dm_atomic_shaper_lut(const struct 
drm_color_lut *shaper_lut,
return ret;
 }
 
+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
+  bool has_rom,
+  enum dc_transfer_func_predefined tf,
+  uint32_t blend_size,
+  struct dc_transfer_func *func_blend)
+{
+   int ret = 0;
+
+   if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
+   /* DRM plane gamma LUT or TF means we are linearizing color
+* space before blending (similar to degamma programming). As
+* we don't have hardcoded curve support, or we use AMD color
+* module to fill the parameters that will be translated to HW
+* points.
+*/
+   func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
+   func_blend->tf = tf;
+   func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+   ret = __set_input_tf(func_blend, blend_lut, blend_size);
+   } else {
+   func_blend->type = TF_TYPE_BYPASS;
+   func_blend->tf = TRANSFER_FUNCTION_LINEAR;
+   }
+
+   return ret;
+}
+
 /* amdgpu_dm_lut3d_size - get expected size according to hw color caps
  * @adev: amdgpu device
  * @lut_size: default size
@@ -1053,8 +1081,9 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
 {
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
enum amdgpu_transfer_function shaper_tf = 
AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-   const struct drm_color_lut *shaper_lut, *lut3d;
-   uint32_t shaper_size, lut3d_size;
+   enum amdgpu_transfer_function blend_tf = 
AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+   const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
+   uint32_t shaper_size, lut3d_size, blend_size;
int ret;
 
/* We have nothing to do here, return */
@@ -1074,12 +1103,30 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
  amdgpu_tf_to_dc_tf(shaper_tf),
  shaper_size,
  dc_plane_state->in_shaper_func);
-   if (ret)
+   if (ret) {
drm_dbg_kms(plane_state->plane->dev,
"setting plane %d shaper LUT failed.\n",
plane_state->plane->index);
 
-   return ret;
+   return ret;
+   }
+
+   blend_tf = dm_plane_state->blend_tf;
+   blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, _size);
+   blend_size = blend_lut != NULL ? blend_size : 0;
+
+   ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
+amdgpu_tf_to_dc_tf(blend_tf),
+blend_size, dc_plane_state->blend_tf);
+   if (ret) {
+   drm_dbg_kms(plane_state->plane->dev,
+   "setting plane %d gamma lut

[PATCH v2 24/34] drm/amd/display: add plane shaper LUT support

2023-08-10 Thread Melissa Wen

Map DC shaper LUT to DM plane color management. Shaper LUT can be used
to delinearize and/or normalize the color space for computational
efficiency and achiving specific visual styles. If a plane degamma is
apply to linearize the color space, a custom shaper 1D LUT can be used
just before applying 3D LUT.

v2:
- use DPP color caps to verify plane 3D LUT support
- add debug message if shaper LUT programming fails

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   1 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   2 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 108 +-
 3 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9fcc169fb87b..347ecff2c920 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8061,6 +8061,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
bundle->surface_updates[planes_count].in_transfer_func 
= dc_plane->in_transfer_func;

bundle->surface_updates[planes_count].gamut_remap_matrix = 
_plane->gamut_remap_matrix;
bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
+   bundle->surface_updates[planes_count].func_shaper = 
dc_plane->in_shaper_func;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 23e3984f17fb..095f39f04210 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -893,6 +893,8 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 /* 3D LUT max size is 17x17x17 */
 #define MAX_COLOR_3DLUT_ENTRIES 4913
 #define MAX_COLOR_3DLUT_BITDEPTH 12
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+   struct drm_plane_state *plane_state);
 /* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 15f7304d8f33..958bb5a5644d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -598,6 +598,74 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
}
 }
 
+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+  uint32_t shaper_size,
+  struct dc_transfer_func *func_shaper)
+{
+   int ret = 0;
+
+   if (shaper_size) {
+   /* If DRM shaper LUT is set, we assume a linear color space
+* (linearized by DRM degamma 1D LUT or not)
+*/
+   func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
+   func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+
+   ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, 
false);
+   } else {
+   func_shaper->type = TF_TYPE_BYPASS;
+   func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+   }
+
+   return ret;
+}
+
+/* amdgpu_dm_lut3d_size - get expected size according to hw color caps
+ * @adev: amdgpu device
+ * @lut_size: default size
+ *
+ * Return:
+ * lut_size if DC 3D LUT is supported, zero otherwise.
+ */
+static uint32_t amdgpu_dm_get_lut3d_size(struct amdgpu_device *adev,
+uint32_t lut_size)
+{
+   return adev->dm.dc->caps.color.dpp.hw_3d_lut ? lut_size : 0;
+}
+
+/**
+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if DRM 3D
+ * LUT matches the hw supported size
+ * @adev: amdgpu device
+ * @crtc_state: the DRM CRTC state
+ *
+ * Verifies if post-blending (MPC) 3D LUT is supported by the HW (DCN 3.0 or
+ * newer) and if the DRM 3D LUT matches the supported size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if lut size are invalid.
+ */
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+   struct drm_plane_state *plane_state)
+{
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   const struct drm_color_lut *shaper = NULL;
+   uint32_t exp_size, size;
+
+   /* shaper LUT is only available if 3D LUT color caps*/
+   exp_size = amdgpu_dm_get_lut3d_size(adev, MAX_COLOR_LUT_ENTRIES);
+   shaper = __extract_blob_lut(dm_plane_state->shaper_lut, );
+
+   if (shaper && size != exp_size) {
+   drm_dbg(>ddev,
+   "Invalid Shaper LUT size. Should be %u but got %u.\n",
+   exp_size, size);
+   return

[PATCH v2 27/34] drm/amd/display: handle empty LUTs in __set_input_tf

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Unlike degamma, blend gamma doesn't support hardcoded curve
(predefined/ROM), but we can use AMD color module to fill blend gamma
parameters when we have non-linear plane gamma TF without plane gamma
LUT. The regular degamma path doesn't hit this.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 20 +++
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 58c4797f506e..2c96501d2fc0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -551,17 +551,21 @@ static int __set_input_tf(struct dc_transfer_func *func,
struct dc_gamma *gamma = NULL;
bool res;
 
-   gamma = dc_create_gamma();
-   if (!gamma)
-   return -ENOMEM;
+   if (lut_size) {
+   gamma = dc_create_gamma();
+   if (!gamma)
+   return -ENOMEM;
 
-   gamma->type = GAMMA_CUSTOM;
-   gamma->num_entries = lut_size;
+   gamma->type = GAMMA_CUSTOM;
+   gamma->num_entries = lut_size;
 
-   __drm_lut_to_dc_gamma(lut, gamma, false);
+   __drm_lut_to_dc_gamma(lut, gamma, false);
+   }
 
-   res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
-   dc_gamma_release();
+   res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != 
NULL);
+
+   if (gamma)
+   dc_gamma_release();
 
return res ? 0 : -ENOMEM;
 }
-- 
2.40.1

[PATCH v2 25/34] drm/amd/display: add plane shaper TF support

2023-08-10 Thread Melissa Wen

Enable usage of predefined transfer func in addition to shaper 1D LUT.
That means we can save some complexity by just setting a predefined
curve, instead of programming a custom curve when preparing color space
for applying 3D LUT.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c   | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 958bb5a5644d..90ec09ca4118 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -599,19 +599,22 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
 }
 
 static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+  bool has_rom,
+  enum dc_transfer_func_predefined tf,
   uint32_t shaper_size,
   struct dc_transfer_func *func_shaper)
 {
int ret = 0;
 
-   if (shaper_size) {
+   if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
/* If DRM shaper LUT is set, we assume a linear color space
 * (linearized by DRM degamma 1D LUT or not)
 */
func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
-   func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+   func_shaper->tf = tf;
+   func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
-   ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, 
false);
+   ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, 
has_rom);
} else {
func_shaper->type = TF_TYPE_BYPASS;
func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
@@ -958,6 +961,7 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state 
*plane_state,
 struct dc_plane_state *dc_plane_state)
 {
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   enum amdgpu_transfer_function shaper_tf = 
AMDGPU_TRANSFER_FUNCTION_DEFAULT;
const struct drm_color_lut *shaper_lut;
uint32_t shaper_size;
int ret;
@@ -970,8 +974,11 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
 
shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, 
_size);
shaper_size = shaper_lut != NULL ? shaper_size : 0;
+   shaper_tf = dm_plane_state->shaper_tf;
 
-   ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, shaper_size,
+   ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
+ amdgpu_tf_to_dc_tf(shaper_tf),
+ shaper_size,
  dc_plane_state->in_shaper_func);
if (ret)
drm_dbg_kms(plane_state->plane->dev,
-- 
2.40.1

[PATCH v2 22/34] drm/amd/display: add dc_fixpt_from_s3132 helper

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Detach value translation from CTM to reuse it for programming HDR
multiplier property.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c  |  8 +---
 drivers/gpu/drm/amd/display/include/fixed31_32.h | 12 
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 5659f88d1f2c..db771c895720 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -383,7 +383,6 @@ static void __drm_lut_to_dc_gamma(const struct 
drm_color_lut *lut,
 static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
   struct fixed31_32 *matrix)
 {
-   int64_t val;
int i;
 
/*
@@ -402,12 +401,7 @@ static void __drm_ctm_to_dc_matrix(const struct 
drm_color_ctm *ctm,
}
 
/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-   val = ctm->matrix[i - (i / 4)];
-   /* If negative, convert to 2's complement. */
-   if (val & (1ULL << 63))
-   val = -(val & ~(1ULL << 63));
-
-   matrix[i].value = val;
+   matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
}
 }
 
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h 
b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index d4cf7ead1d87..84da1dd34efd 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
 static const struct fixed31_32 dc_fixpt_half = { 0x8000LL };
 static const struct fixed31_32 dc_fixpt_one = { 0x1LL };
 
+static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x)
+{
+   struct fixed31_32 val;
+
+   /* If negative, convert to 2's complement. */
+   if (x & (1ULL << 63))
+   x = -(x & ~(1ULL << 63));
+
+   val.value = x;
+   return val;
+}
+
 /*
  * @brief
  * Initialization routines
-- 
2.40.1

[PATCH v2 23/34] drm/amd/display: add HDR multiplier support

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

With `dc_fixpt_from_s3132()` translation, we can just use it to set
hdr_mult.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 1 +
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3e5aa1e46662..9fcc169fb87b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8060,6 +8060,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
bundle->surface_updates[planes_count].gamma = 
dc_plane->gamma_correction;
bundle->surface_updates[planes_count].in_transfer_func 
= dc_plane->in_transfer_func;

bundle->surface_updates[planes_count].gamut_remap_matrix = 
_plane->gamut_remap_matrix;
+   bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index db771c895720..15f7304d8f33 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -902,6 +902,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
  struct drm_plane_state *plane_state,
  struct dc_plane_state *dc_plane_state)
 {
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
bool has_crtc_cm_degamma;
int ret;
 
@@ -912,6 +913,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
/* After, we start to update values according to color props */
has_crtc_cm_degamma = (crtc->cm_has_degamma || 
crtc->cm_is_degamma_srgb);
 
+   dc_plane_state->hdr_mult = 
dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
if (ret == -ENOMEM)
return ret;
-- 
2.40.1

[PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-08-10 Thread Melissa Wen

The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
atomic degamma or implict degamma on legacy gamma. Detach degamma usage
regarging CRTC color properties to manage plane and CRTC color
correction combinations.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 59 +--
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 68e9f2c62f2e..74eb02655d96 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
return 0;
 }
 
-/**
- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
- * @crtc: amdgpu_dm crtc state
- * @dc_plane_state: target DC surface
- *
- * Update the underlying dc_stream_state's input transfer function (ITF) in
- * preparation for hardware commit. The transfer function used depends on
- * the preparation done on the stream for color management.
- *
- * Returns:
- * 0 on success. -ENOMEM if mem allocation fails.
- */
-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
- struct dc_plane_state *dc_plane_state)
+static int
+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
+struct dc_plane_state *dc_plane_state)
 {
const struct drm_color_lut *degamma_lut;
enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
 _size);
ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
 
-   dc_plane_state->in_transfer_func->type =
-   TF_TYPE_DISTRIBUTED_POINTS;
+   dc_plane_state->in_transfer_func->type = 
TF_TYPE_DISTRIBUTED_POINTS;
 
/*
 * This case isn't fully correct, but also fairly
@@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
   degamma_lut, degamma_size);
if (r)
return r;
-   } else if (crtc->cm_is_degamma_srgb) {
+   } else {
/*
 * For legacy gamma support we need the regamma input
 * in linear space. Assume that the input is sRGB.
@@ -847,8 +835,43 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
 
if (tf != TRANSFER_FUNCTION_SRGB &&
!mod_color_calculate_degamma_params(NULL,
-   dc_plane_state->in_transfer_func, NULL, false))
+   
dc_plane_state->in_transfer_func,
+   NULL, false))
return -ENOMEM;
+   }
+
+   return 0;
+}
+
+/**
+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
+ * @crtc: amdgpu_dm crtc state
+ * @dc_plane_state: target DC surface
+ *
+ * Update the underlying dc_stream_state's input transfer function (ITF) in
+ * preparation for hardware commit. The transfer function used depends on
+ * the preparation done on the stream for color management.
+ *
+ * Returns:
+ * 0 on success. -ENOMEM if mem allocation fails.
+ */
+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct dc_plane_state *dc_plane_state)
+{
+   bool has_crtc_cm_degamma;
+   int ret;
+
+   has_crtc_cm_degamma = (crtc->cm_has_degamma || 
crtc->cm_is_degamma_srgb);
+   if (has_crtc_cm_degamma){
+   /* AMD HW doesn't have post-blending degamma caps. When DRM
+* CRTC atomic degamma is set, we maps it to DPP degamma block
+* (pre-blending) or, on legacy gamma, we use DPP degamma to
+* linearize (implicit degamma) from sRGB/BT709 according to
+* the input space.
+*/
+   ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state);
+   if (ret)
+   return ret;
} else {
/* ...Otherwise we can just bypass the DGM block. */
dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
-- 
2.40.1

[PATCH v2 20/34] drm/amd/display: add plane degamma TF and LUT support

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Set DC plane with user degamma LUT or predefined TF from driver-specific
plane color properties. If plane and CRTC degamma are set in the same
time, plane degamma has priority.  That means, we only set CRTC degamma
if we don't have plane degamma LUT or TF to configure. We return -EINVAL
if we don't have plane degamma settings, so we can continue and check
CRTC degamma.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  4 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 70 +--
 3 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 78fdd0b95ae8..3e5aa1e46662 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5009,7 +5009,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device 
*adev,
 * Always set input transfer function, since plane state is refreshed
 * every time.
 */
-   ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
+   ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
+   plane_state,
+   dc_plane_state);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 51471675c298..23e3984f17fb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -903,6 +903,7 @@ int amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
  struct dc_plane_state *dc_plane_state);
 
 void amdgpu_dm_update_connector_after_detect(
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 74eb02655d96..d019a091b08e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -843,9 +843,58 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
return 0;
 }
 
+static int
+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
+  struct dc_plane_state *dc_plane_state)
+{
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   const struct drm_color_lut *degamma_lut;
+   enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+   uint32_t degamma_size;
+   bool has_degamma_lut;
+   int ret;
+
+   degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
+_size);
+
+   has_degamma_lut = degamma_lut &&
+ !__is_lut_linear(degamma_lut, degamma_size);
+
+   tf = dm_plane_state->degamma_tf;
+
+   /* If we don't have plane degamma LUT nor TF to set on DC, we have
+* nothing to do here, return.
+*/
+   if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
+   return -EINVAL;
+
+   dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf);
+
+   if (has_degamma_lut) {
+   ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
+
+   dc_plane_state->in_transfer_func->type =
+   TF_TYPE_DISTRIBUTED_POINTS;
+
+   ret = __set_input_tf(dc_plane_state->in_transfer_func,
+degamma_lut, degamma_size);
+   if (ret)
+   return ret;
+   } else {
+   dc_plane_state->in_transfer_func->type =
+   TF_TYPE_PREDEFINED;
+
+   if (!mod_color_calculate_degamma_params(NULL,
+   dc_plane_state->in_transfer_func, NULL, false))
+   return -ENOMEM;
+   }
+   return 0;
+}
+
 /**
  * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
  * @crtc: amdgpu_dm crtc state
+ * @plane_state: DRM plane state
  * @dc_plane_state: target DC surface
  *
  * Update the underlying dc_stream_state's input transfer function (ITF) in
@@ -856,13 +905,28 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
  * 0 on success. -ENOMEM if mem allocation fails.
  */
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
  struct

[PATCH v2 21/34] drm/amd/display: reject atomic commit if setting both plane and CRTC degamma

2023-08-10 Thread Melissa Wen

DC only has pre-blending degamma caps (plane/DPP) that is currently in
use for CRTC/post-blending degamma, so that we don't have HW caps to
perform plane and CRTC degamma at the same time. Reject atomic updates
when serspace sets both plane and CRTC degamma properties.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index d019a091b08e..5659f88d1f2c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -919,9 +919,20 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
has_crtc_cm_degamma = (crtc->cm_has_degamma || 
crtc->cm_is_degamma_srgb);
 
ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
-   if (ret != -EINVAL)
+   if (ret == -ENOMEM)
return ret;
 
+   /* We only have one degamma block available (pre-blending) for the
+* whole color correction pipeline, so that we can't actually perform
+* plane and CRTC degamma at the same time. Explicitly reject atomic
+* updates when userspace sets both plane and CRTC degamma properties.
+*/
+   if (has_crtc_cm_degamma && ret != -EINVAL){
+   drm_dbg_kms(crtc->base.crtc->dev,
+   "doesn't support plane and CRTC degamma at the same 
time\n");
+   return -EINVAL;
+   }
+
/* If we are here, it means we don't have plane degamma settings, check
 * if we have CRTC degamma waiting for mapping to pre-blending degamma
 * block
-- 
2.40.1

[PATCH v2 14/34] drm/amd/display: add comments to describe DM crtc color mgmt behavior

2023-08-10 Thread Melissa Wen

Describe some expected behavior of the AMD DM color mgmt programming.

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 841e0391f7fb..0a9aa162d4a0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -640,12 +640,23 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
 
+   /* Note: although we pass has_rom as parameter here, we never
+* actually use ROM because the color module only takes the ROM
+* path if transfer_func->type == PREDEFINED.
+*
+* See more in mod_color_calculate_regamma_params()
+*/
r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
regamma_size, has_rom);
if (r)
return r;
} else if (has_regamma) {
-   /* If atomic regamma, CRTC RGM goes into RGM LUT. */
+   /* CRTC RGM goes into RGM LUT.
+*
+* Note: there is no implicit sRGB regamma here. We are using
+* degamma calculation from color module to calculate the curve
+* from a linear base.
+*/
stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 
-- 
2.40.1

[PATCH v2 18/34] drm/amd/display: mark plane as needing reset if color props change

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

We should reset a plane state if at least one of the color management
properties differs from old and new state.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 272974b88cda..78fdd0b95ae8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9524,6 +9524,10 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
 */
for_each_oldnew_plane_in_state(state, other, old_other_state, 
new_other_state, i) {
struct amdgpu_framebuffer *old_afb, *new_afb;
+   struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
+
+   dm_new_other_state = to_dm_plane_state(new_other_state);
+   dm_old_other_state = to_dm_plane_state(old_other_state);
 
if (other->type == DRM_PLANE_TYPE_CURSOR)
continue;
@@ -9560,6 +9564,17 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
old_other_state->color_encoding != 
new_other_state->color_encoding)
return true;
 
+   /* HDR/Transfer Function changes. */
+   if (dm_old_other_state->degamma_tf != 
dm_new_other_state->degamma_tf ||
+   dm_old_other_state->degamma_lut != 
dm_new_other_state->degamma_lut ||
+   dm_old_other_state->hdr_mult != 
dm_new_other_state->hdr_mult ||
+   dm_old_other_state->shaper_lut != 
dm_new_other_state->shaper_lut ||
+   dm_old_other_state->shaper_tf != 
dm_new_other_state->shaper_tf ||
+   dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
+   dm_old_other_state->blend_lut != 
dm_new_other_state->blend_lut ||
+   dm_old_other_state->blend_tf != 
dm_new_other_state->blend_tf)
+   return true;
+
/* Framebuffer checks fall at the end. */
if (!old_other_state->fb || !new_other_state->fb)
continue;
-- 
2.40.1

[PATCH v2 17/34] drm/amd/display: set sdr_ref_white_level to 80 for out_transfer_func

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Otherwise this is just initialized to 0. This needs to actually have a
value so that compute_curve can work for PQ EOTF.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 0188e82d1fdd..68e9f2c62f2e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -72,6 +72,7 @@
  */
 
 #define MAX_DRM_LUT_VALUE 0x
+#define SDR_WHITE_LEVEL_INIT_VALUE 80
 
 /**
  * amdgpu_dm_init_color_mod - Initialize the color module.
@@ -525,6 +526,7 @@ static int amdgpu_dm_set_atomic_regamma(struct 
dc_stream_state *stream,
 */
out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
out_tf->tf = tf;
+   out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
ret = __set_output_tf(out_tf, regamma_lut, regamma_size, 
has_rom);
} else {
-- 
2.40.1

[PATCH v2 16/34] drm/amd/display: add CRTC gamma TF support

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Add predefined transfer function programming. There is no pre-blending
out gamma ROM, but we can use AMD color modules to program LUT
parameters from a pre-defined TF and an empty regamma LUT (or bump up
LUT parameters with pre-defined TF setup).

v2:
- update crtc color mgmt if regamma TF differs between states (Joshua)
- map inverse EOTF to DC transfer function (Melissa)

Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 73 +++
 2 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 77b4d671a9e0..272974b88cda 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9456,6 +9456,7 @@ static int dm_update_crtc_state(struct 
amdgpu_display_manager *dm,
 * when a modeset is needed, to ensure it gets reprogrammed.
 */
if (dm_new_crtc_state->base.color_mgmt_changed ||
+   dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
drm_atomic_crtc_needs_modeset(new_crtc_state)) {
ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
if (ret)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index c0bf55416b4d..0188e82d1fdd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -468,16 +468,18 @@ static int __set_output_tf(struct dc_transfer_func *func,
struct calculate_buffer cal_buffer = {0};
bool res;
 
-   ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
-
cal_buffer.buffer_index = -1;
 
-   gamma = dc_create_gamma();
-   if (!gamma)
-   return -ENOMEM;
+   if (lut_size) {
+   ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
 
-   gamma->num_entries = lut_size;
-   __drm_lut_to_dc_gamma(lut, gamma, false);
+   gamma = dc_create_gamma();
+   if (!gamma)
+   return -ENOMEM;
+
+   gamma->num_entries = lut_size;
+   __drm_lut_to_dc_gamma(lut, gamma, false);
+   }
 
if (func->tf == TRANSFER_FUNCTION_LINEAR) {
/*
@@ -485,32 +487,36 @@ static int __set_output_tf(struct dc_transfer_func *func,
 * on top of a linear input. But degamma params can be used
 * instead to simulate this.
 */
-   gamma->type = GAMMA_CUSTOM;
+   if (gamma)
+   gamma->type = GAMMA_CUSTOM;
res = mod_color_calculate_degamma_params(NULL, func,
-   gamma, true);
+gamma, gamma != NULL);
} else {
/*
 * Assume sRGB. The actual mapping will depend on whether the
 * input was legacy or not.
 */
-   gamma->type = GAMMA_CS_TFM_1D;
-   res = mod_color_calculate_regamma_params(func, gamma, false,
+   if (gamma)
+   gamma->type = GAMMA_CS_TFM_1D;
+   res = mod_color_calculate_regamma_params(func, gamma, gamma != 
NULL,
 has_rom, NULL, 
_buffer);
}
 
-   dc_gamma_release();
+   if (gamma)
+   dc_gamma_release();
 
return res ? 0 : -ENOMEM;
 }
 
 static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
const struct drm_color_lut *regamma_lut,
-   uint32_t regamma_size, bool has_rom)
+   uint32_t regamma_size, bool has_rom,
+   enum dc_transfer_func_predefined tf)
 {
struct dc_transfer_func *out_tf = stream->out_transfer_func;
int ret = 0;
 
-   if (regamma_size) {
+   if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
/* CRTC RGM goes into RGM LUT.
 *
 * Note: there is no implicit sRGB regamma here. We are using
@@ -518,7 +524,7 @@ static int amdgpu_dm_set_atomic_regamma(struct 
dc_stream_state *stream,
 * from a linear base.
 */
out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
-   out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+   out_tf->tf = tf;
 
ret = __set_output_tf(out_tf, regamma_lut, regamma_size, 
has_rom);
} else {
@@ -564,6 +570,38 @@ static int __set_input_tf(struct dc_transfer_func *func,
return res ? 0 : -ENOMEM;

[PATCH v2 15/34] drm/amd/display: encapsulate atomic regamma operation

2023-08-10 Thread Melissa Wen

We will wire up MPC 3D LUT to DM CRTC color pipeline in the next patch,
but so far, only for atomic interface. By checking
set_output_transfer_func in DC drivers with MPC 3D LUT support, we can
verify that regamma is only programmed when 3D LUT programming fails. As
a groundwork to introduce 3D LUT programming and better understand each
step, detach atomic regamma programming from the crtc colocr updating
code.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 53 ---
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 0a9aa162d4a0..c0bf55416b4d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -503,6 +503,36 @@ static int __set_output_tf(struct dc_transfer_func *func,
return res ? 0 : -ENOMEM;
 }
 
+static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
+   const struct drm_color_lut *regamma_lut,
+   uint32_t regamma_size, bool has_rom)
+{
+   struct dc_transfer_func *out_tf = stream->out_transfer_func;
+   int ret = 0;
+
+   if (regamma_size) {
+   /* CRTC RGM goes into RGM LUT.
+*
+* Note: there is no implicit sRGB regamma here. We are using
+* degamma calculation from color module to calculate the curve
+* from a linear base.
+*/
+   out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+   out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+
+   ret = __set_output_tf(out_tf, regamma_lut, regamma_size, 
has_rom);
+   } else {
+   /*
+* No CRTC RGM means we can just put the block into bypass
+* since we don't have any plane level adjustments using it.
+*/
+   out_tf->type = TF_TYPE_BYPASS;
+   out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+   }
+
+   return ret;
+}
+
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
@@ -650,27 +680,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
regamma_size, has_rom);
if (r)
return r;
-   } else if (has_regamma) {
-   /* CRTC RGM goes into RGM LUT.
-*
-* Note: there is no implicit sRGB regamma here. We are using
-* degamma calculation from color module to calculate the curve
-* from a linear base.
-*/
-   stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
-   stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-
-   r = __set_output_tf(stream->out_transfer_func, regamma_lut,
-   regamma_size, has_rom);
+   } else {
+   regamma_size = has_regamma ? regamma_size : 0;
+   r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
+regamma_size, has_rom);
if (r)
return r;
-   } else {
-   /*
-* No CRTC RGM means we can just put the block into bypass
-* since we don't have any plane level adjustments using it.
-*/
-   stream->out_transfer_func->type = TF_TYPE_BYPASS;
-   stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
}
 
/*
-- 
2.40.1

[PATCH v2 12/34] drm/amd/display: add plane blend LUT and TF driver-specific properties

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Blend 1D LUT or a pre-defined transfer function can be set to linearize
content before blending, so that it's positioned just before blending
planes in the AMD color mgmt pipeline, and after 3D LUT (non-linear
space). Shaper and Blend LUTs are 1D LUTs that sandwich 3D LUT. Drivers
should advertize blend properties according to HW caps.

Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 18 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 21 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 36 +++
 4 files changed, 87 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 4fb164204ee6..fd0b7047d56b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -389,6 +389,24 @@ struct amdgpu_mode_info {
 * size of 3D LUT as supported by the driver (read-only).
 */
struct drm_property *plane_lut3d_size_property;
+   /**
+* @plane_blend_lut_property: Plane property for output gamma before
+* blending. Userspace set a blend LUT to convert colors after 3D LUT
+* conversion. It works as a post-3D LUT 1D LUT, with shaper LUT, they
+* are sandwiching 3D LUT with two 1D LUT.
+*/
+   struct drm_property *plane_blend_lut_property;
+   /**
+* @plane_blend_lut_size_property: Plane property to define the max
+* size of blend LUT as supported by the driver (read-only).
+*/
+   struct drm_property *plane_blend_lut_size_property;
+   /**
+* @plane_blend_tf_property: Plane property to set a predefined
+* transfer function for pre-blending blend (before applying 3D LUT)
+* with or without LUT.
+*/
+   struct drm_property *plane_blend_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 6b6c2980f0af..b6fa271ab0dd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -785,6 +785,18 @@ struct dm_plane_state {
 *  drm_color_lut.
 */
struct drm_property_blob *lut3d;
+   /**
+* @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
+* array of  drm_color_lut.
+*/
+   struct drm_property_blob *blend_lut;
+   /**
+* @blend_tf:
+*
+* Pre-defined transfer function for converting plane pixel data before
+* applying blend LUT.
+*/
+   enum amdgpu_transfer_function blend_tf;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index fbcee717bf0a..2d64332e6b80 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -252,6 +252,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_lut3d_size_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_BLEND_LUT", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_blend_lut_property = prop;
+
+   prop = drm_property_create_range(adev_to_drm(adev),
+DRM_MODE_PROP_IMMUTABLE,
+"AMD_PLANE_BLEND_LUT_SIZE", 0, 
UINT_MAX);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_blend_lut_size_property = prop;
+
+   prop = amdgpu_create_tf_property(adev_to_drm(adev),
+"AMD_PLANE_BLEND_TF",
+amdgpu_eotf);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_blend_tf_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 8d6ddf19bb87..3fd57de7c5be 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1333,6 +1333,7 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+   amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 }
 
 static struct drm_plane_state *
@@ -1358,10 +1359,13 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)

[PATCH v2 13/34] drm/amd/display: add CRTC gamma TF driver-specific property

2023-08-10 Thread Melissa Wen

Add AMD pre-defined transfer function property to default DRM CRTC
gamma to convert to wire encoding with or without a user gamma LUT.

v2:
- enable CRTC prop in the end of driver-specific prop sequence
- define inverse EOTFs as supported regamma TFs
- reword driver-specific function doc to remove shaper/3D LUT

Co-developed-by: Joshua Ashton 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  5 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  7 ++
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 72 +++
 4 files changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index fd0b7047d56b..abb871a912d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -407,6 +407,11 @@ struct amdgpu_mode_info {
 * with or without LUT.
 */
struct drm_property *plane_blend_tf_property;
+   /* @regamma_tf_property: Transfer function for CRTC regamma
+* (post-blending). Possible values are defined by `enum
+* amdgpu_transfer_function`.
+*/
+   struct drm_property *regamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index b6fa271ab0dd..51471675c298 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -821,6 +821,14 @@ struct dm_crtc_state {
struct dc_info_packet vrr_infopacket;
 
int abm_level;
+
+/**
+* @regamma_tf:
+*
+* Pre-defined transfer function for converting internal FB -> wire
+* encoding.
+*/
+   enum amdgpu_transfer_function regamma_tf;
 };
 
 #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 2d64332e6b80..841e0391f7fb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -273,6 +273,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_blend_tf_property = prop;
 
+   prop = amdgpu_create_tf_property(adev_to_drm(adev),
+"AMD_CRTC_REGAMMA_TF",
+amdgpu_inv_eotf);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.regamma_tf_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 440fc0869a34..d746f0aa0f11 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -253,6 +253,7 @@ static struct drm_crtc_state 
*dm_crtc_duplicate_state(struct drm_crtc *crtc)
state->freesync_config = cur->freesync_config;
state->cm_has_degamma = cur->cm_has_degamma;
state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
+   state->regamma_tf = cur->regamma_tf;
state->crc_skip_count = cur->crc_skip_count;
state->mpo_requested = cur->mpo_requested;
/* TODO Duplicate dc_stream after objects are stream object is 
flattened */
@@ -289,6 +290,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc 
*crtc)
 }
 #endif
 
+#ifdef AMD_PRIVATE_COLOR
+/**
+ * drm_crtc_additional_color_mgmt - enable additional color properties
+ * @crtc: DRM CRTC
+ *
+ * This function lets the driver enable post-blending CRTC regamma transfer
+ * function property in addition to DRM CRTC gamma LUT. Default value means
+ * linear transfer function, which is the default CRTC gamma LUT behaviour
+ * without this property.
+ */
+static void
+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
+{
+   struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+   if(adev->dm.dc->caps.color.mpc.ogam_ram)
+   drm_object_attach_property(>base,
+  adev->mode_info.regamma_tf_property,
+  AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+}
+
+static int
+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc,
+  struct drm_crtc_state *state,
+  struct drm_property *property,
+  uint64_t val)
+{
+   struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+   struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+   if (property == adev->mode_info.regamma_tf_property) {
+   if (acrtc_state->regamma_tf != val) {
+

[PATCH v2 10/34] drm/amd/display: add plane 3D LUT driver-specific properties

2023-08-10 Thread Melissa Wen

Add 3D LUT property for plane gamma correction using a 3D lookup table.
Since a 3D LUT has a limited number of entries in each dimension we want
to use them in an optimal fashion. This means using the 3D LUT in a
colorspace that is optimized for human vision, such as sRGB, PQ, or
another non-linear space. Therefore, userpace may need one 1D LUT
(shaper) before it to delinearize content and another 1D LUT after 3D
LUT (blend) to linearize content again for blending. The next patches
add these 1D LUTs to the plane color mgmt pipeline.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 10 
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  9 
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 14 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 23 +++
 4 files changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 66bae0eed80c..730a88236501 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -363,6 +363,16 @@ struct amdgpu_mode_info {
 * @plane_hdr_mult_property:
 */
struct drm_property *plane_hdr_mult_property;
+   /**
+* @plane_lut3d_property: Plane property for gamma correction using a
+* 3D LUT (pre-blending).
+*/
+   struct drm_property *plane_lut3d_property;
+   /**
+* @plane_degamma_lut_size_property: Plane property to define the max
+* size of 3D LUT as supported by the driver (read-only).
+*/
+   struct drm_property *plane_lut3d_size_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 44f17ac11a5f..deea90212e31 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -769,6 +769,11 @@ struct dm_plane_state {
 * S31.32 sign-magnitude.
 */
__u64 hdr_mult;
+   /**
+* @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
+*  drm_color_lut.
+*/
+   struct drm_property_blob *lut3d;
 };
 
 struct dm_crtc_state {
@@ -854,6 +859,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector 
*connector,
 
 void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 
+/* 3D LUT max size is 17x17x17 */
+#define MAX_COLOR_3DLUT_ENTRIES 4913
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+/* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index b891aaf5f7c1..7e6d4df99a0c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -209,6 +209,20 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_hdr_mult_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_LUT3D", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_lut3d_property = prop;
+
+   prop = drm_property_create_range(adev_to_drm(adev),
+DRM_MODE_PROP_IMMUTABLE,
+"AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_lut3d_size_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index ab7f0332c431..882391f7add6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1353,6 +1353,8 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
 
if (dm_plane_state->degamma_lut)
drm_property_blob_get(dm_plane_state->degamma_lut);
+   if (dm_plane_state->lut3d)
+   drm_property_blob_get(dm_plane_state->lut3d);
 
dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
@@ -1426,6 +1428,8 @@ static void dm_drm_plane_destroy_state(struct drm_plane 
*plane,
 
if (dm_plane_state->degamma_lut)
drm_property_blob_put(dm_plane_state->degamma_lut);
+   if (dm_plane_state->lut3d)
+   drm_property_blob_put(dm_plane_state->lut3d);
 
if (dm_plane_state->dc_state)
dc_plane_state_release(dm_plane_state->dc_state);
@@ -1456,6 +1460,14 @@ dm_atomic_plane_attach_color_mgmt_properties(struct

[PATCH v2 11/34] drm/amd/display: add plane shaper LUT and TF driver-specific properties

2023-08-10 Thread Melissa Wen

On AMD HW, 3D LUT always assumes a preceding shaper 1D LUT used for
delinearizing and/or normalizing the color space before applying a 3D
LUT. Add pre-defined transfer function to enable delinearizing content
with or without shaper LUT, where AMD color module calculates the
resulted shaper curve. We apply an inverse EOTF to go from linear values
to encoded values. If we are already in a non-linear space and/or don't
need to normalize values, we can bypass shaper LUT with a linear
transfer function that is also the default TF value.

v2:
- squash commits for shaper LUT and shaper TF
- define inverse EOTF as supported shaper TFs

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 16 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 29 +
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 32 +++
 4 files changed, 88 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 730a88236501..4fb164204ee6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -363,6 +363,22 @@ struct amdgpu_mode_info {
 * @plane_hdr_mult_property:
 */
struct drm_property *plane_hdr_mult_property;
+   /**
+* @shaper_lut_property: Plane property to set pre-blending shaper LUT
+* that converts color content before 3D LUT.
+*/
+   struct drm_property *plane_shaper_lut_property;
+   /**
+* @shaper_lut_size_property: Plane property for the size of
+* pre-blending shaper LUT as supported by the driver (read-only).
+*/
+   struct drm_property *plane_shaper_lut_size_property;
+   /**
+* @plane_shaper_tf_property: Plane property to set a predefined
+* transfer function for pre-blending shaper (before applying 3D LUT)
+* with or without LUT.
+*/
+   struct drm_property *plane_shaper_tf_property;
/**
 * @plane_lut3d_property: Plane property for gamma correction using a
 * 3D LUT (pre-blending).
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index deea90212e31..6b6c2980f0af 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -769,6 +769,17 @@ struct dm_plane_state {
 * S31.32 sign-magnitude.
 */
__u64 hdr_mult;
+   /**
+* @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
+* array of  drm_color_lut.
+*/
+   struct drm_property_blob *shaper_lut;
+   /**
+* @shaper_tf:
+*
+* Predefined transfer function to delinearize color space.
+*/
+   enum amdgpu_transfer_function shaper_tf;
/**
 * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
 *  drm_color_lut.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 7e6d4df99a0c..fbcee717bf0a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -151,6 +151,14 @@ static const u32 amdgpu_eotf =
BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
 
+static const u32 amdgpu_inv_eotf =
+   BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
+
 static struct drm_property *
 amdgpu_create_tf_property(struct drm_device *dev,
  const char *name,
@@ -209,6 +217,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_hdr_mult_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_SHAPER_LUT", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_shaper_lut_property = prop;
+
+   prop = drm_property_create_range(adev_to_drm(adev),
+DRM_MODE_PROP_IMMUTABLE,
+"AMD_PLANE_SHAPER_LUT_SIZE", 0, 
UINT_MAX);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_shaper_lut_size_property = prop;
+
+   prop = amdgpu_create_tf_property(adev_to_drm(adev),
+"AMD_PLANE_SHAPER_TF",
+amdgpu_inv_eotf);
+   if (!prop)
+   return

[PATCH v2 09/34] drm/amd/display: add plane HDR multiplier driver-specific property

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Multiplier to 'gain' the plane. When PQ is decoded using the fixed func
transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at
least) When sRGB is decoded, 1.0 -> 1.0.  Therefore, 1.0 multiplier = 80
nits for SDR content. So if you want, 203 nits for SDR content, pass in
(203.0 / 80.0).

Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h   |  4 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  | 14 ++
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c|  6 ++
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c| 13 +
 4 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 6ef958a14e16..66bae0eed80c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -359,6 +359,10 @@ struct amdgpu_mode_info {
 * to go from scanout/encoded values to linear values.
 */
struct drm_property *plane_degamma_tf_property;
+   /**
+* @plane_hdr_mult_property:
+*/
+   struct drm_property *plane_hdr_mult_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index f6251ed89684..44f17ac11a5f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -54,6 +54,9 @@
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x1A
 #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
+
+#define AMDGPU_HDR_MULT_DEFAULT (0x1LL)
+
 /*
 #include "include/amdgpu_dal_power_if.h"
 #include "amdgpu_dm_irq.h"
@@ -755,6 +758,17 @@ struct dm_plane_state {
 * linearize.
 */
enum amdgpu_transfer_function degamma_tf;
+   /**
+* @hdr_mult:
+*
+* Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
+* func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
+* AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
+* Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
+* want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
+* S31.32 sign-magnitude.
+*/
+   __u64 hdr_mult;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 7f13bcdaf016..b891aaf5f7c1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -203,6 +203,12 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_degamma_tf_property = prop;
 
+   prop = drm_property_create_range(adev_to_drm(adev),
+0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_hdr_mult_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 0a955abb1abf..ab7f0332c431 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1331,6 +1331,7 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
 
__drm_atomic_helper_plane_reset(plane, _state->base);
amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+   amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
 }
 
 static struct drm_plane_state *
@@ -1354,6 +1355,7 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
drm_property_blob_get(dm_plane_state->degamma_lut);
 
dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+   dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
 
return _plane_state->base;
 }
@@ -1450,6 +1452,10 @@ dm_atomic_plane_attach_color_mgmt_properties(struct 
amdgpu_display_manager *dm,
   
dm->adev->mode_info.plane_degamma_tf_property,
   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
}
+   /* HDR MULT is always available */
+   drm_object_attach_property(>base,
+  dm->adev->mode_info.plane_hdr_mult_property,
+  AMDGPU_HDR_MULT_DEFAULT);
 }
 
 static int
@@ -1476,6 +1482,11 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
dm_plane_state->degamma_tf = val;
dm_plane_state->base.color_mgmt_changed = 1;
}
+   } else if (property ==

[PATCH v2 07/34] drm/amd/display: explicitly define EOTF and inverse EOTF

2023-08-10 Thread Melissa Wen

Instead of relying on color block names to get the transfer function
intention regarding encoding pixel's luminance, define supported
Electro-Optical Transfer Functions (EOTFs) and inverse EOTFs, that
includes pure gamma or standardized transfer functions.

Suggested-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 +++--
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 69 +++
 2 files changed, 67 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c749c9cb3d94..f6251ed89684 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -718,14 +718,21 @@ extern const struct amdgpu_ip_block_version dm_ip_block;
 
 enum amdgpu_transfer_function {
AMDGPU_TRANSFER_FUNCTION_DEFAULT,
-   AMDGPU_TRANSFER_FUNCTION_SRGB,
-   AMDGPU_TRANSFER_FUNCTION_BT709,
-   AMDGPU_TRANSFER_FUNCTION_PQ,
+   AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_BT709_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
AMDGPU_TRANSFER_FUNCTION_LINEAR,
AMDGPU_TRANSFER_FUNCTION_UNITY,
-   AMDGPU_TRANSFER_FUNCTION_GAMMA22,
-   AMDGPU_TRANSFER_FUNCTION_GAMMA24,
-   AMDGPU_TRANSFER_FUNCTION_GAMMA26,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
+AMDGPU_TRANSFER_FUNCTION_COUNT
 };
 
 struct dm_plane_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 56ce008b9095..cc2187c0879a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,18 +85,59 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
-static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = {
-   { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" },
-   { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" },
-   { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" },
-   { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
-   { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" },
-   { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" },
-   { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
-   { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
-   { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
+static const char * const
+amdgpu_transfer_function_names[] = {
+   [AMDGPU_TRANSFER_FUNCTION_DEFAULT]  = "Default",
+   [AMDGPU_TRANSFER_FUNCTION_LINEAR]   = "Linear",
+   [AMDGPU_TRANSFER_FUNCTION_UNITY]= "Unity",
+   [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]= "sRGB EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_BT709_EOTF]   = "BT.709 EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF]  = "PQ EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]= "sRGB inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF]   = "BT.709 inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF]  = "PQ inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF",
 };
 
+static const u32 amdgpu_eotf =
+   BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_BT709_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
+
+static struct drm_property *
+amdgpu_create_tf_property(struct drm_device *dev,
+ const char *name,
+ u32 supported_tf)
+{
+   u32 transfer_functions = supported_tf |
+BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) |
+BIT(AMDGPU_TRANSFER_FUNCTION_LINEAR) |
+BIT(AMDGPU_TRANSFER_FUNCTION_UNITY);
+   struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT];
+   int i, len;
+
+   len = 0;
+   for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT;

[PATCH v2 08/34] drm/amd/display: document AMDGPU pre-defined transfer functions

2023-08-10 Thread Melissa Wen

Brief documentation about pre-defined transfer function usage on AMD
display driver and standardized EOTFs and inverse EOTFs.

Co-developed-by: Harry Wentland 
Signed-off-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index cc2187c0879a..7f13bcdaf016 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,45 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
+/* Pre-defined Transfer Functions (TF)
+ *
+ * AMD driver supports pre-defined mathematical functions for transferring
+ * between encoded values and optical/linear space. Depending on HW color caps,
+ * ROMs and curves built by the AMD color module support these transforms.
+ *
+ * The driver-specific color implementation exposes properties for pre-blending
+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
+ * curves for the other blocks. What can be done by each color block is
+ * described by struct dpp_color_capsand struct mpc_color_caps.
+ *
+ * AMD driver-specific color API exposes the following pre-defined transfer
+ * functions:
+ *
+ * - Linear/Unity: linear/identity relationship between pixel value and
+ *   luminance value;
+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure gamma functions;
+ * - sRGB: 2.4 gamma with small initial linear section as standardized by IEC
+ *   61966-2-1:1999;
+ * - BT.709 (BT.1886): 2.4 gamma with differences in the dark end of the scale.
+ *   Used in HD-TV and standardized by ITU-R BT.1886;
+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
+ *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
+ *
+ * In the driver-specific API, color block names attached to TF properties
+ * suggest the intention regarding non-linear encoding pixel's luminance
+ * values. As some newer encodings don't use gamma curve, we make encoding and
+ * decoding explicit by defining an enum list of transfer functions supported
+ * in terms of EOTF and inverse EOTF, where:
+ *
+ * - EOTF (electro-optical transfer function): is the transfer function to go
+ *   from the encoded value to an optical (linear) value. De-gamma functions
+ *   traditionally do this.
+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
+ *   from an optical/linear space (which might have been used for blending)
+ *   back to the encoded values. Gamma functions traditionally do this. 
+ */
 static const char * const
 amdgpu_transfer_function_names[] = {
[AMDGPU_TRANSFER_FUNCTION_DEFAULT]  = "Default",
-- 
2.40.1

[PATCH v2 06/34] drm/amd/display: add plane degamma TF driver-specific property

2023-08-10 Thread Melissa Wen

From: Joshua Ashton 

Allow userspace to tell the kernel driver the input space and,
therefore, uses correct predefined transfer function (TF) to delinearize
content with or without LUT.

v2:
- rename TF enum prefix from DRM_ to AMDGPU_ (Harry)
- remove HLG TF

Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  5 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 21 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 19 +++--
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index ec4621deac8c..6ef958a14e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -354,6 +354,11 @@ struct amdgpu_mode_info {
 * size of degamma LUT as supported by the driver (read-only).
 */
struct drm_property *plane_degamma_lut_size_property;
+   /**
+* @plane_degamma_tf_property: Plane pre-defined transfer function to
+* to go from scanout/encoded values to linear values.
+*/
+   struct drm_property *plane_degamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index f0343bbf0fe1..c749c9cb3d94 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -716,6 +716,18 @@ static inline void amdgpu_dm_set_mst_status(uint8_t 
*status,
 
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
+enum amdgpu_transfer_function {
+   AMDGPU_TRANSFER_FUNCTION_DEFAULT,
+   AMDGPU_TRANSFER_FUNCTION_SRGB,
+   AMDGPU_TRANSFER_FUNCTION_BT709,
+   AMDGPU_TRANSFER_FUNCTION_PQ,
+   AMDGPU_TRANSFER_FUNCTION_LINEAR,
+   AMDGPU_TRANSFER_FUNCTION_UNITY,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA22,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA24,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA26,
+};
+
 struct dm_plane_state {
struct drm_plane_state base;
struct dc_plane_state *dc_state;
@@ -729,6 +741,13 @@ struct dm_plane_state {
 * The blob (if not NULL) is an array of  drm_color_lut.
 */
struct drm_property_blob *degamma_lut;
+   /**
+* @degamma_tf:
+*
+* Predefined transfer function to tell DC driver the input space to
+* linearize.
+*/
+   enum amdgpu_transfer_function degamma_tf;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index cf175b86ba80..56ce008b9095 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
+static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = {
+   { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" },
+   { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" },
+   { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" },
+   { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
+   { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" },
+   { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" },
+   { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
+   { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
+   { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
+};
+
 int
 amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 {
@@ -104,6 +116,15 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_degamma_lut_size_property = prop;
 
+   prop = drm_property_create_enum(adev_to_drm(adev),
+   DRM_MODE_PROP_ENUM,
+   "AMD_PLANE_DEGAMMA_TF",
+   amdgpu_transfer_function_enum_list,
+   
ARRAY_SIZE(amdgpu_transfer_function_enum_list));
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_degamma_tf_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 488012d1558d..0a955abb1abf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1326,8 +1326,11 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
WARN_ON(amdgpu_state == NULL);
 
-   if (amdgpu_state)
-   __drm_atomic_helper_plane_reset(plane, _state->base);
+   if

[PATCH v2 04/34] drm/drm_plane: track color mgmt changes per plane

2023-08-10 Thread Melissa Wen

We will add color mgmt properties to DRM planes in the next patches and
we want to track when one of this properties change to define atomic
commit behaviors. Using a similar approach from CRTC color props, we set
a color_mgmt_changed boolean whenever a plane color prop changes.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/drm_atomic.c  | 1 +
 drivers/gpu/drm/drm_atomic_state_helper.c | 1 +
 include/drm/drm_plane.h   | 7 +++
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 11f3a130f6f4..30aa3248bb0d 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -724,6 +724,7 @@ static void drm_atomic_plane_print_state(struct drm_printer 
*p,
   drm_get_color_encoding_name(state->color_encoding));
drm_printf(p, "\tcolor-range=%s\n",
   drm_get_color_range_name(state->color_range));
+   drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed);
 
if (plane->funcs->atomic_print_state)
plane->funcs->atomic_print_state(p, state);
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c 
b/drivers/gpu/drm/drm_atomic_state_helper.c
index 784e63d70a42..25bb0859fda7 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -338,6 +338,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct 
drm_plane *plane,
state->fence = NULL;
state->commit = NULL;
state->fb_damage_clips = NULL;
+   state->color_mgmt_changed = false;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state);
 
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 51291983ea44..52c3287da0da 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -237,6 +237,13 @@ struct drm_plane_state {
 
/** @state: backpointer to global drm_atomic_state */
struct drm_atomic_state *state;
+
+   /**
+* @color_mgmt_changed: Color management properties have changed. Used
+* by the atomic helpers and drivers to steer the atomic commit control
+* flow.
+*/
+   bool color_mgmt_changed : 1;
 };
 
 static inline struct drm_rect
-- 
2.40.1

[PATCH v2 05/34] drm/amd/display: add driver-specific property for plane degamma LUT

2023-08-10 Thread Melissa Wen

Hook up driver-specific atomic operations for managing AMD color
properties. Create AMD driver-specific color management properties
and attach them according to HW capabilities defined by `struct
dc_color_caps`.

First add plane degamma LUT properties that means user-blob and its
size. We will add more plane color properties in the next patches. In
addition, we define AMD_PRIVATE_COLOR to guard these driver-specific
plane properties.

Plane degamma can be used to linearize input space for arithmetical
operations that are more accurate when applied in linear color.

v2:
- update degamma LUT prop description
- move private color operations from amdgpu_display to amdgpu_dm_color

Co-developed-by: Joshua Ashton 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 11 +++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  5 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 24 ++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 81 +++
 5 files changed, 132 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 32fe05c810c6..ec4621deac8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -343,6 +343,17 @@ struct amdgpu_mode_info {
int disp_priority;
const struct amdgpu_display_funcs *funcs;
const enum drm_plane_type *plane_type;
+
+   /* Driver-private color mgmt props */
+
+   /* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+* convert input space before blending.
+*/
+   struct drm_property *plane_degamma_lut_property;
+   /* @plane_degamma_lut_size_property: Plane property to define the max
+* size of degamma LUT as supported by the driver (read-only).
+*/
+   struct drm_property *plane_degamma_lut_size_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index dffa584410a5..77b4d671a9e0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3949,6 +3949,11 @@ static int amdgpu_dm_mode_config_init(struct 
amdgpu_device *adev)
return r;
}
 
+#ifdef AMD_PRIVATE_COLOR
+   if (amdgpu_dm_create_color_properties(adev))
+   return -ENOMEM;
+#endif
+
r = amdgpu_dm_audio_init(adev);
if (r) {
dc_release_state(state->context);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index a2d34be82613..f0343bbf0fe1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -719,6 +719,16 @@ extern const struct amdgpu_ip_block_version dm_ip_block;
 struct dm_plane_state {
struct drm_plane_state base;
struct dc_plane_state *dc_state;
+
+   /* Plane color mgmt */
+   /**
+* @degamma_lut:
+*
+* 1D LUT for mapping framebuffer/plane pixel data before sampling or
+* blending operations. It's usually applied to linearize input space.
+* The blob (if not NULL) is an array of  drm_color_lut.
+*/
+   struct drm_property_blob *degamma_lut;
 };
 
 struct dm_crtc_state {
@@ -809,6 +819,7 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
 
 void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a4cb23d059bd..cf175b86ba80 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -84,6 +84,30 @@ void amdgpu_dm_init_color_mod(void)
setup_x_points_distribution();
 }
 
+#ifdef AMD_PRIVATE_COLOR
+int
+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
+{
+   struct drm_property *prop;
+
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_DEGAMMA_LUT", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_degamma_lut_property = prop;
+
+   prop = drm_property_create_range(adev_to_drm(adev),
+DRM_MODE_PROP_IMMUTABLE,
+"AMD_PLANE_DEGAMMA_LUT_SIZE", 0, 
UINT_MAX);
+   if (!prop)
+

[PATCH v2 03/34] drm/drm_property: make replace_property_blob_from_id a DRM helper

2023-08-10 Thread Melissa Wen

Place it in drm_property where drm_property_replace_blob and
drm_property_lookup_blob live. Then we can use the DRM helper for
driver-specific KMS properties too.

Reviewed-by: Harry Wentland 
Reviewed-by: Liviu Dudau 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/arm/malidp_crtc.c |  2 +-
 drivers/gpu/drm/drm_atomic_uapi.c | 43 ---
 drivers/gpu/drm/drm_property.c| 49 +++
 include/drm/drm_property.h|  6 
 4 files changed, 61 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_crtc.c 
b/drivers/gpu/drm/arm/malidp_crtc.c
index dc01c43f6193..d72c22dcf685 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc 
*crtc,
 
/*
 * The size of the ctm is checked in
-* drm_atomic_replace_property_blob_from_id.
+* drm_property_replace_blob_from_id.
 */
ctm = (struct drm_color_ctm *)state->ctm->data;
for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) {
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index d867e7f9f2cd..a6a9ee5086dd 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -362,39 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct 
drm_atomic_state *state,
return fence_ptr;
 }
 
-static int
-drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
-struct drm_property_blob **blob,
-uint64_t blob_id,
-ssize_t expected_size,
-ssize_t expected_elem_size,
-bool *replaced)
-{
-   struct drm_property_blob *new_blob = NULL;
-
-   if (blob_id != 0) {
-   new_blob = drm_property_lookup_blob(dev, blob_id);
-   if (new_blob == NULL)
-   return -EINVAL;
-
-   if (expected_size > 0 &&
-   new_blob->length != expected_size) {
-   drm_property_blob_put(new_blob);
-   return -EINVAL;
-   }
-   if (expected_elem_size > 0 &&
-   new_blob->length % expected_elem_size != 0) {
-   drm_property_blob_put(new_blob);
-   return -EINVAL;
-   }
-   }
-
-   *replaced |= drm_property_replace_blob(blob, new_blob);
-   drm_property_blob_put(new_blob);
-
-   return 0;
-}
-
 static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
struct drm_crtc_state *state, struct drm_property *property,
uint64_t val)
@@ -415,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc 
*crtc,
} else if (property == config->prop_vrr_enabled) {
state->vrr_enabled = val;
} else if (property == config->degamma_lut_property) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>degamma_lut,
val,
-1, sizeof(struct drm_color_lut),
@@ -423,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc 
*crtc,
state->color_mgmt_changed |= replaced;
return ret;
} else if (property == config->ctm_property) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>ctm,
val,
sizeof(struct drm_color_ctm), -1,
@@ -431,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc 
*crtc,
state->color_mgmt_changed |= replaced;
return ret;
} else if (property == config->gamma_lut_property) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>gamma_lut,
val,
-1, sizeof(struct drm_color_lut),
@@ -563,7 +530,7 @@ static int drm_atomic_plane_set_property(struct drm_plane 
*plane,
} else if (property == plane->color_range_property) {
state->color_range = val;
} else if (property == config->prop_fb_damage_clips) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>fb_damage_clips,
val,
-1,
@@ -729,7 +696,7 @@ static int

[PATCH v2 02/34] drm/drm_mode_object: increase max objects to accommodate new color props

2023-08-10 Thread Melissa Wen

DRM_OBJECT_MAX_PROPERTY limits the number of properties to be attached
and we are increasing that value all time we add a new property (generic
or driver-specific).

In this series, we are adding 13 new KMS driver-specific properties for
AMD color manage:
- CRTC Gamma enumerated Transfer Function
- Plane: Degamma LUT+size+TF, HDR multiplier, shaper LUT+size+TF, 3D
  LUT+size, blend LUT+size+TF (12)

Therefore, just increase DRM_OBJECT_MAX_PROPERTY to a number (64) that
accomodates these new properties and gives some room for others,
avoiding change this number everytime we add a new KMS property.

Reviewed-by: Harry Wentland 
Reviewed-by: Simon Ser 
Signed-off-by: Melissa Wen 
---
 include/drm/drm_mode_object.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h
index 912f1e415685..08d7a7f0188f 100644
--- a/include/drm/drm_mode_object.h
+++ b/include/drm/drm_mode_object.h
@@ -60,7 +60,7 @@ struct drm_mode_object {
void (*free_cb)(struct kref *kref);
 };
 
-#define DRM_OBJECT_MAX_PROPERTY 24
+#define DRM_OBJECT_MAX_PROPERTY 64
 /**
  * struct drm_object_properties - property tracking for _mode_object
  */
-- 
2.40.1

[PATCH v2 00/34] drm/amd/display: add AMD driver-specific properties for color mgmt

2023-08-10 Thread Melissa Wen

Hi all,

Here is the next version of our work to enable AMD driver-specific color
management properties [1][2]. This series is a collection of
contributions from Joshua, Harry, and me to enhance the AMD KMS color
pipeline for Steam Deck/SteamOS by exposing additional pre-blending and
post-blending color capabilities from those available in the current DRM
KMS API[3].

The userspace case here is Gamescope which is the compositor for
SteamOS. Gamescope is already using these features to implement its
color management pipeline [4].

In this version, I try to address all concerns shared in the previous
one, i.e.:
- Replace DRM_ by AMDGPU_ prefix for transfer function enumeration; 
- Explicitly define EOTFs and inverse EOTFs and set props accordingly;
- Document pre-defined transfer functions;
- Remove misleading comments;
- Remove post-blending/MPC shaper and 3D LUT support;
- Move driver-specific property operations from amdgpu_display.c to
  amdgpu_dm_color.c;
- Reset planes if any color props change;
- Nits/small fixes;

Bearing in mind the complexity of color concepts, I believe there is a
high chance of some misunderstanding from my side when defining EOTFs
and documenting pre-defined TFs. So, reviews are very important and
welcome (thanks in advance). FWIW, I added Harry as a co-developer of
this TF documentation since I based on his description of EOTF/inv_EOTF
and previous documentation work [5]. Let me know if there is a better
way for credits.

Two DC patches were already applied and, therefore, removed from the
series. I added r-b according to previous feedback. We also add plane
CTM to driver-specific properties. As a result, this is the updated list
of all driver-specific color properties exposed by this series:

- plane degamma LUT and pre-defined TF;
- plane HDR multiplier;
- plane CTM 3x4;
- plane shaper LUT and pre-defined TF;
- plane 3D LUT;
- plane blend LUT and pre-defined TF;
- CRTC gamma pre-defined TF;

Remember you can find the AMD HW color capabilities documented here:
https://dri.freedesktop.org/docs/drm/gpu/amdgpu/display/display-manager.html#color-management-properties

Worth mentioning that the pre-blending degamma block can use ROM curves
for some pre-defined TFs, but the other blocks use the AMD color module
to calculate this curve considering pre-defined coefficients.

We need changes on DC gamut remap matrix to support the plane and CRTC
CTM on drivers that support both. I've sent a previous patch to apply
these changes to all DCN3+ families [6]. Here I use the same changes but
limited to DCN301. Just let me know if you prefer the previous/expanded
version.

Finally, this is the Linux/AMD color management API before and after
blending with the driver-specific properties:

+--+
|   PLANE  |
|  |
|  ++  |
|  | AMD Degamma|  |
|  ||  |
|  | EOTF | 1D LUT  |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  |AMD HDR |  |
|  |Multiply|  |
|  ++---+  |
|   |  |
|  +v---+  |
|  |  AMD CTM (3x4) |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  | AMD Shaper |  |
|  ||  |
|  | inv_EOTF | |  |
|  | Custom 1D LUT  |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  |   AMD 3D LUT   |  |
|  |   17^3/12-bit  |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  | AMD Blend  |  |
|  ||  |
|  | EOTF | 1D LUT  |  |
|  ++---+  |
|   |  |
++--v-++
||  Blending  ||
++--+-++
|CRTC   |  |
|   |  |
|   +---v---+  |
|   | DRM Degamma   |  |
|   |   |  |
|   | Custom 1D LUT |  |
|   +---+---+  |
|   |  |
|   +---v---+  |
|   | DRM CTM (3x3) |  |
|   +---+---+  |
|   |  |
|   +---v---+  |
|   | DRM Gamma |  |
|   |   |  |
|   | Custom 1D LUT |  |
|   +---+  |
|   | *AMD Gamma|  |
|   |   inv_EOTF|  |
|   +---+  |
|  |
+--+

Let me know your thoughts.

Best Regards,

Melissa Wen

[1] https://lore.kernel.org/dri-devel/20230423141051.702990-1-m...@igalia.com
[2] https://lore.kernel.org/dri-devel/20230523221520.3115570-1-m...@igalia.com
[3] 
https://github.com/ValveSoftware/gamescope/blob/master/src/docs/Steam%20Deck%20Display%20Pipeline.png
[4] https://github.com/ValveSoftware/gamescope
[5] 
https://lore.kernel.org/dri-devel/20210730204134.21769-1-harry.wentl...@amd.com
[6] https://lore.kernel.org/dri-devel/20230721132431.692158-1-m...@igalia.com


Harry Wentland (1):
  drm/amd/display: fix segment distribution for linear LUTs

Joshua Ashton (14):
  drm/amd/display: add plane degamma TF driver-specific property
  drm/amd/display: add

[PATCH v2 01/34] drm/amd/display: fix segment distribution for linear LUTs

2023-08-10 Thread Melissa Wen

From: Harry Wentland 

The region and segment calculation was incapable of dealing
with regions of more than 16 segments. We first fix this.

Now that we can support regions up to 256 elements we can
define a better segment distribution for near-linear LUTs
for our maximum of 256 HW-supported points.

With these changes an "identity" LUT looks visually
indistinguishable from bypass and allows us to use
our 3DLUT.

Signed-off-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../amd/display/dc/dcn10/dcn10_cm_common.c| 93 +++
 1 file changed, 75 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 3538973bd0c6..04b2e04b68f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -349,20 +349,37 @@ bool cm_helper_translate_curve_to_hw_format(struct 
dc_context *ctx,
 * segment is from 2^-10 to 2^1
 * There are less than 256 points, for optimization
 */
-   seg_distr[0] = 3;
-   seg_distr[1] = 4;
-   seg_distr[2] = 4;
-   seg_distr[3] = 4;
-   seg_distr[4] = 4;
-   seg_distr[5] = 4;
-   seg_distr[6] = 4;
-   seg_distr[7] = 4;
-   seg_distr[8] = 4;
-   seg_distr[9] = 4;
-   seg_distr[10] = 1;
+   if (output_tf->tf == TRANSFER_FUNCTION_LINEAR) {
+   seg_distr[0] = 0; /* 2 */
+   seg_distr[1] = 1; /* 4 */
+   seg_distr[2] = 2; /* 4 */
+   seg_distr[3] = 3; /* 8 */
+   seg_distr[4] = 4; /* 16 */
+   seg_distr[5] = 5; /* 32 */
+   seg_distr[6] = 6; /* 64 */
+   seg_distr[7] = 7; /* 128 */
+
+   region_start = -8;
+   region_end = 1;
+   } else {
+   seg_distr[0] = 3; /* 8 */
+   seg_distr[1] = 4; /* 16 */
+   seg_distr[2] = 4;
+   seg_distr[3] = 4;
+   seg_distr[4] = 4;
+   seg_distr[5] = 4;
+   seg_distr[6] = 4;
+   seg_distr[7] = 4;
+   seg_distr[8] = 4;
+   seg_distr[9] = 4;
+   seg_distr[10] = 1; /* 2 */
+   /* total = 8*16 + 8 + 64 + 2 = */
+
+   region_start = -10;
+   region_end = 1;
+   }
+
 
-   region_start = -10;
-   region_end = 1;
}
 
for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
@@ -375,16 +392,56 @@ bool cm_helper_translate_curve_to_hw_format(struct 
dc_context *ctx,
 
j = 0;
for (k = 0; k < (region_end - region_start); k++) {
-   increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
+   /*
+* We're using an ugly-ish hack here. Our HW allows for
+* 256 segments per region but SW_SEGMENTS is 16.
+* SW_SEGMENTS has some undocumented relationship to
+* the number of points in the tf_pts struct, which
+* is 512, unlike what's suggested TRANSFER_FUNC_POINTS.
+*
+* In order to work past this dilemma we'll scale our
+* increment by (1 << 4) and then do the inverse (1 >> 4)
+* when accessing the elements in tf_pts.
+*
+* TODO: find a better way using SW_SEGMENTS and
+*   TRANSFER_FUNC_POINTS definitions
+*/
+   increment = (NUMBER_SW_SEGMENTS << 4) / (1 << seg_distr[k]);
start_index = (region_start + k + MAX_LOW_POINT) *
NUMBER_SW_SEGMENTS;
-   for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
+   for (i = (start_index << 4); i < (start_index << 4) + 
(NUMBER_SW_SEGMENTS << 4);
i += increment) {
+   struct fixed31_32 in_plus_one, in;
+   struct fixed31_32 value, red_value, green_value, 
blue_value;
+   uint32_t t = i & 0xf;
+
if (j == hw_points - 1)
break;
-   rgb_resulted[j].red = output_tf->tf_pts.red[i];
-   rgb_resulted[j].green = output_tf->tf_pts.green[i];
-   rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
+
+   in_plus_one = output_tf->tf_pts.red[(i >> 4) + 1];
+   in = output_tf->tf_pts.red[i >> 4];
+   value = dc_fixpt_sub(in_plus_one, in);
+

Re: [PATCH V8 1/9] drivers core: Add support for Wifi band RF mitigations

2023-08-10 Thread Randy Dunlap




On 8/10/23 00:37, Evan Quan wrote:
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index a1457995fd41..21f73a0bbd0b 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -7152,3 +7152,12 @@
>   xmon commands.
>   off xmon is disabled.
>  
> + wbrf=   [KNL]
> + Format: { on | auto | off }
> + Controls if WBRF features should be enabled or disabled
> + forcely. Default is auto.

"forcely" is not a word. "forcedly" is a word, but it's not used very much
AFAIK.
I would probably write "Controls if WBRF features should be forced on or off."

> + on  Force enable the WBRF features.
> + autoUp to the system to do proper checks to
> + determine the WBRF features should be enabled
> + or not.
> + off Force disable the WBRF features.

-- 
~Randy

Re: [PATCH] drm/amdgpu: don't allow userspace to create a doorbell BO

2023-08-10 Thread Alex Deucher

Ping?

On Wed, Aug 9, 2023 at 3:10 PM Alex Deucher  wrote:
>
> We need the domains in amdgpu_drm.h for the kernel driver to manage
> the pool, but we don't want userspace using it until the code
> is ready.  So reject for now.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 693b1fd1191a..ca4d2d430e28 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -289,6 +289,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
> *data,
> uint32_t handle, initial_domain;
> int r;
>
> +   /* reject DOORBELLs until userspace code to use it is available */
> +   if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL)
> +   return -EINVAL;
> +
> /* reject invalid gem flags */
> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>   AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> --
> 2.41.0
>

Re: [PATCH] drm/amdgpu: Keep reset handlers shared

2023-08-10 Thread Christian König


Am 10.08.23 um 13:44 schrieb Lijo Lazar:

Instead of maintaining a list per device, keep the reset handlers common
per ASIC family. A pointer to the list of handlers is maintained in
reset control.


Why should this be beneficial?

Christian.



Signed-off-by: Lijo Lazar 
---
  drivers/gpu/drm/amd/amdgpu/aldebaran.c  | 19 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   |  8 
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h   | 16 
  drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 20 +++-
  drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c   | 19 +++
  5 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 2b97b8a96fb4..82e1c83a7ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -48,20 +48,19 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control 
*reset_ctl,
  {
struct amdgpu_reset_handler *handler;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+   int i;
  
  	if (reset_context->method != AMD_RESET_METHOD_NONE) {

dev_dbg(adev->dev, "Getting reset handler for method %d\n",
reset_context->method);
-   list_for_each_entry(handler, _ctl->reset_handlers,
-handler_list) {
+   for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_context->method)
return handler;
}
}
  
  	if (aldebaran_is_mode2_default(reset_ctl)) {

-   list_for_each_entry(handler, _ctl->reset_handlers,
-handler_list) {
+   for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
reset_context->method = AMD_RESET_METHOD_MODE2;
return handler;
@@ -124,9 +123,9 @@ static void aldebaran_async_reset(struct work_struct *work)
struct amdgpu_reset_control *reset_ctl =
container_of(work, struct amdgpu_reset_control, reset_work);
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+   int i;
  
-	list_for_each_entry(handler, _ctl->reset_handlers,

-handler_list) {
+   for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_ctl->active_reset) {
dev_dbg(adev->dev, "Resetting device\n");
handler->do_reset(adev);
@@ -395,6 +394,11 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler 
= {
.do_reset   = aldebaran_mode2_reset,
  };
  
+static struct amdgpu_reset_handler

+   *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+   _mode2_handler,
+   };
+
  int aldebaran_reset_init(struct amdgpu_device *adev)
  {
struct amdgpu_reset_control *reset_ctl;
@@ -408,10 +412,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev)
reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
  
-	INIT_LIST_HEAD(_ctl->reset_handlers);

INIT_WORK(_ctl->reset_work, reset_ctl->async_reset);
/* Only mode2 is handled through reset control now */
-   amdgpu_reset_add_handler(reset_ctl, _mode2_handler);
+   reset_ctl->reset_handlers = _rst_handlers;
  
  	adev->reset_cntl = reset_ctl;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

index 5fed06ffcc6b..02d874799c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -26,14 +26,6 @@
  #include "sienna_cichlid.h"
  #include "smu_v13_0_10.h"
  
-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,

-struct amdgpu_reset_handler *handler)
-{
-   /* TODO: Check if handler exists? */
-   list_add_tail(>handler_list, _ctl->reset_handlers);
-   return 0;
-}
-
  int amdgpu_reset_init(struct amdgpu_device *adev)
  {
int ret = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index f4a501ff87d9..471d789b33a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -26,6 +26,8 @@
  
  #include "amdgpu.h"
  
+#define AMDGPU_RESET_MAX_HANDLERS 5

+
  enum AMDGPU_RESET_FLAGS {
  
  	AMDGPU_NEED_FULL_RESET = 0,

@@ -44,7 +46,6 @@ struct amdgpu_reset_context {
  
  struct amdgpu_reset_handler {

enum amd_reset_method reset_method;
-   struct list_head handler_list;
int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
   struct amdgpu_reset_context *context);

Re: [PATCH V8 3/9] cfg80211: expose nl80211_chan_width_to_mhz for wide sharing

2023-08-10 Thread Jeff Johnson


On 8/10/2023 12:37 AM, Evan Quan wrote:

The newly added WBRF feature needs this interface for channel
width calculation.

Signed-off-by: Evan Quan 
---
  include/net/cfg80211.h | 8 
  net/wireless/chan.c| 3 ++-
  2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7c7d03aa9d06..f50508e295db 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -920,6 +920,14 @@ const struct cfg80211_chan_def *
  cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
const struct cfg80211_chan_def *chandef2);
  
+/**

+ * nl80211_chan_width_to_mhz - get the channel width in Mhz
+ * @chan_width: the channel width from  nl80211_chan_width
+ * Return: channel width in Mhz if the chan_width from  nl80211_chan_width
+ * is valid. -1 otherwise.


SI nit: s/Mhz/MHz/ in both places


+ */
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
+
  /**
   * cfg80211_chandef_valid - check if a channel definition is valid
   * @chandef: the channel definition to check
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 0b7e81db383d..227db04eac42 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -141,7 +141,7 @@ static bool cfg80211_edmg_chandef_valid(const struct 
cfg80211_chan_def *chandef)
return true;
  }
  
-static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)

+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
  {
int mhz;
  
@@ -190,6 +190,7 @@ static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)

}
return mhz;
  }
+EXPORT_SYMBOL(nl80211_chan_width_to_mhz);
  
  static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)

  {

RE: [PATCH] drm/amd/pm: Fix temperature unit of SMU v13.0.6

2023-08-10 Thread Wang, Yang(Kevin)

[AMD Official Use Only - General]

Reviewed-by: Yang Wang 

Best Regards,
Kevin

-Original Message-
From: amd-gfx  On Behalf Of Lijo Lazar
Sent: Thursday, August 10, 2023 6:46 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Ma, Le ; 
Kamal, Asad ; Zhang, Hawking 
Subject: [PATCH] drm/amd/pm: Fix temperature unit of SMU v13.0.6

Temperature needs to be reported in millidegree Celsius.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 244e6d31560d..da8ba1aa57eb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -708,16 +708,19 @@ static int smu_v13_0_6_get_smu_metrics_data(struct 
smu_context *smu,
*value = SMUQ10_TO_UINT(metrics->SocketPower) << 8;
break;
case METRICS_TEMPERATURE_HOTSPOT:
-   *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_MEM:
-   *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
/* This is the max of all VRs and not just SOC VR.
 * No need to define another data type for the same.
 */
case METRICS_TEMPERATURE_VRSOC:
-   *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
default:
*value = UINT_MAX;
--
2.25.1

Re: next-20230726 and later - crash in radeon module during init

2023-08-10 Thread Valdis Klētnieks

On Thu, 10 Aug 2023 05:35:02 -0400, "Valdis KlÄtnieks" said:

> I am seeing the following consistent crash at boot:

> Some quick digging indicates the most likely culprit is:
>
> commit cbd0606e6a776bf2ba10d4a6957bb7628c0da947
> Author: Srinivasan Shanmugam 
> Date:   Thu Jul 20 15:39:24 2023 +0530
>
> drm/radeon: Prefer dev_* variant over printk

Nevermind - I see it was already reverted...


pgpEGB0i_V8uk.pgp
Description: PGP signature

Re: [PATCH] video/hdmi: convert *_infoframe_init() functions to void

2023-08-10 Thread Nikita Zhandarovich

Hello,

On 8/10/23 01:13, Maxime Ripard wrote:
> Hi,
> 
> On Tue, Aug 08, 2023 at 11:02:45AM -0700, Nikita Zhandarovich wrote:
>> Four hdmi_*_infoframe_init() functions that initialize different
>> types of hdmi infoframes only return the default 0 value, contrary to
>> their descriptions. Yet these functions are still unnecessarily checked
>> against possible errors in case of failure.
>>
>> Remove redundant error checks in calls to following functions:
>> - hdmi_spd_infoframe_init
>> - hdmi_audio_infoframe_init
>> - hdmi_vendor_infoframe_init
>> - hdmi_drm_infoframe_init
>> Also, convert these functions to 'void' and fix their descriptions.
> 
> I'm not sure what value it actually adds. None of them return any
> errors, but very well might if we started to be a bit serious about it.
> 
> Since the error handling is already there, then I'd rather leave it
> there.

There is definitely no particular urgency to this change.

Since these functions don't perform anything complex and aren't updated
regularly, my main goal was to remove unnecessary (at the moment) checks
and fix up their somewhat misleading descriptions. Cleaning up, in other
words. But I understand your point of view.

If you don't think this patch is warranted at this point, I totally
understand.

> 
>> Fixes: 2c676f378edb ("[media] hdmi: added unpack and logging functions for 
>> InfoFrames")
> 
> I'm confused about that part. What does it fix exactly?
> 
> Maxime

I added the 'Fixes:' tag mostly as a requirement for patch's
description. Once again, it doesn't "fix" anything broken as much as it
cleans up stuff.

Best regards,
Nikita

[PATCH -next] drm/amdgpu: Remove duplicated includes

2023-08-10 Thread GUO Zihua

Remove duplicated includes in amdgpu_amdkfd_gpuvm.c and amdgpu_ttm.c.
Resolves checkincludes message.

Signed-off-by: GUO Zihua 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a136fba9f29b..7d6daf8d2bfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -39,7 +39,6 @@
 #include "amdgpu_xgmi.h"
 #include "kfd_priv.h"
 #include "kfd_smi_events.h"
-#include 
 
 /* Userptr restore delay, just long enough to allow consecutive VM
  * changes to accumulate
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dea848bb55c1..f5aa1362c818 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -49,7 +49,6 @@
 #include 
 
 #include 
-#include 
 
 #include "amdgpu.h"
 #include "amdgpu_object.h"
-- 
2.17.1

next-20230726 and later - crash in radeon module during init

2023-08-10 Thread Valdis Klētnieks

I am seeing the following consistent crash at boot:

[   61.211213][  T819] [drm] radeon kernel modesetting enabled.
[   61.584870][  T819] vga_switcheroo: detected switching method 
\_SB_.PCI0.GFX0.ATPX handle
[   61.667507][  T819] ATPX version 1, functions 0x0033
[   61.748228][  T819] general protection fault, probably for non-canonical 
address 0x54080068930549a0:  [#1] PREEMPT SMP
[   61.829840][  T819] CPU: 3 PID: 819 Comm: (udev-worker) Tainted: G  
I T  6.5.0-rc4-next-20230804 #58 5cce04b101a5bb4a6c0368bfff037f6f096b3d3e
[   61.911411][  T819] Hardware name: Dell Inc. Inspiron 5559/052K07, BIOS 
1.9.0 09/07/2020
[   61.993285][  T819] RIP: 0010:strnlen+0x21/0x40
[   62.074885][  T819] Code: 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 55 48 89 e5 
48 8d 14 37 31 c0 48 85 f6 74 16 48 89 f8 eb 09 48 83 c0 01 48 39 c2 74 0e <80> 
38 00 75 f2 48 29 f8 5d c3
cc cc cc cc 48 89 d0 5d 48 29 f8 c3
[   62.156529][  T819] RSP: 0018:a310419979b8 EFLAGS: 00010202
[   62.318407][  T819] RAX: 54080068930549a0 RBX: a31041997a20 RCX: 

[   62.400015][  T819] RDX: 54080068930549b0 RSI: 0010 RDI: 
54080068930549a0
[   62.481624][  T819] RBP: a310419979b8 R08: 937b85579990 R09: 
a31041997ad8
[   62.563644][  T819] R10: 937b86ddae00 R11:  R12: 
54080068930549a0
[   62.645194][  T819] R13: 937b814291b8 R14: 0001 R15: 
a31041997b81
[   62.726753][  T819] FS:  7efd50479600() GS:937ef2e0() 
knlGS:
[   62.808312][  T819] CS:  0010 DS:  ES:  CR0: 80050033
[   62.889830][  T819] CR2: 7f125d30ee70 CR3: 000105644002 CR4: 
003706e0
[   62.971390][  T819] Call Trace:
[   63.052954][  T819]  
[   63.134501][  T819]  ? show_regs+0x64/0x70
[   63.216058][  T819]  ? die_addr+0x36/0x90
[   63.297594][  T819]  ? exc_general_protection+0x1c1/0x440
[   63.379112][  T819]  ? asm_exc_general_protection+0x2b/0x30
[   63.460650][  T819]  ? strnlen+0x21/0x40
[   63.542209][  T819]  set_dev_info+0x40/0x170
[   63.623762][  T819]  dev_printk_emit+0xa8/0xe0
[   63.705308][  T819]  __dev_printk+0x34/0x80
[   63.786806][  T819]  _dev_info+0x7a/0xa0
[   63.868304][  T819]  radeon_atpx_validate.constprop.0.isra.0+0xbc/0x100 
[radeon f030e9a708043a486415a94978106b28cd7cb9a2]
[   63.949952][  T819]  radeon_atpx_detect+0x17b/0x190 [radeon 
f030e9a708043a486415a94978106b28cd7cb9a2]
[   64.031547][  T819]  ? __pfx_radeon_module_init+0x10/0x10 [radeon 
f030e9a708043a486415a94978106b28cd7cb9a2]
[   64.113102][  T819]  radeon_register_atpx_handler+0xd/0x30 [radeon 
f030e9a708043a486415a94978106b28cd7cb9a2]
[   64.194721][  T819]  radeon_module_init+0x84/0xff0 [radeon 
f030e9a708043a486415a94978106b28cd7cb9a2]
[   64.276365][  T819]  do_one_initcall+0x86/0x380
[   64.357865][  T819]  do_init_module+0x63/0x220
[   64.439342][  T819]  load_module+0x99d/0xa90

Some quick digging indicates the most likely culprit is:

commit cbd0606e6a776bf2ba10d4a6957bb7628c0da947
Author: Srinivasan Shanmugam 
Date:   Thu Jul 20 15:39:24 2023 +0530

drm/radeon: Prefer dev_* variant over printk

Changed from pr_err/info to dev_* variants so that
we get better debug info when there are multiple GPUs
in the system.

Looks like this is the failure point because 'dev' is trashed:

+   dev_info(dev, "ATPX Hybrid Graphics\n");

But  I admit I don't know the APCI stuff well enough to see what, if
anything, is wrong with this:

+   struct acpi_device *adev = container_of(atpx->handle, struct 
acpi_device, handle);
+   struct device *dev = >dev;

Any ideas?



pgpggeCbCBvoT.pgp
Description: PGP signature

[PATCH] drm/amdgpu: Keep reset handlers shared

2023-08-10 Thread Lijo Lazar

Instead of maintaining a list per device, keep the reset handlers common
per ASIC family. A pointer to the list of handlers is maintained in
reset control.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/aldebaran.c  | 19 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   |  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h   | 16 
 drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 20 +++-
 drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c   | 19 +++
 5 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 2b97b8a96fb4..82e1c83a7ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -48,20 +48,19 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control 
*reset_ctl,
 {
struct amdgpu_reset_handler *handler;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+   int i;
 
if (reset_context->method != AMD_RESET_METHOD_NONE) {
dev_dbg(adev->dev, "Getting reset handler for method %d\n",
reset_context->method);
-   list_for_each_entry(handler, _ctl->reset_handlers,
-handler_list) {
+   for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_context->method)
return handler;
}
}
 
if (aldebaran_is_mode2_default(reset_ctl)) {
-   list_for_each_entry(handler, _ctl->reset_handlers,
-handler_list) {
+   for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
reset_context->method = AMD_RESET_METHOD_MODE2;
return handler;
@@ -124,9 +123,9 @@ static void aldebaran_async_reset(struct work_struct *work)
struct amdgpu_reset_control *reset_ctl =
container_of(work, struct amdgpu_reset_control, reset_work);
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+   int i;
 
-   list_for_each_entry(handler, _ctl->reset_handlers,
-handler_list) {
+   for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_ctl->active_reset) {
dev_dbg(adev->dev, "Resetting device\n");
handler->do_reset(adev);
@@ -395,6 +394,11 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler 
= {
.do_reset   = aldebaran_mode2_reset,
 };
 
+static struct amdgpu_reset_handler
+   *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+   _mode2_handler,
+   };
+
 int aldebaran_reset_init(struct amdgpu_device *adev)
 {
struct amdgpu_reset_control *reset_ctl;
@@ -408,10 +412,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev)
reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
 
-   INIT_LIST_HEAD(_ctl->reset_handlers);
INIT_WORK(_ctl->reset_work, reset_ctl->async_reset);
/* Only mode2 is handled through reset control now */
-   amdgpu_reset_add_handler(reset_ctl, _mode2_handler);
+   reset_ctl->reset_handlers = _rst_handlers;
 
adev->reset_cntl = reset_ctl;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 5fed06ffcc6b..02d874799c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -26,14 +26,6 @@
 #include "sienna_cichlid.h"
 #include "smu_v13_0_10.h"
 
-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
-struct amdgpu_reset_handler *handler)
-{
-   /* TODO: Check if handler exists? */
-   list_add_tail(>handler_list, _ctl->reset_handlers);
-   return 0;
-}
-
 int amdgpu_reset_init(struct amdgpu_device *adev)
 {
int ret = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index f4a501ff87d9..471d789b33a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -26,6 +26,8 @@
 
 #include "amdgpu.h"
 
+#define AMDGPU_RESET_MAX_HANDLERS 5
+
 enum AMDGPU_RESET_FLAGS {
 
AMDGPU_NEED_FULL_RESET = 0,
@@ -44,7 +46,6 @@ struct amdgpu_reset_context {
 
 struct amdgpu_reset_handler {
enum amd_reset_method reset_method;
-   struct list_head handler_list;
int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
   struct amdgpu_reset_context *context);
int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl,
@@ -63,7 +64,8 @@ struct

[PATCH] drm/amd/pm: Fix temperature unit of SMU v13.0.6

2023-08-10 Thread Lijo Lazar

Temperature needs to be reported in millidegree Celsius.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 244e6d31560d..da8ba1aa57eb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -708,16 +708,19 @@ static int smu_v13_0_6_get_smu_metrics_data(struct 
smu_context *smu,
*value = SMUQ10_TO_UINT(metrics->SocketPower) << 8;
break;
case METRICS_TEMPERATURE_HOTSPOT:
-   *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_MEM:
-   *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
/* This is the max of all VRs and not just SOC VR.
 * No need to define another data type for the same.
 */
case METRICS_TEMPERATURE_VRSOC:
-   *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
default:
*value = UINT_MAX;
-- 
2.25.1

RE: [PATCH] drm/amdgpu: Add I2C EEPROM support on smu v13_0_6

2023-08-10 Thread Wang, Yang(Kevin)

[AMD Official Use Only - General]

Reviewed-by: Yang Wang 

Best Regards,
Kevin

-Original Message-
From: amd-gfx  On Behalf Of Candice Li
Sent: Thursday, August 10, 2023 4:23 PM
To: amd-gfx@lists.freedesktop.org
Cc: Li, Candice 
Subject: [PATCH] drm/amdgpu: Add I2C EEPROM support on smu v13_0_6

Support I2C EEPROM on smu v13_0_6.

Signed-off-by: Candice Li 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 4287743e121245..27fb9b640011c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -159,6 +159,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device 
*adev)
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 2): /* Aldebaran */
case IP_VERSION(13, 0, 10):
+   case IP_VERSION(13, 0, 6):
return true;
default:
return false;
@@ -213,6 +214,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device 
*adev,
return true;
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 10):
+   case IP_VERSION(13, 0, 6):
control->i2c_address = EEPROM_I2C_MADDR_4;
return true;
default:
--
2.25.1

[PATCH v2] drm/amdgpu: Add I2C EEPROM support on smu v13_0_6

2023-08-10 Thread Candice Li

Support I2C EEPROM on smu v13_0_6.

v2: Move IP_VERSION(13, 0, 6) ahead of IP_VERSION(13, 0, 10).

Signed-off-by: Candice Li 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 4287743e121245..4764d2171f92e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -158,6 +158,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device 
*adev)
case IP_VERSION(11, 0, 7): /* Sienna cichlid */
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 2): /* Aldebaran */
+   case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
return true;
default:
@@ -212,6 +213,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device 
*adev,
control->i2c_address = EEPROM_I2C_MADDR_0;
return true;
case IP_VERSION(13, 0, 0):
+   case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
control->i2c_address = EEPROM_I2C_MADDR_4;
return true;
-- 
2.25.1

RE: [PATCH] drm/amd/pm: Add vclk and dclk sysnode for GC 9.4.3

2023-08-10 Thread Chen, Guchun

[Public]

> -Original Message-
> From: amd-gfx  On Behalf Of Asad
> Kamal
> Sent: Wednesday, August 9, 2023 5:10 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Lazar, Lijo ; Kamal, Asad ;
> Zhang, Hawking 
> Subject: [PATCH] drm/amd/pm: Add vclk and dclk sysnode for GC 9.4.3
>
> Expose sysfs vclck and dclk entries for GC version 9.4.3
>
> Signed-off-by: Asad Kamal 
> Reviewed-by: Lijo Lazar 
> ---
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index 3922dd274f30..e4183d6d7f39 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -2095,7 +2095,8 @@ static int default_attr_update(struct
> amdgpu_device *adev, struct amdgpu_device_
> gc_ver == IP_VERSION(10, 1, 2) ||
> gc_ver == IP_VERSION(11, 0, 0) ||
> gc_ver == IP_VERSION(11, 0, 2) ||
> -   gc_ver == IP_VERSION(11, 0, 3)))
> +   gc_ver == IP_VERSION(11, 0, 3) ||
> +   gc_ver == IP_VERSION(9, 4, 3)))

I suggest to put IP_VERSION(9, 4, 3) prior to IP_VERSION(10...) to maintain it 
numerically.

>   *states = ATTR_STATE_UNSUPPORTED;
>   } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) {
>   if (!((gc_ver == IP_VERSION(10, 3, 1) || @@ -2109,7 +2110,8
> @@ static int default_attr_update(struct amdgpu_device *adev, struct
> amdgpu_device_
> gc_ver == IP_VERSION(10, 1, 2) ||
> gc_ver == IP_VERSION(11, 0, 0) ||
> gc_ver == IP_VERSION(11, 0, 2) ||
> -   gc_ver == IP_VERSION(11, 0, 3)))
> +   gc_ver == IP_VERSION(11, 0, 3) ||
> +   gc_ver == IP_VERSION(9, 4, 3)))

Same as above.

Regards,
Guchun

>   *states = ATTR_STATE_UNSUPPORTED;
>   } else if (DEVICE_ATTR_IS(pp_dpm_dclk1)) {
>   if (!((gc_ver == IP_VERSION(10, 3, 1) ||
> --
> 2.34.1

[PATCH] drm/amdgpu: Add I2C EEPROM support on smu v13_0_6

2023-08-10 Thread Candice Li

Support I2C EEPROM on smu v13_0_6.

Signed-off-by: Candice Li 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 4287743e121245..27fb9b640011c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -159,6 +159,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device 
*adev)
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 2): /* Aldebaran */
case IP_VERSION(13, 0, 10):
+   case IP_VERSION(13, 0, 6):
return true;
default:
return false;
@@ -213,6 +214,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device 
*adev,
return true;
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 10):
+   case IP_VERSION(13, 0, 6):
control->i2c_address = EEPROM_I2C_MADDR_4;
return true;
default:
-- 
2.25.1

Re: [PATCH] video/hdmi: convert *_infoframe_init() functions to void

2023-08-10 Thread Maxime Ripard

Hi,

On Tue, Aug 08, 2023 at 11:02:45AM -0700, Nikita Zhandarovich wrote:
> Four hdmi_*_infoframe_init() functions that initialize different
> types of hdmi infoframes only return the default 0 value, contrary to
> their descriptions. Yet these functions are still unnecessarily checked
> against possible errors in case of failure.
> 
> Remove redundant error checks in calls to following functions:
> - hdmi_spd_infoframe_init
> - hdmi_audio_infoframe_init
> - hdmi_vendor_infoframe_init
> - hdmi_drm_infoframe_init
> Also, convert these functions to 'void' and fix their descriptions.

I'm not sure what value it actually adds. None of them return any
errors, but very well might if we started to be a bit serious about it.

Since the error handling is already there, then I'd rather leave it
there.

> Fixes: 2c676f378edb ("[media] hdmi: added unpack and logging functions for 
> InfoFrames")

I'm confused about that part. What does it fix exactly?

Maxime


signature.asc
Description: PGP signature

[PATCH V8 9/9] drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.7

2023-08-10 Thread Evan Quan

Fulfill the SMU13.0.7 support for Wifi RFI mitigation feature.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 59 +++
 1 file changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index b1f0937ccade..d02fe284b05d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -126,6 +126,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(AllowGpo,   PPSMC_MSG_SetGpoAllow,  
 0),
MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   0),
MSG_MAP(NotifyPowerSource,  PPSMC_MSG_NotifyPowerSource,
   0),
+   MSG_MAP(EnableUCLKShadow,   PPSMC_MSG_EnableUCLKShadow, 
   0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -207,6 +208,7 @@ static struct cmn2asic_mapping 
smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
TAB_MAP(ACTIVITY_MONITOR_COEFF),
[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
TAB_MAP(OVERDRIVE),
+   TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] 
= {
@@ -497,6 +499,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu)
   AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, 
MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+  sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+  AMDGPU_GEM_DOMAIN_VRAM);
 
smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), 
GFP_KERNEL);
if (!smu_table->metrics_table)
@@ -2173,6 +2178,57 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context 
*smu,
   NULL);
 }
 
+static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu)
+{
+   return smu->smc_fw_version > 0x00524600;
+}
+
+static int smu_v13_0_7_set_wbrf_exclusion_ranges(struct smu_context *smu,
+struct exclusion_range 
*exclusion_ranges)
+{
+   WifiBandEntryTable_t wifi_bands;
+   int valid_entries = 0;
+   int ret, i;
+
+   memset(_bands, 0, sizeof(wifi_bands));
+   for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) {
+   if (!exclusion_ranges[i].start &&
+   !exclusion_ranges[i].end)
+   break;
+
+   /* PMFW expects the inputs to be in Mhz unit */
+   wifi_bands.WifiBandEntry[valid_entries].LowFreq =
+   DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, 
HZ_IN_MHZ);
+   wifi_bands.WifiBandEntry[valid_entries++].HighFreq =
+   DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_IN_MHZ);
+   }
+   wifi_bands.WifiBandEntryNum = valid_entries;
+
+   /*
+* Per confirm with PMFW team, WifiBandEntryNum = 0 is a valid setting.
+* Considering the scenarios below:
+* - At first the wifi device adds an exclusion range e.g. (2400,2500) 
to
+*   BIOS and our driver gets notified. We will set WifiBandEntryNum = 1
+*   and pass the WifiBandEntry (2400, 2500) to PMFW.
+*
+* - Later the wifi device removes the wifiband list added above and
+*   our driver gets notified again. At this time, driver will set
+*   WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW.
+*   - PMFW may still need to do some uclk shadow update(e.g. switching
+* from shadow clock back to primary clock) on receiving this.
+*/
+
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_WIFIBAND,
+  0,
+  (void *)(_bands),
+  true);
+   if (ret)
+   dev_err(smu->adev->dev, "Failed to set wifiband!");
+
+   return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2241,6 +2297,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = 
{
.set_mp1_state = smu_v13_0_7_set_mp1_state,
.set_df_cstate = smu_v13_0_7_set_df_cstate,
.gpo_control = smu_v13_0_gpo_control,
+   .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check,
+   .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+   .set_wbrf_exclusion_ranges = smu_v13_0_7_set_wbrf_exclusion_ranges,
 };
 
 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
--

[PATCH V8 8/9] drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0

2023-08-10 Thread Evan Quan

Fulfill the SMU13.0.0 support for Wifi RFI mitigation feature.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  3 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  3 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c|  9 +++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 60 +++
 5 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 4d5cb1b511e5..54e76d6e66ce 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -325,6 +325,7 @@ enum smu_table_id
SMU_TABLE_PACE,
SMU_TABLE_ECCINFO,
SMU_TABLE_COMBO_PPTABLE,
+   SMU_TABLE_WIFIBAND,
SMU_TABLE_COUNT,
 };
 
@@ -1501,6 +1502,8 @@ enum smu_baco_seq {
 __dst_size);  \
 })
 
+#define HZ_IN_MHZ  100U
+
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && 
!defined(SWSMU_CODE_LAYER_L4)
 int smu_get_power_limit(void *handle,
uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 297b70b9388f..5bbb60289a79 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -245,7 +245,8 @@
__SMU_DUMMY_MAP(AllowGpo),  \
__SMU_DUMMY_MAP(Mode2Reset),\
__SMU_DUMMY_MAP(RequestI2cTransaction), \
-   __SMU_DUMMY_MAP(GetMetricsTable),
+   __SMU_DUMMY_MAP(GetMetricsTable), \
+   __SMU_DUMMY_MAP(EnableUCLKShadow),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 355c156d871a..dd70b56aa71e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -299,5 +299,8 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
 uint32_t pcie_gen_cap,
 uint32_t pcie_width_cap);
 
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu,
+bool enablement);
+
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 9b62b45ebb7f..6a5cb582aa92 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2472,3 +2472,12 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
 
return 0;
 }
+
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu,
+bool enablement)
+{
+   return smu_cmn_send_smc_msg_with_param(smu,
+  SMU_MSG_EnableUCLKShadow,
+  enablement,
+  NULL);
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 3d188616ba24..fd3ac18653ed 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -154,6 +154,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(AllowGpo,   PPSMC_MSG_SetGpoAllow,  
 0),
MSG_MAP(AllowIHHostInterrupt,   PPSMC_MSG_AllowIHHostInterrupt, 
  0),
MSG_MAP(ReenableAcDcInterrupt,  
PPSMC_MSG_ReenableAcDcInterrupt,   0),
+   MSG_MAP(EnableUCLKShadow,   PPSMC_MSG_EnableUCLKShadow, 
   0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -237,6 +238,7 @@ static struct cmn2asic_mapping 
smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
TAB_MAP(I2C_COMMANDS),
TAB_MAP(ECCINFO),
TAB_MAP(OVERDRIVE),
+   TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] 
= {
@@ -481,6 +483,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+  sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+  AMDGPU_GEM_DOMAIN_VRAM);
 
smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), 
GFP_KERNEL);
if (!smu_table->metrics_table)
@@ -2593,6 +2598,58 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct 
smu_context *smu,
return ret;
 }
 
+static bool

[PATCH V8 6/9] drm/amd/pm: setup the framework to support Wifi RFI mitigation feature

2023-08-10 Thread Evan Quan

With WBRF feature supported, as a driver responding to the frequencies,
amdgpu driver is able to do shadow pstate switching to mitigate possible
interference(between its (G-)DDR memory clocks and local radio module
frequency bands used by Wifi 6/6e/7).

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
--
v1->v2:
  - update the prompt for feature support(Lijo)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  17 ++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 194 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  23 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
 5 files changed, 239 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3b86b86dc47..2bfc9111ab00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -247,6 +247,8 @@ extern int amdgpu_sg_display;
 
 extern int amdgpu_user_partt_mode;
 
+extern int amdgpu_wbrf;
+
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0593ef8fe0a6..1c574bd3b60d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -195,6 +195,7 @@ int amdgpu_use_xgmi_p2p = 1;
 int amdgpu_vcnfw_log;
 int amdgpu_sg_display = -1; /* auto */
 int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+int amdgpu_wbrf = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -981,6 +982,22 @@ module_param_named(user_partt_mode, 
amdgpu_user_partt_mode, uint, 0444);
 module_param(enforce_isolation, bool, 0444);
 MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between 
graphics and compute . enforce_isolation = on");
 
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely 
interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local 
radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI 
interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or 
“P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial 
setting or
+ * P-state transition. However, there may be potential performance impact with 
this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if 
supported))
+ */
+MODULE_PARM_DESC(wbrf,
+   "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 
= auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ce41a8309582..163037bd6d16 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1228,6 +1228,173 @@ static int smu_get_thermal_temperature_range(struct 
smu_context *smu)
return ret;
 }
 
+/**
+ * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges
+ *
+ * @smu: smu_context pointer
+ *
+ * Retrieve the wbrf exclusion ranges and send them to PMFW for proper 
handling.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu)
+{
+   struct wbrf_ranges_out wbrf_exclusion = {0};
+   struct exclusion_range *wifi_bands = wbrf_exclusion.band_list;
+   struct amdgpu_device *adev = smu->adev;
+   uint64_t start, end;
+   int ret, i, j;
+
+   ret = wbrf_retrieve_exclusions(adev->dev, _exclusion);
+   if (ret) {
+   dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n");
+   return ret;
+   }
+
+   /*
+* The exclusion ranges array we got might be filled with holes and 
duplicate
+* entries. For example:
+* {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 
6189), (0, 0)...}
+* We need to do some sortups to eliminate those holes and duplicate 
entries.
+* Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 
0)...}
+*/
+   for (i = 0; i < MAX_NUM_OF_WBRF_RANGES; i++) {
+   start = wifi_bands[i].start;
+   end = wifi_bands[i].end;
+
+   /* get the last valid entry to fill the intermediate hole */
+   if (!start && !end) {
+   for (j = MAX_NUM_OF_WBRF_RANGES - 1; j > i; j--)
+   if (wifi_bands[j].start &&
+   wifi_bands[j].end)
+   break;

[PATCH V8 7/9] drm/amd/pm: add flood detection for wbrf events

2023-08-10 Thread Evan Quan

To protect PMFW from being overloaded.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 31 +++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 163037bd6d16..aa5e1123ac0a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1318,7 +1318,8 @@ static int smu_wbrf_event_handler(struct notifier_block 
*nb,
 
switch (action) {
case WBRF_CHANGED:
-   smu_wbrf_handle_exclusion_ranges(smu);
+   schedule_delayed_work(>wbrf_delayed_work,
+ 
msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
break;
default:
return NOTIFY_DONE;
@@ -1327,6 +1328,21 @@ static int smu_wbrf_event_handler(struct notifier_block 
*nb,
return NOTIFY_OK;
 }
 
+/**
+ * smu_wbrf_delayed_work_handler - callback on delayed work timer expired
+ *
+ * @work: struct work_struct pointer
+ *
+ * Flood is over and driver will consume the latest exclusion ranges.
+ */
+static void smu_wbrf_delayed_work_handler(struct work_struct *work)
+{
+   struct smu_context *smu =
+   container_of(work, struct smu_context, wbrf_delayed_work.work);
+
+   smu_wbrf_handle_exclusion_ranges(smu);
+}
+
 /**
  * smu_wbrf_support_check - check wbrf support
  *
@@ -1357,12 +1373,14 @@ static void smu_wbrf_support_check(struct smu_context 
*smu)
  */
 static int smu_wbrf_init(struct smu_context *smu)
 {
-   struct amdgpu_device *adev = smu->adev;
int ret;
 
if (!smu->wbrf_supported)
return 0;
 
+   INIT_DELAYED_WORK(>wbrf_delayed_work,
+ smu_wbrf_delayed_work_handler);
+
smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler;
ret = wbrf_register_notifier(>wbrf_notifier);
if (ret)
@@ -1373,11 +1391,10 @@ static int smu_wbrf_init(struct smu_context *smu)
 * before our driver loaded. To make sure our driver
 * is awared of those exclusion ranges.
 */
-   ret = smu_wbrf_handle_exclusion_ranges(smu);
-   if (ret)
-   dev_err(adev->dev, "Failed to handle wbrf exclusion ranges\n");
+   schedule_delayed_work(>wbrf_delayed_work,
+ msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
 
-   return ret;
+   return 0;
 }
 
 /**
@@ -1393,6 +1410,8 @@ static void smu_wbrf_fini(struct smu_context *smu)
return;
 
wbrf_unregister_notifier(>wbrf_notifier);
+
+   cancel_delayed_work_sync(>wbrf_delayed_work);
 }
 
 static int smu_smc_hw_setup(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 244297979f92..4d5cb1b511e5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -480,6 +480,12 @@ struct stb_context {
 
 #define WORKLOAD_POLICY_MAX 7
 
+/*
+ * Configure wbrf event handling pace as there can be only one
+ * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms.
+ */
+#define SMU_WBRF_EVENT_HANDLING_PACE   10
+
 struct smu_context
 {
struct amdgpu_device*adev;
@@ -581,6 +587,7 @@ struct smu_context
/* data structures for wbrf feature support */
boolwbrf_supported;
struct notifier_block   wbrf_notifier;
+   struct delayed_work wbrf_delayed_work;
 };
 
 struct i2c_adapter;
-- 
2.34.1

[PATCH V8 5/9] drm/amd/pm: update driver_if and ppsmc headers for coming wbrf feature

2023-08-10 Thread Evan Quan

Add those data structures to support Wifi RFI mitigation feature.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
---
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 14 +-
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 14 +-
 .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h   |  3 ++-
 .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h   |  3 ++-
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 9dd1ed5b8940..e481407b6584 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -391,6 +391,17 @@ typedef struct {
   EccInfo_t  EccInfo[24];
 } EccInfoTable_t;
 
+typedef struct {
+  uint16_t LowFreq;
+  uint16_t HighFreq;
+} WifiOneBand_t;
+
+typedef struct {
+  uint32_t WifiBandEntryNum;
+  WifiOneBand_tWifiBandEntry[11];
+  uint32_t MmHubPadding[8];
+} WifiBandEntryTable_t;
+
 //D3HOT sequences
 typedef enum {
   BACO_SEQUENCE,
@@ -1615,7 +1626,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS9
 #define TABLE_DRIVER_INFO 10
 #define TABLE_ECCINFO 11
-#define TABLE_COUNT   12
+#define TABLE_WIFIBAND12
+#define TABLE_COUNT   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 62b7c0daff68..1530ca002c6c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -392,6 +392,17 @@ typedef struct {
   EccInfo_t  EccInfo[24];
 } EccInfoTable_t;
 
+typedef struct {
+  uint16_t LowFreq;
+  uint16_t HighFreq;
+} WifiOneBand_t;
+
+typedef struct {
+  uint32_t WifiBandEntryNum;
+  WifiOneBand_tWifiBandEntry[11];
+  uint32_t MmHubPadding[8];
+} WifiBandEntryTable_t;
+
 //D3HOT sequences
 typedef enum {
   BACO_SEQUENCE,
@@ -1605,7 +1616,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS9
 #define TABLE_DRIVER_INFO 10
 #define TABLE_ECCINFO 11
-#define TABLE_COUNT   12
+#define TABLE_WIFIBAND12
+#define TABLE_COUNT   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index 10cff75b44d5..c98cc32d11bd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -138,7 +138,8 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain   0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt   0x4C
-#define PPSMC_Message_Count  0x4D
+#define PPSMC_MSG_EnableUCLKShadow   0x51
+#define PPSMC_Message_Count  0x52
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
index 6aaefca9b595..a6bf9cdd130e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
@@ -134,6 +134,7 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain   0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt   0x4C
-#define PPSMC_Message_Count  0x4D
+#define PPSMC_MSG_EnableUCLKShadow   0x51
+#define PPSMC_Message_Count  0x52
 
 #endif
-- 
2.34.1

[PATCH V8 4/9] wifi: mac80211: Add support for WBRF features

2023-08-10 Thread Evan Quan

To support the WBRF mechanism, Wifi adapters utilized in the system must
register the frequencies in use(or unregister those frequencies no longer
used) via the dedicated calls. So that, other drivers responding to the
frequencies can take proper actions to mitigate possible interference.

Co-developed-by: Mario Limonciello 
Signed-off-by: Mario Limonciello 
Co-developed-by: Evan Quan 
Signed-off-by: Evan Quan 
--
v1->v2:
  - place the new added member(`wbrf_supported`) in
ieee80211_local(Johannes)
  - handle chandefs change scenario properly(Johannes)
  - some minor fixes around code sharing and possible invalid input
checks(Johannes)
v2->v3:
  - drop unnecessary input checks and intermediate APIs(Mario)
  - Separate some mac80211 common code(Mario, Johannes)
v3->v4:
  - some minor fixes around return values(Johannes)
---
 include/linux/ieee80211.h  |   1 +
 net/mac80211/Makefile  |   2 +
 net/mac80211/chan.c|   9 
 net/mac80211/ieee80211_i.h |   9 
 net/mac80211/main.c|   2 +
 net/mac80211/wbrf.c| 103 +
 6 files changed, 126 insertions(+)
 create mode 100644 net/mac80211/wbrf.c

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4b998090898e..f995d06da87f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4335,6 +4335,7 @@ static inline int ieee80211_get_tdls_action(struct 
sk_buff *skb, u32 hdr_size)
 /* convert frequencies */
 #define MHZ_TO_KHZ(freq) ((freq) * 1000)
 #define KHZ_TO_MHZ(freq) ((freq) / 1000)
+#define KHZ_TO_HZ(freq)  ((freq) * 1000)
 #define PR_KHZ(f) KHZ_TO_MHZ(f), f % 1000
 #define KHZ_F "%d.%03d"
 
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index b8de44da1fb8..d46c36f55fd3 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -65,4 +65,6 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \
 
 mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
 
+mac80211-y += wbrf.o
+
 ccflags-y += -DDEBUG
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 68952752b599..458469c224ae 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -506,11 +506,16 @@ static void _ieee80211_change_chanctx(struct 
ieee80211_local *local,
 
WARN_ON(!cfg80211_chandef_compatible(>conf.def, chandef));
 
+   ieee80211_remove_wbrf(local, >conf.def);
+
ctx->conf.def = *chandef;
 
/* check if min chanctx also changed */
changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
  _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+
+   ieee80211_add_wbrf(local, >conf.def);
+
drv_change_chanctx(local, ctx, changed);
 
if (!local->use_chanctx) {
@@ -668,6 +673,8 @@ static int ieee80211_add_chanctx(struct ieee80211_local 
*local,
lockdep_assert_held(>mtx);
lockdep_assert_held(>chanctx_mtx);
 
+   ieee80211_add_wbrf(local, >conf.def);
+
if (!local->use_chanctx)
local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
 
@@ -748,6 +755,8 @@ static void ieee80211_del_chanctx(struct ieee80211_local 
*local,
}
 
ieee80211_recalc_idle(local);
+
+   ieee80211_remove_wbrf(local, >conf.def);
 }
 
 static void ieee80211_free_chanctx(struct ieee80211_local *local,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 91633a0b723e..719f2c892132 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1600,6 +1600,8 @@ struct ieee80211_local {
 
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
+
+   bool wbrf_supported;
 };
 
 static inline struct ieee80211_sub_if_data *
@@ -2638,4 +2640,11 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct 
ieee80211_sub_if_data *sdata,
const struct ieee80211_eht_cap_elem 
*eht_cap_ie_elem,
u8 eht_cap_len,
struct link_sta_info *link_sta);
+
+void ieee80211_check_wbrf_support(struct ieee80211_local *local);
+void ieee80211_add_wbrf(struct ieee80211_local *local,
+   struct cfg80211_chan_def *chandef);
+void ieee80211_remove_wbrf(struct ieee80211_local *local,
+  struct cfg80211_chan_def *chandef);
+
 #endif /* IEEE80211_I_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 24315d7b3126..b20bdaac84db 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1396,6 +1396,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
debugfs_hw_add(local);
rate_control_add_debugfs(local);
 
+   ieee80211_check_wbrf_support(local);
+
rtnl_lock();
wiphy_lock(hw->wiphy);
 
diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c
new file mode 100644
index ..7ddb29d128b1
--- /dev/null
+++ b/net/mac80211/wbrf.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wifi Band Exclusion Interface for WWAN
+ * Copyright (C)

[PATCH V8 3/9] cfg80211: expose nl80211_chan_width_to_mhz for wide sharing

2023-08-10 Thread Evan Quan

The newly added WBRF feature needs this interface for channel
width calculation.

Signed-off-by: Evan Quan 
---
 include/net/cfg80211.h | 8 
 net/wireless/chan.c| 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7c7d03aa9d06..f50508e295db 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -920,6 +920,14 @@ const struct cfg80211_chan_def *
 cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
const struct cfg80211_chan_def *chandef2);
 
+/**
+ * nl80211_chan_width_to_mhz - get the channel width in Mhz
+ * @chan_width: the channel width from  nl80211_chan_width
+ * Return: channel width in Mhz if the chan_width from  nl80211_chan_width
+ * is valid. -1 otherwise.
+ */
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
+
 /**
  * cfg80211_chandef_valid - check if a channel definition is valid
  * @chandef: the channel definition to check
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 0b7e81db383d..227db04eac42 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -141,7 +141,7 @@ static bool cfg80211_edmg_chandef_valid(const struct 
cfg80211_chan_def *chandef)
return true;
 }
 
-static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
 {
int mhz;
 
@@ -190,6 +190,7 @@ static int nl80211_chan_width_to_mhz(enum 
nl80211_chan_width chan_width)
}
return mhz;
 }
+EXPORT_SYMBOL(nl80211_chan_width_to_mhz);
 
 static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
 {
-- 
2.34.1

[PATCH V8 2/9] drivers core: add ACPI based WBRF mechanism introduced by AMD

2023-08-10 Thread Evan Quan

AMD has introduced an ACPI based mechanism to support WBRF for some
platforms with AMD dGPU + WLAN. This needs support from BIOS equipped
with necessary AML implementations and dGPU firmwares.

For those systems without the ACPI mechanism and developing solutions,
user can use/fall-back the generic WBRF solution for diagnosing potential
interference issues.

And for the platform which does not equip with the necessary AMD ACPI
implementations but with CONFIG_WBRF_AMD_ACPI built as 'y', it will
fall back to generic WBRF solution if the `wbrf` is set as "on".

Co-developed-by: Mario Limonciello 
Signed-off-by: Mario Limonciello 
Co-developed-by: Evan Quan 
Signed-off-by: Evan Quan 
--
v4->v5:
  - promote this to be a more generic solution with input argument taking
`struct device` and provide better scalability to support non-ACPI
scenarios(Andrew)
  - update the APIs naming and some other minor fixes(Rafael)
v5->v6:
  - make the code more readable and some other fixes(Andrew)
v6->v8:
  - drop CONFIG_WBRF_GENERIC(Mario)
  - add `wbrf` kernel parameter for policy control(Mario)
---
 drivers/acpi/Makefile |   2 +
 drivers/acpi/amd_wbrf.c   | 294 ++
 drivers/base/Kconfig  |  20 +++
 drivers/base/wbrf.c   | 135 +---
 include/linux/acpi_amd_wbrf.h |  25 +++
 5 files changed, 452 insertions(+), 24 deletions(-)
 create mode 100644 drivers/acpi/amd_wbrf.c
 create mode 100644 include/linux/acpi_amd_wbrf.h

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 3fc5a0d54f6e..9185d16e4495 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -133,3 +133,5 @@ obj-$(CONFIG_ARM64) += arm64/
 obj-$(CONFIG_ACPI_VIOT)+= viot.o
 
 obj-$(CONFIG_RISCV)+= riscv/
+
+obj-$(CONFIG_WBRF_AMD_ACPI)+= amd_wbrf.o
diff --git a/drivers/acpi/amd_wbrf.c b/drivers/acpi/amd_wbrf.c
new file mode 100644
index ..a3390d91cbea
--- /dev/null
+++ b/drivers/acpi/amd_wbrf.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wifi Band Exclusion Interface (AMD ACPI Implementation)
+ * Copyright (C) 2023 Advanced Micro Devices
+ *
+ */
+
+#include 
+#include 
+
+#define ACPI_AMD_WBRF_METHOD   "\\WBRF"
+
+/*
+ * Functions bit vector for WBRF method
+ *
+ * Bit 0: Supported for any functions other than function 0.
+ * Bit 1: Function 1 (Add / Remove frequency) is supported.
+ * Bit 2: Function 2 (Get frequency list) is supported.
+ */
+#define WBRF_ENABLED   0x0
+#define WBRF_RECORD0x1
+#define WBRF_RETRIEVE  0x2
+
+/* record actions */
+#define WBRF_RECORD_ADD0x0
+#define WBRF_RECORD_REMOVE 0x1
+
+#define WBRF_REVISION  0x1
+
+/*
+ * The data structure used for WBRF_RETRIEVE is not natually aligned.
+ * And unfortunately the design has been settled down.
+ */
+struct amd_wbrf_ranges_out {
+   u32 num_of_ranges;
+   struct exclusion_range  band_list[MAX_NUM_OF_WBRF_RANGES];
+} __packed;
+
+static const guid_t wifi_acpi_dsm_guid =
+   GUID_INIT(0x7b7656cf, 0xdc3d, 0x4c1c,
+ 0x83, 0xe9, 0x66, 0xe7, 0x21, 0xde, 0x30, 0x70);
+
+static int wbrf_dsm(struct acpi_device *adev, u8 fn,
+   union acpi_object *argv4,
+   union acpi_object **out)
+{
+   union acpi_object *obj;
+   int rc;
+
+   obj = acpi_evaluate_dsm(adev->handle, _acpi_dsm_guid,
+   WBRF_REVISION, fn, argv4);
+   if (!obj)
+   return -ENXIO;
+
+   switch (obj->type) {
+   case ACPI_TYPE_BUFFER:
+   *out = obj;
+   return 0;
+
+   case ACPI_TYPE_INTEGER:
+   rc =  obj->integer.value ? -EINVAL : 0;
+   break;
+
+   default:
+   rc = -EOPNOTSUPP;
+   }
+
+   ACPI_FREE(obj);
+
+   return rc;
+}
+
+static int wbrf_record(struct acpi_device *adev, uint8_t action,
+  struct wbrf_ranges_in *in)
+{
+   union acpi_object argv4;
+   union acpi_object *tmp;
+   u32 num_of_ranges = 0;
+   u32 num_of_elements;
+   u32 arg_idx = 0;
+   u32 loop_idx;
+   int ret;
+
+   if (!in)
+   return -EINVAL;
+
+   for (loop_idx = 0; loop_idx < ARRAY_SIZE(in->band_list);
+loop_idx++)
+   if (in->band_list[loop_idx].start &&
+   in->band_list[loop_idx].end)
+   num_of_ranges++;
+
+   /*
+* Every range comes with two end points(start and end) and
+* each of them is accounted as an element. Meanwhile the range
+* count and action type are accounted as an element each.
+* So, the total element count = 2 * num_of_ranges + 1 + 1.
+*/
+   num_of_elements = 2 * num_of_ranges + 1 + 1;
+
+   tmp = kcalloc(num_of_elements, sizeof(*tmp), GFP_KERNEL);
+   if (!tmp)
+

[PATCH V8 1/9] drivers core: Add support for Wifi band RF mitigations

2023-08-10 Thread Evan Quan

Due to electrical and mechanical constraints in certain platform designs
there may be likely interference of relatively high-powered harmonics of
the (G-)DDR memory clocks with local radio module frequency bands used
by Wifi 6/6e/7.

To mitigate this, AMD has introduced a mechanism that devices can use to
notify active use of particular frequencies so that other devices can make
relative internal adjustments as necessary to avoid this resonance.

In order for a device to support this, the expected flow for device
driver or subsystems:

Drivers/subsystems contributing frequencies:

1) During probe, check `wbrf_supported_producer` to see if WBRF supported
   for the device.
2) If adding frequencies, then call `wbrf_add_exclusion` with the
   start and end ranges of the frequencies.
3) If removing frequencies, then call `wbrf_remove_exclusion` with
   start and end ranges of the frequencies.

Drivers/subsystems responding to frequencies:

1) During probe, check `wbrf_supported_consumer` to see if WBRF is supported
   for the device.
2) Call the `wbrf_register_notifier` to register for notifications of
   frequency changes from other devices.
3) Call the `wbrf_retrieve_exclusions` to retrieve the current exclusions
   range on receiving a notification and response correspondingly.

Meanwhile a kernel parameter `wbrf` with default setting as "auto" is
introduced to specify what the policy is.
  - With `wbrf=on`, the WBRF features will be enabled forcely.
  - With `wbrf=off`, the WBRF features will be disabled forcely.
  - With `wbrf=auto`, it will be up to the system to do proper checks
to determine the WBRF features should be enabled or not.

Co-developed-by: Mario Limonciello 
Signed-off-by: Mario Limonciello 
Co-developed-by: Evan Quan 
Signed-off-by: Evan Quan 
--
v4->v5:
  - promote this to be a more generic solution with input argument taking
`struct device` and provide better scalability to support non-ACPI
scenarios(Andrew)
  - update the APIs naming and some other minor fixes(Rafael)
v6->v7:
  - revised the `struct wbrf_ranges_out` to be naturally aligned(Andrew)
  - revised some code comments(Andrew)
---
 .../admin-guide/kernel-parameters.txt |   9 +
 drivers/base/Makefile |   1 +
 drivers/base/wbrf.c   | 280 ++
 include/linux/wbrf.h  |  47 +++
 4 files changed, 337 insertions(+)
 create mode 100644 drivers/base/wbrf.c
 create mode 100644 include/linux/wbrf.h

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..21f73a0bbd0b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7152,3 +7152,12 @@
xmon commands.
off xmon is disabled.
 
+   wbrf=   [KNL]
+   Format: { on | auto | off }
+   Controls if WBRF features should be enabled or disabled
+   forcely. Default is auto.
+   on  Force enable the WBRF features.
+   autoUp to the system to do proper checks to
+   determine the WBRF features should be enabled
+   or not.
+   off Force disable the WBRF features.
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 3079bfe53d04..7b3cef898c19 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_GENERIC_MSI_IRQ) += platform-msi.o
 obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o
 obj-$(CONFIG_GENERIC_ARCH_NUMA) += arch_numa.o
 obj-$(CONFIG_ACPI) += physical_location.o
+obj-y  += wbrf.o
 
 obj-y  += test/
 
diff --git a/drivers/base/wbrf.c b/drivers/base/wbrf.c
new file mode 100644
index ..678f245c12c6
--- /dev/null
+++ b/drivers/base/wbrf.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wifi Band Exclusion Interface
+ * Copyright (C) 2023 Advanced Micro Devices
+ *
+ */
+
+#include 
+
+static BLOCKING_NOTIFIER_HEAD(wbrf_chain_head);
+static DEFINE_MUTEX(wbrf_mutex);
+static enum WBRF_POLICY_MODE {
+   WBRF_POLICY_FORCE_DISABLE,
+   WBRF_POLICY_AUTO,
+   WBRF_POLICY_FORCE_ENABLE,
+} wbrf_policy = WBRF_POLICY_AUTO;
+
+static int __init parse_wbrf_policy_mode(char *p)
+{
+   if (!strncmp(p, "auto", 4))
+   wbrf_policy = WBRF_POLICY_AUTO;
+   else if (!strncmp(p, "on", 2))
+   wbrf_policy = WBRF_POLICY_FORCE_ENABLE;
+   else if (!strncmp(p, "off", 3))
+   wbrf_policy = WBRF_POLICY_FORCE_DISABLE;
+   else
+   return -EINVAL;
+
+   return 0;
+}
+early_param("wbrf", parse_wbrf_policy_mode);
+
+static struct exclusion_range_pool {
+   struct exclusion_range  band_list[MAX_NUM_OF_WBRF_RANGES];
+   u64

[PATCH V8 0/9] Enable Wifi RFI interference mitigation feature support

2023-08-10 Thread Evan Quan

Due to electrical and mechanical constraints in certain platform designs there
may be likely interference of relatively high-powered harmonics of the (G-)DDR
memory clocks with local radio module frequency bands used by Wifi 6/6e/7. To
mitigate possible RFI interference producers can advertise the frequencies in
use and consumers can use this information to avoid using these frequencies for
sensitive features.

The whole patch set is based on Linux 6.5-rc5. With some brief introductions
as below:
Patch1 - 2:  Core functionality setup for WBRF feature support
Patch3 - 4:  Bring WBRF support to wifi subsystem.
Patch5 - 9:  Bring WBRF support to AMD graphics driver.

Evan Quan (9):
  drivers core: Add support for Wifi band RF mitigations
  drivers core: add ACPI based WBRF mechanism introduced by AMD
  cfg80211: expose nl80211_chan_width_to_mhz for wide sharing
  wifi: mac80211: Add support for WBRF features
  drm/amd/pm: update driver_if and ppsmc headers for coming wbrf feature
  drm/amd/pm: setup the framework to support Wifi RFI mitigation feature
  drm/amd/pm: add flood detection for wbrf events
  drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0
  drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.7

 .../admin-guide/kernel-parameters.txt |   9 +
 drivers/acpi/Makefile |   2 +
 drivers/acpi/amd_wbrf.c   | 294 ++
 drivers/base/Kconfig  |  20 +
 drivers/base/Makefile |   1 +
 drivers/base/wbrf.c   | 367 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  17 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 213 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  33 ++
 .../inc/pmfw_if/smu13_driver_if_v13_0_0.h |  14 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_7.h |  14 +-
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h  |   3 +-
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h  |   3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |   3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |   3 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c|   9 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  60 +++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  |  59 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
 include/linux/acpi_amd_wbrf.h |  25 ++
 include/linux/ieee80211.h |   1 +
 include/linux/wbrf.h  |  47 +++
 include/net/cfg80211.h|   8 +
 net/mac80211/Makefile |   2 +
 net/mac80211/chan.c   |   9 +
 net/mac80211/ieee80211_i.h|   9 +
 net/mac80211/main.c   |   2 +
 net/mac80211/wbrf.c   | 103 +
 net/wireless/chan.c   |   3 +-
 30 files changed, 1332 insertions(+), 6 deletions(-)
 create mode 100644 drivers/acpi/amd_wbrf.c
 create mode 100644 drivers/base/wbrf.c
 create mode 100644 include/linux/acpi_amd_wbrf.h
 create mode 100644 include/linux/wbrf.h
 create mode 100644 net/mac80211/wbrf.c

-- 
2.34.1

1 2 >

1 - 100 of 101 matches

Mail list logo