[PATCH v2] drm/amd/pm: Allow setting max UCLK on SMU v13.0.6

2024-02-08 Thread Lijo Lazar
Allow reducing max UCLK in MANUAL performance level. New UCLK value
should be less than the max DPM level UCLK level value.

Ex:
echo manual > "/sys/bus/pci/devices/.../power_dpm_force_performance_level"
echo m 1 900 > "/sys/bus/pci/devices/.../pp_od_clk_voltage”
echo c > "/sys/bus/pci/devices/.../pp_od_clk_voltage”

Signed-off-by: Lijo Lazar 
---
v2:
On switching perf level to auto, restore GFX and UCLK levels only if 
needed.

 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 122 +++---
 1 file changed, 102 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 03873d784be6..6e8a7eb1864d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1578,6 +1578,8 @@ static int smu_v13_0_6_set_performance_level(struct 
smu_context *smu,
struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
struct smu_13_0_dpm_table *gfx_table =
_context->dpm_tables.gfx_table;
+   struct smu_13_0_dpm_table *uclk_table =
+   _context->dpm_tables.uclk_table;
struct smu_umd_pstate_table *pstate_table = >pstate_table;
int ret;
 
@@ -1593,17 +1595,27 @@ static int smu_v13_0_6_set_performance_level(struct 
smu_context *smu,
return 0;
 
case AMD_DPM_FORCED_LEVEL_AUTO:
-   if ((gfx_table->min == pstate_table->gfxclk_pstate.curr.min) &&
-   (gfx_table->max == pstate_table->gfxclk_pstate.curr.max))
-   return 0;
+   if ((gfx_table->min != pstate_table->gfxclk_pstate.curr.min) ||
+   (gfx_table->max != pstate_table->gfxclk_pstate.curr.max)) {
+   ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(
+   smu, gfx_table->min, gfx_table->max);
+   if (ret)
+   return ret;
 
-   ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(
-   smu, gfx_table->min, gfx_table->max);
-   if (ret)
-   return ret;
+   pstate_table->gfxclk_pstate.curr.min = gfx_table->min;
+   pstate_table->gfxclk_pstate.curr.max = gfx_table->max;
+   }
+
+   if (uclk_table->max != pstate_table->uclk_pstate.curr.max) {
+   /* Min UCLK is not expected to be changed */
+   ret = smu_v13_0_set_soft_freq_limited_range(
+   smu, SMU_UCLK, 0, uclk_table->max);
+   if (ret)
+   return ret;
+   pstate_table->uclk_pstate.curr.max = uclk_table->max;
+   }
+   pstate_table->uclk_pstate.custom.max = 0;
 
-   pstate_table->gfxclk_pstate.curr.min = gfx_table->min;
-   pstate_table->gfxclk_pstate.curr.max = gfx_table->max;
return 0;
case AMD_DPM_FORCED_LEVEL_MANUAL:
return 0;
@@ -1626,7 +1638,8 @@ static int smu_v13_0_6_set_soft_freq_limited_range(struct 
smu_context *smu,
uint32_t max_clk;
int ret = 0;
 
-   if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK)
+   if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK &&
+   clk_type != SMU_UCLK)
return -EINVAL;
 
if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) &&
@@ -1636,18 +1649,31 @@ static int 
smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu,
if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) {
if (min >= max) {
dev_err(smu->adev->dev,
-   "Minimum GFX clk should be less than the 
maximum allowed clock\n");
+   "Minimum clk should be less than the maximum 
allowed clock\n");
return -EINVAL;
}
 
-   if ((min == pstate_table->gfxclk_pstate.curr.min) &&
-   (max == pstate_table->gfxclk_pstate.curr.max))
-   return 0;
+   if (clk_type == SMU_GFXCLK) {
+   if ((min == pstate_table->gfxclk_pstate.curr.min) &&
+   (max == pstate_table->gfxclk_pstate.curr.max))
+   return 0;
 
-   ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(smu, min, 
max);
-   if (!ret) {
-   pstate_table->gfxclk_pstate.curr.min = min;
-   pstate_table->gfxclk_pstate.curr.max = max;
+   ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(
+   smu, min, max);
+   if (!ret) {
+   pstate_table->gfxclk_pstate.curr.min = min;
+   

Re: [PATCH] drm/amd/pm: Allow setting max UCLK on SMU v13.0.6

2024-02-08 Thread Lazar, Lijo
Sending another one, please ignore.

Thanks,
Lijo

On 2/9/2024 12:04 PM, Lijo Lazar wrote:
> Allow reducing max UCLK in MANUAL performance level. New UCLK value
> should be less than the max DPM level UCLK level value.
> 
> Ex:
> echo manual > "/sys/bus/pci/devices/.../power_dpm_force_performance_level"
> echo m 1 900 > "/sys/bus/pci/devices/.../pp_od_clk_voltage”
> echo c > "/sys/bus/pci/devices/.../pp_od_clk_voltage”
> 
> Signed-off-by: Lijo Lazar 
> ---
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 103 --
>  1 file changed, 92 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 03873d784be6..9929981ff6c5 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -1578,6 +1578,8 @@ static int smu_v13_0_6_set_performance_level(struct 
> smu_context *smu,
>   struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
>   struct smu_13_0_dpm_table *gfx_table =
>   _context->dpm_tables.gfx_table;
> + struct smu_13_0_dpm_table *uclk_table =
> + _context->dpm_tables.uclk_table;
>   struct smu_umd_pstate_table *pstate_table = >pstate_table;
>   int ret;
>  
> @@ -1604,6 +1606,15 @@ static int smu_v13_0_6_set_performance_level(struct 
> smu_context *smu,
>  
>   pstate_table->gfxclk_pstate.curr.min = gfx_table->min;
>   pstate_table->gfxclk_pstate.curr.max = gfx_table->max;
> +
> + /* Min UCLK is not expected to be changed */
> + ret = smu_v13_0_set_soft_freq_limited_range(smu, SMU_UCLK, 0,
> + uclk_table->max);
> + if (ret)
> + return ret;
> + pstate_table->uclk_pstate.curr.max = uclk_table->max;
> + pstate_table->uclk_pstate.custom.max = 0;
> +
>   return 0;
>   case AMD_DPM_FORCED_LEVEL_MANUAL:
>   return 0;
> @@ -1626,7 +1637,8 @@ static int 
> smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu,
>   uint32_t max_clk;
>   int ret = 0;
>  
> - if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK)
> + if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK &&
> + clk_type != SMU_UCLK)
>   return -EINVAL;
>  
>   if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) &&
> @@ -1636,18 +1648,31 @@ static int 
> smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu,
>   if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) {
>   if (min >= max) {
>   dev_err(smu->adev->dev,
> - "Minimum GFX clk should be less than the 
> maximum allowed clock\n");
> + "Minimum clk should be less than the maximum 
> allowed clock\n");
>   return -EINVAL;
>   }
>  
> - if ((min == pstate_table->gfxclk_pstate.curr.min) &&
> - (max == pstate_table->gfxclk_pstate.curr.max))
> - return 0;
> + if (clk_type == SMU_GFXCLK) {
> + if ((min == pstate_table->gfxclk_pstate.curr.min) &&
> + (max == pstate_table->gfxclk_pstate.curr.max))
> + return 0;
>  
> - ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(smu, min, 
> max);
> - if (!ret) {
> - pstate_table->gfxclk_pstate.curr.min = min;
> - pstate_table->gfxclk_pstate.curr.max = max;
> + ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(
> + smu, min, max);
> + if (!ret) {
> + pstate_table->gfxclk_pstate.curr.min = min;
> + pstate_table->gfxclk_pstate.curr.max = max;
> + }
> + }
> +
> + if (clk_type == SMU_UCLK) {
> + if (max == pstate_table->uclk_pstate.curr.max)
> + return 0;
> + /* Only max clock limiting is allowed for UCLK */
> + ret = smu_v13_0_set_soft_freq_limited_range(
> + smu, SMU_UCLK, 0, max);
> + if (!ret)
> + pstate_table->uclk_pstate.curr.max = max;
>   }
>  
>   return ret;
> @@ -1740,6 +1765,40 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct 
> smu_context *smu,
>   return -EINVAL;
>   }
>   break;
> + case PP_OD_EDIT_MCLK_VDDC_TABLE:
> + if (size != 2) {
> + dev_err(smu->adev->dev,
> + "Input parameter number not correct\n");
> + return -EINVAL;
> + }
> +
> + 

[PATCH] drm/amd/pm: Allow setting max UCLK on SMU v13.0.6

2024-02-08 Thread Lijo Lazar
Allow reducing max UCLK in MANUAL performance level. New UCLK value
should be less than the max DPM level UCLK level value.

Ex:
echo manual > "/sys/bus/pci/devices/.../power_dpm_force_performance_level"
echo m 1 900 > "/sys/bus/pci/devices/.../pp_od_clk_voltage”
echo c > "/sys/bus/pci/devices/.../pp_od_clk_voltage”

Signed-off-by: Lijo Lazar 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 103 --
 1 file changed, 92 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 03873d784be6..9929981ff6c5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1578,6 +1578,8 @@ static int smu_v13_0_6_set_performance_level(struct 
smu_context *smu,
struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
struct smu_13_0_dpm_table *gfx_table =
_context->dpm_tables.gfx_table;
+   struct smu_13_0_dpm_table *uclk_table =
+   _context->dpm_tables.uclk_table;
struct smu_umd_pstate_table *pstate_table = >pstate_table;
int ret;
 
@@ -1604,6 +1606,15 @@ static int smu_v13_0_6_set_performance_level(struct 
smu_context *smu,
 
pstate_table->gfxclk_pstate.curr.min = gfx_table->min;
pstate_table->gfxclk_pstate.curr.max = gfx_table->max;
+
+   /* Min UCLK is not expected to be changed */
+   ret = smu_v13_0_set_soft_freq_limited_range(smu, SMU_UCLK, 0,
+   uclk_table->max);
+   if (ret)
+   return ret;
+   pstate_table->uclk_pstate.curr.max = uclk_table->max;
+   pstate_table->uclk_pstate.custom.max = 0;
+
return 0;
case AMD_DPM_FORCED_LEVEL_MANUAL:
return 0;
@@ -1626,7 +1637,8 @@ static int smu_v13_0_6_set_soft_freq_limited_range(struct 
smu_context *smu,
uint32_t max_clk;
int ret = 0;
 
-   if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK)
+   if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK &&
+   clk_type != SMU_UCLK)
return -EINVAL;
 
if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) &&
@@ -1636,18 +1648,31 @@ static int 
smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu,
if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) {
if (min >= max) {
dev_err(smu->adev->dev,
-   "Minimum GFX clk should be less than the 
maximum allowed clock\n");
+   "Minimum clk should be less than the maximum 
allowed clock\n");
return -EINVAL;
}
 
-   if ((min == pstate_table->gfxclk_pstate.curr.min) &&
-   (max == pstate_table->gfxclk_pstate.curr.max))
-   return 0;
+   if (clk_type == SMU_GFXCLK) {
+   if ((min == pstate_table->gfxclk_pstate.curr.min) &&
+   (max == pstate_table->gfxclk_pstate.curr.max))
+   return 0;
 
-   ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(smu, min, 
max);
-   if (!ret) {
-   pstate_table->gfxclk_pstate.curr.min = min;
-   pstate_table->gfxclk_pstate.curr.max = max;
+   ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(
+   smu, min, max);
+   if (!ret) {
+   pstate_table->gfxclk_pstate.curr.min = min;
+   pstate_table->gfxclk_pstate.curr.max = max;
+   }
+   }
+
+   if (clk_type == SMU_UCLK) {
+   if (max == pstate_table->uclk_pstate.curr.max)
+   return 0;
+   /* Only max clock limiting is allowed for UCLK */
+   ret = smu_v13_0_set_soft_freq_limited_range(
+   smu, SMU_UCLK, 0, max);
+   if (!ret)
+   pstate_table->uclk_pstate.curr.max = max;
}
 
return ret;
@@ -1740,6 +1765,40 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct 
smu_context *smu,
return -EINVAL;
}
break;
+   case PP_OD_EDIT_MCLK_VDDC_TABLE:
+   if (size != 2) {
+   dev_err(smu->adev->dev,
+   "Input parameter number not correct\n");
+   return -EINVAL;
+   }
+
+   if (!smu_cmn_feature_is_enabled(smu,
+   SMU_FEATURE_DPM_UCLK_BIT)) {
+   dev_warn(smu->adev->dev,
+ 

RE: [PATCH] drm/amdgpu/psp: update define to better align with its meaning

2024-02-08 Thread Zhang, Hawking
[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Friday, February 9, 2024 05:15
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH] drm/amdgpu/psp: update define to better align with its meaning

MEM_TRAINING_ENCROACHED_SIZE is for BIST training data.  It's not memory type 
specific.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 +-  
drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 2 +-  
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 9951bdd022de..47ffaa796264 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -203,7 +203,7 @@ struct psp_ras_context {
 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES  0x1000
 #define GDDR6_MEM_TRAINING_OFFSET  0x8000
 /*Define the VRAM size that will be encroached by BIST training.*/
-#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x200
+#define BIST_MEM_TRAINING_ENCROACHED_SIZE  0x200

 enum psp_memory_training_init_flag {
PSP_MEM_TRAIN_NOT_SUPPORT   = 0x0,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index efa37e3b7931..2395f1856962 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -506,7 +506,7 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)
 * before training, and restore it after training to avoid
 * VRAM corruption.
 */
-   sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+   sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;

if (adev->gmc.visible_vram_size < sz || 
!adev->mman.aper_base_kaddr) {
DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p 
is not initialized.\n", diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 722b6066ce07..0e4329640ecb 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -561,7 +561,7 @@ static int psp_v13_0_memory_training(struct psp_context 
*psp, uint32_t ops)
 * before training, and restore it after training to avoid
 * VRAM corruption.
 */
-   sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+   sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;

if (adev->gmc.visible_vram_size < sz || 
!adev->mman.aper_base_kaddr) {
dev_err(adev->dev, "visible_vram_size %llx or 
aper_base_kaddr %p is not initialized.\n",
--
2.42.0



Re: [PATCH] drm/amdgpu: make damage clips support configurable

2024-02-08 Thread Mario Limonciello

On 2/8/2024 16:11, Hamza Mahfooz wrote:

We have observed that there are quite a number of PSR-SU panels on the
market that are unable to keep up with what user space throws at them,
resulting in hangs and random black screens. So, make damage clips
support configurable and disable it by default for PSR-SU displays.

Cc: Mario Limonciello 
Signed-off-by: Hamza Mahfooz 


I think this with this patch in place you could also revert

571c2fa26aa6 ("drm/amd/display: Disable PSR-SU on Parade 0803 TCON again")

One minor nit below otherwise LGTM.

Reviewed-by: Mario Limonciello 

---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 13 +
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  7 +++
  3 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 312dfaec7b4a..1291b8eb9dff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -198,6 +198,7 @@ extern uint amdgpu_dc_debug_mask;
  extern uint amdgpu_dc_visual_confirm;
  extern uint amdgpu_dm_abm_level;
  extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
  extern struct amdgpu_mgpu_info mgpu_info;
  extern int amdgpu_ras_enable;
  extern uint amdgpu_ras_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 161ecf9b4174..2b75aeb8280f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -211,6 +211,7 @@ int amdgpu_seamless = -1; /* auto */
  uint amdgpu_debug_mask;
  int amdgpu_agp = -1; /* auto */
  int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
  
  static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
  
@@ -859,6 +860,18 @@ int amdgpu_backlight = -1;

  MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto 
(default))");
  module_param_named(backlight, amdgpu_backlight, bint, 0444);
  
+/**

+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1.


I think it would be better if this documentation explained what the 
values are (IE what -1 really means).



+ */
+MODULE_PARM_DESC(damageclips,
+"Damage clips support (0 = disable, 1 = enable, -1 auto 
(default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
  /**
   * DOC: tmz (int)
   * Trusted Memory Zone (TMZ) is a method to protect data being written
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b3a5e730be24..fbe2aa40c21a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5253,6 +5253,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
struct drm_plane_state *new_plane_state,
struct drm_crtc_state *crtc_state,
struct dc_flip_addrs *flip_addrs,
+   bool is_psr_su,
bool *dirty_regions_changed)
  {
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
@@ -5277,6 +5278,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
clips = drm_plane_get_damage_clips(new_plane_state);
  
+	if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 &&

+  is_psr_su)))
+   goto ffu;
+
if (!dm_crtc_state->mpo_requested) {
if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
goto ffu;
@@ -8411,6 +8416,8 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
fill_dc_dirty_rects(plane, old_plane_state,
new_plane_state, new_crtc_state,
>flip_addrs[planes_count],
+   
acrtc_state->stream->link->psr_settings.psr_version ==
+   DC_PSR_VERSION_SU_1,
_rects_changed);
  
  			/*




[PATCH] drm/amdgpu: make damage clips support configurable

2024-02-08 Thread Hamza Mahfooz
We have observed that there are quite a number of PSR-SU panels on the
market that are unable to keep up with what user space throws at them,
resulting in hangs and random black screens. So, make damage clips
support configurable and disable it by default for PSR-SU displays.

Cc: Mario Limonciello 
Signed-off-by: Hamza Mahfooz 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 13 +
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  7 +++
 3 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 312dfaec7b4a..1291b8eb9dff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -198,6 +198,7 @@ extern uint amdgpu_dc_debug_mask;
 extern uint amdgpu_dc_visual_confirm;
 extern uint amdgpu_dm_abm_level;
 extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
 extern struct amdgpu_mgpu_info mgpu_info;
 extern int amdgpu_ras_enable;
 extern uint amdgpu_ras_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 161ecf9b4174..2b75aeb8280f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -211,6 +211,7 @@ int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -859,6 +860,18 @@ int amdgpu_backlight = -1;
 MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto 
(default))");
 module_param_named(backlight, amdgpu_backlight, bint, 0444);
 
+/**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1.
+ */
+MODULE_PARM_DESC(damageclips,
+"Damage clips support (0 = disable, 1 = enable, -1 auto 
(default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
 /**
  * DOC: tmz (int)
  * Trusted Memory Zone (TMZ) is a method to protect data being written
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b3a5e730be24..fbe2aa40c21a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5253,6 +5253,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
struct drm_plane_state *new_plane_state,
struct drm_crtc_state *crtc_state,
struct dc_flip_addrs *flip_addrs,
+   bool is_psr_su,
bool *dirty_regions_changed)
 {
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
@@ -5277,6 +5278,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
clips = drm_plane_get_damage_clips(new_plane_state);
 
+   if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 &&
+  is_psr_su)))
+   goto ffu;
+
if (!dm_crtc_state->mpo_requested) {
if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
goto ffu;
@@ -8411,6 +8416,8 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
fill_dc_dirty_rects(plane, old_plane_state,
new_plane_state, new_crtc_state,
>flip_addrs[planes_count],
+   
acrtc_state->stream->link->psr_settings.psr_version ==
+   DC_PSR_VERSION_SU_1,
_rects_changed);
 
/*
-- 
2.43.0



[PATCH] drm/amdgpu/psp: update define to better align with its meaning

2024-02-08 Thread Alex Deucher
MEM_TRAINING_ENCROACHED_SIZE is for BIST training data.  It's
not memory type specific.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 +-
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 9951bdd022de..47ffaa796264 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -203,7 +203,7 @@ struct psp_ras_context {
 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES  0x1000
 #define GDDR6_MEM_TRAINING_OFFSET  0x8000
 /*Define the VRAM size that will be encroached by BIST training.*/
-#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x200
+#define BIST_MEM_TRAINING_ENCROACHED_SIZE  0x200
 
 enum psp_memory_training_init_flag {
PSP_MEM_TRAIN_NOT_SUPPORT   = 0x0,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index efa37e3b7931..2395f1856962 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -506,7 +506,7 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)
 * before training, and restore it after training to avoid
 * VRAM corruption.
 */
-   sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+   sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
 
if (adev->gmc.visible_vram_size < sz || 
!adev->mman.aper_base_kaddr) {
DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p 
is not initialized.\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 722b6066ce07..0e4329640ecb 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -561,7 +561,7 @@ static int psp_v13_0_memory_training(struct psp_context 
*psp, uint32_t ops)
 * before training, and restore it after training to avoid
 * VRAM corruption.
 */
-   sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+   sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
 
if (adev->gmc.visible_vram_size < sz || 
!adev->mman.aper_base_kaddr) {
dev_err(adev->dev, "visible_vram_size %llx or 
aper_base_kaddr %p is not initialized.\n",
-- 
2.42.0



[PATCH v4 3/3] drm/amdgpu: Make it possible to async flip overlay planes

2024-02-08 Thread André Almeida
amdgpu can handle async flips on overlay planes, so mark it as true
during the plane initialization.

Signed-off-by: André Almeida 
---
v4: new patch

 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 8a4c40b4c27e..dc5392c08a87 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1708,6 +1708,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager 
*dm,
} else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
unsigned int zpos = 1 + drm_plane_index(plane);
drm_plane_create_zpos_property(plane, zpos, 1, 254);
+   plane->async_flip = true;
} else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
drm_plane_create_zpos_immutable_property(plane, 255);
}
-- 
2.43.0



[PATCH v4 2/3] drm: Allow drivers to choose plane types to async flip

2024-02-08 Thread André Almeida
Different planes may have different capabilities of doing async flips,
so create a field to let drivers allow async flip per plane type.

Signed-off-by: André Almeida 
---
v4: new patch

 drivers/gpu/drm/drm_atomic_uapi.c | 4 ++--
 drivers/gpu/drm/drm_plane.c   | 3 +++
 include/drm/drm_plane.h   | 5 +
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 1eecfb9e240d..5c66289188be 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1075,9 +1075,9 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
break;
}
 
-   if (async_flip && plane_state->plane->type != 
DRM_PLANE_TYPE_PRIMARY) {
+   if (async_flip && !plane_state->plane->async_flip) {
drm_dbg_atomic(prop->dev,
-  "[OBJECT:%d] Only primary planes can be 
changed during async flip\n",
+  "[OBJECT:%d] This type of plane cannot 
be changed during async flip\n",
   obj->id);
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 672c655c7a8e..71ada690222a 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -366,6 +366,9 @@ static int __drm_universal_plane_init(struct drm_device 
*dev,
 
drm_modeset_lock_init(>mutex);
 
+   if (type == DRM_PLANE_TYPE_PRIMARY)
+   plane->async_flip = true;
+
plane->base.properties = >properties;
plane->dev = dev;
plane->funcs = funcs;
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 641fe298052d..5e9efb7321ac 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -779,6 +779,11 @@ struct drm_plane {
 * @hotspot_y_property: property to set mouse hotspot y offset.
 */
struct drm_property *hotspot_y_property;
+
+   /**
+* @async_flip: indicates if a plane can do async flips
+*/
+   bool async_flip;
 };
 
 #define obj_to_plane(x) container_of(x, struct drm_plane, base)
-- 
2.43.0



[PATCH v4 0/3] drm/atomic: Allow drivers to write their own plane check for async

2024-02-08 Thread André Almeida
Hi,

AMD hardware can do async flips with overlay planes, so this patchset does a
small redesign to allow drivers to choose per plane type if they can or cannot
do async flips.

It also allows async commits with IN_FENCE_ID in any driver.

Changes from v3:
- Major patchset redesign 
v3: https://lore.kernel.org/lkml/20240128212515.630345-1-andrealm...@igalia.com/

Changes from v2:
 - Allow IN_FENCE_ID for any driver
 - Allow overlay planes again
v2: https://lore.kernel.org/lkml/20240119181235.255060-1-andrealm...@igalia.com/

Changes from v1:
 - Drop overlay planes option for now
v1: 
https://lore.kernel.org/dri-devel/20240116045159.1015510-1-andrealm...@igalia.com/

André Almeida (3):
  drm/atomic: Allow userspace to use explicit sync with atomic async
flips
  drm: Allow drivers to choose plane types to async flip
  drm/amdgpu: Make it possible to async flip overlay planes

 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1 +
 drivers/gpu/drm/drm_atomic_uapi.c   | 8 +---
 drivers/gpu/drm/drm_plane.c | 3 +++
 include/drm/drm_plane.h | 5 +
 4 files changed, 14 insertions(+), 3 deletions(-)

-- 
2.43.0



[PATCH v4 1/3] drm/atomic: Allow userspace to use explicit sync with atomic async flips

2024-02-08 Thread André Almeida
Allow userspace to use explicit synchronization with atomic async flips.
That means that the flip will wait for some hardware fence, and then
will flip as soon as possible (async) in regard of the vblank.

Signed-off-by: André Almeida 
---
v4: no changes

 drivers/gpu/drm/drm_atomic_uapi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 29d4940188d4..1eecfb9e240d 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1066,7 +1066,9 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
break;
}
 
-   if (async_flip && prop != config->prop_fb_id) {
+   if (async_flip &&
+   prop != config->prop_fb_id &&
+   prop != config->prop_in_fence_fd) {
ret = drm_atomic_plane_get_property(plane, plane_state,
prop, _val);
ret = drm_atomic_check_prop_changes(ret, old_val, 
prop_value, prop);
-- 
2.43.0



Re: [Patch v2] drm/amdkfd: update SIMD distribution algo for GFXIP 9.4.2 onwards

2024-02-08 Thread Felix Kuehling



On 2024-02-08 15:01, Bhardwaj, Rajneesh wrote:


On 2/8/2024 2:41 PM, Felix Kuehling wrote:


On 2024-02-07 23:14, Rajneesh Bhardwaj wrote:
In certain cooperative group dispatch scenarios the default SPI 
resource

allocation may cause reduced per-CU workgroup occupancy. Set
COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang
scenarions.

Suggested-by: Joseph Greathouse 
Signed-off-by: Rajneesh Bhardwaj 
---
* Found a bug in the previous reviewed version
https://lists.freedesktop.org/archives/amd-gfx/2024-February/104101.html 


   since the q->is_gws is unset for keeping the count.
* updated pqm_set_gws to pass minfo holding gws state for the active
   queues and use that to apply the FORCE_SIMD_DIST_MASK.

  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c    | 4 
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 1 +
  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++-
  3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index 42d881809dc7..0b71db4c96b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,10 @@ static void update_mqd(struct mqd_manager *mm, 
void *mqd,

  update_cu_mask(mm, mqd, minfo, 0);
  set_priority(m, q);
  +    if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2))
+    m->compute_resource_limits = minfo->gws ?
+    COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0;
+


This looks OK because we don't set anything else in 
m->compute_resource_limits. If that ever changes, we have to be more 
careful here to not wipe out other fields in that register.



Yes, Should I change it to below and send a v3?

 m->compute_resource_limits |= minfo->gws ?
    COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0;


I think you need to do

if (minfo->gws)
m->compute_resource_limits |= 
COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
else
m->compute_resource_limits &= 
~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;

That way you can clear the resource limit when GWS is disable for the queue.









  q->is_active = QUEUE_IS_ACTIVE(*q);
  }
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 677281c0793e..f4b327a2d4a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -542,6 +542,7 @@ struct mqd_update_info {
  } cu_mask;
  };
  enum mqd_update_flag update_flag;
+    bool gws;


Instead of adding a new bool, can we add a flag to mqd_update_flag?


Maybe, I initially thought about it but then I chose the bool approach 
since  those debug flags are generic KFD non per-Asic flags while this 
bool is per-Asic request so I felt they didn't fit together. On the 
other hand, those flags and this bool are both quirks anyways so maybe 
they can be together.   Please let me know your preference.


I'd prefer to used the flags. They are currently used for a GFX11 quirk, 
now we can add another flag for a GFX9 quirk.


The GFX11 code currently has an implicit assumption that no other flags 
exist. That would need to be fixed:


if (has_wa_flag) {
-   uint32_t wa_mask = minfo->update_flag == 
UPDATE_FLAG_DBG_WA_ENABLE ?
+   uint32_t wa_mask = (minfo->update_flag & 
UPDATE_FLAG_DBG_WA_ENABLE) ?
0x : 0x;

Regards,
  Felix







Looks good to me otherwise.

Regards,
  Felix



  };
    /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 43eff221eae5..5416a110ced9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct 
kfd_process_device *pdd)

  int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
  void *gws)
  {
+    struct mqd_update_info minfo = {0};
  struct kfd_node *dev = NULL;
  struct process_queue_node *pqn;
  struct kfd_process_device *pdd;
@@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager 
*pqm, unsigned int qid,

  }
    pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+    minfo.gws = !!gws;
    return 
pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,

-    pqn->q, NULL);
+    pqn->q, );
  }
    void kfd_process_dequeue_from_all_devices(struct kfd_process *p)


[PATCH 3/4] drm/amdgpu/jpeg5: Enable doorbell

2024-02-08 Thread Alex Deucher
From: Sonny Jiang 

Add doorbell for JPEG5

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index 71c28cc06605..e70200f97555 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -85,7 +85,7 @@ static int jpeg_v5_0_0_sw_init(void *handle)
return r;
 
ring = adev->jpeg.inst->ring_dec;
-   ring->use_doorbell = false;
+   ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
ring->vm_hub = AMDGPU_MMHUB0(0);
 
@@ -134,6 +134,13 @@ static int jpeg_v5_0_0_hw_init(void *handle)
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
 
+   adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+   WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+   ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT 
|
+   VCN_JPEG_DB_CTRL__EN_MASK);
+
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-- 
2.42.0



[PATCH 4/4] drm/amdgpu: Add jpeg_v5_0_0 ip block support

2024-02-08 Thread Alex Deucher
From: Sonny Jiang 

Enable support for jpeg_v5_0_0 ip  block.

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 399428054c0d..2b6e0eb5aa3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -97,6 +97,7 @@
 #include "smuio_v13_0_3.h"
 #include "smuio_v13_0_6.h"
 #include "vcn_v5_0_0.h"
+#include "jpeg_v5_0_0.h"
 
 #include "amdgpu_vpe.h"
 
@@ -2135,6 +2136,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct 
amdgpu_device *adev)
break;
case IP_VERSION(5, 0, 0):
amdgpu_device_ip_block_add(adev, _v5_0_0_ip_block);
+   amdgpu_device_ip_block_add(adev, _v5_0_0_ip_block);
break;
default:
dev_err(adev->dev,
-- 
2.42.0



[PATCH 1/4] drm/amdgpu: Add JPEG5 support

2024-02-08 Thread Alex Deucher
From: Sonny Jiang 

Add support for JPEG5

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c |   2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c |  16 +-
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h |  15 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 555 +++
 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h |  29 ++
 6 files changed, 611 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 84c1baf8f3cb..daccbc11176e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -217,7 +217,8 @@ amdgpu-y += \
jpeg_v3_0.o \
jpeg_v4_0.o \
jpeg_v4_0_3.o \
-   jpeg_v4_0_5.o
+   jpeg_v4_0_5.o \
+   jpeg_v5_0_0.o
 
 # add VPE block
 amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index ab70395a0022..6df99cb00d9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -247,6 +247,8 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
if (tmp == 0xDEADBEEF)
break;
udelay(1);
+   if (amdgpu_emu_mode == 1)
+   udelay(10);
}
 
if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 165448bed6c9..ab17dacd2310 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -650,7 +650,7 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct 
amdgpu_ring *ring)
  *
  * Write a start command to the ring.
  */
-static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
+void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
 {
if (!amdgpu_sriov_vf(ring->adev)) {
amdgpu_ring_write(ring, 
PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
@@ -670,7 +670,7 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct 
amdgpu_ring *ring)
  *
  * Write a end command to the ring.
  */
-static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
+void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
 {
if (!amdgpu_sriov_vf(ring->adev)) {
amdgpu_ring_write(ring, 
PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
@@ -693,7 +693,7 @@ static void jpeg_v4_0_3_dec_ring_insert_end(struct 
amdgpu_ring *ring)
  *
  * Write a fence and a trap command to the ring.
  */
-static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 
addr, u64 seq,
+void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 
seq,
unsigned int flags)
 {
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
@@ -762,7 +762,7 @@ static void jpeg_v4_0_3_dec_ring_emit_fence(struct 
amdgpu_ring *ring, u64 addr,
  *
  * Write ring commands to execute the indirect buffer.
  */
-static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
uint32_t flags)
@@ -813,7 +813,7 @@ static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring 
*ring,
amdgpu_ring_write(ring, 0x2);
 }
 
-static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, 
uint32_t reg,
+void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask)
 {
uint32_t reg_offset = (reg << 2);
@@ -840,7 +840,7 @@ static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct 
amdgpu_ring *ring, uint32_
amdgpu_ring_write(ring, mask);
 }
 
-static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
 {
struct amdgpu_vmhub *hub = >adev->vmhub[ring->vm_hub];
@@ -855,7 +855,7 @@ static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct 
amdgpu_ring *ring,
jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
 }
 
-static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t 
reg, uint32_t val)
+void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 
uint32_t val)
 {
uint32_t reg_offset = (reg << 2);
 
@@ -873,7 +873,7 @@ static void jpeg_v4_0_3_dec_ring_emit_wreg(struct 
amdgpu_ring *ring, uint32_t re
amdgpu_ring_write(ring, val);
 }
 
-static void 

[PATCH 2/4] drm/amdgpu/jpeg5: add power gating support

2024-02-08 Thread Alex Deucher
From: Sonny Jiang 

Add PG support for JPEG5

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index fbc987e299f8..71c28cc06605 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -261,6 +261,14 @@ static int jpeg_v5_0_0_enable_static_power_gating(struct 
amdgpu_device *adev)
UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
 
+   if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+   WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG),
+   2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+   SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS,
+   1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+   UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+   }
+
return 0;
 }
 
-- 
2.42.0



[PATCH 0/4] JPEG 5.0 Support

2024-02-08 Thread Alex Deucher
This patch set adds support for JPEG 5.0.x.

Sonny Jiang (4):
  drm/amdgpu: Add JPEG5 support
  drm/amdgpu/jpeg5: add power gating support
  drm/amdgpu/jpeg5: Enable doorbell
  drm/amdgpu: Add jpeg_v5_0_0 ip block support

 drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c  |   2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c  |  16 +-
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h  |  15 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c  | 570 ++
 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h  |  29 +
 7 files changed, 628 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h

-- 
2.42.0



Re: [Patch v2] drm/amdkfd: update SIMD distribution algo for GFXIP 9.4.2 onwards

2024-02-08 Thread Bhardwaj, Rajneesh



On 2/8/2024 2:41 PM, Felix Kuehling wrote:


On 2024-02-07 23:14, Rajneesh Bhardwaj wrote:

In certain cooperative group dispatch scenarios the default SPI resource
allocation may cause reduced per-CU workgroup occupancy. Set
COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang
scenarions.

Suggested-by: Joseph Greathouse 
Signed-off-by: Rajneesh Bhardwaj 
---
* Found a bug in the previous reviewed version
https://lists.freedesktop.org/archives/amd-gfx/2024-February/104101.html
   since the q->is_gws is unset for keeping the count.
* updated pqm_set_gws to pass minfo holding gws state for the active
   queues and use that to apply the FORCE_SIMD_DIST_MASK.

  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c    | 4 
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 1 +
  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++-
  3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index 42d881809dc7..0b71db4c96b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,10 @@ static void update_mqd(struct mqd_manager *mm, 
void *mqd,

  update_cu_mask(mm, mqd, minfo, 0);
  set_priority(m, q);
  +    if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2))
+    m->compute_resource_limits = minfo->gws ?
+    COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0;
+


This looks OK because we don't set anything else in 
m->compute_resource_limits. If that ever changes, we have to be more 
careful here to not wipe out other fields in that register.



Yes, Should I change it to below and send a v3?

 m->compute_resource_limits |= minfo->gws ?
    COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0;






  q->is_active = QUEUE_IS_ACTIVE(*q);
  }
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 677281c0793e..f4b327a2d4a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -542,6 +542,7 @@ struct mqd_update_info {
  } cu_mask;
  };
  enum mqd_update_flag update_flag;
+    bool gws;


Instead of adding a new bool, can we add a flag to mqd_update_flag?


Maybe, I initially thought about it but then I chose the bool approach 
since  those debug flags are generic KFD non per-Asic flags while this 
bool is per-Asic request so I felt they didn't fit together. On the 
other hand, those flags and this bool are both quirks anyways so maybe 
they can be together.   Please let me know your preference.





Looks good to me otherwise.

Regards,
  Felix



  };
    /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 43eff221eae5..5416a110ced9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct 
kfd_process_device *pdd)

  int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
  void *gws)
  {
+    struct mqd_update_info minfo = {0};
  struct kfd_node *dev = NULL;
  struct process_queue_node *pqn;
  struct kfd_process_device *pdd;
@@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager 
*pqm, unsigned int qid,

  }
    pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+    minfo.gws = !!gws;
    return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
-    pqn->q, NULL);
+    pqn->q, );
  }
    void kfd_process_dequeue_from_all_devices(struct kfd_process *p)


[PATCH 3/4] drm/amdgpu: add VCN_5_0_0 IP block support

2024-02-08 Thread Alex Deucher
From: Sonny Jiang 

Add VCN_5_0_0 IP init, ring functions, DPG support.

v2: squash in warning fixes (Alex)
v3: squash in block and ring init, boot, doorbell enablement,
DPG support (Alex)

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |   42 +
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 1339 +++
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h |   37 +
 4 files changed, 1419 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 87022325bbf7..84c1baf8f3cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -209,6 +209,7 @@ amdgpu-y += \
vcn_v4_0.o \
vcn_v4_0_3.o \
vcn_v4_0_5.o \
+   vcn_v5_0_0.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 514c98ea144f..1985f71b4373 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -160,6 +160,48 @@
}   
  \
} while (0)
 
+#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg)   
\
+   ({  
\
+   uint32_t internal_reg_offset, addr; 
\
+   bool video_range, aon_range;\
+   
\
+   addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + 
reg);   \
+   addr <<= 2; 
\
+   video_range = 0xF & addr) >= (VCN_VID_SOC_ADDRESS)) &&  
\
+   ((0xF & addr) < ((VCN_VID_SOC_ADDRESS + 
0x2600);\
+   aon_range   = 0xF & addr) >= (VCN_AON_SOC_ADDRESS)) &&  
\
+   ((0xF & addr) < ((VCN_AON_SOC_ADDRESS + 
0x600); \
+   if (video_range)
\
+   internal_reg_offset = ((0xF & addr) - 
(VCN_VID_SOC_ADDRESS) +   \
+   (VCN_VID_IP_ADDRESS));  
\
+   else if (aon_range) 
\
+   internal_reg_offset = ((0xF & addr) - 
(VCN_AON_SOC_ADDRESS) +   \
+   (VCN_AON_IP_ADDRESS));  
\
+   else
\
+   internal_reg_offset = (0xF & addr); 
\
+   
\
+   internal_reg_offset >>= 2;  
\
+   })
+
+#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect)  
\
+   do {
\
+   if (!indirect) {
\
+   WREG32_SOC15(VCN, GET_INST(VCN, inst_idx),  
\
+regUVD_DPG_LMA_DATA, value);   
\
+   WREG32_SOC15(   
\
+   VCN, GET_INST(VCN, inst_idx),   
\
+   regUVD_DPG_LMA_CTL, 
\
+   (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |
\
+mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |   
\
+offset << 
UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));   \
+   } else {
\
+   *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =
\
+   offset; 
\
+   *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =
\
+   value;  
\
+   }  

[PATCH 0/4] VCN 5.0 Support

2024-02-08 Thread Alex Deucher
This patch set adds support for VCN (Video Codec Next) 5.0.x.  VCN
is the multimedia codec IP in AMD SoCs.

Patch 1 adds new registers and is too large for the mailing list so
it has been omitted.

Hawking Zhang (1):
  drm/amdgpu: Add vcn v5_0_0 ip headers (v5)

Sonny Jiang (3):
  drm/amdgpu: add VCN_5_0_0 firmware support
  drm/amdgpu: add VCN_5_0_0 IP block support
  amdgpu/drm: Add vcn_v5_0_0_ip_block support

 drivers/gpu/drm/amd/amdgpu/Makefile   |1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c |4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h   |   42 +
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c   | 1339 +++
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h   |   37 +
 .../include/asic_reg/vcn/vcn_5_0_0_offset.h   | 1672 
 .../include/asic_reg/vcn/vcn_5_0_0_sh_mask.h  | 7627 +
 8 files changed, 10724 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
 create mode 100644 drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h
 create mode 100644 drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h

-- 
2.42.0



[PATCH 4/4] amdgpu/drm: Add vcn_v5_0_0_ip_block support

2024-02-08 Thread Alex Deucher
From: Sonny Jiang 

Enable support for vcn_v5_0_0_ip_block

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 59530fe36b6b..399428054c0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -96,6 +96,7 @@
 #include "smuio_v13_0.h"
 #include "smuio_v13_0_3.h"
 #include "smuio_v13_0_6.h"
+#include "vcn_v5_0_0.h"
 
 #include "amdgpu_vpe.h"
 
@@ -2132,6 +2133,9 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct 
amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, _v4_0_5_ip_block);
amdgpu_device_ip_block_add(adev, _v4_0_5_ip_block);
break;
+   case IP_VERSION(5, 0, 0):
+   amdgpu_device_ip_block_add(adev, _v5_0_0_ip_block);
+   break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip 
block(UVD_HWIP:0x%x)\n",
-- 
2.42.0



[PATCH 2/4] drm/amdgpu: add VCN_5_0_0 firmware support

2024-02-08 Thread Alex Deucher
From: Sonny Jiang 

Add VCN5_0_0 firmware support

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index f300d4a4457d..eb2a88991206 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -59,6 +59,7 @@
 #define FIRMWARE_VCN4_0_3  "amdgpu/vcn_4_0_3.bin"
 #define FIRMWARE_VCN4_0_4  "amdgpu/vcn_4_0_4.bin"
 #define FIRMWARE_VCN4_0_5  "amdgpu/vcn_4_0_5.bin"
+#define FIRMWARE_VCN5_0_0  "amdgpu/vcn_5_0_0.bin"
 
 MODULE_FIRMWARE(FIRMWARE_RAVEN);
 MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -82,6 +83,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
 MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
 MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
 MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
 
 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
 
-- 
2.42.0



Re: [Patch v2] drm/amdkfd: update SIMD distribution algo for GFXIP 9.4.2 onwards

2024-02-08 Thread Felix Kuehling



On 2024-02-07 23:14, Rajneesh Bhardwaj wrote:

In certain cooperative group dispatch scenarios the default SPI resource
allocation may cause reduced per-CU workgroup occupancy. Set
COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang
scenarions.

Suggested-by: Joseph Greathouse 
Signed-off-by: Rajneesh Bhardwaj 
---
* Found a bug in the previous reviewed version
   https://lists.freedesktop.org/archives/amd-gfx/2024-February/104101.html
   since the q->is_gws is unset for keeping the count.
* updated pqm_set_gws to pass minfo holding gws state for the active
   queues and use that to apply the FORCE_SIMD_DIST_MASK.

  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c| 4 
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 1 +
  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++-
  3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 42d881809dc7..0b71db4c96b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
  
+	if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2))

+   m->compute_resource_limits = minfo->gws ?
+   COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0;
+


This looks OK because we don't set anything else in 
m->compute_resource_limits. If that ever changes, we have to be more 
careful here to not wipe out other fields in that register.




q->is_active = QUEUE_IS_ACTIVE(*q);
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 677281c0793e..f4b327a2d4a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -542,6 +542,7 @@ struct mqd_update_info {
} cu_mask;
};
enum mqd_update_flag update_flag;
+   bool gws;


Instead of adding a new bool, can we add a flag to mqd_update_flag?

Looks good to me otherwise.

Regards,
  Felix



  };
  
  /**

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 43eff221eae5..5416a110ced9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct 
kfd_process_device *pdd)
  int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws)
  {
+   struct mqd_update_info minfo = {0};
struct kfd_node *dev = NULL;
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
@@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, 
unsigned int qid,
}
  
  	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;

+   minfo.gws = !!gws;
  
  	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,

-   pqn->q, NULL);
+   pqn->q, );
  }
  
  void kfd_process_dequeue_from_all_devices(struct kfd_process *p)


Re: [PATCH v4 1/3] drm: Add drm_get_acpi_edid() helper

2024-02-08 Thread Mario Limonciello

On 2/8/2024 08:31, Jani Nikula wrote:

On Thu, 08 Feb 2024, Maxime Ripard  wrote:

On Thu, Feb 08, 2024 at 11:57:11AM +0200, Jani Nikula wrote:

On Wed, 07 Feb 2024, Mario Limonciello  wrote:

Some manufacturers have intentionally put an EDID that differs from
the EDID on the internal panel on laptops.  Drivers can call this
helper to attempt to fetch the EDID from the BIOS's ACPI _DDC method.

Signed-off-by: Mario Limonciello 
---
  drivers/gpu/drm/Kconfig|  5 +++
  drivers/gpu/drm/drm_edid.c | 77 ++
  include/drm/drm_edid.h |  1 +
  3 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 6ec33d36f3a4..ec2bb71e8b36 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -21,6 +21,11 @@ menuconfig DRM
select KCMP
select VIDEO_CMDLINE
select VIDEO_NOMODESET
+   select ACPI_VIDEO if ACPI
+   select BACKLIGHT_CLASS_DEVICE if ACPI
+   select INPUT if ACPI
+   select X86_PLATFORM_DEVICES if ACPI && X86
+   select ACPI_WMI if ACPI && X86


I think I'll defer to drm maintainers on whether this is okay or
something to be avoided.


It's pretty much the same thing that all the other drivers do right now.
Because the symbol is now used by DRM it needs to do this.

Patch 3 in this version of the series tears it out from all the drivers.





help
  Kernel-level support for the Direct Rendering Infrastructure (DRI)
  introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 923c4423151c..c649b4f9fd8e 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -28,6 +28,7 @@
   * DEALINGS IN THE SOFTWARE.
   */
  
+#include 

  #include 
  #include 
  #include 
@@ -2188,6 +2189,49 @@ drm_do_probe_ddc_edid(void *data, u8 *buf, unsigned int 
block, size_t len)
return ret == xfers ? 0 : -1;
  }
  
+/**

+ * drm_do_probe_acpi_edid() - get EDID information via ACPI _DDC
+ * @data: struct drm_device
+ * @buf: EDID data buffer to be filled
+ * @block: 128 byte EDID block to start fetching from
+ * @len: EDID data buffer length to fetch
+ *
+ * Try to fetch EDID information by calling acpi_video_get_edid() function.
+ *
+ * Return: 0 on success or error code on failure.
+ */
+static int
+drm_do_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len)
+{
+   struct drm_device *ddev = data;
+   struct acpi_device *acpidev = ACPI_COMPANION(ddev->dev);
+   unsigned char start = block * EDID_LENGTH;
+   void *edid;
+   int r;
+
+   if (!acpidev)
+   return -ENODEV;
+
+   /* fetch the entire edid from BIOS */
+   r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, );
+   if (r < 0) {
+   DRM_DEBUG_KMS("Failed to get EDID from ACPI: %d\n", r);
+   return -EINVAL;
+   }
+   if (len > r || start > r || start + len > r) {
+   r = -EINVAL;
+   goto cleanup;
+   }
+
+   memcpy(buf, edid + start, len);
+   r = 0;
+
+cleanup:
+   kfree(edid);
+
+   return r;
+}
+
  static void connector_bad_edid(struct drm_connector *connector,
   const struct edid *edid, int num_blocks)
  {
@@ -2643,6 +2687,39 @@ struct edid *drm_get_edid(struct drm_connector 
*connector,
  }
  EXPORT_SYMBOL(drm_get_edid);
  
+/**

+ * drm_get_acpi_edid - get EDID data, if available


I'd prefer all the new EDID API to be named drm_edid_*. Makes a clean
break from the old API, and is more consistent.

So perhaps drm_edid_read_acpi() to be in line with all the other struct
drm_edid based EDID reading functions.



Roger that.  Even if it ends up not being exported out will rename as such.


+ * @connector: connector we're probing
+ *
+ * Use the BIOS to attempt to grab EDID data if possible.
+ *
+ * The returned pointer must be freed using drm_edid_free().
+ *
+ * Return: Pointer to valid EDID or NULL if we couldn't find any.
+ */
+const struct drm_edid *drm_get_acpi_edid(struct drm_connector *connector)
+{
+   const struct drm_edid *drm_edid;
+
+   switch (connector->connector_type) {
+   case DRM_MODE_CONNECTOR_LVDS:
+   case DRM_MODE_CONNECTOR_eDP:
+   break;
+   default:
+   return NULL;
+   }
+
+   if (connector->force == DRM_FORCE_OFF)
+   return NULL;
+
+   drm_edid = drm_edid_read_custom(connector, drm_do_probe_acpi_edid, 
connector->dev);
+
+   /* Note: Do *not* call connector updates here. */
+
+   return drm_edid;
+}
+EXPORT_SYMBOL(drm_get_acpi_edid);


Why shouldn't we use the BIOS/UEFI to retrieve them if it's available?

I guess what I'm asking is why should we make this an exported function
that drivers would have to call explicitly, instead of just making it
part of the usual EDID retrieval interface.


Two main questions:

What if the 

Re: [PATCH v4 2/3] drm/amd: Stop evicting resources on APUs in suspend

2024-02-08 Thread Alex Deucher
On Thu, Feb 8, 2024 at 1:48 AM Mario Limonciello
 wrote:
>
> commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare()
> callback") intentionally moved the eviction of resources to earlier in
> the suspend process, but this introduced a subtle change that it occurs
> before adev->in_s0ix or adev->in_s3 are set. This meant that APUs
> actually started to evict resources at suspend time as well.
>
> Explicitly set s0ix or s3 in the prepare() stage, and unset them if the
> prepare() stage failed.
>
> Reported-by: Jürg Billeter 
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038
> Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
> callback")
> Signed-off-by: Mario Limonciello 

Acked-by: Alex Deucher 

> ---
> v3->v4:
> * New function to set s0ix/s3 and explicitly unset in cleanup
> v2->v3:
> * Whitespace
> v1->v2:
> * Add and use new in_prepare member
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c   | 15 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +--
>  3 files changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 3d8a48f46b01..f6c38a974bae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1547,9 +1547,11 @@ static inline int 
> amdgpu_acpi_smart_shift_update(struct drm_device *dev,
>  #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
>  bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
>  bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
> +void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
>  #else
>  static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { 
> return false; }
>  static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { 
> return false; }
> +static void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
>  #endif
>
>  #if defined(CONFIG_DRM_AMD_DC)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> index 2deebece810e..cc21ed67a330 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> @@ -1519,4 +1519,19 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
> *adev)
>  #endif /* CONFIG_AMD_PMC */
>  }
>
> +/**
> + * amdgpu_choose_low_power_state
> + *
> + * @adev: amdgpu_device_pointer
> + *
> + * Choose the target low power state for the GPU
> + */
> +void amdgpu_choose_low_power_state(struct amdgpu_device *adev)
> +{
> +   if (amdgpu_acpi_is_s0ix_active(adev))
> +   adev->in_s0ix = true;
> +   else if (amdgpu_acpi_is_s3_active(adev))
> +   adev->in_s3 = true;
> +}
> +
>  #endif /* CONFIG_SUSPEND */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2bc460cb993d..dab03865c827 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4518,13 +4518,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
> struct amdgpu_device *adev = drm_to_adev(dev);
> int i, r;
>
> +   amdgpu_choose_low_power_state(adev);
> +
> if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> return 0;
>
> /* Evict the majority of BOs before starting suspend sequence */
> r = amdgpu_device_evict_resources(adev);
> if (r)
> -   return r;
> +   goto unprepare;
>
> for (i = 0; i < adev->num_ip_blocks; i++) {
> if (!adev->ip_blocks[i].status.valid)
> @@ -4533,10 +4535,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
> continue;
> r = adev->ip_blocks[i].version->funcs->prepare_suspend((void 
> *)adev);
> if (r)
> -   return r;
> +   goto unprepare;
> }
>
> return 0;
> +
> +unprepare:
> +   adev->in_s0ix = adev->in_s3 = false;
> +
> +   return r;
>  }
>
>  /**
> --
> 2.34.1
>


[pull] amdgpu drm-fixes-6.8

2024-02-08 Thread Alex Deucher
Hi Dave, Sima,

Fixes for 6.8.

The following changes since commit d0399da9fb5f8e3d897b9776bffee2d3bfe20210:

  drm/sched: Re-queue run job worker when drm_sched_entity_pop_job() returns 
NULL (2024-02-06 12:47:43 +1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-6.8-2024-02-08

for you to fetch changes up to 534c8a5b9d5d41d30cdcac93cfa1bca5e17be009:

  drm/amdgpu: Fix HDP flush for VFs on nbio v7.9 (2024-02-07 18:30:11 -0500)


amd-drm-fixes-6.8-2024-02-08:

amdgpu:
- Misc NULL/bounds check fixes
- ODM pipe policy fix
- Aborted suspend fixes
- JPEG 4.0.5 fix
- DCN 3.5 fixes
- PSP fix
- DP MST fix
- Phantom pipe fix
- VRAM vendor fix
- Clang fix
- SR-IOV fix


Alvin Lee (1):
  drm/amd/display: Update phantom pipe enable / disable sequence

Fangzhi Zuo (1):
  drm/amd/display: Fix MST Null Ptr for RV

Li Ma (1):
  drm/amdgpu: remove asymmetrical irq disabling in jpeg 4.0.5 suspend

Lijo Lazar (2):
  drm/amdgpu: Avoid fetching VRAM vendor info
  drm/amdgpu: Fix HDP flush for VFs on nbio v7.9

Mario Limonciello (1):
  drm/amd/display: Clear phantom stream count and plane count

Nathan Chancellor (1):
  drm/amd/display: Increase frame-larger-than for all display_mode_vba files

Nicholas Kazlauskas (1):
  drm/amd/display: Increase eval/entry delay for DCN35

Prike Liang (2):
  drm/amdgpu: skip to program GFXDEC registers for suspend abort
  drm/amdgpu: reset gpu for s3 suspend abort case

Rodrigo Siqueira (1):
  drm/amd/display: Disable ODM by default for DCN35

Srinivasan Shanmugam (3):
  drm/amd/display: Fix 'panel_cntl' could be null in 
'dcn21_set_backlight_level()'
  drm/amd/display: Add NULL test for 'timing generator' in 
'dcn21_set_pipe()'
  drm/amd/display: Implement bounds check for stream encoder creation in 
DCN301

Stanley.Yang (1):
  drm/amdgpu: Fix shared buff copy to user

Wenjing Liu (1):
  drm/amd/display: set odm_combine_policy based on context in dcn32 resource

 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  8 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  8 ---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c |  9 ---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c   | 10 ---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/soc15.c | 22 +++
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 12 ++--
 drivers/gpu/drm/amd/display/dc/core/dc.c   |  4 +-
 drivers/gpu/drm/amd/display/dc/core/dc_state.c |  3 +
 drivers/gpu/drm/amd/display/dc/dml/Makefile|  6 +-
 .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c   | 15 +++--
 .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c  |  4 +-
 .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.h  |  4 ++
 .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c|  2 +-
 .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h|  4 ++
 .../drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c| 63 +-
 .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c| 76 +++---
 .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h|  2 +
 .../gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c |  3 +
 drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h |  1 +
 .../drm/amd/display/dc/hwss/hw_sequencer_private.h |  7 ++
 drivers/gpu/drm/amd/display/dc/inc/resource.h  | 20 +++---
 .../display/dc/resource/dcn301/dcn301_resource.c   |  2 +-
 .../amd/display/dc/resource/dcn32/dcn32_resource.c | 16 -
 .../amd/display/dc/resource/dcn35/dcn35_resource.c |  5 +-
 28 files changed, 220 insertions(+), 98 deletions(-)


Re: [PATCH v5 1/3] drm/buddy: Implement tracking clear page feature

2024-02-08 Thread Arunpravin Paneer Selvam




On 1/31/2024 11:52 PM, Matthew Auld wrote:

On 30/01/2024 19:48, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+    block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
  mark_free(mm, block->left);
  mark_free(mm, block->right);
  +    if (drm_buddy_block_is_clear(block)) {
+    mark_cleared(block->left);
+    mark_cleared(block->right);
+    clear_reset(block);
+    }
+
  mark_split(block);
    return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  +    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
+
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+
  list_del(>link);
    drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
  BUG_ON(!drm_buddy_block_is_allocated(block));
  mm->avail += drm_buddy_block_size(mm, block);
+    if (drm_buddy_block_is_clear(block))
+    mm->clear_avail += drm_buddy_block_size(mm, block);
+
  __drm_buddy_free(mm, block);
  }
  EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
   * @mm: DRM buddy manager
   * @objects: input list head to free blocks
   */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
*objects)

+void drm_buddy_free_list(struct drm_buddy *mm,
+ struct list_head *objects,
+ unsigned long flags)
  {
  struct drm_buddy_block *block, *on;
  +    if (flags & DRM_BUDDY_CLEARED) {
+    list_for_each_entry(block, objects, link)
+    mark_cleared(block);
+    } else {
+    list_for_each_entry(block, objects, link)
+    

Re: [PATCH v4 3/3] drm/amd: Drop unneeded functions to check if s3/s0ix active

2024-02-08 Thread Christian König




Am 08.02.24 um 16:04 schrieb Mario Limonciello:

On 2/8/2024 00:54, Christian König wrote:

Am 08.02.24 um 06:52 schrieb Mario Limonciello:

amdgpu_acpi_is_s0ix_active() and amdgpu_acpi_is_s0ix_active() aren't
needed to be checked multiple times in a suspend cycle. Checking and
setting up policy one time in the prepare() callback is sufficient.


Mhm, looking at amdgpu_acpi_is_s3_active() I think we should not 
cache that in a variable in the first place.


Just calling the function all the time to check the state should be 
sufficient, or do we then run into any state transition problems?


I think calling to check it each time is perfectly fine, it should 
never change once the sequence starts until it's over.


If the first 2 patches look OK, I'd like to get those merged so we can 
fix the regressions.  I'll do another series that can drop the cached 
parameters.


Feel free to add my Acked-by to the series, but for ACPI stuff I would 
wait for Alex to take a look as well.


Regards,
Christian.





Regards,
Christian.



Signed-off-by: Mario Limonciello 
---
v4: New patch
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c |  7 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 17 ++---
  3 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index f6c38a974bae..53823539eba5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1545,12 +1545,8 @@ static inline int 
amdgpu_acpi_smart_shift_update(struct drm_device *dev,

  #endif
  #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
-bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
  void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
  #else
-static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
*adev) { return false; }
-static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device 
*adev) { return false; }
  static void amdgpu_choose_low_power_state(struct amdgpu_device 
*adev) { }

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

index cc21ed67a330..1d58728f8c3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1366,8 +1366,7 @@ bool amdgpu_acpi_should_gpu_reset(struct 
amdgpu_device *adev)
  adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU 
enabled APUs */

  return false;
-    if ((adev->flags & AMD_IS_APU) &&
-    amdgpu_acpi_is_s3_active(adev))
+    if ((adev->flags & AMD_IS_APU) && adev->in_s3)
  return false;
  if (amdgpu_sriov_vf(adev))
@@ -1472,7 +1471,7 @@ void amdgpu_acpi_release(void)
   *
   * returns true if supported, false if not.
   */
-bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device 
*adev)

  {
  return !(adev->flags & AMD_IS_APU) ||
  (pm_suspend_target_state == PM_SUSPEND_MEM);
@@ -1485,7 +1484,7 @@ bool amdgpu_acpi_is_s3_active(struct 
amdgpu_device *adev)

   *
   * returns true if supported, false if not.
   */
-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
*adev)

  {
  if (!(adev->flags & AMD_IS_APU) ||
  (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 971acf01bea6..2bc4c5bb9b5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2456,13 +2456,6 @@ static int amdgpu_pmops_prepare(struct device 
*dev)

  pm_runtime_suspended(dev))
  return 1;
-    /* if we will not support s3 or s2i for the device
- *  then skip suspend
- */
-    if (!amdgpu_acpi_is_s0ix_active(adev) &&
-    !amdgpu_acpi_is_s3_active(adev))
-    return 1;
-
  return amdgpu_device_prepare(drm_dev);
  }
@@ -2476,10 +2469,6 @@ static int amdgpu_pmops_suspend(struct device 
*dev)

  struct drm_device *drm_dev = dev_get_drvdata(dev);
  struct amdgpu_device *adev = drm_to_adev(drm_dev);
-    if (amdgpu_acpi_is_s0ix_active(adev))
-    adev->in_s0ix = true;
-    else if (amdgpu_acpi_is_s3_active(adev))
-    adev->in_s3 = true;
  if (!adev->in_s0ix && !adev->in_s3)
  return 0;
  return amdgpu_device_suspend(drm_dev, true);
@@ -2510,10 +2499,8 @@ static int amdgpu_pmops_resume(struct device 
*dev)

  adev->no_hw_access = true;
  r = amdgpu_device_resume(drm_dev, true);
-    if (amdgpu_acpi_is_s0ix_active(adev))
-    adev->in_s0ix = false;
-    else
-    adev->in_s3 = false;
+    adev->in_s0ix = adev->in_s3 = false;
+
  return r;
  }








[PATCH v6 3/3] drm/buddy: Add defragmentation support

2024-02-08 Thread Arunpravin Paneer Selvam
Add a function to support defragmentation.

v1: Defragment the memory beginning from min_order
till the required memory space is available.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 67 +++--
 include/drm/drm_buddy.h |  3 ++
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 33ad0cfbd54c..fac423d2cb73 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
 }
 EXPORT_SYMBOL(drm_get_buddy);
 
-static void __drm_buddy_free(struct drm_buddy *mm,
-struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+struct drm_buddy_block *block,
+bool defrag)
 {
struct drm_buddy_block *parent;
+   unsigned int order;
 
while ((parent = block->parent)) {
struct drm_buddy_block *buddy;
@@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
-   if (drm_buddy_block_is_clear(block) !=
-   drm_buddy_block_is_clear(buddy))
-   break;
+   if (!defrag) {
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
 
-   if (drm_buddy_block_is_clear(block))
-   mark_cleared(parent);
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+   }
 
list_del(>link);
 
@@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy *mm,
block = parent;
}
 
+   order = drm_buddy_block_order(block);
mark_free(mm, block);
+
+   return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routine
+ *
+ * @mm: DRM buddy manager
+ * @min_order: minimum order in the freelist to begin
+ * the defragmentation process
+ *
+ * Driver calls the defragmentation function when the
+ * requested memory allocation returns -ENOSPC.
+ */
+void drm_buddy_defrag(struct drm_buddy *mm,
+ unsigned int min_order)
+{
+   struct drm_buddy_block *block;
+   struct list_head *list;
+   unsigned int order;
+   int i;
+
+   if (min_order > mm->max_order)
+   return;
+
+   for (i = min_order - 1; i >= 0; i--) {
+   list = >free_list[i];
+   if (list_empty(list))
+   continue;
+
+   list_for_each_entry_reverse(block, list, link) {
+   if (!block->parent)
+   continue;
+
+   order = __drm_buddy_free(mm, block, 1);
+   if (order >= min_order)
+   return;
+   }
+   }
 }
+EXPORT_SYMBOL(drm_buddy_defrag);
 
 /**
  * drm_buddy_free_block - free a block
@@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail += drm_buddy_block_size(mm, block);
 
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
@@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
 
 err_undo:
if (tmp != order)
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 
 err_free:
if (err == -ENOSPC && total_allocated_on_err) {
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index d81c596dfa38..d0f63e7b5915 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
 struct list_head *objects,
 unsigned int flags);
 
+void drm_buddy_defrag(struct drm_buddy *mm,
+ unsigned int min_order);
+
 void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
 void drm_buddy_block_print(struct drm_buddy *mm,
   struct 

[PATCH v6 2/3] drm/amdgpu: Enable clear page functionality

2024-02-08 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3:(Christian)
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b671b0665492..c673e070199f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -595,8 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -626,15 +626,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1348,8 +1350,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8722beba494e..bcbffe909b47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -378,11 +378,15 

[PATCH v6 1/3] drm/buddy: Implement tracking clear page feature

2024-02-08 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 192 ++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
 include/drm/drm_buddy.h   |  18 +-
 6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
-/**
- * drm_buddy_free_list - free blocks
- *
- * @mm: DRM buddy manager
- * @objects: input list head to free blocks
- */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)

Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2024-02-08 Thread Arunpravin Paneer Selvam




On 1/31/2024 11:59 PM, Matthew Auld wrote:

On 30/01/2024 20:30, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.


I was not involved, but it looks like we have also tried enabling 
the clear-on-free idea for VRAM in i915 and then also tracking that 
in the allocator, however that work unfortunately is not upstream. 
The code is open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300 



It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable 
to me). And also the inclusion of a de-fragmentation routine, since 
buddy blocks are now not always merged back, we might choose to run 
the defrag in some cases, which also sounds reasonable. IIRC in 
amdgpu userspace can control the page-size for an allocation, so 
perhaps you would want to run it first if the allocation fails, 
before trying to evict stuff?
I checked the clear-on-free idea implemented in i915. In amdgpu 
version, we are clearing all the blocks in amdgpu free routine and 
DRM buddy expects only the DRM_BUDDY_CLEARED flag. Basically, we are 
keeping the cleared blocks ready to be allocated when the user 
request for the cleared memory. We observed that this improves the 
performance on games and resolves the stutter issues as well. I see 
i915 active fences part does the same job for i915. Could we move 
this part into i915 free routine and set the DRM_BUDDY_CLEARED flag.


On de-fragmentation , I have included a function which can be called 
at places where we get -ENOSPC. This routine will merge back the 
clear and dirty blocks together to form a larger block of requested 
size. I am wondering where we could use this routine as for the 
non-contiguous memory we have the fallback method and for the 
contiguous memory we have the try harder method which searches 
through the tree.


Don't you also want to call it from your vram manager when the 
requested page size is something large, before trying to evict stuff? 
That could now fail due to fragmention IIUC. Or am I misreading 
mdgpu_vram_mgr_new()?
Yes you are right, we can call the defragmentation routine from VRAM 
manager when there is a allocation failure.


Thanks,
Arun




I agree we can have 2 lists (clear list and dirty list) and this 
would reduce the search iterations. But we need to handle the 2 lists 
design in all the functions which might require more time for testing 
on all platforms. Could we just go ahead with 1 list (free list) for 
now and I am going to take up this work as my next task.


Sounds good.



Thanks,
Arun.




v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 
+++---

  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ 

Re: [PATCH v4 3/3] drm/amd: Drop unneeded functions to check if s3/s0ix active

2024-02-08 Thread Mario Limonciello

On 2/8/2024 00:54, Christian König wrote:

Am 08.02.24 um 06:52 schrieb Mario Limonciello:

amdgpu_acpi_is_s0ix_active() and amdgpu_acpi_is_s0ix_active() aren't
needed to be checked multiple times in a suspend cycle.  Checking and
setting up policy one time in the prepare() callback is sufficient.


Mhm, looking at amdgpu_acpi_is_s3_active() I think we should not cache 
that in a variable in the first place.


Just calling the function all the time to check the state should be 
sufficient, or do we then run into any state transition problems?


I think calling to check it each time is perfectly fine, it should never 
change once the sequence starts until it's over.


If the first 2 patches look OK, I'd like to get those merged so we can 
fix the regressions.  I'll do another series that can drop the cached 
parameters.




Regards,
Christian.



Signed-off-by: Mario Limonciello 
---
v4: New patch
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c |  7 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 17 ++---
  3 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index f6c38a974bae..53823539eba5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1545,12 +1545,8 @@ static inline int 
amdgpu_acpi_smart_shift_update(struct drm_device *dev,

  #endif
  #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
-bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
  void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
  #else
-static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
*adev) { return false; }
-static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device 
*adev) { return false; }
  static void amdgpu_choose_low_power_state(struct amdgpu_device 
*adev) { }

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

index cc21ed67a330..1d58728f8c3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1366,8 +1366,7 @@ bool amdgpu_acpi_should_gpu_reset(struct 
amdgpu_device *adev)
  adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU 
enabled APUs */

  return false;
-    if ((adev->flags & AMD_IS_APU) &&
-    amdgpu_acpi_is_s3_active(adev))
+    if ((adev->flags & AMD_IS_APU) && adev->in_s3)
  return false;
  if (amdgpu_sriov_vf(adev))
@@ -1472,7 +1471,7 @@ void amdgpu_acpi_release(void)
   *
   * returns true if supported, false if not.
   */
-bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
  {
  return !(adev->flags & AMD_IS_APU) ||
  (pm_suspend_target_state == PM_SUSPEND_MEM);
@@ -1485,7 +1484,7 @@ bool amdgpu_acpi_is_s3_active(struct 
amdgpu_device *adev)

   *
   * returns true if supported, false if not.
   */
-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
*adev)

  {
  if (!(adev->flags & AMD_IS_APU) ||
  (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 971acf01bea6..2bc4c5bb9b5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2456,13 +2456,6 @@ static int amdgpu_pmops_prepare(struct device 
*dev)

  pm_runtime_suspended(dev))
  return 1;
-    /* if we will not support s3 or s2i for the device
- *  then skip suspend
- */
-    if (!amdgpu_acpi_is_s0ix_active(adev) &&
-    !amdgpu_acpi_is_s3_active(adev))
-    return 1;
-
  return amdgpu_device_prepare(drm_dev);
  }
@@ -2476,10 +2469,6 @@ static int amdgpu_pmops_suspend(struct device 
*dev)

  struct drm_device *drm_dev = dev_get_drvdata(dev);
  struct amdgpu_device *adev = drm_to_adev(drm_dev);
-    if (amdgpu_acpi_is_s0ix_active(adev))
-    adev->in_s0ix = true;
-    else if (amdgpu_acpi_is_s3_active(adev))
-    adev->in_s3 = true;
  if (!adev->in_s0ix && !adev->in_s3)
  return 0;
  return amdgpu_device_suspend(drm_dev, true);
@@ -2510,10 +2499,8 @@ static int amdgpu_pmops_resume(struct device *dev)
  adev->no_hw_access = true;
  r = amdgpu_device_resume(drm_dev, true);
-    if (amdgpu_acpi_is_s0ix_active(adev))
-    adev->in_s0ix = false;
-    else
-    adev->in_s3 = false;
+    adev->in_s0ix = adev->in_s3 = false;
+
  return r;
  }






Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Matthew Auld

On 08/02/2024 14:17, Matthew Auld wrote:

On 08/02/2024 13:47, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 2/8/2024 7:00 PM, Matthew Auld wrote:

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?

In the try hard contiguous allocation, for example the requested 
memory is 1024 pages,
it might go and pick the highest and last block (of size 512 pages) in 
the freelist where
there are no more space exist in the total address range. In this kind 
of corner case,
alloc_range was returning success though the allocated size is less 
than the requested size.
Hence in try_hard_contiguous_allocation, we will not proceed to the 
LHS allocation and
we return only with the RHS allocation having only the 512 pages of 
allocation. This
leads to display corruption in many use cases (I think mainly when 
requested for contiguous huge buffer)

mainly on APU platforms.


Ok, I guess other thing is doing:

lhs_offset = drm_buddy_block_offset(block) - lhs_size;

I presume it's possible for block_offset < lhs_size here, which might be 
funny?


I think would also be good to add some basic unit test here:
https://patchwork.freedesktop.org/patch/577497/?series=129671=1

Test passes with your patch, and ofc fails without it.

Just the question of the lhs_offset above,
Reviewed-by: Matthew Auld 





Thanks,
Arun.


The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:




Re: [PATCH] drm/amd/pm: denote S to the deep sleep clock

2024-02-08 Thread Alex Deucher
On Thu, Feb 8, 2024 at 12:34 AM Kenneth Feng  wrote:
>
> denote S to the deep sleep clock for the clock output on smu 
> v13.0.0/v13.0.7/v13.0.10
>

Would be nice to fix up smu 11 as well at some point.

Alex


> Signed-off-by: Kenneth Feng 
> ---
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 27 +--
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 27 +--
>  2 files changed, 38 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 2e7518f4ae1a..fd33646970a4 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -109,6 +109,7 @@
>  #define PP_OD_FEATURE_FAN_MINIMUM_PWM  10
>
>  #define LINK_SPEED_MAX 3
> +#define SMU_13_0_0_DSCLK_THRESHOLD 100
>
>  static struct cmn2asic_msg_mapping 
> smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = {
> MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,
>  1),
> @@ -1269,20 +1270,30 @@ static int smu_v13_0_0_print_clk_levels(struct 
> smu_context *smu,
>  *   - level 0 -> min clock freq
>  *   - level 1 -> max clock freq
>  * And the current clock frequency can be any value 
> between them.
> -* So, if the current clock frequency is not at level 
> 0 or level 1,
> -* we will fake it as three dpm levels:
> +* So, if the current clock frequency is lower than 
> level 0,
> +* we will denote it to S:
> +*   - level S -> current actual clock freq
>  *   - level 0 -> min clock freq
> -*   - level 1 -> current actual clock freq
> -*   - level 2 -> max clock freq
> +*   - level 1 -> max clock freq
>  */
> if ((single_dpm_table->dpm_levels[0].value != 
> curr_freq) &&
> -(single_dpm_table->dpm_levels[1].value != 
> curr_freq)) {
> +(single_dpm_table->dpm_levels[1].value != 
> curr_freq) &&
> +(curr_freq < SMU_13_0_0_DSCLK_THRESHOLD)) {
> +   size += sysfs_emit_at(buf, size, "S: %uMhz 
> *\n",
> +   curr_freq);
> size += sysfs_emit_at(buf, size, "0: %uMhz\n",
> 
> single_dpm_table->dpm_levels[0].value);
> -   size += sysfs_emit_at(buf, size, "1: %uMhz 
> *\n",
> -   curr_freq);
> -   size += sysfs_emit_at(buf, size, "2: %uMhz\n",
> +   size += sysfs_emit_at(buf, size, "1: %uMhz\n",
> 
> single_dpm_table->dpm_levels[1].value);
> +   } else if ((single_dpm_table->dpm_levels[0].value != 
> curr_freq) &&
> +   
> (single_dpm_table->dpm_levels[1].value != curr_freq)) {
> +   size += sysfs_emit_at(buf, size, "0: %uMhz 
> %s\n",
> +   
> single_dpm_table->dpm_levels[0].value,
> +   
> single_dpm_table->dpm_levels[0].value == curr_freq ? "*" : "");
> +   size += sysfs_emit_at(buf, size, "1: %uMhz 
> *\n", curr_freq);
> +   size += sysfs_emit_at(buf, size, "2: %uMhz 
> %s\n",
> +   
> single_dpm_table->dpm_levels[1].value,
> +   
> single_dpm_table->dpm_levels[1].value == curr_freq ? "*" : "");
> } else {
> size += sysfs_emit_at(buf, size, "0: %uMhz 
> %s\n",
> 
> single_dpm_table->dpm_levels[0].value,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> index 0ffdb58af74e..2ecebad7a9cb 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> @@ -85,6 +85,7 @@
>  #define PP_OD_FEATURE_FAN_MINIMUM_PWM  10
>
>  #define LINK_SPEED_MAX 3
> +#define SMU_13_0_7_DSCLK_THRESHOLD 100
>
>  static struct cmn2asic_msg_mapping 
> smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = {
> MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,
>  1),
> @@ -1258,20 +1259,30 @@ static int smu_v13_0_7_print_clk_levels(struct 
> 

Re: Re: [PATCH v4 1/3] drm: Add drm_get_acpi_edid() helper

2024-02-08 Thread Jani Nikula
On Thu, 08 Feb 2024, Maxime Ripard  wrote:
> On Thu, Feb 08, 2024 at 11:57:11AM +0200, Jani Nikula wrote:
>> On Wed, 07 Feb 2024, Mario Limonciello  wrote:
>> > Some manufacturers have intentionally put an EDID that differs from
>> > the EDID on the internal panel on laptops.  Drivers can call this
>> > helper to attempt to fetch the EDID from the BIOS's ACPI _DDC method.
>> >
>> > Signed-off-by: Mario Limonciello 
>> > ---
>> >  drivers/gpu/drm/Kconfig|  5 +++
>> >  drivers/gpu/drm/drm_edid.c | 77 ++
>> >  include/drm/drm_edid.h |  1 +
>> >  3 files changed, 83 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>> > index 6ec33d36f3a4..ec2bb71e8b36 100644
>> > --- a/drivers/gpu/drm/Kconfig
>> > +++ b/drivers/gpu/drm/Kconfig
>> > @@ -21,6 +21,11 @@ menuconfig DRM
>> >select KCMP
>> >select VIDEO_CMDLINE
>> >select VIDEO_NOMODESET
>> > +  select ACPI_VIDEO if ACPI
>> > +  select BACKLIGHT_CLASS_DEVICE if ACPI
>> > +  select INPUT if ACPI
>> > +  select X86_PLATFORM_DEVICES if ACPI && X86
>> > +  select ACPI_WMI if ACPI && X86
>> 
>> I think I'll defer to drm maintainers on whether this is okay or
>> something to be avoided.
>> 
>> 
>> >help
>> >  Kernel-level support for the Direct Rendering Infrastructure (DRI)
>> >  introduced in XFree86 4.0. If you say Y here, you need to select
>> > diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
>> > index 923c4423151c..c649b4f9fd8e 100644
>> > --- a/drivers/gpu/drm/drm_edid.c
>> > +++ b/drivers/gpu/drm/drm_edid.c
>> > @@ -28,6 +28,7 @@
>> >   * DEALINGS IN THE SOFTWARE.
>> >   */
>> >  
>> > +#include 
>> >  #include 
>> >  #include 
>> >  #include 
>> > @@ -2188,6 +2189,49 @@ drm_do_probe_ddc_edid(void *data, u8 *buf, unsigned 
>> > int block, size_t len)
>> >return ret == xfers ? 0 : -1;
>> >  }
>> >  
>> > +/**
>> > + * drm_do_probe_acpi_edid() - get EDID information via ACPI _DDC
>> > + * @data: struct drm_device
>> > + * @buf: EDID data buffer to be filled
>> > + * @block: 128 byte EDID block to start fetching from
>> > + * @len: EDID data buffer length to fetch
>> > + *
>> > + * Try to fetch EDID information by calling acpi_video_get_edid() 
>> > function.
>> > + *
>> > + * Return: 0 on success or error code on failure.
>> > + */
>> > +static int
>> > +drm_do_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t 
>> > len)
>> > +{
>> > +  struct drm_device *ddev = data;
>> > +  struct acpi_device *acpidev = ACPI_COMPANION(ddev->dev);
>> > +  unsigned char start = block * EDID_LENGTH;
>> > +  void *edid;
>> > +  int r;
>> > +
>> > +  if (!acpidev)
>> > +  return -ENODEV;
>> > +
>> > +  /* fetch the entire edid from BIOS */
>> > +  r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, );
>> > +  if (r < 0) {
>> > +  DRM_DEBUG_KMS("Failed to get EDID from ACPI: %d\n", r);
>> > +  return -EINVAL;
>> > +  }
>> > +  if (len > r || start > r || start + len > r) {
>> > +  r = -EINVAL;
>> > +  goto cleanup;
>> > +  }
>> > +
>> > +  memcpy(buf, edid + start, len);
>> > +  r = 0;
>> > +
>> > +cleanup:
>> > +  kfree(edid);
>> > +
>> > +  return r;
>> > +}
>> > +
>> >  static void connector_bad_edid(struct drm_connector *connector,
>> >   const struct edid *edid, int num_blocks)
>> >  {
>> > @@ -2643,6 +2687,39 @@ struct edid *drm_get_edid(struct drm_connector 
>> > *connector,
>> >  }
>> >  EXPORT_SYMBOL(drm_get_edid);
>> >  
>> > +/**
>> > + * drm_get_acpi_edid - get EDID data, if available
>> 
>> I'd prefer all the new EDID API to be named drm_edid_*. Makes a clean
>> break from the old API, and is more consistent.
>> 
>> So perhaps drm_edid_read_acpi() to be in line with all the other struct
>> drm_edid based EDID reading functions.
>> 
>> > + * @connector: connector we're probing
>> > + *
>> > + * Use the BIOS to attempt to grab EDID data if possible.
>> > + *
>> > + * The returned pointer must be freed using drm_edid_free().
>> > + *
>> > + * Return: Pointer to valid EDID or NULL if we couldn't find any.
>> > + */
>> > +const struct drm_edid *drm_get_acpi_edid(struct drm_connector *connector)
>> > +{
>> > +  const struct drm_edid *drm_edid;
>> > +
>> > +  switch (connector->connector_type) {
>> > +  case DRM_MODE_CONNECTOR_LVDS:
>> > +  case DRM_MODE_CONNECTOR_eDP:
>> > +  break;
>> > +  default:
>> > +  return NULL;
>> > +  }
>> > +
>> > +  if (connector->force == DRM_FORCE_OFF)
>> > +  return NULL;
>> > +
>> > +  drm_edid = drm_edid_read_custom(connector, drm_do_probe_acpi_edid, 
>> > connector->dev);
>> > +
>> > +  /* Note: Do *not* call connector updates here. */
>> > +
>> > +  return drm_edid;
>> > +}
>> > +EXPORT_SYMBOL(drm_get_acpi_edid);
>
> Why shouldn't we use the BIOS/UEFI to retrieve them if it's available?
>
> I guess what I'm asking is why should we make this an exported function
> that drivers would have to call explicitly, 

Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Matthew Auld

On 08/02/2024 13:47, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 2/8/2024 7:00 PM, Matthew Auld wrote:

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?

In the try hard contiguous allocation, for example the requested memory 
is 1024 pages,
it might go and pick the highest and last block (of size 512 pages) in 
the freelist where
there are no more space exist in the total address range. In this kind 
of corner case,
alloc_range was returning success though the allocated size is less than 
the requested size.
Hence in try_hard_contiguous_allocation, we will not proceed to the LHS 
allocation and
we return only with the RHS allocation having only the 512 pages of 
allocation. This
leads to display corruption in many use cases (I think mainly when 
requested for contiguous huge buffer)

mainly on APU platforms.


Ok, I guess other thing is doing:

lhs_offset = drm_buddy_block_offset(block) - lhs_size;

I presume it's possible for block_offset < lhs_size here, which might be 
funny?




Thanks,
Arun.


The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:




[PATCH] drm/amd/display: Fix missing NULL check in dcn21_set_backlight_level()

2024-02-08 Thread Nikita Zhandarovich
On the off chance 'panel_cntl' ends up being not properly initialized,
dcn21_set_backlight_level() may hit NULL pointer dereference while
changing embedded panel backlight levels.

Prevent this issue by using some of the existing checks for the
similar purpose. At the same time clean up redundant tests for
NULL in 'abm'.

Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.

Fixes: 6f0ef80a00ad ("drm/amd/display: Fix ABM pipe/backlight issues when 
change backlight")
Signed-off-by: Nikita Zhandarovich 
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 8e88dcaf88f5..2b1b580541a8 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -247,7 +247,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
if (abm != NULL) {
uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
 
-   if (abm && panel_cntl) {
+   if (panel_cntl) {
if (abm->funcs && abm->funcs->set_pipe_ex) {
abm->funcs->set_pipe_ex(abm,
otg_inst,
@@ -261,15 +261,16 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
panel_cntl->inst,

panel_cntl->pwrseq_inst);
}
+
+   if (abm->funcs && abm->funcs->set_backlight_level_pwm)
+   abm->funcs->set_backlight_level_pwm(abm, 
backlight_pwm_u16_16,
+   frame_ramp, 0, panel_cntl->inst);
+   else
+   dmub_abm_set_backlight(dc, 
backlight_pwm_u16_16, frame_ramp,
+   panel_cntl->inst);
}
}
 
-   if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm)
-   abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16,
-   frame_ramp, 0, panel_cntl->inst);
-   else
-   dmub_abm_set_backlight(dc, backlight_pwm_u16_16, frame_ramp, 
panel_cntl->inst);
-
return true;
 }
 


Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Mario Limonciello

On 2/8/2024 06:06, Arunpravin Paneer Selvam wrote:

Hi Christian,

On 2/8/2024 12:27 PM, Christian König wrote:

Am 07.02.24 um 18:44 schrieb Arunpravin Paneer Selvam:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097


Syntax should be "Closes: $URL"


Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 


Acked-by: Christian König 

CC: stable.. ?

I will check and add the stable kernel version.


Should be 6.7.



Thanks,
Arun.



---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:








Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Arunpravin Paneer Selvam

Hi Matthew,

On 2/8/2024 7:00 PM, Matthew Auld wrote:

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?

In the try hard contiguous allocation, for example the requested memory 
is 1024 pages,
it might go and pick the highest and last block (of size 512 pages) in 
the freelist where
there are no more space exist in the total address range. In this kind 
of corner case,
alloc_range was returning success though the allocated size is less than 
the requested size.
Hence in try_hard_contiguous_allocation, we will not proceed to the LHS 
allocation and
we return only with the RHS allocation having only the 512 pages of 
allocation. This
leads to display corruption in many use cases (I think mainly when 
requested for contiguous huge buffer)

mainly on APU platforms.

Thanks,
Arun.


The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:




Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Matthew Auld

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?



The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 
Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
  
  	list_splice_tail(, blocks);

+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
  
  err_undo:


Re: Re: [PATCH v4 1/3] drm: Add drm_get_acpi_edid() helper

2024-02-08 Thread Maxime Ripard
On Thu, Feb 08, 2024 at 11:57:11AM +0200, Jani Nikula wrote:
> On Wed, 07 Feb 2024, Mario Limonciello  wrote:
> > Some manufacturers have intentionally put an EDID that differs from
> > the EDID on the internal panel on laptops.  Drivers can call this
> > helper to attempt to fetch the EDID from the BIOS's ACPI _DDC method.
> >
> > Signed-off-by: Mario Limonciello 
> > ---
> >  drivers/gpu/drm/Kconfig|  5 +++
> >  drivers/gpu/drm/drm_edid.c | 77 ++
> >  include/drm/drm_edid.h |  1 +
> >  3 files changed, 83 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> > index 6ec33d36f3a4..ec2bb71e8b36 100644
> > --- a/drivers/gpu/drm/Kconfig
> > +++ b/drivers/gpu/drm/Kconfig
> > @@ -21,6 +21,11 @@ menuconfig DRM
> > select KCMP
> > select VIDEO_CMDLINE
> > select VIDEO_NOMODESET
> > +   select ACPI_VIDEO if ACPI
> > +   select BACKLIGHT_CLASS_DEVICE if ACPI
> > +   select INPUT if ACPI
> > +   select X86_PLATFORM_DEVICES if ACPI && X86
> > +   select ACPI_WMI if ACPI && X86
> 
> I think I'll defer to drm maintainers on whether this is okay or
> something to be avoided.
> 
> 
> > help
> >   Kernel-level support for the Direct Rendering Infrastructure (DRI)
> >   introduced in XFree86 4.0. If you say Y here, you need to select
> > diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
> > index 923c4423151c..c649b4f9fd8e 100644
> > --- a/drivers/gpu/drm/drm_edid.c
> > +++ b/drivers/gpu/drm/drm_edid.c
> > @@ -28,6 +28,7 @@
> >   * DEALINGS IN THE SOFTWARE.
> >   */
> >  
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -2188,6 +2189,49 @@ drm_do_probe_ddc_edid(void *data, u8 *buf, unsigned 
> > int block, size_t len)
> > return ret == xfers ? 0 : -1;
> >  }
> >  
> > +/**
> > + * drm_do_probe_acpi_edid() - get EDID information via ACPI _DDC
> > + * @data: struct drm_device
> > + * @buf: EDID data buffer to be filled
> > + * @block: 128 byte EDID block to start fetching from
> > + * @len: EDID data buffer length to fetch
> > + *
> > + * Try to fetch EDID information by calling acpi_video_get_edid() function.
> > + *
> > + * Return: 0 on success or error code on failure.
> > + */
> > +static int
> > +drm_do_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len)
> > +{
> > +   struct drm_device *ddev = data;
> > +   struct acpi_device *acpidev = ACPI_COMPANION(ddev->dev);
> > +   unsigned char start = block * EDID_LENGTH;
> > +   void *edid;
> > +   int r;
> > +
> > +   if (!acpidev)
> > +   return -ENODEV;
> > +
> > +   /* fetch the entire edid from BIOS */
> > +   r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, );
> > +   if (r < 0) {
> > +   DRM_DEBUG_KMS("Failed to get EDID from ACPI: %d\n", r);
> > +   return -EINVAL;
> > +   }
> > +   if (len > r || start > r || start + len > r) {
> > +   r = -EINVAL;
> > +   goto cleanup;
> > +   }
> > +
> > +   memcpy(buf, edid + start, len);
> > +   r = 0;
> > +
> > +cleanup:
> > +   kfree(edid);
> > +
> > +   return r;
> > +}
> > +
> >  static void connector_bad_edid(struct drm_connector *connector,
> >const struct edid *edid, int num_blocks)
> >  {
> > @@ -2643,6 +2687,39 @@ struct edid *drm_get_edid(struct drm_connector 
> > *connector,
> >  }
> >  EXPORT_SYMBOL(drm_get_edid);
> >  
> > +/**
> > + * drm_get_acpi_edid - get EDID data, if available
> 
> I'd prefer all the new EDID API to be named drm_edid_*. Makes a clean
> break from the old API, and is more consistent.
> 
> So perhaps drm_edid_read_acpi() to be in line with all the other struct
> drm_edid based EDID reading functions.
> 
> > + * @connector: connector we're probing
> > + *
> > + * Use the BIOS to attempt to grab EDID data if possible.
> > + *
> > + * The returned pointer must be freed using drm_edid_free().
> > + *
> > + * Return: Pointer to valid EDID or NULL if we couldn't find any.
> > + */
> > +const struct drm_edid *drm_get_acpi_edid(struct drm_connector *connector)
> > +{
> > +   const struct drm_edid *drm_edid;
> > +
> > +   switch (connector->connector_type) {
> > +   case DRM_MODE_CONNECTOR_LVDS:
> > +   case DRM_MODE_CONNECTOR_eDP:
> > +   break;
> > +   default:
> > +   return NULL;
> > +   }
> > +
> > +   if (connector->force == DRM_FORCE_OFF)
> > +   return NULL;
> > +
> > +   drm_edid = drm_edid_read_custom(connector, drm_do_probe_acpi_edid, 
> > connector->dev);
> > +
> > +   /* Note: Do *not* call connector updates here. */
> > +
> > +   return drm_edid;
> > +}
> > +EXPORT_SYMBOL(drm_get_acpi_edid);

Why shouldn't we use the BIOS/UEFI to retrieve them if it's available?

I guess what I'm asking is why should we make this an exported function
that drivers would have to call explicitly, instead of just making it
part of the usual EDID retrieval interface.

Maxime


signature.asc
Description: PGP signature


Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Arunpravin Paneer Selvam

Hi Christian,

On 2/8/2024 12:27 PM, Christian König wrote:

Am 07.02.24 um 18:44 schrieb Arunpravin Paneer Selvam:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 


Acked-by: Christian König 

CC: stable.. ?

I will check and add the stable kernel version.

Thanks,
Arun.



---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:






Re: [PATCH v4 1/3] drm: Add drm_get_acpi_edid() helper

2024-02-08 Thread Jani Nikula
On Wed, 07 Feb 2024, Mario Limonciello  wrote:
> Some manufacturers have intentionally put an EDID that differs from
> the EDID on the internal panel on laptops.  Drivers can call this
> helper to attempt to fetch the EDID from the BIOS's ACPI _DDC method.
>
> Signed-off-by: Mario Limonciello 
> ---
>  drivers/gpu/drm/Kconfig|  5 +++
>  drivers/gpu/drm/drm_edid.c | 77 ++
>  include/drm/drm_edid.h |  1 +
>  3 files changed, 83 insertions(+)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 6ec33d36f3a4..ec2bb71e8b36 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -21,6 +21,11 @@ menuconfig DRM
>   select KCMP
>   select VIDEO_CMDLINE
>   select VIDEO_NOMODESET
> + select ACPI_VIDEO if ACPI
> + select BACKLIGHT_CLASS_DEVICE if ACPI
> + select INPUT if ACPI
> + select X86_PLATFORM_DEVICES if ACPI && X86
> + select ACPI_WMI if ACPI && X86

I think I'll defer to drm maintainers on whether this is okay or
something to be avoided.


>   help
> Kernel-level support for the Direct Rendering Infrastructure (DRI)
> introduced in XFree86 4.0. If you say Y here, you need to select
> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
> index 923c4423151c..c649b4f9fd8e 100644
> --- a/drivers/gpu/drm/drm_edid.c
> +++ b/drivers/gpu/drm/drm_edid.c
> @@ -28,6 +28,7 @@
>   * DEALINGS IN THE SOFTWARE.
>   */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -2188,6 +2189,49 @@ drm_do_probe_ddc_edid(void *data, u8 *buf, unsigned 
> int block, size_t len)
>   return ret == xfers ? 0 : -1;
>  }
>  
> +/**
> + * drm_do_probe_acpi_edid() - get EDID information via ACPI _DDC
> + * @data: struct drm_device
> + * @buf: EDID data buffer to be filled
> + * @block: 128 byte EDID block to start fetching from
> + * @len: EDID data buffer length to fetch
> + *
> + * Try to fetch EDID information by calling acpi_video_get_edid() function.
> + *
> + * Return: 0 on success or error code on failure.
> + */
> +static int
> +drm_do_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len)
> +{
> + struct drm_device *ddev = data;
> + struct acpi_device *acpidev = ACPI_COMPANION(ddev->dev);
> + unsigned char start = block * EDID_LENGTH;
> + void *edid;
> + int r;
> +
> + if (!acpidev)
> + return -ENODEV;
> +
> + /* fetch the entire edid from BIOS */
> + r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, );
> + if (r < 0) {
> + DRM_DEBUG_KMS("Failed to get EDID from ACPI: %d\n", r);
> + return -EINVAL;
> + }
> + if (len > r || start > r || start + len > r) {
> + r = -EINVAL;
> + goto cleanup;
> + }
> +
> + memcpy(buf, edid + start, len);
> + r = 0;
> +
> +cleanup:
> + kfree(edid);
> +
> + return r;
> +}
> +
>  static void connector_bad_edid(struct drm_connector *connector,
>  const struct edid *edid, int num_blocks)
>  {
> @@ -2643,6 +2687,39 @@ struct edid *drm_get_edid(struct drm_connector 
> *connector,
>  }
>  EXPORT_SYMBOL(drm_get_edid);
>  
> +/**
> + * drm_get_acpi_edid - get EDID data, if available

I'd prefer all the new EDID API to be named drm_edid_*. Makes a clean
break from the old API, and is more consistent.

So perhaps drm_edid_read_acpi() to be in line with all the other struct
drm_edid based EDID reading functions.

> + * @connector: connector we're probing
> + *
> + * Use the BIOS to attempt to grab EDID data if possible.
> + *
> + * The returned pointer must be freed using drm_edid_free().
> + *
> + * Return: Pointer to valid EDID or NULL if we couldn't find any.
> + */
> +const struct drm_edid *drm_get_acpi_edid(struct drm_connector *connector)
> +{
> + const struct drm_edid *drm_edid;
> +
> + switch (connector->connector_type) {
> + case DRM_MODE_CONNECTOR_LVDS:
> + case DRM_MODE_CONNECTOR_eDP:
> + break;
> + default:
> + return NULL;
> + }
> +
> + if (connector->force == DRM_FORCE_OFF)
> + return NULL;
> +
> + drm_edid = drm_edid_read_custom(connector, drm_do_probe_acpi_edid, 
> connector->dev);
> +
> + /* Note: Do *not* call connector updates here. */
> +
> + return drm_edid;
> +}
> +EXPORT_SYMBOL(drm_get_acpi_edid);
> +
>  /**
>   * drm_edid_read_custom - Read EDID data using given EDID block read function
>   * @connector: Connector to use
> diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
> index 7923bc00dc7a..ca41be289fc6 100644
> --- a/include/drm/drm_edid.h
> +++ b/include/drm/drm_edid.h
> @@ -410,6 +410,7 @@ struct edid *drm_do_get_edid(struct drm_connector 
> *connector,
>   void *data);
>  struct edid *drm_get_edid(struct drm_connector *connector,
> struct i2c_adapter *adapter);
> +const struct drm_edid *drm_get_acpi_edid(struct drm_connector