[PATCH v2 4/4] drm/amd/pm: Add sysfs attribute to get pm log

2023-10-05 Thread Lijo Lazar
Add sysfs attribute to read power management log. A snapshot is
captured to the buffer when the attribute is read.

Signed-off-by: Lijo Lazar 
---

v2: Pass PAGE_SIZE as the max size of input buffer

 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 40 ++
 1 file changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 4c65a2fac028..5a1d21c52672 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1794,6 +1794,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device 
*dev,
return count;
 }
 
+static int amdgpu_pmlog_attr_update(struct amdgpu_device *adev,
+   struct amdgpu_device_attr *attr,
+   uint32_t mask,
+   enum amdgpu_device_attr_states *states)
+{
+   if (amdgpu_dpm_get_pm_log(adev, NULL, 0) == -EOPNOTSUPP)
+   *states = ATTR_STATE_UNSUPPORTED;
+
+   return 0;
+}
+
+static ssize_t amdgpu_get_pmlog(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   ssize_t size = 0;
+   int ret;
+
+   if (amdgpu_in_reset(adev))
+   return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
+
+   ret = pm_runtime_get_sync(ddev->dev);
+   if (ret < 0) {
+   pm_runtime_put_autosuspend(ddev->dev);
+   return ret;
+   }
+
+   size = amdgpu_dpm_get_pm_log(adev, buf, PAGE_SIZE);
+
+   pm_runtime_mark_last_busy(ddev->dev);
+   pm_runtime_put_autosuspend(ddev->dev);
+
+   return size;
+}
+
 /**
  * DOC: gpu_metrics
  *
@@ -2091,6 +2129,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(smartshift_bias,  
ATTR_FLAG_BASIC,
  .attr_update = ss_bias_attr_update),
AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy, 
ATTR_FLAG_BASIC),
+   AMDGPU_DEVICE_ATTR_RO(pmlog,
ATTR_FLAG_BASIC,
+ .attr_update = amdgpu_pmlog_attr_update),
 };
 
 static int default_attr_update(struct amdgpu_device *adev, struct 
amdgpu_device_attr *attr,
-- 
2.25.1



[PATCH v2 3/4] drm/amd/pm: Add pm log support to SMU v13.0.6

2023-10-05 Thread Lijo Lazar
Add support to fetch PM log sample from SMU v13.0.6

Signed-off-by: Lijo Lazar 
---

v2: Check if input buffer has enough space to copy log data

 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  1 +
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h  |  4 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  4 +-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 59 +++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 0d84fb9640a6..01bc92875f3e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -253,6 +253,7 @@ struct smu_table {
uint64_t mc_address;
void *cpu_addr;
struct amdgpu_bo *bo;
+   uint32_t version;
 };
 
 enum smu_perf_level_designation {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 509e3cd483fb..891d03327ffa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -91,7 +91,9 @@
 #define PPSMC_MSG_QueryValidMcaCeCount  0x3A
 #define PPSMC_MSG_McaBankCeDumpDW   0x3B
 #define PPSMC_MSG_SelectPLPDMode0x40
-#define PPSMC_Message_Count 0x41
+#define PPSMC_MSG_PmLogReadSample   0x41
+#define PPSMC_MSG_PmLogGetTableVersion  0x42
+#define PPSMC_Message_Count 0x43
 
 //PPSMC Reset Types for driver msg argument
 #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 4850e48bbef5..6ea9adabe30f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -253,7 +253,9 @@
__SMU_DUMMY_MAP(QueryValidMcaCeCount),  \
__SMU_DUMMY_MAP(McaBankDumpDW), \
__SMU_DUMMY_MAP(McaBankCeDumpDW),   \
-   __SMU_DUMMY_MAP(SelectPLPDMode),
+   __SMU_DUMMY_MAP(SelectPLPDMode),\
+   __SMU_DUMMY_MAP(PmLogGetTableVersion),  \
+   __SMU_DUMMY_MAP(PmLogReadSample),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index bf01a23f399a..e5f84d8dec80 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -172,6 +172,8 @@ static const struct cmn2asic_msg_mapping 
smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(McaBankDumpDW,   PPSMC_MSG_McaBankDumpDW,   
0),
MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, 
0),
MSG_MAP(SelectPLPDMode,  PPSMC_MSG_SelectPLPDMode,  
0),
+   MSG_MAP(PmLogGetTableVersion,
PPSMC_MSG_PmLogGetTableVersion,0),
+   MSG_MAP(PmLogReadSample, PPSMC_MSG_PmLogReadSample, 
0),
 };
 
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
@@ -337,6 +339,61 @@ static int smu_v13_0_6_get_allowed_feature_mask(struct 
smu_context *smu,
return 0;
 }
 
+static int smu_v13_0_6_setup_pm_log(struct smu_context *smu)
+{
+   struct smu_table_context *smu_tbl_ctxt = >smu_table;
+   struct smu_table *table = _tbl_ctxt->tables[SMU_TABLE_PMSTATUSLOG];
+   uint32_t pmlog_version;
+   int ret;
+
+   if (!table->size)
+   return 0;
+
+   ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PmLogGetTableVersion,
+  _version);
+   if (ret)
+   return ret;
+
+   table->version = pmlog_version;
+
+   return 0;
+}
+
+static ssize_t smu_v13_0_6_get_pm_log(struct smu_context *smu, void *log,
+ size_t max_size)
+{
+   struct smu_table_context *smu_tbl_ctxt = >smu_table;
+   struct smu_table *table = _tbl_ctxt->tables[SMU_TABLE_PMSTATUSLOG];
+   struct amdgpu_pmlog *pm_log = log;
+   uint32_t pmfw_version, log_size;
+   int ret;
+
+   if (smu->adev->flags & AMD_IS_APU)
+   return -EOPNOTSUPP;
+
+   if (!pm_log || !max_size)
+   return -EINVAL;
+
+   smu_cmn_get_smc_version(smu, NULL, _version);
+   ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PmLogReadSample, _size);
+   if (ret)
+   return ret;
+
+   if (max_size < (log_size + sizeof(pm_log->common_header)))
+   return -EOVERFLOW;
+
+   amdgpu_asic_invalidate_hdp(smu->adev, NULL);
+   memcpy(pm_log->data, table->cpu_addr, log_size);
+
+   memset(_log->common_header, 0, sizeof(pm_log->common_header));
+   

[PATCH v2 1/4] drm/amdgpu: add pmlog structure definition

2023-10-05 Thread Lijo Lazar
From: Alex Deucher 

Define the pmlog structures to be exposed via sysfs.

Signed-off-by: Alex Deucher 
Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index e0bb6d39f0c3..9905228fd89c 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -980,4 +980,19 @@ struct gpu_metrics_v2_4 {
uint16_taverage_soc_current;
uint16_taverage_gfx_current;
 };
+
+struct amdgpu_pmlog_header {
+   uint16_t structure_size;
+   uint16_t pad;
+   uint32_t mp1_ip_discovery_version;
+   uint32_t pmfw_version;
+   uint32_t pmlog_version;
+};
+
+struct amdgpu_pmlog {
+   struct amdgpu_pmlog_header common_header;
+
+   uint8_t data[];
+};
+
 #endif
-- 
2.25.1



[PATCH v2 2/4] drm/amd/pm: Add support to fetch pm log sample

2023-10-05 Thread Lijo Lazar
Add API support to fetch a snapshot of power management log from PMFW.

Signed-off-by: Lijo Lazar 
---

v2: Add max size of input buffer to take care of overflows

 drivers/gpu/drm/amd/include/kgd_pp_interface.h |  1 +
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c| 16 
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h| 11 +++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c  | 14 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h  |  8 
 5 files changed, 50 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 9905228fd89c..01eaafafd3c3 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -426,6 +426,7 @@ struct amd_pm_funcs {
int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
ssize_t (*get_gpu_metrics)(void *handle, void **table);
+   ssize_t (*get_pm_log)(void *handle, void *pmlog, size_t size);
int (*set_watermarks_for_clock_ranges)(void *handle,
   struct pp_smu_wm_range_sets 
*ranges);
int (*display_disable_memory_clock_switch)(void *handle,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 1b17a71ed45e..1db899485309 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1300,6 +1300,22 @@ int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device 
*adev, void **table)
return ret;
 }
 
+ssize_t amdgpu_dpm_get_pm_log(struct amdgpu_device *adev, void *pm_log,
+ size_t size)
+{
+   const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+   int ret = 0;
+
+   if (!pp_funcs->get_pm_log)
+   return 0;
+
+   mutex_lock(>pm.mutex);
+   ret = pp_funcs->get_pm_log(adev->powerplay.pp_handle, pm_log, size);
+   mutex_unlock(>pm.mutex);
+
+   return ret;
+}
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
uint32_t *fan_mode)
 {
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index feccd2a7120d..ea2c1cc9c7b0 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -511,6 +511,17 @@ int amdgpu_dpm_get_power_profile_mode(struct amdgpu_device 
*adev,
 int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev,
  long *input, uint32_t size);
 int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table);
+
+/**
+ * @get_pm_log: Get one snapshot of power management log from PMFW. The sample
+ * is copied to pmlog buffer. It's expected to be allocated by the caller. Max
+ * size expected for a log sample is 4096 bytes.
+ *
+ * Return: Actual size of the log
+ */
+ssize_t amdgpu_dpm_get_pm_log(struct amdgpu_device *adev, void *pmlog,
+ size_t size);
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
uint32_t *fan_mode);
 int amdgpu_dpm_set_fan_speed_pwm(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 99750c182279..73f3e7915d23 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3090,6 +3090,19 @@ static ssize_t smu_sys_get_gpu_metrics(void *handle, 
void **table)
return smu->ppt_funcs->get_gpu_metrics(smu, table);
 }
 
+static ssize_t smu_sys_get_pm_log(void *handle, void *pm_log, size_t size)
+{
+   struct smu_context *smu = handle;
+
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return -EOPNOTSUPP;
+
+   if (!smu->ppt_funcs->get_pm_log)
+   return -EOPNOTSUPP;
+
+   return smu->ppt_funcs->get_pm_log(smu, pm_log, size);
+}
+
 static int smu_enable_mgpu_fan_boost(void *handle)
 {
struct smu_context *smu = handle;
@@ -3231,6 +3244,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.set_df_cstate= smu_set_df_cstate,
.set_xgmi_pstate  = smu_set_xgmi_pstate,
.get_gpu_metrics  = smu_sys_get_gpu_metrics,
+   .get_pm_log   = smu_sys_get_pm_log,
.set_watermarks_for_clock_ranges = 
smu_set_watermarks_for_clock_ranges,
.display_disable_memory_clock_switch = 
smu_display_disable_memory_clock_switch,
.get_max_sustainable_clocks_by_dc= 
smu_get_max_sustainable_clocks_by_dc,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index f3cab5e633a7..0d84fb9640a6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ 

[PATCH] drm/amdgpu: Increase IP discovery region size

2023-10-05 Thread Lijo Lazar
IP discovery region has increased to > 8K on some SOCs.Maximum reserve
size is upto 12K, but not used. For now increase to 10K.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 3a2f347bd50d..4d03cd5b3410 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,7 +24,7 @@
 #ifndef __AMDGPU_DISCOVERY__
 #define __AMDGPU_DISCOVERY__
 
-#define DISCOVERY_TMR_SIZE  (8 << 10)
+#define DISCOVERY_TMR_SIZE  (10 << 10)
 #define DISCOVERY_TMR_OFFSET(64 << 10)
 
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
-- 
2.25.1



Re: [PATCH] drm/amdgpu: Annotate struct amdgpu_bo_list with __counted_by

2023-10-05 Thread Alex Deucher
Applied.  Thanks!

Alex

On Thu, Oct 5, 2023 at 10:32 AM Christian König
 wrote:
>
> Am 04.10.23 um 01:29 schrieb Kees Cook:
> > Prepare for the coming implementation by GCC and Clang of the __counted_by
> > attribute. Flexible array members annotated with __counted_by can have
> > their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for
> > array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family
> > functions).
> >
> > As found with Coccinelle[1], add __counted_by for struct amdgpu_bo_list.
> > Additionally, since the element count member must be set before accessing
> > the annotated flexible array member, move its initialization earlier.
> >
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: "Pan, Xinhui" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: "Gustavo A. R. Silva" 
> > Cc: Luben Tuikov 
> > Cc: Christophe JAILLET 
> > Cc: Felix Kuehling 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Cc: linux-harden...@vger.kernel.org
> > Link: 
> > https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci
> >  [1]
> > Signed-off-by: Kees Cook 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
> >   2 files changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> > index 6f5b641b631e..781e5c5ce04d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> > @@ -84,6 +84,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
> > struct drm_file *filp,
> >
> >   kref_init(>refcount);
> >
> > + list->num_entries = num_entries;
> >   array = list->entries;
> >
> >   for (i = 0; i < num_entries; ++i) {
> > @@ -129,7 +130,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
> > struct drm_file *filp,
> >   }
> >
> >   list->first_userptr = first_userptr;
> > - list->num_entries = num_entries;
> >   sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
> >amdgpu_bo_list_entry_cmp, NULL);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> > index 6a703be45d04..555cd6d877c3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> > @@ -56,7 +56,7 @@ struct amdgpu_bo_list {
> >*/
> >   struct mutex bo_list_mutex;
> >
> > - struct amdgpu_bo_list_entry entries[];
> > + struct amdgpu_bo_list_entry entries[] __counted_by(num_entries);
> >   };
> >
> >   int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
>



Re: [PATCH v2 1/2] usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope

2023-10-05 Thread Sebastian Reichel
Hi,

On Thu, Oct 05, 2023 at 12:52:29PM -0500, Mario Limonciello wrote:
> On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs
> at a black screen on startup.  This issue occurs only if `ucsi_acpi` has
> loaded before `amdgpu` has loaded.  The reason for this failure is that
> `amdgpu` uses power_supply_is_system_supplied() to determine if running
> on AC or DC power at startup. If this value is reported incorrectly the
> dGPU will also be programmed incorrectly and trigger errors.
> 
> power_supply_is_system_supplied() reports the wrong value because UCSI
> power supplies provided as part of the system don't properly report the
> scope as "DEVICE" scope (not powering the system).
> 
> In order to fix this issue check the capabilities reported from the UCSI
> power supply to ensure that it supports charging a battery and that it can
> be powered by AC.  Mark the scope accordingly.
> 
> Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope")
> Link: 
> https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html
>  p28
> Signed-off-by: Mario Limonciello 
> ---
> Cc: Kai-Heng Feng 
> Cc: Alex Deucher >
> Cc: Richard Gong 
> ---
>  drivers/usb/typec/ucsi/psy.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c
> index 384b42267f1f..b35c6e07911e 100644
> --- a/drivers/usb/typec/ucsi/psy.c
> +++ b/drivers/usb/typec/ucsi/psy.c
> @@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con,
>   struct device *dev = con->ucsi->dev;
>  
>   device_property_read_u8(dev, "scope", );
> + if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) {
> + u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY |
> +UCSI_CAP_ATTR_BATTERY_CHARGING;
> +
> + if (con->ucsi->cap.attributes & mask)
> + scope = POWER_SUPPLY_SCOPE_SYSTEM;
> + else
> + scope = POWER_SUPPLY_SCOPE_DEVICE;
> + }
>   val->intval = scope;
>   return 0;
>  }

Reviewed-by: Sebastian Reichel 

-- Sebastian


signature.asc
Description: PGP signature


Re: [PATCH 2/3] power: supply: Don't count 'unknown' scope power supplies

2023-10-05 Thread Mario Limonciello

On 10/4/2023 18:10, Sebastian Reichel wrote:

Hi,

On Sun, Oct 01, 2023 at 07:00:11PM -0500, Mario Limonciello wrote:

Let me try to add more detail.

This is an OEM system that has 3 USB type C ports.  It's an Intel system,
but this doesn't matter for the issue.
* when ucsi_acpi is not loaded there are no power supplies in the system and
it reports power_supply_is_system_supplied() as AC.
* When ucsi_acpi is loaded 3 power supplies will be registered.
power_supply_is_system_supplied() reports as DC.

Now when you add in a Navi3x AMD dGPU to the system the power supplies don't
change.  This particular dGPU model doesn't contain a USB-C port, so there
is no UCSI power supply registered.

As amdgpu is loaded it looks at device initialization whether the system is
powered by AC or DC.  Here is how it looks:

https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c?h=linux-6.5.y#n3834

On the OEM system if amdgpu loads before the ucsi_acpi driver (such as in
the initramfs) then the right value is returned for
power_supply_is_system_supplied() - AC.

If amdgpu is loaded after the ucsi_acpi driver, the wrong value is returned
for power_supply_is_system_supplied() - DC.

This value is very important to set up the dGPU properly.  If the wrong
value is returned, the wrong value will be notified to the hardware and the
hardware will not behave properly.  On the OEM system this is a "black
screen" at bootup along with RAS errors emitted by the dGPU.

With no changes to a malfunctioning kernel or initramfs binaries I can add
modprobe.blacklist=ucsi_acpi to kernel command line avoid registering those
3 power supplies and the system behaves properly.

So I think it's inappropriate for "UNKNOWN" scope power supplies to be
registered and treated as system supplies, at least as it pertains to
power_supply_is_system_supplied().


So the main issue is, that the ucsi_acpi registers a bunch of
power-supply chargers with unknown scope on a desktop systems
and that results in the system assumed to be supplied from battery.

The problem with your change is, that many of the charger drivers
don't set a scope at all (and thus report unknown scope). Those
obviously should not be skipped. Probably most of these drivers
could be changed to properly set the scope, but it needs to be
checked on a case-by-case basis. With your current patch they would
regress in the oposite direction of your use-case.

Ideally ucsi is changed to properly describe the scope, but I
suppose this information is not available in ACPI?

Assuming that the above are not solvable easily, my idea would be to
only count the number of POWER_SUPPLY_TYPE_BATTERY device, which have
!POWER_SUPPLY_SCOPE_DEVICE and exit early if there are none.
Basically change __power_supply_is_system_supplied(), so that it
looks like this:

...
if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, ))
if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE)
return 0;

if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY)
(*count)++;
 else
if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_ONLINE,
))
return ret.intval;
...

That should work in both cases.



I tested both your suggestion as well as modifying UCSI driver to set 
the scope.  Both worked.


I've sent out v2 modifying the scope for UCSI driver.  If for some 
reason that ends up not working out we can revert to your generic 
suggestion.


https://lore.kernel.org/linux-usb/20231005175230.232764-1-mario.limoncie...@amd.com/T/#m9543f1f2c3767c0e88135c2e3f15ced65cfdf004


-- Sebastian


   drivers/power/supply/power_supply_core.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/power_supply_core.c 
b/drivers/power/supply/power_supply_core.c
index d325e6dbc770..3de6e6d00815 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -349,7 +349,7 @@ static int __power_supply_is_system_supplied(struct device 
*dev, void *data)
unsigned int *count = data;
if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, ))
-   if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE)
+   if (ret.intval != POWER_SUPPLY_SCOPE_SYSTEM)
return 0;
(*count)++;
--
2.34.1







Re: [PATCH v4 1/1] drm/amdkfd: get doorbell's absolute offset based on the db_size

2023-10-05 Thread Felix Kuehling

On 2023-10-05 13:20, Arvind Yadav wrote:

Here, Adding db_size in byte to find the doorbell's
absolute offset for both 32-bit and 64-bit doorbell sizes.
So that doorbell offset will be aligned based on the doorbell
size.

v2:
- Addressed the review comment from Felix.
v3:
- Adding doorbell_size as parameter to get db absolute offset.
v4:
   Squash the two patches into one.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h|  5 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c| 13 +
  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c   |  3 ++-
  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c   | 10 --
  .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c  |  3 ++-
  5 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 09f6727e7c73..4a8b33f55f6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -357,8 +357,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev);
  void amdgpu_doorbell_fini(struct amdgpu_device *adev);
  int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
  uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
-  struct amdgpu_bo *db_bo,
-  uint32_t doorbell_index);
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size);
  
  #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))

  #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
index da4be0bbb446..6690f5a72f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -114,19 +114,24 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, 
u32 index, u64 v)
   * @adev: amdgpu_device pointer
   * @db_bo: doorbell object's bo
   * @db_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
   *
   * returns doorbell's absolute index in BAR
   */
  uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
-  struct amdgpu_bo *db_bo,
-  uint32_t doorbell_index)
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size)
  {
int db_bo_offset;
  
  	db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
  
-	/* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */

-   return db_bo_offset / sizeof(u32) + doorbell_index * 2;
+   /* doorbell index is 32 bit but doorbell's size can be 32 bit
+* or 64 bit, so *db_size(in byte)/4 for alignment.
+*/
+   return db_bo_offset / sizeof(u32) + doorbell_index *
+  DIV_ROUND_UP(db_size, 4);
  }
  
  /**

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0d3d538b64eb..e07652e72496 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -407,7 +407,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
  
  	q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,

  
qpd->proc_doorbells,
- 
q->doorbell_id);
+ 
q->doorbell_id,
+ 
dev->kfd->device_info.doorbell_size);
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c

index 7b38537c7c99..05c74887fd6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
  
-	*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);

+   *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
+kfd->doorbells,
+inx,
+
kfd->device_info.doorbell_size);
inx *= 2;
  
  	pr_debug("Get kernel queue doorbell\n"

@@ -240,7 +243,10 

Re: [PATCH 3/3] drm/amdkfd: Check bitmap_mapped flag to skip retry fault

2023-10-05 Thread Philip Yang

  


On 2023-10-02 13:08, Chen, Xiaogang
  wrote:


  
  On 9/29/2023 9:11 AM, Philip Yang wrote:
  
  Caution: This message originated from an
External Source. Use proper caution when opening attachments,
clicking links, or responding.



Use bitmap_mapped flag to check if range already mapped to the
specific

GPU, to skip the retry fault from different page of the same
range.


Remove prange validate_timestamp which is not accurate for
multiple

GPUs.


Signed-off-by: Philip Yang 

---

  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24


  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 -

  2 files changed, 8 insertions(+), 17 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index ac65bf25c685..5e063d902a46 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -43,10 +43,6 @@


  #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1


-/* Long enough to ensure no retry fault comes after svm range
is restored and

- * page table is updated.

- */

-#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING   (2UL *
NSEC_PER_MSEC)

  #if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)

  #define dynamic_svm_range_dump(svms) \

 _dynamic_func_call_no_desc("svm_range_dump",
svm_range_debug_dump, svms)

@@ -365,7 +361,6 @@ svm_range *svm_range_new(struct
svm_range_list *svms, uint64_t start,

 INIT_LIST_HEAD(>deferred_list);

 INIT_LIST_HEAD(>child_list);

 atomic_set(>invalid, 0);

-   prange->validate_timestamp = 0;

 mutex_init(>migrate_mutex);

 mutex_init(>lock);


@@ -1876,8 +1871,6 @@ static int
svm_range_validate_and_map(struct mm_struct *mm,

 }


 svm_range_unreserve_bos(ctx);

-   if (!r)

-   prange->validate_timestamp =
ktime_get_boottime();


  free_ctx:

 kfree(ctx);

@@ -3162,15 +3155,6 @@ svm_range_restore_pages(struct
amdgpu_device *adev, unsigned int pasid,

 goto out_unlock_range;

 }


-   /* skip duplicate vm fault on different pages of same
range */

-   if (ktime_before(timestamp,
ktime_add_ns(prange->validate_timestamp,

-  
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {

-   pr_debug("svms 0x%p [0x%lx %lx] already
restored\n",

-    svms, prange->start,
prange->last);

-   r = 0;

-   goto out_unlock_range;

-   }

-

 /* __do_munmap removed VMA, return success as we are
handling stale

  * retry fault.

  */

@@ -3196,6 +3180,14 @@ svm_range_restore_pages(struct
amdgpu_device *adev, unsigned int pasid,

 goto out_unlock_range;

 }


+   /* skip duplicate vm fault on different pages of same
range */

  
  
  I think the following call means if the prange->granularity
  range that the addr is in is mapped on gpuidex already, not
  different pages of same range.
  

yes, the comment should update to "skip duplicate vm fault on
different pages of same granularity range"

  
  Regards
  
  
  Xiaogang
  
  
  +   if
(svm_range_partial_mapped_dev(gpuidx, prange, addr, addr)) {

+   pr_debug("svms 0x%p [0x%lx %lx] already restored
on gpu %d\n",

+    svms, prange->start,
prange->last, gpuidx);

+   r = 0;

+   goto out_unlock_range;

+   }

+

 pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x,
actual loc 0x%x\n",

  svms, prange->start, prange->last,
best_loc,
 

[linux-next:master] BUILD REGRESSION 7d730f1bf6f39ece2d9f3ae682f12e5b593d534d

2023-10-05 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 7d730f1bf6f39ece2d9f3ae682f12e5b593d534d  Add linux-next specific 
files for 20231005

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202309122047.cri9yjrq-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202309192314.vbsjiim5-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202309212121.cul1ptra-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202309212339.hxhbu2f1-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202309221945.uwcq56zg-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310041744.d34giv9v-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310042215.w9pg3rqs-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310051547.40nm4sif-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310052201.anvbpgpr-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

Documentation/gpu/amdgpu/thermal:43: ./drivers/gpu/drm/amd/pm/amdgpu_pm.c:988: 
WARNING: Unexpected indentation.
arch/x86/include/asm/string_32.h:150:25: warning: '__builtin_memcpy' writing 3 
bytes into a region of size 0 overflows the destination [-Wstringop-overflow=]
drivers/cpufreq/sti-cpufreq.c:215:50: warning: '%d' directive output may be 
truncated writing between 1 and 10 bytes into a region of size 2 
[-Wformat-truncation=]
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c:274: warning: Function parameter or 
member 'gart_placement' not described in 'amdgpu_gmc_gart_location'
fs/bcachefs/bcachefs_format.h:215:25: warning: 'p' offset 3 in 'struct bkey' 
isn't aligned to 4 [-Wpacked-not-aligned]
fs/bcachefs/bcachefs_format.h:217:25: warning: 'version' offset 27 in 'struct 
bkey' isn't aligned to 4 [-Wpacked-not-aligned]
fs/gfs2/inode.c:1876:14: sparse:struct gfs2_glock *
fs/gfs2/inode.c:1876:14: sparse:struct gfs2_glock [noderef] __rcu *
fs/gfs2/super.c:1543:17: sparse:struct gfs2_glock *
fs/gfs2/super.c:1543:17: sparse:struct gfs2_glock [noderef] __rcu *
include/linux/fortify-string.h:57:33: warning: writing 8 bytes into a region of 
size 0 [-Wstringop-overflow=]
kernel/bpf/helpers.c:1906:19: warning: no previous declaration for 
'bpf_percpu_obj_new_impl' [-Wmissing-declarations]
kernel/bpf/helpers.c:1942:18: warning: no previous declaration for 
'bpf_percpu_obj_drop_impl' [-Wmissing-declarations]
kernel/bpf/helpers.c:2477:18: warning: no previous declaration for 'bpf_throw' 
[-Wmissing-declarations]

Unverified Error/Warning (likely false positive, please contact us if 
interested):

Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml:
arch/x86/kvm/x86.c:8891 x86_emulate_instruction() warn: missing error code? 'r'
drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c:209 amdgpu_mca_smu_get_mca_entry() 
warn: variable dereferenced before check 'mca_funcs' (see line 200)
drivers/gpu/drm/i915/display/intel_psr.c:3185 i915_psr_sink_status_show() 
error: uninitialized symbol 'error_status'.
drivers/gpu/drm/i915/display/intel_tc.c:327 mtl_tc_port_get_max_lane_count() 
error: uninitialized symbol 'pin_mask'.
fs/exfat/namei.c:393 exfat_find_empty_entry() error: uninitialized symbol 
'last_clu'.
fs/ntfs3/bitmap.c:663 wnd_init() warn: Please consider using kvcalloc instead 
of kvmalloc_array
fs/ntfs3/super.c:466:23: sparse: sparse: unknown escape sequence: '\%'
lib/kunit/executor_test.c:39 parse_filter_test() error: double free of 
'filter.suite_glob'
lib/kunit/executor_test.c:40 parse_filter_test() error: double free of 
'filter.test_glob'
scripts/mod/modpost.c:1437:14: warning: passing 'typeof (rela->r_offset) *' 
(aka 'const unsigned long *') to parameter of type 'void *' discards qualifiers 
[-Wincompatible-pointer-types-discards-qualifiers]
scripts/mod/modpost.c:1440:11: warning: passing 'typeof (rela->r_addend) *' 
(aka 'const long *') to parameter of type 'void *' discards qualifiers 
[-Wincompatible-pointer-types-discards-qualifiers]
scripts/mod/modpost.c:1472:14: warning: passing 'typeof (rel->r_offset) *' (aka 
'const unsigned long *') to parameter of type 'void *' discards qualifiers 
[-Wincompatible-pointer-types-discards-qualifiers]
{standard input}:1127: Error: unknown .loc sub-directive `is_stm'

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- arc-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|   |-- 
fs-bcachefs-bcachefs_format.h:warning:p-offset-in-struct-bkey-isn-t-aligned-to
|   `-- 
fs-bcachefs-bcachefs_format.h:warning:version-offset-in-struct-bkey-isn-t-aligned-to
|-- arc-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|   |-- 
fs-bcachefs-bcachefs_format.h:warning:p-offset-in-struct-bkey-isn-t-aligned-to
|   `-- 
fs-bcachefs-bcachefs_format.h:warning:version-offset-in-struct-bkey-isn-t-aligned-to
|-- arm-allmodconfig
|   `--

Re: [PATCH 2/3] amd/amdkfd: Unmap range from GPUs based on granularity

2023-10-05 Thread Philip Yang

  


On 2023-10-02 15:27, Felix Kuehling
  wrote:


  
  On 2023-09-29 10:11, Philip Yang
wrote:
  
  
Align unmap range start and last address to granularity boundary.
Skip unmap if range is already unmapped from GPUs.
  
  This only handles unmap due to MMU notifiers with XNACK on.
What about svm_range_unmap_from_cpu?
  

unmap_from_cpu is going to remove the range, we cannot align range
based on granularity, still split the prange and unmap from GPU the
exact range .

   
  Regards,
  Felix
  
  
  
This also solve the rocgdb CWSR migration related issue.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35 
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 626e0dd4ec79..ac65bf25c685 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2004,6 +2004,26 @@ static void svm_range_restore_work(struct work_struct *work)
 	mmput(mm);
 }
 
+static unsigned long
+svm_range_align_start(struct svm_range *prange, unsigned long start)
+{
+	unsigned long start_align;
+
+	start_align = ALIGN_DOWN(start, 1UL << prange->granularity);
+	start_align = max_t(unsigned long, start_align, prange->start);
+	return start_align;
+}
+
+static unsigned long
+svm_range_align_last(struct svm_range *prange, unsigned long last)
+{
+	unsigned long last_align;
+
+	last_align = ALIGN(last, 1UL << prange->granularity) - 1;
  
  I think this should be
  	last_align = ALIGN(last + 1, 1UL << prange->granularity) - 1;

  Otherwise you're off by one granule when (last & (1UL
<< prange->granularity)) == 0.
  
  
  
  
+	last_align = min_t(unsigned long, last_align, prange->last);
+	return last_align;
+}
+
 /**
  * svm_range_evict - evict svm range
  * @prange: svm range structure
@@ -2078,6 +2098,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 		unsigned long s, l;
 		uint32_t trigger;
 
+		if (!svm_range_partial_mapped(prange, start, last)) {
+			pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n",
+prange->svms, start, last);
+			return 0;
+		}
+
 		if (event == MMU_NOTIFY_MIGRATE)
 			trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
 		else
@@ -2085,16 +2111,17 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 
 		pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
 			 prange->svms, start, last);
+
 		list_for_each_entry(pchild, >child_list, child_list) {
 			mutex_lock_nested(>lock, 1);
-			s = max(start, pchild->start);
-			l = min(last, pchild->last);
+			s = svm_range_align_start(pchild, start);
+			l = svm_range_align_last(pchild, last);
 			if (l >= s)
 svm_range_unmap_from_gpus(pchild, s, l, trigger);
 			mutex_unlock(>lock);
 		}
-		s = max(start, prange->start);
-		l = min(last, prange->last);
+		s = svm_range_align_start(prange, start);
+		l = svm_range_align_last(prange, last);
 		if (l >= s)
 			svm_range_unmap_from_gpus(prange, s, l, trigger);
 	}

  

  



Re: [PATCH 2/3] amd/amdkfd: Unmap range from GPUs based on granularity

2023-10-05 Thread Philip Yang

  


On 2023-10-02 13:06, Chen, Xiaogang
  wrote:


  
  On 9/29/2023 9:11 AM, Philip Yang wrote:
  
  Caution: This message originated from an
External Source. Use proper caution when opening attachments,
clicking links, or responding.



Align unmap range start and last address to granularity
boundary.

Skip unmap if range is already unmapped from GPUs.


This also solve the rocgdb CWSR migration related issue.


Signed-off-by: Philip Yang 

---

  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35


  1 file changed, 31 insertions(+), 4 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 626e0dd4ec79..ac65bf25c685 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -2004,6 +2004,26 @@ static void svm_range_restore_work(struct
work_struct *work)

 mmput(mm);

  }


+static unsigned long

+svm_range_align_start(struct svm_range *prange, unsigned long
start)

+{

+   unsigned long start_align;

+

+   start_align = ALIGN_DOWN(start, 1UL <<
prange->granularity);

+   start_align = max_t(unsigned long, start_align,
prange->start);

+   return start_align;

+}

+

+static unsigned long

+svm_range_align_last(struct svm_range *prange, unsigned long
last)

+{

+   unsigned long last_align;

+

+   last_align = ALIGN(last, 1UL <<
prange->granularity) - 1;

  
  
  should be ALIGN(last + 1, 1UL << prange->granularity) -
  1;? Here last is included last page number.
  

yes, you are right, if evicting range [0, 0x200], we should unmap
range [0x, 0x3ff].

  
  Regards
  
  
  Xiaogang
  
  
  +   last_align = min_t(unsigned long,
last_align, prange->last);

+   return last_align;

+}

+

  /**

   * svm_range_evict - evict svm range

   * @prange: svm range structure

@@ -2078,6 +2098,12 @@ svm_range_evict(struct svm_range *prange,
struct mm_struct *mm,

 unsigned long s, l;

 uint32_t trigger;


+   if (!svm_range_partial_mapped(prange, start,
last)) {

+   pr_debug("svms 0x%p [0x%lx 0x%lx]
unmapped already\n",

+   prange->svms, start, last);

+   return 0;

+   }

+

 if (event == MMU_NOTIFY_MIGRATE)

 trigger =
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;

 else

@@ -2085,16 +2111,17 @@ svm_range_evict(struct svm_range
*prange, struct mm_struct *mm,


 pr_debug("invalidate unmap svms 0x%p [0x%lx
0x%lx] from GPUs\n",

  prange->svms, start, last);

+

 list_for_each_entry(pchild,
>child_list, child_list) {

 mutex_lock_nested(>lock,
1);

-   s = max(start, pchild->start);

-   l = min(last, pchild->last);

+   s = svm_range_align_start(pchild,
start);

+   l = svm_range_align_last(pchild, last);

 if (l >= s)


svm_range_unmap_from_gpus(pchild, s, l, trigger);

 mutex_unlock(>lock);

 }

-   s = max(start, prange->start);

-   l = min(last, prange->last);

+   s = svm_range_align_start(prange, start);

+   l = svm_range_align_last(prange, last);

 if (l >= s)

 svm_range_unmap_from_gpus(prange, s, l,
trigger);

Re: [PATCH 1/3] amd/amdkfd: Add granularity bitmap mapped to gpu flag

2023-10-05 Thread Philip Yang

  


On 2023-10-02 14:35, Felix Kuehling
  wrote:


  
  
  
  On 2023-09-29 10:11, Philip Yang
wrote:
  
  
Replace prange->mapped_to_gpu with prange->bitmap_mapped[], which is
based on prange granularity, updated when map to GPUS or unmap from
GPUs, to optimize multiple GPU map, unmap and retry fault recover.

svm_range_is_mapped is false only if no parital range mapping on any
GPUs.

Split the bitmap_mapped when unmap from cpu to split the prange.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 218 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   4 +-
 2 files changed, 184 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 040dc32ad475..626e0dd4ec79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -292,12 +292,12 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
 	KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
 	}
 
-	/* free dma_addr array for each gpu */
+	/* free dma_addr array, bitmap_mapped for each gpu */
 	for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
-		if (prange->dma_addr[gpuidx]) {
+		if (prange->dma_addr[gpuidx])
 			kvfree(prange->dma_addr[gpuidx]);
-prange->dma_addr[gpuidx] = NULL;
-		}
+		if (prange->bitmap_mapped[gpuidx])
+			bitmap_free(prange->bitmap_mapped[gpuidx]);
 	}
 
 	mutex_destroy(>lock);
@@ -323,19 +323,38 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	uint64_t size = last - start + 1;
 	struct svm_range *prange;
 	struct kfd_process *p;
-
-	prange = kzalloc(sizeof(*prange), GFP_KERNEL);
-	if (!prange)
-		return NULL;
+	unsigned int nbits;
+	uint32_t gpuidx;
 
 	p = container_of(svms, struct kfd_process, svms);
 	if (!p->xnack_enabled && update_mem_usage &&
 	amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
 		pr_info("SVM mapping failed, exceeds resident system memory limit\n");
-		kfree(prange);
 		return NULL;
 	}
+
+	prange = kzalloc(sizeof(*prange), GFP_KERNEL);
+	if (!prange)
+		return NULL;
+
+	svm_range_set_default_attributes(>preferred_loc,
+	 >prefetch_loc,
+	 >granularity, >flags);
+
+	nbits = svm_range_mapped_nbits(size, prange->granularity);
+	pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped nbits %d\n", prange,
+		 start, last, nbits);
+	for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+		prange->bitmap_mapped[gpuidx] = bitmap_zalloc(nbits, GFP_KERNEL);
+		if (!prange->bitmap_mapped[gpuidx]) {
+			while (gpuidx--)
+bitmap_free(prange->bitmap_mapped[gpuidx]);
+			kfree(prange);
+			return NULL;
+		}
+	}
+
 	prange->npages = size;
 	prange->svms = svms;
 	prange->start = start;
@@ -354,10 +373,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 		bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
 			MAX_GPU_INSTANCE);
 
-	svm_range_set_default_attributes(>preferred_loc,
-	 >prefetch_loc,
-	 >granularity, >flags);
-
 	pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
 
 	return prange;
@@ -972,6 +987,48 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
 	return 0;
 }
 
+static int
+svm_range_split_bitmap_mapped(struct svm_range *new, struct svm_range *old,
+			  uint64_t start, uint64_t last)
+{
+	struct kfd_process *p = container_of(new->svms, struct kfd_process, svms);
+	unsigned int nbits, old_nbits, old_nbits2;
+	unsigned long *bits;
+	uint32_t gpuidx;
+
+	nbits = svm_range_mapped_nbits(new->npages, new->granularity);
+	old_nbits = svm_range_mapped_nbits(old->npages, old->granularity);
+	old_nbits2 = svm_range_mapped_nbits(last - start + 1, old->granularity);
  
  This may be off by one if start and last are not aligned on
granularity boundaries. I think you need to calculate the index
for each of start and last and subtract the indices. E.g.
granularity = 9, start = 511, last = 512. last - start + 1 is 2
and the division tells you you need one bit. But this range
touches two different granules, so you need two bits.
  

right, thanks, will check granularity boundary to calculate nbits.

   
  
  
  
+
+	pr_debug("old 0x%p [0x%lx 0x%lx] => [0x%llx 0x%llx] nbits %d => %d\n",
+		 old, old->start, old->last, start, last, old_nbits, old_nbits2);
+	pr_debug("new 0x%p [0x%lx 0x%lx] nbits %d\n", new, new->start, new->last,
+		 nbits);
+
+	for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+		bits = bitmap_alloc(old_nbits2, GFP_KERNEL);
+		if (!bits)
+			return -ENOMEM;
+
+		if (start == old->start) {
+			bitmap_shift_right(new->bitmap_mapped[gpuidx],
+	   old->bitmap_mapped[gpuidx],
+	   old_nbits2, old_nbits);
+			bitmap_shift_right(bits, old->bitmap_mapped[gpuidx], 0,
+	   old_nbits2);
  
  

Re: [PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"

2023-10-05 Thread Alex Deucher
On Thu, Oct 5, 2023 at 3:13 PM Greg Kroah-Hartman
 wrote:
>
> On Thu, Oct 05, 2023 at 12:52:30PM -0500, Mario Limonciello wrote:
> > This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23.
> >
> > Reviewed-by: Alex Deucher 
> > Signed-off-by: Mario Limonciello 
>
> No explaination as to why this needs to be reverted?  And does this need
> to be backported anywhere?

This patch ultimately never went upstream, but there was some
confusion about whether it did or not.  It can be ignored.

Alex


Re: [PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"

2023-10-05 Thread Mario Limonciello

On 10/5/2023 14:12, Greg Kroah-Hartman wrote:

On Thu, Oct 05, 2023 at 12:52:30PM -0500, Mario Limonciello wrote:

This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 


No explaination as to why this needs to be reverted?  And does this need
to be backported anywhere?

thanks,

greg k-h


No need to be backported anywhere.  The commit is only in 
amd-staging-drm-next right now.


I think it's up to whether Alex includes the workaround commit in the 
final 6.7 pull request.  If he does, then yeah this could use a larger 
write up to explain why it went in and out.


I was sort of thinking we could land both commits amd-staging-drm-next 
and then when Alex did the pull request the workaround commit just 
wouldn't be part of the 6.7 PR since it's a no-op with the revert.


Re: [PATCH v2 1/2] usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope

2023-10-05 Thread Greg Kroah-Hartman
On Thu, Oct 05, 2023 at 12:52:29PM -0500, Mario Limonciello wrote:
> On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs
> at a black screen on startup.  This issue occurs only if `ucsi_acpi` has
> loaded before `amdgpu` has loaded.  The reason for this failure is that
> `amdgpu` uses power_supply_is_system_supplied() to determine if running
> on AC or DC power at startup. If this value is reported incorrectly the
> dGPU will also be programmed incorrectly and trigger errors.
> 
> power_supply_is_system_supplied() reports the wrong value because UCSI
> power supplies provided as part of the system don't properly report the
> scope as "DEVICE" scope (not powering the system).
> 
> In order to fix this issue check the capabilities reported from the UCSI
> power supply to ensure that it supports charging a battery and that it can
> be powered by AC.  Mark the scope accordingly.
> 
> Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope")
> Link: 
> https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html
>  p28
> Signed-off-by: Mario Limonciello 
> ---
> Cc: Kai-Heng Feng 
> Cc: Alex Deucher >
> Cc: Richard Gong 
> ---
>  drivers/usb/typec/ucsi/psy.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c
> index 384b42267f1f..b35c6e07911e 100644
> --- a/drivers/usb/typec/ucsi/psy.c
> +++ b/drivers/usb/typec/ucsi/psy.c
> @@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con,
>   struct device *dev = con->ucsi->dev;
>  
>   device_property_read_u8(dev, "scope", );
> + if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) {
> + u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY |
> +UCSI_CAP_ATTR_BATTERY_CHARGING;
> +
> + if (con->ucsi->cap.attributes & mask)
> + scope = POWER_SUPPLY_SCOPE_SYSTEM;
> + else
> + scope = POWER_SUPPLY_SCOPE_DEVICE;
> + }
>   val->intval = scope;
>   return 0;
>  }
> -- 
> 2.34.1
> 
> 

Hi,

This is the friendly patch-bot of Greg Kroah-Hartman.  You have sent him
a patch that has triggered this response.  He used to manually respond
to these common problems, but in order to save his sanity (he kept
writing the same thing over and over, yet to different people), I was
created.  Hopefully you will not take offence and will fix the problem
in your patch and resubmit it so that it can be accepted into the Linux
kernel tree.

You are receiving this message because of the following common error(s)
as indicated below:

- You have marked a patch with a "Fixes:" tag for a commit that is in an
  older released kernel, yet you do not have a cc: stable line in the
  signed-off-by area at all, which means that the patch will not be
  applied to any older kernel releases.  To properly fix this, please
  follow the documented rules in the
  Documentation/process/stable-kernel-rules.rst file for how to resolve
  this.

If you wish to discuss this problem further, or you have questions about
how to resolve this issue, please feel free to respond to this email and
Greg will reply once he has dug out from the pending patches received
from other developers.

thanks,

greg k-h's patch email bot


Re: [PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"

2023-10-05 Thread Greg Kroah-Hartman
On Thu, Oct 05, 2023 at 12:52:30PM -0500, Mario Limonciello wrote:
> This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23.
> 
> Reviewed-by: Alex Deucher 
> Signed-off-by: Mario Limonciello 

No explaination as to why this needs to be reverted?  And does this need
to be backported anywhere?

thanks,

greg k-h


[PATCH v2 1/2] usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope

2023-10-05 Thread Mario Limonciello
On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs
at a black screen on startup.  This issue occurs only if `ucsi_acpi` has
loaded before `amdgpu` has loaded.  The reason for this failure is that
`amdgpu` uses power_supply_is_system_supplied() to determine if running
on AC or DC power at startup. If this value is reported incorrectly the
dGPU will also be programmed incorrectly and trigger errors.

power_supply_is_system_supplied() reports the wrong value because UCSI
power supplies provided as part of the system don't properly report the
scope as "DEVICE" scope (not powering the system).

In order to fix this issue check the capabilities reported from the UCSI
power supply to ensure that it supports charging a battery and that it can
be powered by AC.  Mark the scope accordingly.

Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope")
Link: 
https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html
 p28
Signed-off-by: Mario Limonciello 
---
Cc: Kai-Heng Feng 
Cc: Alex Deucher >
Cc: Richard Gong 
---
 drivers/usb/typec/ucsi/psy.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c
index 384b42267f1f..b35c6e07911e 100644
--- a/drivers/usb/typec/ucsi/psy.c
+++ b/drivers/usb/typec/ucsi/psy.c
@@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con,
struct device *dev = con->ucsi->dev;
 
device_property_read_u8(dev, "scope", );
+   if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) {
+   u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY |
+  UCSI_CAP_ATTR_BATTERY_CHARGING;
+
+   if (con->ucsi->cap.attributes & mask)
+   scope = POWER_SUPPLY_SCOPE_SYSTEM;
+   else
+   scope = POWER_SUPPLY_SCOPE_DEVICE;
+   }
val->intval = scope;
return 0;
 }
-- 
2.34.1



[PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"

2023-10-05 Thread Mario Limonciello
This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c   | 3 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 08cb9f8ce64e..9b62b45ebb7f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1026,7 +1026,8 @@ static int smu_v13_0_process_pending_interrupt(struct 
smu_context *smu)
 {
int ret = 0;
 
-   if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT))
+   if (smu->dc_controlled_by_gpio &&
+   smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT))
ret = smu_v13_0_allow_ih_interrupt(smu);
 
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 07df5be063e2..0fb6be11a0cc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2662,6 +2662,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = 
{
.enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost,
.get_power_limit = smu_v13_0_0_get_power_limit,
.set_power_limit = smu_v13_0_set_power_limit,
+   .set_power_source = smu_v13_0_set_power_source,
.get_power_profile_mode = smu_v13_0_0_get_power_profile_mode,
.set_power_profile_mode = smu_v13_0_0_set_power_profile_mode,
.run_btc = smu_v13_0_run_btc,
-- 
2.34.1



[PATCH v2 0/2] Fix Navi3x boot and hotplug problems

2023-10-05 Thread Mario Limonciello
On some OEM systems multiple navi3x dGPUS are triggering RAS errors
and BACO errors.

These errors come from elements of the OEM system that weren't part of
original test environment.  This series addresses those problems.

NOTE: Although this series touches two subsystems, I would prefer to
take this all through DRM because there is a workaround in
amd-staging-drm-next that I would like to be reverted at the same
time as picking up the fix.

v1->v2:
 * Drop _PR3 patch from series, it was cherry picked and is on it's way
   to 6.6-rcX already.
 * Rather than changing global policy, fix the problematic power supply
   driver.
v1: 
https://lore.kernel.org/linux-pm/20230926225955.386553-1-mario.limoncie...@amd.com/

Mario Limonciello (2):
  usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power
supply scope
  Revert "drm/amd/pm: workaround for the wrong ac power detection on smu
13.0.0"

 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c   |  3 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c |  1 +
 drivers/usb/typec/ucsi/psy.c | 10 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

-- 
2.34.1



Re: [PATCH 1/3] amd/amdkfd: Add granularity bitmap mapped to gpu flag

2023-10-05 Thread Philip Yang

  


On 2023-10-02 13:02, Chen, Xiaogang
  wrote:


  
  On 9/29/2023 9:11 AM, Philip Yang wrote:
  
  Caution: This message originated from an
External Source. Use proper caution when opening attachments,
clicking links, or responding.



Replace prange->mapped_to_gpu with
prange->bitmap_mapped[], which is

based on prange granularity, updated when map to GPUS or unmap
from

GPUs, to optimize multiple GPU map, unmap and retry fault
recover.


svm_range_is_mapped is false only if no parital range mapping on
any

GPUs.


Split the bitmap_mapped when unmap from cpu to split the prange.


Signed-off-by: Philip Yang 

---

  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 218
++-

  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   4 +-

  2 files changed, 184 insertions(+), 38 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 040dc32ad475..626e0dd4ec79 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -292,12 +292,12 @@ static void svm_range_free(struct
svm_range *prange, bool do_unmap)


KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);

 }


-   /* free dma_addr array for each gpu */

+   /* free dma_addr array, bitmap_mapped for each gpu */

 for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE;
gpuidx++) {

-   if (prange->dma_addr[gpuidx]) {

+   if (prange->dma_addr[gpuidx])

 kvfree(prange->dma_addr[gpuidx]);

-   prange->dma_addr[gpuidx] =
NULL;

-   }

+   if (prange->bitmap_mapped[gpuidx])

+  
bitmap_free(prange->bitmap_mapped[gpuidx]);

 }


 mutex_destroy(>lock);

@@ -323,19 +323,38 @@ svm_range *svm_range_new(struct
svm_range_list *svms, uint64_t start,

 uint64_t size = last - start + 1;

 struct svm_range *prange;

 struct kfd_process *p;

-

-   prange = kzalloc(sizeof(*prange), GFP_KERNEL);

-   if (!prange)

-   return NULL;

+   unsigned int nbits;

+   uint32_t gpuidx;


 p = container_of(svms, struct kfd_process, svms);

 if (!p->xnack_enabled && update_mem_usage
&&

 amdgpu_amdkfd_reserve_mem_limit(NULL, size <<
PAGE_SHIFT,


KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {

 pr_info("SVM mapping failed, exceeds resident
system memory limit\n");

-   kfree(prange);

 return NULL;

 }

+

+   prange = kzalloc(sizeof(*prange), GFP_KERNEL);

+   if (!prange)

+   return NULL;

+

+  
svm_range_set_default_attributes(>preferred_loc,

+   
>prefetch_loc,

+   
>granularity, >flags);

+

+   nbits = svm_range_mapped_nbits(size,
prange->granularity);

+   pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped
nbits %d\n", prange,

+    start, last, nbits);

+   for_each_set_bit(gpuidx, p->svms.bitmap_supported,
p->n_pdds) {

+   prange->bitmap_mapped[gpuidx] =
bitmap_zalloc(nbits, GFP_KERNEL);

+   if (!prange->bitmap_mapped[gpuidx]) {

+   while (gpuidx--)

+  
bitmap_free(prange->bitmap_mapped[gpuidx]);

+   kfree(prange);

+   return NULL;

+   }

+   }

+

 

[PATCH v4 1/1] drm/amdkfd: get doorbell's absolute offset based on the db_size

2023-10-05 Thread Arvind Yadav
Here, Adding db_size in byte to find the doorbell's
absolute offset for both 32-bit and 64-bit doorbell sizes.
So that doorbell offset will be aligned based on the doorbell
size.

v2:
- Addressed the review comment from Felix.
v3:
- Adding doorbell_size as parameter to get db absolute offset.
v4:
  Squash the two patches into one.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h|  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c| 13 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c   |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c   | 10 --
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c  |  3 ++-
 5 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 09f6727e7c73..4a8b33f55f6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -357,8 +357,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev);
 void amdgpu_doorbell_fini(struct amdgpu_device *adev);
 int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
 uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
-  struct amdgpu_bo *db_bo,
-  uint32_t doorbell_index);
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size);
 
 #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
 #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
index da4be0bbb446..6690f5a72f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -114,19 +114,24 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, 
u32 index, u64 v)
  * @adev: amdgpu_device pointer
  * @db_bo: doorbell object's bo
  * @db_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
  *
  * returns doorbell's absolute index in BAR
  */
 uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
-  struct amdgpu_bo *db_bo,
-  uint32_t doorbell_index)
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size)
 {
int db_bo_offset;
 
db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
 
-   /* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */
-   return db_bo_offset / sizeof(u32) + doorbell_index * 2;
+   /* doorbell index is 32 bit but doorbell's size can be 32 bit
+* or 64 bit, so *db_size(in byte)/4 for alignment.
+*/
+   return db_bo_offset / sizeof(u32) + doorbell_index *
+  DIV_ROUND_UP(db_size, 4);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0d3d538b64eb..e07652e72496 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -407,7 +407,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
 
q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
  
qpd->proc_doorbells,
- 
q->doorbell_id);
+ 
q->doorbell_id,
+ 
dev->kfd->device_info.doorbell_size);
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 7b38537c7c99..05c74887fd6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
 
-   *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, 
inx);
+   *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
+kfd->doorbells,
+inx,
+
kfd->device_info.doorbell_size);
inx *= 2;
 
pr_debug("Get kernel queue doorbell\n"
@@ -240,7 +243,10 @@ phys_addr_t kfd_get_process_doorbells(struct 
kfd_process_device *pdd)

[PATCH v4 0/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

2023-10-05 Thread Arvind Yadav
On older chips, the absolute doorbell offset within
the doorbell page is based on the queue ID.
KFD is using queue ID and doorbell size to get an
absolute doorbell offset in userspace.

Here, adding db_size in byte to find the doorbell's
absolute offset for both 32-bit and 64-bit doorbell sizes.
So that doorbell offset will be aligned based on the doorbell
size.

v2:
- Addressed the review comment from Felix.

v3:
- Adding doorbell_size as parameter to get db absolute offset.  

v4:
  Squash the two patches into one.

Arvind Yadav (1):
  drm/amdkfd: get doorbell's absolute offset based on the db_size

 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h|  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c| 13 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c   |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c   | 10 --
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c  |  3 ++-
 5 files changed, 24 insertions(+), 10 deletions(-)

-- 
2.34.1



[PATCH v4 32/32] drm/amd/display: Add 3x4 CTM support for plane CTM

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Create drm_color_ctm_3x4 to support 3x4-dimension plane CTM matrix and
convert DRM CTM to DC CSC float matrix.

v3:
- rename ctm2 to ctm_3x4 (Harry)

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 28 +--
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |  2 +-
 include/uapi/drm/drm_mode.h   |  8 ++
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index bc9dd75e8881..655c18c9a2d7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -433,6 +433,28 @@ static void __drm_ctm_to_dc_matrix(const struct 
drm_color_ctm *ctm,
}
 }
 
+/**
+ * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix with 3x4 dimensions
+ * @matrix: DC CSC float matrix
+ *
+ * The matrix needs to be a 3x4 (12 entry) matrix.
+ */
+static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm,
+  struct fixed31_32 *matrix)
+{
+   int i;
+
+   /* The format provided is S31.32, using signed-magnitude representation.
+* Our fixed31_32 is also S31.32, but is using 2's complement. We have
+* to convert from signed-magnitude to 2's complement.
+*/
+   for (i = 0; i < 12; i++) {
+   /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
+   matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]);
+   }
+}
+
 /**
  * __set_legacy_tf - Calculates the legacy transfer function
  * @func: transfer function
@@ -1176,7 +1198,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
 {
struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-   struct drm_color_ctm *ctm = NULL;
+   struct drm_color_ctm_3x4 *ctm = NULL;
struct dc_color_caps *color_caps = NULL;
bool has_crtc_cm_degamma;
int ret;
@@ -1231,7 +1253,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
 
/* Setup CRTC CTM. */
if (dm_plane_state->ctm) {
-   ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data;
+   ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data;
/*
 * DCN2 and older don't support both pre-blending and
 * post-blending gamut remap. For this HW family, if we have
@@ -1243,7 +1265,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
 * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
 * as it's done by dcn30_program_gamut_remap().
 */
-   __drm_ctm_to_dc_matrix(ctm, 
dc_plane_state->gamut_remap_matrix.matrix);
+   __drm_ctm_3x4_to_dc_matrix(ctm, 
dc_plane_state->gamut_remap_matrix.matrix);
 
dc_plane_state->gamut_remap_matrix.enable_remap = true;
dc_plane_state->input_csc_color_matrix.enable_adjustment = 
false;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index d9537d9bf18c..a3935c56189b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1549,7 +1549,7 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
ret = drm_property_replace_blob_from_id(plane->dev,
_plane_state->ctm,
val,
-   sizeof(struct 
drm_color_ctm), -1,
+   sizeof(struct 
drm_color_ctm_3x4), -1,
);
dm_plane_state->base.color_mgmt_changed |= replaced;
return ret;
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 46becedf5b2f..a811d24e8ed5 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -838,6 +838,14 @@ struct drm_color_ctm {
__u64 matrix[9];
 };
 
+struct drm_color_ctm_3x4 {
+   /*
+* Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+* (not two's complement!) format.
+*/
+   __u64 matrix[12];
+};
+
 struct drm_color_lut {
/*
 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
-- 
2.40.1



[PATCH v4 31/32] drm/amd/display: add plane CTM support

2023-10-05 Thread Melissa Wen
Map the plane CTM driver-specific property to DC plane, instead of DC
stream. The remaining steps to program DPP block are already implemented
on DC shared-code.

v3:
- fix comment about plane and CRTC CTMs priorities (Harry)

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 26 +++
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 5e64eda6ed11..7de67b5ab6e9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9998,6 +9998,7 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
if (dm_old_other_state->degamma_tf != 
dm_new_other_state->degamma_tf ||
dm_old_other_state->degamma_lut != 
dm_new_other_state->degamma_lut ||
dm_old_other_state->hdr_mult != 
dm_new_other_state->hdr_mult ||
+   dm_old_other_state->ctm != dm_new_other_state->ctm ||
dm_old_other_state->shaper_lut != 
dm_new_other_state->shaper_lut ||
dm_old_other_state->shaper_tf != 
dm_new_other_state->shaper_tf ||
dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 41c5926ca068..bc9dd75e8881 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1175,6 +1175,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
  struct dc_plane_state *dc_plane_state)
 {
struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   struct drm_color_ctm *ctm = NULL;
struct dc_color_caps *color_caps = NULL;
bool has_crtc_cm_degamma;
int ret;
@@ -1227,5 +1229,29 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
return ret;
}
 
+   /* Setup CRTC CTM. */
+   if (dm_plane_state->ctm) {
+   ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data;
+   /*
+* DCN2 and older don't support both pre-blending and
+* post-blending gamut remap. For this HW family, if we have
+* the plane and CRTC CTMs simultaneously, CRTC CTM takes
+* priority, and we discard plane CTM, as implemented in
+* dcn10_program_gamut_remap(). However, DCN3+ has DPP
+* (pre-blending) and MPC (post-blending) `gamut remap` blocks;
+* therefore, we can program plane and CRTC CTMs together by
+* mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
+* as it's done by dcn30_program_gamut_remap().
+*/
+   __drm_ctm_to_dc_matrix(ctm, 
dc_plane_state->gamut_remap_matrix.matrix);
+
+   dc_plane_state->gamut_remap_matrix.enable_remap = true;
+   dc_plane_state->input_csc_color_matrix.enable_adjustment = 
false;
+   } else {
+   /* Bypass CTM. */
+   dc_plane_state->gamut_remap_matrix.enable_remap = false;
+   dc_plane_state->input_csc_color_matrix.enable_adjustment = 
false;
+   }
+
return amdgpu_dm_plane_set_color_properties(plane_state, 
dc_plane_state);
 }
-- 
2.40.1



[PATCH v4 30/32] drm/amd/display: add plane CTM driver-specific property

2023-10-05 Thread Melissa Wen
Plane CTM for pre-blending color space conversion. Only enable
driver-specific plane CTM property on drivers that support both pre- and
post-blending gamut remap matrix, i.e., DCN3+ family. Otherwise it
conflits with DRM CRTC CTM property.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  2 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  7 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  7 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 20 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 071cc10bfd90..1347022ce57d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -363,6 +363,8 @@ struct amdgpu_mode_info {
 * @plane_hdr_mult_property:
 */
struct drm_property *plane_hdr_mult_property;
+
+   struct drm_property *plane_ctm_property;
/**
 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
 * that converts color content before 3D LUT. If
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 219efa7fe181..c9cd2e5f79ae 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -784,6 +784,13 @@ struct dm_plane_state {
 * TF is needed for any subsequent linear-to-non-linear transforms.
 */
__u64 hdr_mult;
+   /**
+* @ctm:
+*
+* Color transformation matrix. The blob (if not NULL) is a 
+* drm_color_ctm_3x4.
+*/
+   struct drm_property_blob *ctm;
/**
 * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
 * array of  drm_color_lut.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 251b5f14bd89..41c5926ca068 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -239,6 +239,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_hdr_mult_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_CTM", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_ctm_property = prop;
+
prop = drm_property_create(adev_to_drm(adev),
   DRM_MODE_PROP_BLOB,
   "AMD_PLANE_SHAPER_LUT", 0);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index f1070ca7076a..d9537d9bf18c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1361,6 +1361,8 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
 
if (dm_plane_state->degamma_lut)
drm_property_blob_get(dm_plane_state->degamma_lut);
+   if (dm_plane_state->ctm)
+   drm_property_blob_get(dm_plane_state->ctm);
if (dm_plane_state->shaper_lut)
drm_property_blob_get(dm_plane_state->shaper_lut);
if (dm_plane_state->lut3d)
@@ -1442,6 +1444,8 @@ static void dm_drm_plane_destroy_state(struct drm_plane 
*plane,
 
if (dm_plane_state->degamma_lut)
drm_property_blob_put(dm_plane_state->degamma_lut);
+   if (dm_plane_state->ctm)
+   drm_property_blob_put(dm_plane_state->ctm);
if (dm_plane_state->lut3d)
drm_property_blob_put(dm_plane_state->lut3d);
if (dm_plane_state->shaper_lut)
@@ -1479,6 +1483,11 @@ dm_atomic_plane_attach_color_mgmt_properties(struct 
amdgpu_display_manager *dm,
   dm->adev->mode_info.plane_hdr_mult_property,
   AMDGPU_HDR_MULT_DEFAULT);
 
+   /* Only enable plane CTM if both DPP and MPC gamut remap is available. 
*/
+   if (dm->dc->caps.color.mpc.gamut_remap)
+   drm_object_attach_property(>base,
+  
dm->adev->mode_info.plane_ctm_property, 0);
+
if (dpp_color_caps.hw_3d_lut) {
drm_object_attach_property(>base,
   mode_info.plane_shaper_lut_property, 
0);
@@ -1536,6 +1545,14 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
dm_plane_state->hdr_mult = val;
dm_plane_state->base.color_mgmt_changed = 1;
}
+   } else if (property == adev->mode_info.plane_ctm_property) {
+   ret = 

[PATCH v4 29/32] drm/amd/display: copy 3D LUT settings from crtc state to stream_update

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

When commiting planes, we copy color mgmt resources to the stream state.
Do the same for shaper and 3D LUTs.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4b4181447df9..5e64eda6ed11 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8501,6 +8501,10 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
_state->stream->csc_color_matrix;
bundle->stream_update.out_transfer_func =
acrtc_state->stream->out_transfer_func;
+   bundle->stream_update.lut3d_func =
+   (struct dc_3dlut *) 
acrtc_state->stream->lut3d_func;
+   bundle->stream_update.func_shaper =
+   (struct dc_transfer_func *) 
acrtc_state->stream->func_shaper;
}
 
acrtc_state->stream->abm_level = acrtc_state->abm_level;
-- 
2.40.1



[PATCH v4 28/32] drm/amd/display: allow newer DC hardware to use degamma ROM for PQ/HLG

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Need to funnel the color caps through to these functions so it can check
that the hardware is capable.

v2:
- remove redundant color caps assignment on plane degamma map (Harry)
- pass color caps to degamma params

v3:
- remove unused color_caps parameter from set_color_properties (Harry)

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 29 ---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 0909ed5639bf..251b5f14bd89 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -564,6 +564,7 @@ static int amdgpu_dm_set_atomic_regamma(struct 
dc_stream_state *stream,
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
+ * @caps: dc color capabilities
  * @func: transfer function
  * @lut: lookup table that defines the color space
  * @lut_size: size of respective lut.
@@ -571,7 +572,7 @@ static int amdgpu_dm_set_atomic_regamma(struct 
dc_stream_state *stream,
  * Returns:
  * 0 in case of success. -ENOMEM if fails.
  */
-static int __set_input_tf(struct dc_transfer_func *func,
+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func 
*func,
  const struct drm_color_lut *lut, uint32_t lut_size)
 {
struct dc_gamma *gamma = NULL;
@@ -588,7 +589,7 @@ static int __set_input_tf(struct dc_transfer_func *func,
__drm_lut_to_dc_gamma(lut, gamma, false);
}
 
-   res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != 
NULL);
+   res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != 
NULL);
 
if (gamma)
dc_gamma_release();
@@ -752,7 +753,7 @@ static int amdgpu_dm_atomic_blend_lut(const struct 
drm_color_lut *blend_lut,
func_blend->tf = tf;
func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
-   ret = __set_input_tf(func_blend, blend_lut, blend_size);
+   ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
} else {
func_blend->type = TF_TYPE_BYPASS;
func_blend->tf = TRANSFER_FUNCTION_LINEAR;
@@ -968,7 +969,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
 
 static int
 map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
-struct dc_plane_state *dc_plane_state)
+struct dc_plane_state *dc_plane_state,
+struct dc_color_caps *caps)
 {
const struct drm_color_lut *degamma_lut;
enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -1023,7 +1025,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
dc_plane_state->in_transfer_func->tf =
TRANSFER_FUNCTION_LINEAR;
 
-   r = __set_input_tf(dc_plane_state->in_transfer_func,
+   r = __set_input_tf(caps, dc_plane_state->in_transfer_func,
   degamma_lut, degamma_size);
if (r)
return r;
@@ -1036,7 +1038,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
dc_plane_state->in_transfer_func->tf = tf;
 
if (tf != TRANSFER_FUNCTION_SRGB &&
-   !mod_color_calculate_degamma_params(NULL,
+   !mod_color_calculate_degamma_params(caps,

dc_plane_state->in_transfer_func,
NULL, false))
return -ENOMEM;
@@ -1047,7 +1049,8 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
 
 static int
 __set_dm_plane_degamma(struct drm_plane_state *plane_state,
-  struct dc_plane_state *dc_plane_state)
+  struct dc_plane_state *dc_plane_state,
+  struct dc_color_caps *color_caps)
 {
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
const struct drm_color_lut *degamma_lut;
@@ -1078,7 +1081,7 @@ __set_dm_plane_degamma(struct drm_plane_state 
*plane_state,
dc_plane_state->in_transfer_func->type =
TF_TYPE_DISTRIBUTED_POINTS;
 
-   ret = __set_input_tf(dc_plane_state->in_transfer_func,
+   ret = __set_input_tf(color_caps, 
dc_plane_state->in_transfer_func,
 degamma_lut, degamma_size);
if (ret)
return ret;
@@ -1086,7 +1089,7 @@ __set_dm_plane_degamma(struct drm_plane_state 
*plane_state,
dc_plane_state->in_transfer_func->type =
   

[PATCH v4 27/32] drm/amd/display: add plane blend LUT and TF support

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Map plane blend properties to DPP blend gamma. Plane blend is a
post-3D LUT curve that linearizes color space for blending. It may be
defined by a user-blob LUT and/or predefined transfer function. As
hardcoded curve (ROM) is not supported on blend gamma, we use AMD color
module to fill parameters when setting non-linear TF with empty LUT.

v2:
- rename DRM TFs to AMDGPU TFs

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 56 +--
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 26d19159bd79..4b4181447df9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8289,6 +8289,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
bundle->surface_updates[planes_count].func_shaper = 
dc_plane->in_shaper_func;
bundle->surface_updates[planes_count].lut3d_func = 
dc_plane->lut3d_func;
+   bundle->surface_updates[planes_count].blend_tf = 
dc_plane->blend_tf;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 25e9aa147e00..0909ed5639bf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -732,6 +732,35 @@ static int amdgpu_dm_atomic_shaper_lut(const struct 
drm_color_lut *shaper_lut,
return ret;
 }
 
+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
+  bool has_rom,
+  enum dc_transfer_func_predefined tf,
+  uint32_t blend_size,
+  struct dc_transfer_func *func_blend)
+{
+   int ret = 0;
+
+   if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
+   /*
+* DRM plane gamma LUT or TF means we are linearizing color
+* space before blending (similar to degamma programming). As
+* we don't have hardcoded curve support, or we use AMD color
+* module to fill the parameters that will be translated to HW
+* points.
+*/
+   func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
+   func_blend->tf = tf;
+   func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+   ret = __set_input_tf(func_blend, blend_lut, blend_size);
+   } else {
+   func_blend->type = TF_TYPE_BYPASS;
+   func_blend->tf = TRANSFER_FUNCTION_LINEAR;
+   }
+
+   return ret;
+}
+
 /**
  * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
  * shaper and 3D LUTs match the hw supported size
@@ -1070,8 +1099,9 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
 {
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
enum amdgpu_transfer_function shaper_tf = 
AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-   const struct drm_color_lut *shaper_lut, *lut3d;
-   uint32_t shaper_size, lut3d_size;
+   enum amdgpu_transfer_function blend_tf = 
AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+   const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
+   uint32_t shaper_size, lut3d_size, blend_size;
int ret;
 
/* We have nothing to do here, return */
@@ -1091,12 +1121,30 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
  amdgpu_tf_to_dc_tf(shaper_tf),
  shaper_size,
  dc_plane_state->in_shaper_func);
-   if (ret)
+   if (ret) {
drm_dbg_kms(plane_state->plane->dev,
"setting plane %d shaper LUT failed.\n",
plane_state->plane->index);
 
-   return ret;
+   return ret;
+   }
+
+   blend_tf = dm_plane_state->blend_tf;
+   blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, _size);
+   blend_size = blend_lut != NULL ? blend_size : 0;
+
+   ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
+amdgpu_tf_to_dc_tf(blend_tf),
+blend_size, dc_plane_state->blend_tf);
+   if (ret) {
+   drm_dbg_kms(plane_state->plane->dev,
+   

[PATCH v4 25/32] drm/amd/display: add plane 3D LUT support

2023-10-05 Thread Melissa Wen
Wire up DC 3D LUT to DM plane color management (pre-blending). On AMD
display HW, 3D LUT comes after a shaper curve and we always have to
program a shaper curve to delinearize or normalize the color space
before applying a 3D LUT (since we have a reduced number of LUT
entries).

In this version, the default values of 3D LUT for size and bit_depth are
17x17x17 and 12-bit, but we already provide here a more generic
mechanisms to program other supported values (9x9x9 size and 10-bit).

v2:
- started with plane 3D LUT instead of CRTC 3D LUT support

v4:
- lut3d_size is the max dimension size instead of # of entries

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 102 +-
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c79cd98d1228..26d19159bd79 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8288,6 +8288,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,

bundle->surface_updates[planes_count].gamut_remap_matrix = 
_plane->gamut_remap_matrix;
bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
bundle->surface_updates[planes_count].func_shaper = 
dc_plane->in_shaper_func;
+   bundle->surface_updates[planes_count].lut3d_func = 
dc_plane->lut3d_func;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 0e65bf0a886e..9c38291c42a2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -622,6 +622,86 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
}
 }
 
+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
+   const struct drm_color_lut lut,
+   int bit_precision)
+{
+   rgb->red = drm_color_lut_extract(lut.red, bit_precision);
+   rgb->green = drm_color_lut_extract(lut.green, bit_precision);
+   rgb->blue  = drm_color_lut_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
+   uint32_t lut3d_size,
+   struct tetrahedral_params *params,
+   bool use_tetrahedral_9,
+   int bit_depth)
+{
+   struct dc_rgb *lut0;
+   struct dc_rgb *lut1;
+   struct dc_rgb *lut2;
+   struct dc_rgb *lut3;
+   int lut_i, i;
+
+
+   if (use_tetrahedral_9) {
+   lut0 = params->tetrahedral_9.lut0;
+   lut1 = params->tetrahedral_9.lut1;
+   lut2 = params->tetrahedral_9.lut2;
+   lut3 = params->tetrahedral_9.lut3;
+   } else {
+   lut0 = params->tetrahedral_17.lut0;
+   lut1 = params->tetrahedral_17.lut1;
+   lut2 = params->tetrahedral_17.lut2;
+   lut3 = params->tetrahedral_17.lut3;
+   }
+
+   for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+   /*
+* We should consider the 3D LUT RGB values are distributed
+* along four arrays lut0-3 where the first sizes 1229 and the
+* other 1228. The bit depth supported for 3dlut channel is
+* 12-bit, but DC also supports 10-bit.
+*
+* TODO: improve color pipeline API to enable the userspace set
+* bit depth and 3D LUT size/stride, as specified by VA-API.
+*/
+   __to_dc_lut3d_color([lut_i], lut[i], bit_depth);
+   __to_dc_lut3d_color([lut_i], lut[i + 1], bit_depth);
+   __to_dc_lut3d_color([lut_i], lut[i + 2], bit_depth);
+   __to_dc_lut3d_color([lut_i], lut[i + 3], bit_depth);
+   }
+   /* lut0 has 1229 points (lut_size/4 + 1) */
+   __to_dc_lut3d_color([lut_i], lut[i], bit_depth);
+}
+
+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
+ * @drm_lut3d: user 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it
+ * on DCN accordingly.
+ */
+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d,
+  uint32_t drm_lut3d_size,
+  struct dc_3dlut *lut)
+{
+   if (!drm_lut3d_size) {
+   lut->state.bits.initialized = 0;
+   } else {
+   /* Stride and bit depth are not programmable by API 

[PATCH v4 26/32] drm/amd/display: handle empty LUTs in __set_input_tf

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Unlike degamma, blend gamma doesn't support hardcoded curve
(predefined/ROM), but we can use AMD color module to fill blend gamma
parameters when we have non-linear plane gamma TF without plane gamma
LUT. The regular degamma path doesn't hit this.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 20 +++
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 9c38291c42a2..25e9aa147e00 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -577,17 +577,21 @@ static int __set_input_tf(struct dc_transfer_func *func,
struct dc_gamma *gamma = NULL;
bool res;
 
-   gamma = dc_create_gamma();
-   if (!gamma)
-   return -ENOMEM;
+   if (lut_size) {
+   gamma = dc_create_gamma();
+   if (!gamma)
+   return -ENOMEM;
 
-   gamma->type = GAMMA_CUSTOM;
-   gamma->num_entries = lut_size;
+   gamma->type = GAMMA_CUSTOM;
+   gamma->num_entries = lut_size;
 
-   __drm_lut_to_dc_gamma(lut, gamma, false);
+   __drm_lut_to_dc_gamma(lut, gamma, false);
+   }
 
-   res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
-   dc_gamma_release();
+   res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != 
NULL);
+
+   if (gamma)
+   dc_gamma_release();
 
return res ? 0 : -ENOMEM;
 }
-- 
2.40.1



[PATCH v4 24/32] drm/amd/display: add plane shaper TF support

2023-10-05 Thread Melissa Wen
Enable usage of predefined transfer func in addition to shaper 1D LUT.
That means we can save some complexity by just setting a predefined
curve, instead of programming a custom curve when preparing color space
for applying 3D LUT.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c   | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 1d18f447f387..0e65bf0a886e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -623,20 +623,23 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
 }
 
 static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+  bool has_rom,
+  enum dc_transfer_func_predefined tf,
   uint32_t shaper_size,
   struct dc_transfer_func *func_shaper)
 {
int ret = 0;
 
-   if (shaper_size) {
+   if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
/*
 * If user shaper LUT is set, we assume a linear color space
 * (linearized by degamma 1D LUT or not).
 */
func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
-   func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+   func_shaper->tf = tf;
+   func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
-   ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, 
false);
+   ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, 
has_rom);
} else {
func_shaper->type = TF_TYPE_BYPASS;
func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
@@ -971,6 +974,7 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state 
*plane_state,
 struct dc_plane_state *dc_plane_state)
 {
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   enum amdgpu_transfer_function shaper_tf = 
AMDGPU_TRANSFER_FUNCTION_DEFAULT;
const struct drm_color_lut *shaper_lut;
uint32_t shaper_size;
int ret;
@@ -983,8 +987,11 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
 
shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, 
_size);
shaper_size = shaper_lut != NULL ? shaper_size : 0;
+   shaper_tf = dm_plane_state->shaper_tf;
 
-   ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, shaper_size,
+   ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
+ amdgpu_tf_to_dc_tf(shaper_tf),
+ shaper_size,
  dc_plane_state->in_shaper_func);
if (ret)
drm_dbg_kms(plane_state->plane->dev,
-- 
2.40.1



[PATCH v4 23/32] drm/amd/display: add plane shaper LUT support

2023-10-05 Thread Melissa Wen
Map DC shaper LUT to DM plane color management. Shaper LUT can be used
to delinearize and/or normalize the color space for computational
efficiency and achiving specific visual styles. If a plane degamma is
apply to linearize the color space, a custom shaper 1D LUT can be used
just before applying 3D LUT.

v2:
- use DPP color caps to verify plane 3D LUT support
- add debug message if shaper LUT programming fails

v4:
- remove helper to check 3D LUT color caps (Harry)
- update desc of lut3d-setup helper from MPC to DPP

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 97 ++-
 3 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ded52dd780c8..c79cd98d1228 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8287,6 +8287,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
bundle->surface_updates[planes_count].in_transfer_func 
= dc_plane->in_transfer_func;

bundle->surface_updates[planes_count].gamut_remap_matrix = 
_plane->gamut_remap_matrix;
bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
+   bundle->surface_updates[planes_count].func_shaper = 
dc_plane->in_shaper_func;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c5ec6e4f15c2..219efa7fe181 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -908,6 +908,8 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 /* 3D LUT max size is 17x17x17 (4913 entries) */
 #define MAX_COLOR_3DLUT_SIZE 17
 #define MAX_COLOR_3DLUT_BITDEPTH 12
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+   struct drm_plane_state *plane_state);
 /* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 849e07dd436a..1d18f447f387 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -622,6 +622,63 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
}
 }
 
+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+  uint32_t shaper_size,
+  struct dc_transfer_func *func_shaper)
+{
+   int ret = 0;
+
+   if (shaper_size) {
+   /*
+* If user shaper LUT is set, we assume a linear color space
+* (linearized by degamma 1D LUT or not).
+*/
+   func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
+   func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+
+   ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, 
false);
+   } else {
+   func_shaper->type = TF_TYPE_BYPASS;
+   func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+   }
+
+   return ret;
+}
+
+/**
+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
+ * shaper and 3D LUTs match the hw supported size
+ * @adev: amdgpu device
+ * @crtc_state: the DRM CRTC state
+ *
+ * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or
+ * newer) and if the user shaper and 3D LUTs match the supported size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if lut size are invalid.
+ */
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+   struct drm_plane_state *plane_state)
+{
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   const struct drm_color_lut *shaper = NULL;
+   uint32_t exp_size, size;
+   bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut;
+
+   /* shaper LUT is only available if 3D LUT color caps */
+   exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0;
+   shaper = __extract_blob_lut(dm_plane_state->shaper_lut, );
+
+   if (shaper && size != exp_size) {
+   drm_dbg(>ddev,
+   "Invalid Shaper LUT size. Should be %u but got %u.\n",
+   exp_size, size);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 /**
  * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported 
sizes
  * @crtc_state: the DRM CRTC state
@@ -909,6 

[PATCH v4 20/32] drm/amd/display: reject atomic commit if setting both plane and CRTC degamma

2023-10-05 Thread Melissa Wen
DC only has pre-blending degamma caps (plane/DPP) that is currently in
use for CRTC/post-blending degamma, so that we don't have HW caps to
perform plane and CRTC degamma at the same time. Reject atomic updates
when serspace sets both plane and CRTC degamma properties.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 6acc9ebc52da..354ab46894d2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -943,9 +943,20 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
has_crtc_cm_degamma = (crtc->cm_has_degamma || 
crtc->cm_is_degamma_srgb);
 
ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
-   if (ret != -EINVAL)
+   if (ret == -ENOMEM)
return ret;
 
+   /* We only have one degamma block available (pre-blending) for the
+* whole color correction pipeline, so that we can't actually perform
+* plane and CRTC degamma at the same time. Explicitly reject atomic
+* updates when userspace sets both plane and CRTC degamma properties.
+*/
+   if (has_crtc_cm_degamma && ret != -EINVAL){
+   drm_dbg_kms(crtc->base.crtc->dev,
+   "doesn't support plane and CRTC degamma at the same 
time\n");
+   return -EINVAL;
+   }
+
/* If we are here, it means we don't have plane degamma settings, check
 * if we have CRTC degamma waiting for mapping to pre-blending degamma
 * block
-- 
2.40.1



[PATCH v4 22/32] drm/amd/display: add HDR multiplier support

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

With `dc_fixpt_from_s3132()` translation, we can just use it to set
hdr_mult.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 1 +
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a8a1690b7322..ded52dd780c8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8286,6 +8286,7 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
bundle->surface_updates[planes_count].gamma = 
dc_plane->gamma_correction;
bundle->surface_updates[planes_count].in_transfer_func 
= dc_plane->in_transfer_func;

bundle->surface_updates[planes_count].gamut_remap_matrix = 
_plane->gamut_remap_matrix;
+   bundle->surface_updates[planes_count].hdr_mult = 
dc_plane->hdr_mult;
}
 
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 599bba566226..849e07dd436a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -926,6 +926,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
  struct drm_plane_state *plane_state,
  struct dc_plane_state *dc_plane_state)
 {
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
bool has_crtc_cm_degamma;
int ret;
 
@@ -936,6 +937,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
/* After, we start to update values according to color props */
has_crtc_cm_degamma = (crtc->cm_has_degamma || 
crtc->cm_is_degamma_srgb);
 
+   dc_plane_state->hdr_mult = 
dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
if (ret == -ENOMEM)
return ret;
-- 
2.40.1



[PATCH v4 21/32] drm/amd/display: add dc_fixpt_from_s3132 helper

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Detach value translation from CTM to reuse it for programming HDR
multiplier property.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c  |  8 +---
 drivers/gpu/drm/amd/display/include/fixed31_32.h | 12 
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 354ab46894d2..599bba566226 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -404,7 +404,6 @@ static void __drm_lut_to_dc_gamma(const struct 
drm_color_lut *lut,
 static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
   struct fixed31_32 *matrix)
 {
-   int64_t val;
int i;
 
/*
@@ -423,12 +422,7 @@ static void __drm_ctm_to_dc_matrix(const struct 
drm_color_ctm *ctm,
}
 
/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-   val = ctm->matrix[i - (i / 4)];
-   /* If negative, convert to 2's complement. */
-   if (val & (1ULL << 63))
-   val = -(val & ~(1ULL << 63));
-
-   matrix[i].value = val;
+   matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
}
 }
 
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h 
b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index d4cf7ead1d87..84da1dd34efd 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
 static const struct fixed31_32 dc_fixpt_half = { 0x8000LL };
 static const struct fixed31_32 dc_fixpt_one = { 0x1LL };
 
+static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x)
+{
+   struct fixed31_32 val;
+
+   /* If negative, convert to 2's complement. */
+   if (x & (1ULL << 63))
+   x = -(x & ~(1ULL << 63));
+
+   val.value = x;
+   return val;
+}
+
 /*
  * @brief
  * Initialization routines
-- 
2.40.1



[PATCH v4 19/32] drm/amd/display: add plane degamma TF and LUT support

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Set DC plane with user degamma LUT or predefined TF from driver-specific
plane color properties. If plane and CRTC degamma are set in the same
time, plane degamma has priority.  That means, we only set CRTC degamma
if we don't have plane degamma LUT or TF to configure. We return -EINVAL
if we don't have plane degamma settings, so we can continue and check
CRTC degamma.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  4 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 70 +--
 3 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d44fd8cb6edf..a8a1690b7322 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5191,7 +5191,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device 
*adev,
 * Always set input transfer function, since plane state is refreshed
 * every time.
 */
-   ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
+   ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
+   plane_state,
+   dc_plane_state);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c138457ff12e..c5ec6e4f15c2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -918,6 +918,7 @@ int amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
  struct dc_plane_state *dc_plane_state);
 
 void amdgpu_dm_update_connector_after_detect(
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index ffdf493b8ef2..6acc9ebc52da 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -867,9 +867,58 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
return 0;
 }
 
+static int
+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
+  struct dc_plane_state *dc_plane_state)
+{
+   struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+   const struct drm_color_lut *degamma_lut;
+   enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+   uint32_t degamma_size;
+   bool has_degamma_lut;
+   int ret;
+
+   degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
+_size);
+
+   has_degamma_lut = degamma_lut &&
+ !__is_lut_linear(degamma_lut, degamma_size);
+
+   tf = dm_plane_state->degamma_tf;
+
+   /* If we don't have plane degamma LUT nor TF to set on DC, we have
+* nothing to do here, return.
+*/
+   if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
+   return -EINVAL;
+
+   dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf);
+
+   if (has_degamma_lut) {
+   ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
+
+   dc_plane_state->in_transfer_func->type =
+   TF_TYPE_DISTRIBUTED_POINTS;
+
+   ret = __set_input_tf(dc_plane_state->in_transfer_func,
+degamma_lut, degamma_size);
+   if (ret)
+   return ret;
+   } else {
+   dc_plane_state->in_transfer_func->type =
+   TF_TYPE_PREDEFINED;
+
+   if (!mod_color_calculate_degamma_params(NULL,
+   dc_plane_state->in_transfer_func, NULL, false))
+   return -ENOMEM;
+   }
+   return 0;
+}
+
 /**
  * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
  * @crtc: amdgpu_dm crtc state
+ * @plane_state: DRM plane state
  * @dc_plane_state: target DC surface
  *
  * Update the underlying dc_stream_state's input transfer function (ITF) in
@@ -880,13 +929,28 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
  * 0 on success. -ENOMEM if mem allocation fails.
  */
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
  struct 

[PATCH v4 18/32] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-10-05 Thread Melissa Wen
The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
atomic degamma or implict degamma on legacy gamma. Detach degamma usage
regarging CRTC color properties to manage plane and CRTC color
correction combinations.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 60 +--
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index e81263d60b7d..ffdf493b8ef2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -788,20 +788,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
return 0;
 }
 
-/**
- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
- * @crtc: amdgpu_dm crtc state
- * @dc_plane_state: target DC surface
- *
- * Update the underlying dc_stream_state's input transfer function (ITF) in
- * preparation for hardware commit. The transfer function used depends on
- * the preparation done on the stream for color management.
- *
- * Returns:
- * 0 on success. -ENOMEM if mem allocation fails.
- */
-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
- struct dc_plane_state *dc_plane_state)
+static int
+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
+struct dc_plane_state *dc_plane_state)
 {
const struct drm_color_lut *degamma_lut;
enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -824,8 +813,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
 _size);
ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
 
-   dc_plane_state->in_transfer_func->type =
-   TF_TYPE_DISTRIBUTED_POINTS;
+   dc_plane_state->in_transfer_func->type = 
TF_TYPE_DISTRIBUTED_POINTS;
 
/*
 * This case isn't fully correct, but also fairly
@@ -861,7 +849,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
   degamma_lut, degamma_size);
if (r)
return r;
-   } else if (crtc->cm_is_degamma_srgb) {
+   } else {
/*
 * For legacy gamma support we need the regamma input
 * in linear space. Assume that the input is sRGB.
@@ -871,8 +859,44 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
 
if (tf != TRANSFER_FUNCTION_SRGB &&
!mod_color_calculate_degamma_params(NULL,
-   dc_plane_state->in_transfer_func, NULL, false))
+   
dc_plane_state->in_transfer_func,
+   NULL, false))
return -ENOMEM;
+   }
+
+   return 0;
+}
+
+/**
+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
+ * @crtc: amdgpu_dm crtc state
+ * @dc_plane_state: target DC surface
+ *
+ * Update the underlying dc_stream_state's input transfer function (ITF) in
+ * preparation for hardware commit. The transfer function used depends on
+ * the preparation done on the stream for color management.
+ *
+ * Returns:
+ * 0 on success. -ENOMEM if mem allocation fails.
+ */
+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct dc_plane_state *dc_plane_state)
+{
+   bool has_crtc_cm_degamma;
+   int ret;
+
+   has_crtc_cm_degamma = (crtc->cm_has_degamma || 
crtc->cm_is_degamma_srgb);
+   if (has_crtc_cm_degamma){
+   /*
+* AMD HW doesn't have post-blending degamma caps. When DRM
+* CRTC atomic degamma is set, we maps it to DPP degamma block
+* (pre-blending) or, on legacy gamma, we use DPP degamma to
+* linearize (implicit degamma) from sRGB/BT709 according to
+* the input space.
+*/
+   ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state);
+   if (ret)
+   return ret;
} else {
/* ...Otherwise we can just bypass the DGM block. */
dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
-- 
2.40.1



[PATCH v4 16/32] drm/amd/display: set sdr_ref_white_level to 80 for out_transfer_func

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Otherwise this is just initialized to 0. This needs to actually have a
value so that compute_curve can work for PQ EOTF.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 782adb8bea43..e81263d60b7d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -72,6 +72,7 @@
  */
 
 #define MAX_DRM_LUT_VALUE 0x
+#define SDR_WHITE_LEVEL_INIT_VALUE 80
 
 /**
  * amdgpu_dm_init_color_mod - Initialize the color module.
@@ -551,6 +552,7 @@ static int amdgpu_dm_set_atomic_regamma(struct 
dc_stream_state *stream,
 */
out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
out_tf->tf = tf;
+   out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
ret = __set_output_tf(out_tf, regamma_lut, regamma_size, 
has_rom);
} else {
-- 
2.40.1



[PATCH v4 13/32] drm/amd/display: add comments to describe DM crtc color mgmt behavior

2023-10-05 Thread Melissa Wen
Describe some expected behavior of the AMD DM color mgmt programming.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c  | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 2ecfa0e886e8..2b2826a1d855 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -660,13 +660,25 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
crtc->cm_is_degamma_srgb = true;
stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-
+   /*
+* Note: although we pass has_rom as parameter here, we never
+* actually use ROM because the color module only takes the ROM
+* path if transfer_func->type == PREDEFINED.
+*
+* See more in mod_color_calculate_regamma_params()
+*/
r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
regamma_size, has_rom);
if (r)
return r;
} else if (has_regamma) {
-   /* If atomic regamma, CRTC RGM goes into RGM LUT. */
+   /*
+* CRTC RGM goes into RGM LUT.
+*
+* Note: there is no implicit sRGB regamma here. We are using
+* degamma calculation from color module to calculate the curve
+* from a linear base.
+*/
stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 
-- 
2.40.1



[PATCH v4 14/32] drm/amd/display: encapsulate atomic regamma operation

2023-10-05 Thread Melissa Wen
We will wire up MPC 3D LUT to DM CRTC color pipeline in the next patch,
but so far, only for atomic interface. By checking
set_output_transfer_func in DC drivers with MPC 3D LUT support, we can
verify that regamma is only programmed when 3D LUT programming fails. As
a groundwork to introduce 3D LUT programming and better understand each
step, detach atomic regamma programming from the crtc colocr updating
code.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 55 ---
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 2b2826a1d855..0487fb715945 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -524,6 +524,37 @@ static int __set_output_tf(struct dc_transfer_func *func,
return res ? 0 : -ENOMEM;
 }
 
+static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
+   const struct drm_color_lut *regamma_lut,
+   uint32_t regamma_size, bool has_rom)
+{
+   struct dc_transfer_func *out_tf = stream->out_transfer_func;
+   int ret = 0;
+
+   if (regamma_size) {
+   /*
+* CRTC RGM goes into RGM LUT.
+*
+* Note: there is no implicit sRGB regamma here. We are using
+* degamma calculation from color module to calculate the curve
+* from a linear base.
+*/
+   out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+   out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+
+   ret = __set_output_tf(out_tf, regamma_lut, regamma_size, 
has_rom);
+   } else {
+   /*
+* No CRTC RGM means we can just put the block into bypass
+* since we don't have any plane level adjustments using it.
+*/
+   out_tf->type = TF_TYPE_BYPASS;
+   out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+   }
+
+   return ret;
+}
+
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
@@ -671,28 +702,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
regamma_size, has_rom);
if (r)
return r;
-   } else if (has_regamma) {
-   /*
-* CRTC RGM goes into RGM LUT.
-*
-* Note: there is no implicit sRGB regamma here. We are using
-* degamma calculation from color module to calculate the curve
-* from a linear base.
-*/
-   stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
-   stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-
-   r = __set_output_tf(stream->out_transfer_func, regamma_lut,
-   regamma_size, has_rom);
+   } else {
+   regamma_size = has_regamma ? regamma_size : 0;
+   r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
+regamma_size, has_rom);
if (r)
return r;
-   } else {
-   /*
-* No CRTC RGM means we can just put the block into bypass
-* since we don't have any plane level adjustments using it.
-*/
-   stream->out_transfer_func->type = TF_TYPE_BYPASS;
-   stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
}
 
/*
-- 
2.40.1



[PATCH v4 17/32] drm/amd/display: mark plane as needing reset if color props change

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

We should reset a plane state if at least one of the color management
properties differs from old and new state.

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 846dbeddd0fb..d44fd8cb6edf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9944,6 +9944,10 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
 */
for_each_oldnew_plane_in_state(state, other, old_other_state, 
new_other_state, i) {
struct amdgpu_framebuffer *old_afb, *new_afb;
+   struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
+
+   dm_new_other_state = to_dm_plane_state(new_other_state);
+   dm_old_other_state = to_dm_plane_state(old_other_state);
 
if (other->type == DRM_PLANE_TYPE_CURSOR)
continue;
@@ -9980,6 +9984,17 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
old_other_state->color_encoding != 
new_other_state->color_encoding)
return true;
 
+   /* HDR/Transfer Function changes. */
+   if (dm_old_other_state->degamma_tf != 
dm_new_other_state->degamma_tf ||
+   dm_old_other_state->degamma_lut != 
dm_new_other_state->degamma_lut ||
+   dm_old_other_state->hdr_mult != 
dm_new_other_state->hdr_mult ||
+   dm_old_other_state->shaper_lut != 
dm_new_other_state->shaper_lut ||
+   dm_old_other_state->shaper_tf != 
dm_new_other_state->shaper_tf ||
+   dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
+   dm_old_other_state->blend_lut != 
dm_new_other_state->blend_lut ||
+   dm_old_other_state->blend_tf != 
dm_new_other_state->blend_tf)
+   return true;
+
/* Framebuffer checks fall at the end. */
if (!old_other_state->fb || !new_other_state->fb)
continue;
-- 
2.40.1



[PATCH v4 12/32] drm/amd/display: add CRTC gamma TF driver-specific property

2023-10-05 Thread Melissa Wen
Add AMD pre-defined transfer function property to default DRM CRTC gamma
to convert to wire encoding with or without a user gamma LUT. There is
no post-blending regamma ROM for pre-defined TF. When setting Gamma TF
(!= Identity) and LUT at the same time, the color module will combine
the pre-defined TF and the custom LUT values into the LUT that's
actually programmed.

v2:
- enable CRTC prop in the end of driver-specific prop sequence
- define inverse EOTFs as supported regamma TFs
- reword driver-specific function doc to remove shaper/3D LUT

v3:
- spell out TF+LUT behavior in the commit and comments (Harry)

Co-developed-by: Joshua Ashton 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  7 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  7 ++
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 72 +++
 4 files changed, 94 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index dee35d208493..071cc10bfd90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -424,6 +424,13 @@ struct amdgpu_mode_info {
 * from a combination of pre-defined TF and the custom 1D LUT).
 */
struct drm_property *plane_blend_tf_property;
+   /* @regamma_tf_property: Transfer function for CRTC regamma
+* (post-blending). Possible values are defined by `enum
+* amdgpu_transfer_function`. There is no regamma ROM, but we can use
+* AMD color modules to program LUT parameters from predefined TF (or
+* from a combination of pre-defined TF and the custom 1D LUT).
+*/
+   struct drm_property *regamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 1b96c742d747..c138457ff12e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -836,6 +836,14 @@ struct dm_crtc_state {
struct dc_info_packet vrr_infopacket;
 
int abm_level;
+
+/**
+* @regamma_tf:
+*
+* Pre-defined transfer function for converting internal FB -> wire
+* encoding.
+*/
+   enum amdgpu_transfer_function regamma_tf;
 };
 
 #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 82c554662faa..2ecfa0e886e8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -294,6 +294,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_blend_tf_property = prop;
 
+   prop = amdgpu_create_tf_property(adev_to_drm(adev),
+"AMD_CRTC_REGAMMA_TF",
+amdgpu_inv_eotf);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.regamma_tf_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 440fc0869a34..d746f0aa0f11 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -253,6 +253,7 @@ static struct drm_crtc_state 
*dm_crtc_duplicate_state(struct drm_crtc *crtc)
state->freesync_config = cur->freesync_config;
state->cm_has_degamma = cur->cm_has_degamma;
state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
+   state->regamma_tf = cur->regamma_tf;
state->crc_skip_count = cur->crc_skip_count;
state->mpo_requested = cur->mpo_requested;
/* TODO Duplicate dc_stream after objects are stream object is 
flattened */
@@ -289,6 +290,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc 
*crtc)
 }
 #endif
 
+#ifdef AMD_PRIVATE_COLOR
+/**
+ * drm_crtc_additional_color_mgmt - enable additional color properties
+ * @crtc: DRM CRTC
+ *
+ * This function lets the driver enable post-blending CRTC regamma transfer
+ * function property in addition to DRM CRTC gamma LUT. Default value means
+ * linear transfer function, which is the default CRTC gamma LUT behaviour
+ * without this property.
+ */
+static void
+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
+{
+   struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+   if(adev->dm.dc->caps.color.mpc.ogam_ram)
+   drm_object_attach_property(>base,
+  adev->mode_info.regamma_tf_property,
+  AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+}
+
+static 

[PATCH v4 15/32] drm/amd/display: add CRTC gamma TF support

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Add predefined transfer function programming. There is no post-blending
out gamma ROM for hardcoded curves, but we can use AMD color modules to
program LUT parameters from pre-defined coefficients and an empty
regamma LUT (or bump up LUT parameters with pre-defined TF values).

v2:
- update crtc color mgmt if regamma TF differs between states (Joshua)
- map inverse EOTF to DC transfer function (Melissa)

v3:
- update AMDGPU TF list

v4:
- update comment regarding regamma behavior

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 77 +++
 2 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7c55b1c0ac5c..846dbeddd0fb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9876,6 +9876,7 @@ static int dm_update_crtc_state(struct 
amdgpu_display_manager *dm,
 * when a modeset is needed, to ensure it gets reprogrammed.
 */
if (dm_new_crtc_state->base.color_mgmt_changed ||
+   dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
drm_atomic_crtc_needs_modeset(new_crtc_state)) {
ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
if (ret)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 0487fb715945..782adb8bea43 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -489,16 +489,18 @@ static int __set_output_tf(struct dc_transfer_func *func,
struct calculate_buffer cal_buffer = {0};
bool res;
 
-   ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
-
cal_buffer.buffer_index = -1;
 
-   gamma = dc_create_gamma();
-   if (!gamma)
-   return -ENOMEM;
+   if (lut_size) {
+   ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
 
-   gamma->num_entries = lut_size;
-   __drm_lut_to_dc_gamma(lut, gamma, false);
+   gamma = dc_create_gamma();
+   if (!gamma)
+   return -ENOMEM;
+
+   gamma->num_entries = lut_size;
+   __drm_lut_to_dc_gamma(lut, gamma, false);
+   }
 
if (func->tf == TRANSFER_FUNCTION_LINEAR) {
/*
@@ -506,41 +508,49 @@ static int __set_output_tf(struct dc_transfer_func *func,
 * on top of a linear input. But degamma params can be used
 * instead to simulate this.
 */
-   gamma->type = GAMMA_CUSTOM;
+   if (gamma)
+   gamma->type = GAMMA_CUSTOM;
res = mod_color_calculate_degamma_params(NULL, func,
-   gamma, true);
+gamma, gamma != NULL);
} else {
/*
 * Assume sRGB. The actual mapping will depend on whether the
 * input was legacy or not.
 */
-   gamma->type = GAMMA_CS_TFM_1D;
-   res = mod_color_calculate_regamma_params(func, gamma, false,
+   if (gamma)
+   gamma->type = GAMMA_CS_TFM_1D;
+   res = mod_color_calculate_regamma_params(func, gamma, gamma != 
NULL,
 has_rom, NULL, 
_buffer);
}
 
-   dc_gamma_release();
+   if (gamma)
+   dc_gamma_release();
 
return res ? 0 : -ENOMEM;
 }
 
 static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
const struct drm_color_lut *regamma_lut,
-   uint32_t regamma_size, bool has_rom)
+   uint32_t regamma_size, bool has_rom,
+   enum dc_transfer_func_predefined tf)
 {
struct dc_transfer_func *out_tf = stream->out_transfer_func;
int ret = 0;
 
-   if (regamma_size) {
+   if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
/*
 * CRTC RGM goes into RGM LUT.
 *
 * Note: there is no implicit sRGB regamma here. We are using
 * degamma calculation from color module to calculate the curve
-* from a linear base.
+* from a linear base if gamma TF is not set. However, if gamma
+* TF (!= Linear) and LUT are set at the same time, we will use
+* regamma calculation, and the color module will combine the
+ 

[PATCH v4 11/32] drm/amd/display: add plane blend LUT and TF driver-specific properties

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Blend 1D LUT or a pre-defined transfer function (TF) can be set to
linearize content before blending, so that it's positioned just before
blending planes in the AMD color mgmt pipeline, and after 3D LUT
(non-linear space). Shaper and Blend LUTs are 1D LUTs that sandwich 3D
LUT. Drivers should advertize blend properties according to HW caps.

There is no blend ROM for pre-defined TF. When setting blend TF (!=
Identity) and LUT at the same time, the color module will combine the
pre-defined TF and the custom LUT values into the LUT that's actually
programmed.

v3:
- spell out TF+LUT behavior in the commit and comments (Harry)

Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 22 
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 21 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 36 +++
 4 files changed, 91 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index af70db4f6b4b..dee35d208493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -402,6 +402,28 @@ struct amdgpu_mode_info {
 * entries for 3D LUT array is the 3D LUT size cubed;
 */
struct drm_property *plane_lut3d_size_property;
+   /**
+* @plane_blend_lut_property: Plane property for output gamma before
+* blending. Userspace set a blend LUT to convert colors after 3D LUT
+* conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+* are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+* != Identity TF, AMD color module will combine the user LUT values
+* with pre-defined TF into the LUT parameters to be programmed.
+*/
+   struct drm_property *plane_blend_lut_property;
+   /**
+* @plane_blend_lut_size_property: Plane property to define the max
+* size of blend LUT as supported by the driver (read-only).
+*/
+   struct drm_property *plane_blend_lut_size_property;
+   /**
+* @plane_blend_tf_property: Plane property to set a predefined
+* transfer function for pre-blending blend/out_gamma (after applying
+* 3D LUT) with or without LUT. There is no blend ROM, but we can use
+* AMD color modules to program LUT parameters from predefined TF (or
+* from a combination of pre-defined TF and the custom 1D LUT).
+*/
+   struct drm_property *plane_blend_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 0e2a04a3caf3..1b96c742d747 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -800,6 +800,18 @@ struct dm_plane_state {
 *  drm_color_lut.
 */
struct drm_property_blob *lut3d;
+   /**
+* @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
+* array of  drm_color_lut.
+*/
+   struct drm_property_blob *blend_lut;
+   /**
+* @blend_tf:
+*
+* Pre-defined transfer function for converting plane pixel data before
+* applying blend LUT.
+*/
+   enum amdgpu_transfer_function blend_tf;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index d3c7f9a13a61..82c554662faa 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -273,6 +273,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_lut3d_size_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_BLEND_LUT", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_blend_lut_property = prop;
+
+   prop = drm_property_create_range(adev_to_drm(adev),
+DRM_MODE_PROP_IMMUTABLE,
+"AMD_PLANE_BLEND_LUT_SIZE", 0, 
UINT_MAX);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_blend_lut_size_property = prop;
+
+   prop = amdgpu_create_tf_property(adev_to_drm(adev),
+"AMD_PLANE_BLEND_TF",
+amdgpu_eotf);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_blend_tf_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 

[PATCH v4 10/32] drm/amd/display: add plane shaper LUT and TF driver-specific properties

2023-10-05 Thread Melissa Wen
On AMD HW, 3D LUT always assumes a preceding shaper 1D LUT used for
delinearizing and/or normalizing the color space before applying a 3D
LUT. Add pre-defined transfer function to enable delinearizing content
with or without shaper LUT, where AMD color module calculates the
resulted shaper curve. We apply an inverse EOTF to go from linear
values to encoded values. If we are already in a non-linear space and/or
don't need to normalize values, we can bypass shaper LUT with a linear
transfer function that is also the default TF value.

There is no shaper ROM. When setting shaper TF (!= Identity) and LUT at
the same time, the color module will combine the pre-defined TF and the
custom LUT values into the LUT that's actually programmed.

v2:
- squash commits for shaper LUT and shaper TF
- define inverse EOTF as supported shaper TFs

v3:
- spell out TF+LUT behavior in the commit and comments (Harry)
- replace BT709 EOTF by inv OETF

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 21 
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 29 +
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 32 +++
 4 files changed, 93 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index f7adaa52c23f..af70db4f6b4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -363,6 +363,27 @@ struct amdgpu_mode_info {
 * @plane_hdr_mult_property:
 */
struct drm_property *plane_hdr_mult_property;
+   /**
+* @shaper_lut_property: Plane property to set pre-blending shaper LUT
+* that converts color content before 3D LUT. If
+* plane_shaper_tf_property != Identity TF, AMD color module will
+* combine the user LUT values with pre-defined TF into the LUT
+* parameters to be programmed.
+*/
+   struct drm_property *plane_shaper_lut_property;
+   /**
+* @shaper_lut_size_property: Plane property for the size of
+* pre-blending shaper LUT as supported by the driver (read-only).
+*/
+   struct drm_property *plane_shaper_lut_size_property;
+   /**
+* @plane_shaper_tf_property: Plane property to set a predefined
+* transfer function for pre-blending shaper (before applying 3D LUT)
+* with or without LUT. There is no shaper ROM, but we can use AMD
+* color modules to program LUT parameters from predefined TF (or
+* from a combination of pre-defined TF and the custom 1D LUT).
+*/
+   struct drm_property *plane_shaper_tf_property;
/**
 * @plane_lut3d_property: Plane property for color transformation using
 * a 3D LUT (pre-blending), a three-dimensional array where each
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 7a2350c62cf1..0e2a04a3caf3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -784,6 +784,17 @@ struct dm_plane_state {
 * TF is needed for any subsequent linear-to-non-linear transforms.
 */
__u64 hdr_mult;
+   /**
+* @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
+* array of  drm_color_lut.
+*/
+   struct drm_property_blob *shaper_lut;
+   /**
+* @shaper_tf:
+*
+* Predefined transfer function to delinearize color space.
+*/
+   enum amdgpu_transfer_function shaper_tf;
/**
 * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
 *  drm_color_lut.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 011f2f9ec890..d3c7f9a13a61 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -173,6 +173,14 @@ static const u32 amdgpu_eotf =
BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
 
+static const u32 amdgpu_inv_eotf =
+   BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
+
 static struct drm_property *
 amdgpu_create_tf_property(struct drm_device *dev,
  const char *name,
@@ -230,6 +238,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_hdr_mult_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+ 

[PATCH v4 09/32] drm/amd/display: add plane 3D LUT driver-specific properties

2023-10-05 Thread Melissa Wen
Add 3D LUT property for plane color transformations using a 3D lookup
table. 3D LUT allows for highly accurate and complex color
transformations and is suitable to adjust the balance between color
channels. It's also more complex to manage and require more
computational resources.

Since a 3D LUT has a limited number of entries in each dimension we want
to use them in an optimal fashion. This means using the 3D LUT in a
colorspace that is optimized for human vision, such as sRGB, PQ, or
another non-linear space. Therefore, userpace may need one 1D LUT
(shaper) before it to delinearize content and another 1D LUT after 3D
LUT (blend) to linearize content again for blending. The next patches
add these 1D LUTs to the plane color mgmt pipeline.

v3:
- improve commit message about 3D LUT
- describe the 3D LUT entries and size (Harry)

v4:
- advertise 3D LUT max size as the size of a single-dimension

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 18 +++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  9 
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 14 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 23 +++
 4 files changed, 64 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 62044d41da75..f7adaa52c23f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -363,6 +363,24 @@ struct amdgpu_mode_info {
 * @plane_hdr_mult_property:
 */
struct drm_property *plane_hdr_mult_property;
+   /**
+* @plane_lut3d_property: Plane property for color transformation using
+* a 3D LUT (pre-blending), a three-dimensional array where each
+* element is an RGB triplet. Each dimension has a size of the cubed
+* root of lut3d_size. The array contains samples from the approximated
+* function. On AMD, values between samples are estimated by
+* tetrahedral interpolation. The array is accessed with three indices,
+* one for each input dimension (color channel), blue being the
+* outermost dimension, red the innermost.
+*/
+   struct drm_property *plane_lut3d_property;
+   /**
+* @plane_degamma_lut_size_property: Plane property to define the max
+* size of 3D LUT as supported by the driver (read-only). The max size
+* is the max size of one dimension and, therefore, the max number of
+* entries for 3D LUT array is the 3D LUT size cubed;
+*/
+   struct drm_property *plane_lut3d_size_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index bb2ce843369d..7a2350c62cf1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -784,6 +784,11 @@ struct dm_plane_state {
 * TF is needed for any subsequent linear-to-non-linear transforms.
 */
__u64 hdr_mult;
+   /**
+* @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
+*  drm_color_lut.
+*/
+   struct drm_property_blob *lut3d;
 };
 
 struct dm_crtc_state {
@@ -869,6 +874,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector 
*connector,
 
 void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 
+/* 3D LUT max size is 17x17x17 (4913 entries) */
+#define MAX_COLOR_3DLUT_SIZE 17
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+/* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index caf49a044ab4..011f2f9ec890 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -230,6 +230,20 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_hdr_mult_property = prop;
 
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_LUT3D", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_lut3d_property = prop;
+
+   prop = drm_property_create_range(adev_to_drm(adev),
+DRM_MODE_PROP_IMMUTABLE,
+"AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_lut3d_size_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 

[PATCH v4 08/32] drm/amd/display: add plane HDR multiplier driver-specific property

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Multiplier to 'gain' the plane. When PQ is decoded using the fixed func
transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at
least) When sRGB is decoded, 1.0 -> 1.0.  Therefore, 1.0 multiplier = 80
nits for SDR content. So if you want, 203 nits for SDR content, pass in
(203.0 / 80.0).

v4:
- comment about the PQ TF need for L-to-NL (from Harry's review)

Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h|  4 
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h   | 17 +
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c |  6 ++
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 13 +
 4 files changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 9b6fab86c6c3..62044d41da75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -359,6 +359,10 @@ struct amdgpu_mode_info {
 * to go from scanout/encoded values to linear values.
 */
struct drm_property *plane_degamma_tf_property;
+   /**
+* @plane_hdr_mult_property:
+*/
+   struct drm_property *plane_hdr_mult_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index fc4f188d397e..bb2ce843369d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -55,6 +55,9 @@
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x1A
 #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
+
+#define AMDGPU_HDR_MULT_DEFAULT (0x1LL)
+
 /*
 #include "include/amdgpu_dal_power_if.h"
 #include "amdgpu_dm_irq.h"
@@ -767,6 +770,20 @@ struct dm_plane_state {
 * linearize.
 */
enum amdgpu_transfer_function degamma_tf;
+   /**
+* @hdr_mult:
+*
+* Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
+* func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
+* AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
+* Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
+* want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
+* S31.32 sign-magnitude.
+*
+* HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ
+* TF is needed for any subsequent linear-to-non-linear transforms.
+*/
+   __u64 hdr_mult;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index d5dbd20a6766..caf49a044ab4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -224,6 +224,12 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_degamma_tf_property = prop;
 
+   prop = drm_property_create_range(adev_to_drm(adev),
+0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_hdr_mult_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 04af6db8cffd..ae64d4b73360 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1337,6 +1337,7 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
 
__drm_atomic_helper_plane_reset(plane, _state->base);
amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+   amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
 }
 
 static struct drm_plane_state *
@@ -1360,6 +1361,7 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
drm_property_blob_get(dm_plane_state->degamma_lut);
 
dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+   dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
 
return _plane_state->base;
 }
@@ -1456,6 +1458,10 @@ dm_atomic_plane_attach_color_mgmt_properties(struct 
amdgpu_display_manager *dm,
   
dm->adev->mode_info.plane_degamma_tf_property,
   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
}
+   /* HDR MULT is always available */
+   drm_object_attach_property(>base,
+  dm->adev->mode_info.plane_hdr_mult_property,
+  AMDGPU_HDR_MULT_DEFAULT);
 }
 
 static int
@@ -1482,6 +1488,11 @@ 

[PATCH v4 07/32] drm/amd/display: document AMDGPU pre-defined transfer functions

2023-10-05 Thread Melissa Wen
Brief documentation about pre-defined transfer function usage on AMD
display driver and standardized EOTFs and inverse EOTFs.

v3:
- Document BT709 OETF (Pekka)
- Fix description of sRGB and pure power funcs (Pekka)

v4:
- Add description of linear and non-linear forms (Harry)

Co-developed-by: Harry Wentland 
Signed-off-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 62 +++
 1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index d03bdb010e8b..d5dbd20a6766 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,68 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
+/* Pre-defined Transfer Functions (TF)
+ *
+ * AMD driver supports pre-defined mathematical functions for transferring
+ * between encoded values and optical/linear space. Depending on HW color caps,
+ * ROMs and curves built by the AMD color module support these transforms.
+ *
+ * The driver-specific color implementation exposes properties for pre-blending
+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
+ * curves for the other blocks. What can be done by each color block is
+ * described by struct dpp_color_capsand struct mpc_color_caps.
+ *
+ * AMD driver-specific color API exposes the following pre-defined transfer
+ * functions:
+ *
+ * - Identity: linear/identity relationship between pixel value and
+ *   luminance value;
+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions;
+ * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999;
+ * - BT.709: has a linear segment in the bottom part and then a power function
+ *   with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by
+ *   ITU-R BT.709-6;
+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
+ *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
+ *
+ * The AMD color model is designed with an assumption that SDR (sRGB, BT.709,
+ * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ
+ * system. This has the implication that PQ EOTF (non-linear to linear) maps to
+ * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits.
+ *
+ * Non-linear and linear forms are described in the table below:
+ *
+ * ┌───┬─┬──┐
+ * │   │ Non-linear  │   Linear │
+ * ├───┼─┼──┤
+ * │  sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0]   │
+ * ├───┼─┼──┤
+ * │ BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0]   │
+ * ├───┼─┼──┤
+ * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0]   │
+ * ├───┼─┼──┤
+ * │PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0] │
+ * ├───┼─┼──┤
+ * │  Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │
+ * └───┴─┴──┘
+ * * CCCS: Windows canonical composition color space
+ * ** Respectively
+ *
+ * In the driver-specific API, color block names attached to TF properties
+ * suggest the intention regarding non-linear encoding pixel's luminance
+ * values. As some newer encodings don't use gamma curve, we make encoding and
+ * decoding explicit by defining an enum list of transfer functions supported
+ * in terms of EOTF and inverse EOTF, where:
+ *
+ * - EOTF (electro-optical transfer function): is the transfer function to go
+ *   from the encoded value to an optical (linear) value. De-gamma functions
+ *   traditionally do this.
+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
+ *   from an optical/linear space (which might have been used for blending)
+ *   back to the encoded values. Gamma functions traditionally do this.
+ */
 static const char * const
 amdgpu_transfer_function_names[] = {
[AMDGPU_TRANSFER_FUNCTION_DEFAULT]  = "Default",
-- 
2.40.1



[PATCH v4 05/32] drm/amd/display: add plane degamma TF driver-specific property

2023-10-05 Thread Melissa Wen
From: Joshua Ashton 

Allow userspace to tell the kernel driver the input space and,
therefore, uses correct predefined transfer function (TF) to go from
encoded values to linear values.

v2:
- rename TF enum prefix from DRM_ to AMDGPU_ (Harry)
- remove HLG TF

Reviewed-by: Harry Wentland 
Signed-off-by: Joshua Ashton 
Co-developed-by: Melissa Wen 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  5 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 21 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 19 +++--
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 2d00802b3265..9b6fab86c6c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -354,6 +354,11 @@ struct amdgpu_mode_info {
 * size of degamma LUT as supported by the driver (read-only).
 */
struct drm_property *plane_degamma_lut_size_property;
+   /**
+* @plane_degamma_tf_property: Plane pre-defined transfer function to
+* to go from scanout/encoded values to linear values.
+*/
+   struct drm_property *plane_degamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 0f565469b4b5..4c4cdf7fc6be 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -724,6 +724,18 @@ struct amdgpu_dm_wb_connector {
 
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
+enum amdgpu_transfer_function {
+   AMDGPU_TRANSFER_FUNCTION_DEFAULT,
+   AMDGPU_TRANSFER_FUNCTION_SRGB,
+   AMDGPU_TRANSFER_FUNCTION_BT709,
+   AMDGPU_TRANSFER_FUNCTION_PQ,
+   AMDGPU_TRANSFER_FUNCTION_LINEAR,
+   AMDGPU_TRANSFER_FUNCTION_UNITY,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA22,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA24,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA26,
+};
+
 struct dm_plane_state {
struct drm_plane_state base;
struct dc_plane_state *dc_state;
@@ -737,6 +749,13 @@ struct dm_plane_state {
 * The blob (if not NULL) is an array of  drm_color_lut.
 */
struct drm_property_blob *degamma_lut;
+   /**
+* @degamma_tf:
+*
+* Predefined transfer function to tell DC driver the input space to
+* linearize.
+*/
+   enum amdgpu_transfer_function degamma_tf;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index cf175b86ba80..56ce008b9095 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
+static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = {
+   { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" },
+   { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" },
+   { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" },
+   { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
+   { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" },
+   { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" },
+   { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
+   { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
+   { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
+};
+
 int
 amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 {
@@ -104,6 +116,15 @@ amdgpu_dm_create_color_properties(struct amdgpu_device 
*adev)
return -ENOMEM;
adev->mode_info.plane_degamma_lut_size_property = prop;
 
+   prop = drm_property_create_enum(adev_to_drm(adev),
+   DRM_MODE_PROP_ENUM,
+   "AMD_PLANE_DEGAMMA_TF",
+   amdgpu_transfer_function_enum_list,
+   
ARRAY_SIZE(amdgpu_transfer_function_enum_list));
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_degamma_tf_property = prop;
+
return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 69357a8ae887..04af6db8cffd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1332,8 +1332,11 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
WARN_ON(amdgpu_state == NULL);
 
-   if (amdgpu_state)
-   __drm_atomic_helper_plane_reset(plane, _state->base);
+   if 

[PATCH v4 06/32] drm/amd/display: explicitly define EOTF and inverse EOTF

2023-10-05 Thread Melissa Wen
Instead of relying on color block names to get the transfer function
intention regarding encoding pixel's luminance, define supported
Electro-Optical Transfer Functions (EOTFs) and inverse EOTFs, that
includes pure gamma or standardized transfer functions.

v3:
- squash linear and unity TFs to identity (Pekka)
- define the right TFs for BT.709 (Pekka and Harry)
- add comment about AMD TF coefficients

Suggested-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 27 +---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 67 ++-
 2 files changed, 71 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 4c4cdf7fc6be..fc4f188d397e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -724,16 +724,27 @@ struct amdgpu_dm_wb_connector {
 
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
+/* enum amdgpu_transfer_function: pre-defined transfer function supported by 
AMD.
+ *
+ * It includes standardized transfer functions and pure power functions. The
+ * transfer function coefficients are available at modules/color/color_gamma.c
+ */
 enum amdgpu_transfer_function {
AMDGPU_TRANSFER_FUNCTION_DEFAULT,
-   AMDGPU_TRANSFER_FUNCTION_SRGB,
-   AMDGPU_TRANSFER_FUNCTION_BT709,
-   AMDGPU_TRANSFER_FUNCTION_PQ,
-   AMDGPU_TRANSFER_FUNCTION_LINEAR,
-   AMDGPU_TRANSFER_FUNCTION_UNITY,
-   AMDGPU_TRANSFER_FUNCTION_GAMMA22,
-   AMDGPU_TRANSFER_FUNCTION_GAMMA24,
-   AMDGPU_TRANSFER_FUNCTION_GAMMA26,
+   AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF,
+   AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_IDENTITY,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_BT709_OETF,
+   AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
+   AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
+AMDGPU_TRANSFER_FUNCTION_COUNT
 };
 
 struct dm_plane_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 56ce008b9095..d03bdb010e8b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,18 +85,57 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
-static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = {
-   { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" },
-   { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" },
-   { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" },
-   { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
-   { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" },
-   { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" },
-   { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
-   { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
-   { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
+static const char * const
+amdgpu_transfer_function_names[] = {
+   [AMDGPU_TRANSFER_FUNCTION_DEFAULT]  = "Default",
+   [AMDGPU_TRANSFER_FUNCTION_IDENTITY] = "Identity",
+   [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]= "sRGB EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF]   = "BT.709 inv_OETF",
+   [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF]  = "PQ EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]= "sRGB inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_BT709_OETF]   = "BT.709 OETF",
+   [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF]  = "PQ inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF",
+   [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF",
 };
 
+static const u32 amdgpu_eotf =
+   BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
+   BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
+
+static struct drm_property *
+amdgpu_create_tf_property(struct drm_device *dev,
+ const char *name,
+ u32 supported_tf)
+{
+   u32 

[PATCH v4 00/32] drm/amd/display: add AMD driver-specific properties for color mgmt

2023-10-05 Thread Melissa Wen
Hello,

Just another iteration for AMD driver-specific color properties.
Basically, addressing comments from the previous version.

Recap: this series extends the current KMS color management API with AMD
driver-specific properties to enhance the color management support on
AMD Steam Deck. The key additions to the color pipeline include:

- plane degamma LUT and pre-defined TF;
- plane HDR multiplier;
- plane CTM 3x4;
- plane shaper LUT and pre-defined TF;
- plane 3D LUT;
- plane blend LUT and pre-defined TF;
- CRTC gamma pre-defined TF;

You can find the AMD HW color capabilities documented here:
https://dri.freedesktop.org/docs/drm/gpu/amdgpu/display/display-manager.html#color-management-properties

The userspace case is Gamescope[1], the compositor for SteamOS.
Gamescope has already adopted AMD driver-specific properties to
implement comprehensive color management support, including gamut
mapping, HDR rendering, SDR on HDR, HDR on SDR. Using these features in
the SteamOS 3.5[2] users can expect a significantly enhanced visual
experience. 

You can find a brief overview of the Steam Deck color pipeline here:
https://github.com/ValveSoftware/gamescope/blob/master/src/docs/Steam%20Deck%20Display%20Pipeline.png

Changes from:

[RFC] https://lore.kernel.org/dri-devel/20230423141051.702990-1-m...@igalia.com
- Remove KConfig and guard properties with `AMD_PRIVATE_COLOR`;
- Remove properties for post-blending/CRTC shaper TF+LUT and 3D LUT;
- Use color caps to improve the support of pre-defined curve;

[v1] https://lore.kernel.org/dri-devel/20230523221520.3115570-1-m...@igalia.com
- Replace DRM_ by AMDGPU_ prefix for transfer function (TF) enum; 
- Explicitly define EOTFs and inverse EOTFs and set props accordingly;
- Document pre-defined transfer functions;
- Remove HLG transfer function from supported TFs;
- Remove misleading comments;
- Remove post-blending shaper TF+LUT and 3D LUT support;
- Move driver-specific property operations from amdgpu_display.c to
  amdgpu_dm_color.c;
- Reset planes if any color props change;
- Add plane CTM 3x4 support;
- Removed two DC fixes already applied upstream;

[v2] https://lore.kernel.org/dri-devel/20230810160314.48225-1-m...@igalia.com
- Many documentation fixes: BT.709 OETF, description of sRGB and pure
  power functions, TF+1D LUT behavior;
- Rename CTM2 to CTM 3x4 and fix misleading comment about DC gamut remap;
- Squash `Linear` and `Unity` TF in `Identity`;
- Remove the `MPC gamut remap` patch already applied upstream[3];
- Remove outdated delta segmentation fix;
- Nits/small fixes;

[v3] https://lore.kernel.org/amd-gfx/20230925194932.1329483-1-m...@igalia.com
- Add table to describe value range in linear and non-linear forms
- Comment the PQ TF need after HDR multiplier
- Advertise the 3D LUT size as the size of a single-dimension (read-only)
- remove function to check expected size from 3DLUT caps
- cleanup comments

It's worth noting that driver-specific properties are guarded by
`AMD_PRIVATE_COLOR`. So, finally, this is the color management API when
driver-specific properties are enabled:

+--+
|   PLANE  |
|  |
|  ++  |
|  | AMD Degamma|  |
|  ||  |
|  | EOTF | 1D LUT  |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  |AMD HDR |  |
|  |Multiply|  |
|  ++---+  |
|   |  |
|  +v---+  |
|  |  AMD CTM (3x4) |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  | AMD Shaper |  |
|  ||  |
|  | inv_EOTF | |  |
|  | Custom 1D LUT  |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  |   AMD 3D LUT   |  |
|  |   17^3/12-bit  |  |
|  ++---+  |
|   |  |
|  +v---+  |
|  | AMD Blend  |  |
|  ||  |
|  | EOTF | 1D LUT  |  |
|  ++---+  |
|   |  |
++--v-++
||  Blending  ||
++--+-++
|CRTC   |  |
|   |  |
|   +---v---+  |
|   | DRM Degamma   |  |
|   |   |  |
|   | Custom 1D LUT |  |
|   +---+---+  |
|   |  |
|   +---v---+  |
|   | DRM CTM (3x3) |  |
|   +---+---+  |
|   |  |
|   +---v---+  |
|   | DRM Gamma |  |
|   |   |  |
|   | Custom 1D LUT |  |
|   +---+  |
|   | *AMD Gamma|  |
|   |   inv_EOTF|  |
|   +---+  |
|  |
+--+

Please, let us know your thoughts.

Best Regards,

Melissa Wen

[1] https://github.com/ValveSoftware/gamescope
[2] https://store.steampowered.com/news/app/1675200/view/3686804163591367815
[3] https://lore.kernel.org/dri-devel/20230721132431.692158-1-m...@igalia.com

Joshua Ashton (14):
  drm/amd/display: add plane degamma TF driver-specific property
  drm/amd/display: add plane 

[PATCH v4 03/32] drm/drm_plane: track color mgmt changes per plane

2023-10-05 Thread Melissa Wen
We will add color mgmt properties to DRM planes in the next patches and
we want to track when one of this properties change to define atomic
commit behaviors. Using a similar approach from CRTC color props, we set
a color_mgmt_changed boolean whenever a plane color prop changes.

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/drm_atomic.c  | 1 +
 drivers/gpu/drm/drm_atomic_state_helper.c | 1 +
 include/drm/drm_plane.h   | 7 +++
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 2c454568a607..2925371d230d 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -724,6 +724,7 @@ static void drm_atomic_plane_print_state(struct drm_printer 
*p,
   drm_get_color_encoding_name(state->color_encoding));
drm_printf(p, "\tcolor-range=%s\n",
   drm_get_color_range_name(state->color_range));
+   drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed);
 
if (plane->funcs->atomic_print_state)
plane->funcs->atomic_print_state(p, state);
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c 
b/drivers/gpu/drm/drm_atomic_state_helper.c
index 784e63d70a42..25bb0859fda7 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -338,6 +338,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct 
drm_plane *plane,
state->fence = NULL;
state->commit = NULL;
state->fb_damage_clips = NULL;
+   state->color_mgmt_changed = false;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state);
 
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 51291983ea44..52c3287da0da 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -237,6 +237,13 @@ struct drm_plane_state {
 
/** @state: backpointer to global drm_atomic_state */
struct drm_atomic_state *state;
+
+   /**
+* @color_mgmt_changed: Color management properties have changed. Used
+* by the atomic helpers and drivers to steer the atomic commit control
+* flow.
+*/
+   bool color_mgmt_changed : 1;
 };
 
 static inline struct drm_rect
-- 
2.40.1



[PATCH v4 04/32] drm/amd/display: add driver-specific property for plane degamma LUT

2023-10-05 Thread Melissa Wen
Hook up driver-specific atomic operations for managing AMD color
properties. Create AMD driver-specific color management properties
and attach them according to HW capabilities defined by `struct
dc_color_caps`.

First add plane degamma LUT properties that means user-blob and its
size. We will add more plane color properties in the next patches. In
addition, we define AMD_PRIVATE_COLOR to guard these driver-specific
plane properties.

Plane degamma can be used to linearize input space for arithmetical
operations that are more accurate when applied in linear color.

v2:
- update degamma LUT prop description
- move private color operations from amdgpu_display to amdgpu_dm_color

Reviewed-by: Harry Wentland 
Co-developed-by: Joshua Ashton 
Signed-off-by: Joshua Ashton 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 11 +++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  5 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 24 ++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 81 +++
 5 files changed, 132 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index d8083972e393..2d00802b3265 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -343,6 +343,17 @@ struct amdgpu_mode_info {
int disp_priority;
const struct amdgpu_display_funcs *funcs;
const enum drm_plane_type *plane_type;
+
+   /* Driver-private color mgmt props */
+
+   /* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+* convert input space before blending.
+*/
+   struct drm_property *plane_degamma_lut_property;
+   /* @plane_degamma_lut_size_property: Plane property to define the max
+* size of degamma LUT as supported by the driver (read-only).
+*/
+   struct drm_property *plane_degamma_lut_size_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a59a11ae42db..7c55b1c0ac5c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4106,6 +4106,11 @@ static int amdgpu_dm_mode_config_init(struct 
amdgpu_device *adev)
return r;
}
 
+#ifdef AMD_PRIVATE_COLOR
+   if (amdgpu_dm_create_color_properties(adev))
+   return -ENOMEM;
+#endif
+
r = amdgpu_dm_audio_init(adev);
if (r) {
dc_release_state(state->context);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index b16613082bc3..0f565469b4b5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -727,6 +727,16 @@ extern const struct amdgpu_ip_block_version dm_ip_block;
 struct dm_plane_state {
struct drm_plane_state base;
struct dc_plane_state *dc_state;
+
+   /* Plane color mgmt */
+   /**
+* @degamma_lut:
+*
+* 1D LUT for mapping framebuffer/plane pixel data before sampling or
+* blending operations. It's usually applied to linearize input space.
+* The blob (if not NULL) is an array of  drm_color_lut.
+*/
+   struct drm_property_blob *degamma_lut;
 };
 
 struct dm_crtc_state {
@@ -817,6 +827,7 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
 
 void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a4cb23d059bd..cf175b86ba80 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -84,6 +84,30 @@ void amdgpu_dm_init_color_mod(void)
setup_x_points_distribution();
 }
 
+#ifdef AMD_PRIVATE_COLOR
+int
+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
+{
+   struct drm_property *prop;
+
+   prop = drm_property_create(adev_to_drm(adev),
+  DRM_MODE_PROP_BLOB,
+  "AMD_PLANE_DEGAMMA_LUT", 0);
+   if (!prop)
+   return -ENOMEM;
+   adev->mode_info.plane_degamma_lut_property = prop;
+
+   prop = drm_property_create_range(adev_to_drm(adev),
+DRM_MODE_PROP_IMMUTABLE,
+"AMD_PLANE_DEGAMMA_LUT_SIZE", 0, 

[PATCH v4 02/32] drm/drm_property: make replace_property_blob_from_id a DRM helper

2023-10-05 Thread Melissa Wen
Place it in drm_property where drm_property_replace_blob and
drm_property_lookup_blob live. Then we can use the DRM helper for
driver-specific KMS properties too.

Reviewed-by: Harry Wentland 
Reviewed-by: Liviu Dudau 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/arm/malidp_crtc.c |  2 +-
 drivers/gpu/drm/drm_atomic_uapi.c | 43 ---
 drivers/gpu/drm/drm_property.c| 49 +++
 include/drm/drm_property.h|  6 
 4 files changed, 61 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_crtc.c 
b/drivers/gpu/drm/arm/malidp_crtc.c
index dc01c43f6193..d72c22dcf685 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc 
*crtc,
 
/*
 * The size of the ctm is checked in
-* drm_atomic_replace_property_blob_from_id.
+* drm_property_replace_blob_from_id.
 */
ctm = (struct drm_color_ctm *)state->ctm->data;
for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) {
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index d867e7f9f2cd..a6a9ee5086dd 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -362,39 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct 
drm_atomic_state *state,
return fence_ptr;
 }
 
-static int
-drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
-struct drm_property_blob **blob,
-uint64_t blob_id,
-ssize_t expected_size,
-ssize_t expected_elem_size,
-bool *replaced)
-{
-   struct drm_property_blob *new_blob = NULL;
-
-   if (blob_id != 0) {
-   new_blob = drm_property_lookup_blob(dev, blob_id);
-   if (new_blob == NULL)
-   return -EINVAL;
-
-   if (expected_size > 0 &&
-   new_blob->length != expected_size) {
-   drm_property_blob_put(new_blob);
-   return -EINVAL;
-   }
-   if (expected_elem_size > 0 &&
-   new_blob->length % expected_elem_size != 0) {
-   drm_property_blob_put(new_blob);
-   return -EINVAL;
-   }
-   }
-
-   *replaced |= drm_property_replace_blob(blob, new_blob);
-   drm_property_blob_put(new_blob);
-
-   return 0;
-}
-
 static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
struct drm_crtc_state *state, struct drm_property *property,
uint64_t val)
@@ -415,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc 
*crtc,
} else if (property == config->prop_vrr_enabled) {
state->vrr_enabled = val;
} else if (property == config->degamma_lut_property) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>degamma_lut,
val,
-1, sizeof(struct drm_color_lut),
@@ -423,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc 
*crtc,
state->color_mgmt_changed |= replaced;
return ret;
} else if (property == config->ctm_property) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>ctm,
val,
sizeof(struct drm_color_ctm), -1,
@@ -431,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc 
*crtc,
state->color_mgmt_changed |= replaced;
return ret;
} else if (property == config->gamma_lut_property) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>gamma_lut,
val,
-1, sizeof(struct drm_color_lut),
@@ -563,7 +530,7 @@ static int drm_atomic_plane_set_property(struct drm_plane 
*plane,
} else if (property == plane->color_range_property) {
state->color_range = val;
} else if (property == config->prop_fb_damage_clips) {
-   ret = drm_atomic_replace_property_blob_from_id(dev,
+   ret = drm_property_replace_blob_from_id(dev,
>fb_damage_clips,
val,
-1,
@@ -729,7 +696,7 @@ static int 

[PATCH v4 01/32] drm/drm_mode_object: increase max objects to accommodate new color props

2023-10-05 Thread Melissa Wen
DRM_OBJECT_MAX_PROPERTY limits the number of properties to be attached
and we are increasing that value all time we add a new property (generic
or driver-specific).

In this series, we are adding 13 new KMS driver-specific properties for
AMD color manage:
- CRTC Gamma enumerated Transfer Function
- Plane: Degamma LUT+size+TF, HDR multiplier, shaper LUT+size+TF, 3D
  LUT+size, blend LUT+size+TF (12)

Therefore, just increase DRM_OBJECT_MAX_PROPERTY to a number (64) that
accomodates these new properties and gives some room for others,
avoiding change this number everytime we add a new KMS property.

Reviewed-by: Harry Wentland 
Reviewed-by: Simon Ser 
Signed-off-by: Melissa Wen 
---
 include/drm/drm_mode_object.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h
index 912f1e415685..08d7a7f0188f 100644
--- a/include/drm/drm_mode_object.h
+++ b/include/drm/drm_mode_object.h
@@ -60,7 +60,7 @@ struct drm_mode_object {
void (*free_cb)(struct kref *kref);
 };
 
-#define DRM_OBJECT_MAX_PROPERTY 24
+#define DRM_OBJECT_MAX_PROPERTY 64
 /**
  * struct drm_object_properties - property tracking for _mode_object
  */
-- 
2.40.1



Re: [PATCH 0/9] drm: Annotate structs with __counted_by

2023-10-05 Thread Kees Cook
On Thu, Oct 05, 2023 at 11:42:38AM +0200, Christian König wrote:
> Am 02.10.23 um 20:22 schrieb Kees Cook:
> > On Mon, Oct 02, 2023 at 08:11:41PM +0200, Christian König wrote:
> > > Am 02.10.23 um 20:08 schrieb Kees Cook:
> > > > On Mon, Oct 02, 2023 at 08:01:57PM +0200, Christian König wrote:
> > > > > Am 02.10.23 um 18:53 schrieb Kees Cook:
> > > > > > On Mon, Oct 02, 2023 at 11:06:19AM -0400, Alex Deucher wrote:
> > > > > > > On Mon, Oct 2, 2023 at 5:20 AM Christian König
> > > > > > >  wrote:
> > > > > > > > Am 29.09.23 um 21:33 schrieb Kees Cook:
> > > > > > > > > On Fri, 22 Sep 2023 10:32:05 -0700, Kees Cook wrote:
> > > > > > > > > > This is a batch of patches touching drm for preparing for 
> > > > > > > > > > the coming
> > > > > > > > > > implementation by GCC and Clang of the __counted_by 
> > > > > > > > > > attribute. Flexible
> > > > > > > > > > array members annotated with __counted_by can have their 
> > > > > > > > > > accesses
> > > > > > > > > > bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS 
> > > > > > > > > > (for array
> > > > > > > > > > indexing) and CONFIG_FORTIFY_SOURCE (for 
> > > > > > > > > > strcpy/memcpy-family functions).
> > > > > > > > > > 
> > > > > > > > > > As found with Coccinelle[1], add __counted_by to structs 
> > > > > > > > > > that would
> > > > > > > > > > benefit from the annotation.
> > > > > > > > > > 
> > > > > > > > > > [...]
> > > > > > > > > Since this got Acks, I figure I should carry it in my tree. 
> > > > > > > > > Let me know
> > > > > > > > > if this should go via drm instead.
> > > > > > > > > 
> > > > > > > > > Applied to for-next/hardening, thanks!
> > > > > > > > > 
> > > > > > > > > [1/9] drm/amd/pm: Annotate struct 
> > > > > > > > > smu10_voltage_dependency_table with __counted_by
> > > > > > > > >   https://git.kernel.org/kees/c/a6046ac659d6
> > > > > > > > STOP! In a follow up discussion Alex and I figured out that 
> > > > > > > > this won't work.
> > > > > > I'm so confused; from the discussion I saw that Alex said both 
> > > > > > instances
> > > > > > were false positives?
> > > > > > 
> > > > > > > > The value in the structure is byte swapped based on some 
> > > > > > > > firmware
> > > > > > > > endianness which not necessary matches the CPU endianness.
> > > > > > > SMU10 is APU only so the endianess of the SMU firmware and the CPU
> > > > > > > will always match.
> > > > > > Which I think is what is being said here?
> > > > > > 
> > > > > > > > Please revert that one from going upstream if it's already on 
> > > > > > > > it's way.
> > > > > > > > 
> > > > > > > > And because of those reasons I strongly think that patches like 
> > > > > > > > this
> > > > > > > > should go through the DRM tree :)
> > > > > > Sure, that's fine -- please let me know. It was others Acked/etc. 
> > > > > > Who
> > > > > > should carry these patches?
> > > > > Probably best if the relevant maintainer pick them up individually.
> > > > > 
> > > > > Some of those structures are filled in by firmware/hardware and only 
> > > > > the
> > > > > maintainers can judge if that value actually matches what the compiler
> > > > > needs.
> > > > > 
> > > > > We have cases where individual bits are used as flags or when the 
> > > > > size is
> > > > > byte swapped etc...
> > > > > 
> > > > > Even Alex and I didn't immediately say how and where that field is 
> > > > > actually
> > > > > used and had to dig that up. That's where the confusion came from.
> > > > Okay, I've dropped them all from my tree. Several had Acks/Reviews, so
> > > > hopefully those can get picked up for the DRM tree?
> > > I will pick those up to go through drm-misc-next.
> > > 
> > > Going to ping maintainers once more when I'm not sure if stuff is correct 
> > > or
> > > not.
> > Sounds great; thanks!
> 
> I wasn't 100% sure for the VC4 patch, but pushed the whole set to
> drm-misc-next anyway.
> 
> This also means that the patches are now auto merged into the drm-tip
> integration branch and should any build or unit test go boom we should
> notice immediately and can revert it pretty easily.

Thanks very much; I'll keep an eye out for any reports.

-- 
Kees Cook


Re: [PATCH] drm/amd/display: Fix mst hub unplug warning

2023-10-05 Thread Alex Deucher
On Thu, Oct 5, 2023 at 4:04 AM Wayne Lin  wrote:
>
> [Why]
> Unplug mst hub will cause warning. That's because
> dm_helpers_construct_old_payload() is changed to be called after
> payload removement from dc link.
>
> In dm_helpers_construct_old_payload(), We refer to the vcpi in
> payload allocation table of dc link to construct the old payload
> and payload is no longer in the table when we call the function
> now.
>
> [How]
> Refer to the mst_state to construct the number of time slot for old
> payload now. Note that dm_helpers_construct_old_payload() is just
> a quick workaround before and we are going to abandon it soon.
>
> Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload 
> allocation/removement")
> Reviewed-by: Jerry Zuo 
> Signed-off-by: Wayne Lin 

Pushed to drm-misc-next.  Thanks!

Alex

> ---
>  .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 38 +--
>  1 file changed, 18 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> index baf7e5254fb3..2f94bcf128c0 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> @@ -204,15 +204,16 @@ void dm_helpers_dp_update_branch_info(
>  {}
>
>  static void dm_helpers_construct_old_payload(
> -   struct dc_link *link,
> -   int pbn_per_slot,
> +   struct drm_dp_mst_topology_mgr *mgr,
> +   struct drm_dp_mst_topology_state *mst_state,
> struct drm_dp_mst_atomic_payload *new_payload,
> struct drm_dp_mst_atomic_payload *old_payload)
>  {
> -   struct link_mst_stream_allocation_table current_link_table =
> -   
> link->mst_stream_alloc_table;
> -   struct link_mst_stream_allocation *dc_alloc;
> -   int i;
> +   struct drm_dp_mst_atomic_payload *pos;
> +   int pbn_per_slot = mst_state->pbn_div;
> +   u8 next_payload_vc_start = mgr->next_start_slot;
> +   u8 payload_vc_start = new_payload->vc_start_slot;
> +   u8 allocated_time_slots;
>
> *old_payload = *new_payload;
>
> @@ -221,20 +222,17 @@ static void dm_helpers_construct_old_payload(
>  * struct drm_dp_mst_atomic_payload are don't care fields
>  * while calling drm_dp_remove_payload_part2()
>  */
> -   for (i = 0; i < current_link_table.stream_count; i++) {
> -   dc_alloc =
> -   _link_table.stream_allocations[i];
> -
> -   if (dc_alloc->vcp_id == new_payload->vcpi) {
> -   old_payload->time_slots = dc_alloc->slot_count;
> -   old_payload->pbn = dc_alloc->slot_count * 
> pbn_per_slot;
> -   break;
> -   }
> +   list_for_each_entry(pos, _state->payloads, next) {
> +   if (pos != new_payload &&
> +   pos->vc_start_slot > payload_vc_start &&
> +   pos->vc_start_slot < next_payload_vc_start)
> +   next_payload_vc_start = pos->vc_start_slot;
> }
>
> -   /* make sure there is an old payload*/
> -   ASSERT(i != current_link_table.stream_count);
> +   allocated_time_slots = next_payload_vc_start - payload_vc_start;
>
> +   old_payload->time_slots = allocated_time_slots;
> +   old_payload->pbn = allocated_time_slots * pbn_per_slot;
>  }
>
>  /*
> @@ -272,8 +270,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
> drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload);
> } else {
> /* construct old payload by VCPI*/
> -   dm_helpers_construct_old_payload(stream->link, 
> mst_state->pbn_div,
> -   new_payload, _payload);
> +   dm_helpers_construct_old_payload(mst_mgr, mst_state,
> +new_payload, _payload);
> target_payload = _payload;
>
> drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload);
> @@ -366,7 +364,7 @@ bool dm_helpers_dp_mst_send_payload_allocation(
> if (enable) {
> ret = drm_dp_add_payload_part2(mst_mgr, 
> mst_state->base.state, new_payload);
> } else {
> -   dm_helpers_construct_old_payload(stream->link, 
> mst_state->pbn_div,
> +   dm_helpers_construct_old_payload(mst_mgr, mst_state,
>  new_payload, _payload);
> drm_dp_remove_payload_part2(mst_mgr, mst_state, _payload, 
> new_payload);
> }
> --
> 2.37.3
>


Re: [PATCH] drm/amdgpu: Enable SMU 13.0.0 optimizations when ROCm is active (v2)

2023-10-05 Thread Alex Deucher
On Wed, Oct 4, 2023 at 11:47 PM Zhang, Hawking  wrote:
>
> [AMD Official Use Only - General]
>
> Hmm... thinking about it more, will it override the profile mode/workload for 
> 0xC8 or 0xCC SKU as well. In another words, does it mean the pmfw fix is 
> general to all the 13_0_0 SKUs.

Yes, my understanding is that this should apply to all skus.

Alex

>
> Other than that, the patch looks good to me.
>
> Regards,
> Hawking
>
> -Original Message-
> From: amd-gfx  On Behalf Of Zhang, 
> Hawking
> Sent: Thursday, October 5, 2023 11:32
> To: Deucher, Alexander ; 
> amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Liu, Kun 
> 
> Subject: RE: [PATCH] drm/amdgpu: Enable SMU 13.0.0 optimizations when ROCm is 
> active (v2)
>
> [AMD Official Use Only - General]
>
> [AMD Official Use Only - General]
>
> Reviewed-by: Hawking Zhang 
>
> Regards,
> Hawking
> -Original Message-
> From: amd-gfx  On Behalf Of Alex 
> Deucher
> Sent: Wednesday, October 4, 2023 23:34
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Liu, Kun 
> 
> Subject: [PATCH] drm/amdgpu: Enable SMU 13.0.0 optimizations when ROCm is 
> active (v2)
>
> From: Kun Liu 
>
> When ROCm is active enable additional SMU 13.0.0 optimizations.
> This reuses the unused powersave profile on PMFW.
>
> v2: move to the swsmu code since we need both bits active in
> the workload mask.
>
> Signed-off-by: Alex Deucher 
> ---
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c| 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 684b4e01fac2..83035fb1839a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -2447,6 +2447,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
> smu_context *smu,
> DpmActivityMonitorCoeffInt_t *activity_monitor =
> &(activity_monitor_external.DpmActivityMonitorCoeffInt);
> int workload_type, ret = 0;
> +   u32 workload_mask;
>
> smu->power_profile_mode = input[size];
>
> @@ -2536,9 +2537,23 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
> smu_context *smu,
> if (workload_type < 0)
> return -EINVAL;
>
> +   workload_mask = 1 << workload_type;
> +
> +   /* Add optimizations for SMU13.0.0.  Reuse the power saving profile */
> +   if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
> +   (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 
> 0)) &&
> +   ((smu->adev->pm.fw_version == 0x004e6601) ||
> +(smu->adev->pm.fw_version >= 0x004e7300))) {
> +   workload_type = smu_cmn_to_asic_specific_index(smu,
> +  
> CMN2ASIC_MAPPING_WORKLOAD,
> +  
> PP_SMC_POWER_PROFILE_POWERSAVING);
> +   if (workload_type >= 0)
> +   workload_mask |= 1 << workload_type;
> +   }
> +
> return smu_cmn_send_smc_msg_with_param(smu,
>SMU_MSG_SetWorkloadMask,
> -  1 << workload_type,
> +  workload_mask,
>NULL);  }
>
> --
> 2.41.0
>


Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-05 Thread Chen, Xiaogang



On 10/5/2023 8:25 AM, Philip Yang wrote:


Sorry for the late reply, just notice 2 other issues:

1. function svm_range_split_by_granularity can be removed now.


yes, the code has been sent to gerrit and merged. Will do it next time.


2. svm_range_restore_pages should map partial range to GPUs after 
partial migration.


I think partial mapping is next step after partial migration is done. I 
have been thinking partial mapping. Will submit a new patch to address it.


Regards

Xiaogang


Regards,

Philip

On 2023-10-03 19:31, Xiaogang.Chen wrote:

From: Xiaogang Chen

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
  4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
  
  	r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset);

migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
  {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
  
-	if (prange->actual_loc == best_loc) {

-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
  
+	if (start_mgr < prange->start || last_mgr > prange->last) {

+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
  
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,

-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
  
-	start = prange->start << PAGE_SHIFT;

-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
  
  	r = svm_range_vram_node_new(node, prange, true);

if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
  
  	if (cpages) {

prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
  
@@ -663,19 +676,19 @@ 

Re: [PATCH v4 1/3] drm/amd: Evict resources during PM ops prepare() callback

2023-10-05 Thread Mario Limonciello

On 10/5/2023 09:39, Christian König wrote:

Am 04.10.23 um 19:18 schrieb Mario Limonciello:

Linux PM core has a prepare() callback run before suspend.

If the system is under high memory pressure, the resources may need
to be evicted into swap instead.  If the storage backing for swap
is offlined during the suspend() step then such a call may fail.

So duplicate this step into prepare() to move evict majority of
resources while leaving all existing steps that put the GPU into a
low power state in suspend().

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362
Signed-off-by: Mario Limonciello 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  7 +++---
  3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index d23fb4b5ad95..6643d0ed6b1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1413,6 +1413,7 @@ void amdgpu_driver_postclose_kms(struct 
drm_device *dev,

  void amdgpu_driver_release_kms(struct drm_device *dev);
  int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+int amdgpu_device_prepare(struct drm_device *dev);
  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
  int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
  u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index bad2b5577e96..67acee569c08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4259,6 +4259,31 @@ static int amdgpu_device_evict_resources(struct 
amdgpu_device *adev)

  /*
   * Suspend & resume.
   */
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+    struct amdgpu_device *adev = drm_to_adev(dev);
+    int r;
+
+    if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+    return 0;
+
+    /* Evict the majority of BOs before starting suspend sequence */
+    r = amdgpu_device_evict_resources(adev);
+    if (r)
+    return r;
+
+    return 0;
+}
+
  /**
   * amdgpu_device_suspend - initiate device suspend
   *
@@ -4279,7 +4304,6 @@ int amdgpu_device_suspend(struct drm_device 
*dev, bool fbcon)

  adev->in_suspend = true;
-    /* Evict the majority of BOs before grabbing the full access */
  r = amdgpu_device_evict_resources(adev);
  if (r)
  return r;


I would just completely drop this extra amdgpu_device_evict_resources() 
call now.


We have a second call which is used to evacuate firmware etc... after 
the hw has been shut down. That one can't move, but also shouldn't 
allocate that much memory.




The problem is that amdgpu_device_suspend() is also called from 
amdgpu_switcheroo_set_state() as well as a bunch of pmops sequences that 
I don't expect call prepare() like poweroff().


I would think we still want to evict resources at the beginning of 
amdgpu_device_suspend() for all of those.


So it's an extra call for the prepare() path but it should be harmless.


Regards,
Christian.

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index e3471293846f..175167582db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2425,8 +2425,9 @@ static int amdgpu_pmops_prepare(struct device *dev)
  /* Return a positive number here so
   * DPM_FLAG_SMART_SUSPEND works properly
   */
-    if (amdgpu_device_supports_boco(drm_dev))
-    return pm_runtime_suspended(dev);
+    if (amdgpu_device_supports_boco(drm_dev) &&
+    pm_runtime_suspended(dev))
+    return 1;
  /* if we will not support s3 or s2i for the device
   *  then skip suspend
@@ -2435,7 +2436,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
  !amdgpu_acpi_is_s3_active(adev))
  return 1;
-    return 0;
+    return amdgpu_device_prepare(drm_dev);
  }
  static void amdgpu_pmops_complete(struct device *dev)






Re: [PATCH v4 1/3] drm/amd: Evict resources during PM ops prepare() callback

2023-10-05 Thread Christian König

Am 04.10.23 um 19:18 schrieb Mario Limonciello:

Linux PM core has a prepare() callback run before suspend.

If the system is under high memory pressure, the resources may need
to be evicted into swap instead.  If the storage backing for swap
is offlined during the suspend() step then such a call may fail.

So duplicate this step into prepare() to move evict majority of
resources while leaving all existing steps that put the GPU into a
low power state in suspend().

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362
Signed-off-by: Mario Limonciello 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  7 +++---
  3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d23fb4b5ad95..6643d0ed6b1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1413,6 +1413,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
  void amdgpu_driver_release_kms(struct drm_device *dev);
  
  int amdgpu_device_ip_suspend(struct amdgpu_device *adev);

+int amdgpu_device_prepare(struct drm_device *dev);
  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
  int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
  u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bad2b5577e96..67acee569c08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4259,6 +4259,31 @@ static int amdgpu_device_evict_resources(struct 
amdgpu_device *adev)
  /*
   * Suspend & resume.
   */
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+   struct amdgpu_device *adev = drm_to_adev(dev);
+   int r;
+
+   if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+   return 0;
+
+   /* Evict the majority of BOs before starting suspend sequence */
+   r = amdgpu_device_evict_resources(adev);
+   if (r)
+   return r;
+
+   return 0;
+}
+
  /**
   * amdgpu_device_suspend - initiate device suspend
   *
@@ -4279,7 +4304,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
  
  	adev->in_suspend = true;
  
-	/* Evict the majority of BOs before grabbing the full access */

r = amdgpu_device_evict_resources(adev);
if (r)
return r;


I would just completely drop this extra amdgpu_device_evict_resources() 
call now.


We have a second call which is used to evacuate firmware etc... after 
the hw has been shut down. That one can't move, but also shouldn't 
allocate that much memory.


Regards,
Christian.


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e3471293846f..175167582db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2425,8 +2425,9 @@ static int amdgpu_pmops_prepare(struct device *dev)
/* Return a positive number here so
 * DPM_FLAG_SMART_SUSPEND works properly
 */
-   if (amdgpu_device_supports_boco(drm_dev))
-   return pm_runtime_suspended(dev);
+   if (amdgpu_device_supports_boco(drm_dev) &&
+   pm_runtime_suspended(dev))
+   return 1;
  
  	/* if we will not support s3 or s2i for the device

 *  then skip suspend
@@ -2435,7 +2436,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
!amdgpu_acpi_is_s3_active(adev))
return 1;
  
-	return 0;

+   return amdgpu_device_prepare(drm_dev);
  }
  
  static void amdgpu_pmops_complete(struct device *dev)




Re: [PATCH v4 2/3] drm/amd/display: Destroy DC context while keeping DML

2023-10-05 Thread Mario Limonciello

On 10/5/2023 09:27, Alex Deucher wrote:

On Wed, Oct 4, 2023 at 1:37 PM Mario Limonciello
 wrote:


If there is memory pressure at suspend time then dynamically
allocating a large structure as part of DC suspend code will
fail.

Instead re-use the same structure and clear all members except
those that should be maintained.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362
Signed-off-by: Mario Limonciello 
---
  drivers/gpu/drm/amd/display/dc/core/dc.c  | 25 ---
  .../gpu/drm/amd/display/dc/core/dc_resource.c | 12 +
  2 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 39e291a467e2..cb8c7c5a8807 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -4728,9 +4728,6 @@ bool dc_set_power_state(
 struct dc *dc,
 enum dc_acpi_cm_power_state power_state)
  {
-   struct kref refcount;
-   struct display_mode_lib *dml;
-
 if (!dc->current_state)
 return true;

@@ -4750,30 +4747,8 @@ bool dc_set_power_state(
 break;
 default:
 ASSERT(dc->current_state->stream_count == 0);
-   /* Zero out the current context so that on resume we start with
-* clean state, and dc hw programming optimizations will not
-* cause any trouble.
-*/
-   dml = kzalloc(sizeof(struct display_mode_lib),
-   GFP_KERNEL);
-
-   ASSERT(dml);
-   if (!dml)
-   return false;
-
-   /* Preserve refcount */
-   refcount = dc->current_state->refcount;
-   /* Preserve display mode lib */
-   memcpy(dml, >current_state->bw_ctx.dml, sizeof(struct 
display_mode_lib));

 dc_resource_state_destruct(dc->current_state);
-   memset(dc->current_state, 0,
-   sizeof(*dc->current_state));
-
-   dc->current_state->refcount = refcount;
-   dc->current_state->bw_ctx.dml = *dml;


The dml dance seems a bit weird.  I guess it's here because
dc_resource_state_destruct() might change it?  Can we safely drop
this?  If we do need it, we could pre-allocate a dml structure and use
that.


The dml structure is huge, so I think it's sub-optimal to have two 
copies of it.  That's why I aimed to just destroy everything else except 
it instead.


The only reason it's "safe" to drop the whole above stuff is because of 
"threading the needle" of what dc_resource_state_destruct() does.


In the earlier version I had a mistake to miss clearing the scratch 
variable and it caused some IGT faliures.


This probably needs to be double checked with the DML2 series landing as 
well to make sure it didn't get caught in the middle.




Alex


-
-   kfree(dml);

 break;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index aa7b5db83644..e487c966c118 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -4350,6 +4350,18 @@ void dc_resource_state_destruct(struct dc_state *context)
 context->streams[i] = NULL;
 }
 context->stream_count = 0;
+   context->stream_mask = 0;
+   memset(>res_ctx, 0, sizeof(context->res_ctx));
+   memset(>pp_display_cfg, 0, sizeof(context->pp_display_cfg));
+   memset(>dcn_bw_vars, 0, sizeof(context->dcn_bw_vars));
+   context->clk_mgr = NULL;
+   memset(>bw_ctx.bw, 0, sizeof(context->bw_ctx.bw));
+   memset(context->block_sequence, 0, sizeof(context->block_sequence));
+   context->block_sequence_steps = 0;
+   memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd));
+   context->dmub_cmd_count = 0;
+   memset(>perf_params, 0, sizeof(context->perf_params));
+   memset(>scratch, 0, sizeof(context->scratch));
  }

  void dc_resource_state_copy_construct(
--
2.34.1





Re: [PATCH] drm/amdgpu: update ib start and size alignment

2023-10-05 Thread Christian König

Am 05.10.23 um 15:49 schrieb boyuan.zh...@amd.com:

From: Boyuan Zhang 

Update IB starting address alignment and size alignment with correct values
for decode and encode IPs.

Decode IB starting address alignment: 256 bytes
Decode IB size alignment: 64 bytes
Encode IB starting address alignment: 256 bytes
Encode IB size alignment: 4 bytes

Also bump amdgpu driver version for this update.

Signed-off-by: Boyuan Zhang 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 22 +++---
  2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e3471293846f..9e345d503a47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -113,9 +113,10 @@
   *gl1c_cache_size, gl2c_cache_size, mall_size, 
enabled_rb_pipes_mask_hi
   *   3.53.0 - Support for GFX11 CP GFX shadowing
   *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ *   3.55.0 - Update IB start address and size alignment for decode and encode
   */
  #define KMS_DRIVER_MAJOR  3
-#define KMS_DRIVER_MINOR   54
+#define KMS_DRIVER_MINOR   55
  #define KMS_DRIVER_PATCHLEVEL 0
  
  /*

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 081bd28e2443..96db51765a6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,7 +447,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.inst[i].ring.sched.ready)
++num_rings;
}
-   ib_start_alignment = 64;
+   ib_start_alignment = 256;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
@@ -455,8 +455,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
for (i = 0; i < adev->vce.num_rings; i++)
if (adev->vce.ring[i].sched.ready)
++num_rings;
-   ib_start_alignment = 4;
-   ib_size_alignment = 1;
+   ib_start_alignment = 256;
+   ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
@@ -468,8 +468,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.inst[i].ring_enc[j].sched.ready)
++num_rings;
}
-   ib_start_alignment = 64;
-   ib_size_alignment = 64;
+   ib_start_alignment = 256;
+   ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
@@ -480,8 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.inst[i].ring_dec.sched.ready)
++num_rings;
}
-   ib_start_alignment = 16;
-   ib_size_alignment = 16;
+   ib_start_alignment = 256;
+   ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
@@ -493,8 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.inst[i].ring_enc[j].sched.ready)
++num_rings;
}
-   ib_start_alignment = 64;
-   ib_size_alignment = 1;
+   ib_start_alignment = 256;
+   ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = (amdgpu_device_ip_get_ip_block(adev, 
AMD_IP_BLOCK_TYPE_JPEG)) ?
@@ -508,8 +508,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
++num_rings;
}
-   ib_start_alignment = 16;
-   ib_size_alignment = 16;
+   ib_start_alignment = 256;
+   ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VPE:
type = AMD_IP_BLOCK_TYPE_VPE;




Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART

2023-10-05 Thread Christian König

Am 04.10.23 um 23:34 schrieb Felix Kuehling:


On 2023-09-18 06:32, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).
V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 81 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
  2 files changed, 82 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index e266674e0d44..c0eb622dfc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .funcs = _v11_0_ip_funcs,
  };
  +static int
+gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)

+{
+    int ret;
+
+    ret = amdgpu_bo_reserve(bo, true);
+    if (ret) {
+    DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+    goto err_reserve_bo_failed;
+    }
+
+    ret = amdgpu_ttm_alloc_gart(>tbo);
+    if (ret) {
+    DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+    goto err_map_bo_gart_failed;
+    }
+
+    amdgpu_bo_unreserve(bo);


The GART mapping can become invalid as soon as you unlock the BOs.

You need to attach an eviction fence for this to work correctly.


Don't you need an eviction fence on the WPTR BO regardless of the GTT 
mapping?


Yeah, indeed :)

Long story short we need a general eviction fence handling for BOs.

Regards,
Christian.



Regards,
  Felix





+    bo = amdgpu_bo_ref(bo);
+
+    return 0;
+
+err_map_bo_gart_failed:
+    amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+    return ret;
+}
+
+static int
+gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev,
+  struct amdgpu_usermode_queue *queue,
+  uint64_t wptr)
+{
+    struct amdgpu_bo_va_mapping *wptr_mapping;
+    struct amdgpu_vm *wptr_vm;
+    struct amdgpu_bo *wptr_bo = NULL;
+    int ret;
+
+    mutex_lock(>vm->eviction_lock);


Never ever touch the eviction lock outside of the VM code! That lock 
is completely unrelated to what you do here.



+    wptr_vm = queue->vm;
+    ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+    if (ret)
+    goto unlock;
+
+    wptr &= AMDGPU_GMC_HOLE_MASK;
+    wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
PAGE_SHIFT);

+    amdgpu_bo_unreserve(wptr_vm->root.bo);
+    if (!wptr_mapping) {
+    DRM_ERROR("Failed to lookup wptr bo\n");
+    ret = -EINVAL;
+    goto unlock;
+    }
+
+    wptr_bo = wptr_mapping->bo_va->base.bo;
+    if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+    DRM_ERROR("Requested GART mapping for wptr bo larger than 
one page\n");

+    ret = -EINVAL;
+    goto unlock;
+    }


We probably also want to enforce that this BO is a per VM BO.


+
+    ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo);
+    if (ret) {
+    DRM_ERROR("Failed to map wptr bo to GART\n");
+    goto unlock;
+    }
+
+    queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT;


This needs to be amdgpu_bo_gpu_offset() instead.

Regards,
Christian.


+
+unlock:
+    mutex_unlock(>vm->eviction_lock);
+    return ret;
+}
+
  static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
    struct amdgpu_usermode_queue *queue)
  {
@@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct 
amdgpu_userq_mgr *uq_mgr,

  queue_input.queue_size = userq_props->queue_size >> 2;
  queue_input.doorbell_offset = userq_props->doorbell_index;
  queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);

+    queue_input.wptr_mc_addr = queue->wptr_mc_addr;
    amdgpu_mes_lock(>mes);
  r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  goto free_mqd;
  }
  +    /* FW expects WPTR BOs to be mapped into GART */
+    r = gfx_v11_0_create_wptr_mapping(adev, queue, 
userq_props.wptr_gpu_addr);

+    if (r) {
+    DRM_ERROR("Failed to create WPTR mapping\n");
+    goto free_ctx;
+    }
+
  /* Map userqueue into FW using MES */
  r = gfx_v11_0_userq_map(uq_mgr, queue, _props);
  if (r) {
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h

index 34e20daa06c8..ae155de62560 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -39,6 +39,7 @@ struct 

Re: [PATCH] drm/amdgpu: Annotate struct amdgpu_bo_list with __counted_by

2023-10-05 Thread Christian König

Am 04.10.23 um 01:29 schrieb Kees Cook:

Prepare for the coming implementation by GCC and Clang of the __counted_by
attribute. Flexible array members annotated with __counted_by can have
their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for
array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family
functions).

As found with Coccinelle[1], add __counted_by for struct amdgpu_bo_list.
Additionally, since the element count member must be set before accessing
the annotated flexible array member, move its initialization earlier.

Cc: Alex Deucher 
Cc: "Christian König" 
Cc: "Pan, Xinhui" 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: "Gustavo A. R. Silva" 
Cc: Luben Tuikov 
Cc: Christophe JAILLET 
Cc: Felix Kuehling 
Cc: amd-gfx@lists.freedesktop.org
Cc: dri-de...@lists.freedesktop.org
Cc: linux-harden...@vger.kernel.org
Link: 
https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci
 [1]
Signed-off-by: Kees Cook 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 6f5b641b631e..781e5c5ce04d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -84,6 +84,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct 
drm_file *filp,
  
  	kref_init(>refcount);
  
+	list->num_entries = num_entries;

array = list->entries;
  
  	for (i = 0; i < num_entries; ++i) {

@@ -129,7 +130,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
struct drm_file *filp,
}
  
  	list->first_userptr = first_userptr;

-   list->num_entries = num_entries;
sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
 amdgpu_bo_list_entry_cmp, NULL);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h

index 6a703be45d04..555cd6d877c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -56,7 +56,7 @@ struct amdgpu_bo_list {
 */
struct mutex bo_list_mutex;
  
-	struct amdgpu_bo_list_entry entries[];

+   struct amdgpu_bo_list_entry entries[] __counted_by(num_entries);
  };
  
  int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,




Re: [PATCH] drm/amdgpu: update ib start and size alignment

2023-10-05 Thread Alex Deucher
On Thu, Oct 5, 2023 at 10:17 AM  wrote:
>
> From: Boyuan Zhang 
>
> Update IB starting address alignment and size alignment with correct values
> for decode and encode IPs.
>
> Decode IB starting address alignment: 256 bytes
> Decode IB size alignment: 64 bytes
> Encode IB starting address alignment: 256 bytes
> Encode IB size alignment: 4 bytes
>
> Also bump amdgpu driver version for this update.
>
> Signed-off-by: Boyuan Zhang 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 22 +++---
>  2 files changed, 13 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index e3471293846f..9e345d503a47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -113,9 +113,10 @@
>   *gl1c_cache_size, gl2c_cache_size, mall_size, 
> enabled_rb_pipes_mask_hi
>   *   3.53.0 - Support for GFX11 CP GFX shadowing
>   *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
> + *   3.55.0 - Update IB start address and size alignment for decode and 
> encode
>   */
>  #define KMS_DRIVER_MAJOR   3
> -#define KMS_DRIVER_MINOR   54
> +#define KMS_DRIVER_MINOR   55
>  #define KMS_DRIVER_PATCHLEVEL  0
>
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 081bd28e2443..96db51765a6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -447,7 +447,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> if (adev->uvd.inst[i].ring.sched.ready)
> ++num_rings;
> }
> -   ib_start_alignment = 64;
> +   ib_start_alignment = 256;
> ib_size_alignment = 64;
> break;
> case AMDGPU_HW_IP_VCE:
> @@ -455,8 +455,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> for (i = 0; i < adev->vce.num_rings; i++)
> if (adev->vce.ring[i].sched.ready)
> ++num_rings;
> -   ib_start_alignment = 4;
> -   ib_size_alignment = 1;
> +   ib_start_alignment = 256;
> +   ib_size_alignment = 4;
> break;
> case AMDGPU_HW_IP_UVD_ENC:
> type = AMD_IP_BLOCK_TYPE_UVD;
> @@ -468,8 +468,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> if (adev->uvd.inst[i].ring_enc[j].sched.ready)
> ++num_rings;
> }
> -   ib_start_alignment = 64;
> -   ib_size_alignment = 64;
> +   ib_start_alignment = 256;
> +   ib_size_alignment = 4;
> break;
> case AMDGPU_HW_IP_VCN_DEC:
> type = AMD_IP_BLOCK_TYPE_VCN;
> @@ -480,8 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> if (adev->vcn.inst[i].ring_dec.sched.ready)
> ++num_rings;
> }
> -   ib_start_alignment = 16;
> -   ib_size_alignment = 16;
> +   ib_start_alignment = 256;
> +   ib_size_alignment = 64;
> break;
> case AMDGPU_HW_IP_VCN_ENC:
> type = AMD_IP_BLOCK_TYPE_VCN;
> @@ -493,8 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> if (adev->vcn.inst[i].ring_enc[j].sched.ready)
> ++num_rings;
> }
> -   ib_start_alignment = 64;
> -   ib_size_alignment = 1;
> +   ib_start_alignment = 256;
> +   ib_size_alignment = 4;
> break;
> case AMDGPU_HW_IP_VCN_JPEG:
> type = (amdgpu_device_ip_get_ip_block(adev, 
> AMD_IP_BLOCK_TYPE_JPEG)) ?
> @@ -508,8 +508,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> if 
> (adev->jpeg.inst[i].ring_dec[j].sched.ready)
> ++num_rings;
> }
> -   ib_start_alignment = 16;
> -   ib_size_alignment = 16;
> +   ib_start_alignment = 256;
> +   ib_size_alignment = 64;
> break;
> case AMDGPU_HW_IP_VPE:
> type = AMD_IP_BLOCK_TYPE_VPE;
> --
> 2.34.1
>


Re: [PATCH v4 3/3] drm/amd/display: make dc_set_power_state() return type `void` again

2023-10-05 Thread Alex Deucher
On Wed, Oct 4, 2023 at 1:27 PM Mario Limonciello
 wrote:
>
> As dc_set_power_state() no longer allocates memory, it's not necessary
> to have return types and check return code as it can't fail anymore.
>
> Change it back to `void`.
>
> Signed-off-by: Mario Limonciello 

Reviewed-by: Alex Deucher 

> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 17 +
>  drivers/gpu/drm/amd/display/dc/core/dc.c|  6 ++
>  drivers/gpu/drm/amd/display/dc/dc.h |  2 +-
>  3 files changed, 8 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index a59a11ae42db..df9d9437f149 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -2685,11 +2685,6 @@ static void hpd_rx_irq_work_suspend(struct 
> amdgpu_display_manager *dm)
> }
>  }
>
> -static int dm_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state 
> power_state)
> -{
> -   return dc_set_power_state(dc, power_state) ? 0 : -ENOMEM;
> -}
> -
>  static int dm_suspend(void *handle)
>  {
> struct amdgpu_device *adev = handle;
> @@ -2723,7 +2718,9 @@ static int dm_suspend(void *handle)
>
> hpd_rx_irq_work_suspend(dm);
>
> -   return dm_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
> +   dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
> +
> +   return 0;
>  }
>
>  struct drm_connector *
> @@ -2917,9 +2914,7 @@ static int dm_resume(void *handle)
> if (r)
> DRM_ERROR("DMUB interface failed to initialize: 
> status=%d\n", r);
>
> -   r = dm_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
> -   if (r)
> -   return r;
> +   dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
>
> dc_resume(dm->dc);
>
> @@ -2969,9 +2964,7 @@ static int dm_resume(void *handle)
> }
>
> /* power on hardware */
> -   r = dm_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
> -   if (r)
> -   return r;
> +dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
>
> /* program HPD filter */
> dc_resume(dm->dc);
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index cb8c7c5a8807..2645d59dc58e 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -4724,12 +4724,12 @@ void dc_power_down_on_boot(struct dc *dc)
> dc->hwss.power_down_on_boot(dc);
>  }
>
> -bool dc_set_power_state(
> +void dc_set_power_state(
> struct dc *dc,
> enum dc_acpi_cm_power_state power_state)
>  {
> if (!dc->current_state)
> -   return true;
> +   return;
>
> switch (power_state) {
> case DC_ACPI_CM_POWER_STATE_D0:
> @@ -4752,8 +4752,6 @@ bool dc_set_power_state(
>
> break;
> }
> -
> -   return true;
>  }
>
>  void dc_resume(struct dc *dc)
> diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
> b/drivers/gpu/drm/amd/display/dc/dc.h
> index b140eb240ad7..b6002b11a745 100644
> --- a/drivers/gpu/drm/amd/display/dc/dc.h
> +++ b/drivers/gpu/drm/amd/display/dc/dc.h
> @@ -2330,7 +2330,7 @@ void dc_notify_vsync_int_state(struct dc *dc, struct 
> dc_stream_state *stream, bo
>
>  /* Power Interfaces */
>
> -bool dc_set_power_state(
> +void dc_set_power_state(
> struct dc *dc,
> enum dc_acpi_cm_power_state power_state);
>  void dc_resume(struct dc *dc);
> --
> 2.34.1
>


Re: [PATCH v4 2/3] drm/amd/display: Destroy DC context while keeping DML

2023-10-05 Thread Alex Deucher
On Wed, Oct 4, 2023 at 1:37 PM Mario Limonciello
 wrote:
>
> If there is memory pressure at suspend time then dynamically
> allocating a large structure as part of DC suspend code will
> fail.
>
> Instead re-use the same structure and clear all members except
> those that should be maintained.
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362
> Signed-off-by: Mario Limonciello 
> ---
>  drivers/gpu/drm/amd/display/dc/core/dc.c  | 25 ---
>  .../gpu/drm/amd/display/dc/core/dc_resource.c | 12 +
>  2 files changed, 12 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index 39e291a467e2..cb8c7c5a8807 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -4728,9 +4728,6 @@ bool dc_set_power_state(
> struct dc *dc,
> enum dc_acpi_cm_power_state power_state)
>  {
> -   struct kref refcount;
> -   struct display_mode_lib *dml;
> -
> if (!dc->current_state)
> return true;
>
> @@ -4750,30 +4747,8 @@ bool dc_set_power_state(
> break;
> default:
> ASSERT(dc->current_state->stream_count == 0);
> -   /* Zero out the current context so that on resume we start 
> with
> -* clean state, and dc hw programming optimizations will not
> -* cause any trouble.
> -*/
> -   dml = kzalloc(sizeof(struct display_mode_lib),
> -   GFP_KERNEL);
> -
> -   ASSERT(dml);
> -   if (!dml)
> -   return false;
> -
> -   /* Preserve refcount */
> -   refcount = dc->current_state->refcount;
> -   /* Preserve display mode lib */
> -   memcpy(dml, >current_state->bw_ctx.dml, sizeof(struct 
> display_mode_lib));
>
> dc_resource_state_destruct(dc->current_state);
> -   memset(dc->current_state, 0,
> -   sizeof(*dc->current_state));
> -
> -   dc->current_state->refcount = refcount;
> -   dc->current_state->bw_ctx.dml = *dml;

The dml dance seems a bit weird.  I guess it's here because
dc_resource_state_destruct() might change it?  Can we safely drop
this?  If we do need it, we could pre-allocate a dml structure and use
that.

Alex

> -
> -   kfree(dml);
>
> break;
> }
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> index aa7b5db83644..e487c966c118 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> @@ -4350,6 +4350,18 @@ void dc_resource_state_destruct(struct dc_state 
> *context)
> context->streams[i] = NULL;
> }
> context->stream_count = 0;
> +   context->stream_mask = 0;
> +   memset(>res_ctx, 0, sizeof(context->res_ctx));
> +   memset(>pp_display_cfg, 0, sizeof(context->pp_display_cfg));
> +   memset(>dcn_bw_vars, 0, sizeof(context->dcn_bw_vars));
> +   context->clk_mgr = NULL;
> +   memset(>bw_ctx.bw, 0, sizeof(context->bw_ctx.bw));
> +   memset(context->block_sequence, 0, sizeof(context->block_sequence));
> +   context->block_sequence_steps = 0;
> +   memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd));
> +   context->dmub_cmd_count = 0;
> +   memset(>perf_params, 0, sizeof(context->perf_params));
> +   memset(>scratch, 0, sizeof(context->scratch));
>  }
>
>  void dc_resource_state_copy_construct(
> --
> 2.34.1
>


Re: [PATCH v4 1/3] drm/amd: Evict resources during PM ops prepare() callback

2023-10-05 Thread Alex Deucher
On Wed, Oct 4, 2023 at 1:37 PM Mario Limonciello
 wrote:
>
> Linux PM core has a prepare() callback run before suspend.
>
> If the system is under high memory pressure, the resources may need
> to be evicted into swap instead.  If the storage backing for swap
> is offlined during the suspend() step then such a call may fail.
>
> So duplicate this step into prepare() to move evict majority of
> resources while leaving all existing steps that put the GPU into a
> low power state in suspend().
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362
> Signed-off-by: Mario Limonciello 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  7 +++---
>  3 files changed, 30 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d23fb4b5ad95..6643d0ed6b1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1413,6 +1413,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  void amdgpu_driver_release_kms(struct drm_device *dev);
>
>  int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
> +int amdgpu_device_prepare(struct drm_device *dev);
>  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
>  int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
>  u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bad2b5577e96..67acee569c08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4259,6 +4259,31 @@ static int amdgpu_device_evict_resources(struct 
> amdgpu_device *adev)
>  /*
>   * Suspend & resume.
>   */
> +/**
> + * amdgpu_device_prepare - prepare for device suspend
> + *
> + * @dev: drm dev pointer
> + *
> + * Prepare to put the hw in the suspend state (all asics).
> + * Returns 0 for success or an error on failure.
> + * Called at driver suspend.
> + */
> +int amdgpu_device_prepare(struct drm_device *dev)
> +{
> +   struct amdgpu_device *adev = drm_to_adev(dev);
> +   int r;
> +
> +   if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> +   return 0;
> +
> +   /* Evict the majority of BOs before starting suspend sequence */
> +   r = amdgpu_device_evict_resources(adev);
> +   if (r)
> +   return r;
> +
> +   return 0;
> +}
> +
>  /**
>   * amdgpu_device_suspend - initiate device suspend
>   *
> @@ -4279,7 +4304,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
> fbcon)
>
> adev->in_suspend = true;
>
> -   /* Evict the majority of BOs before grabbing the full access */
> r = amdgpu_device_evict_resources(adev);
> if (r)
> return r;

Might want to add a note that this is likely a noop in the normal
suspend case and is just here to handle the case where
amdgpu_device_suspend() is called outside of the normal pmops
framework.
Other than that, the patch is:
Reviewed-by: Alex Deucher 

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index e3471293846f..175167582db0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2425,8 +2425,9 @@ static int amdgpu_pmops_prepare(struct device *dev)
> /* Return a positive number here so
>  * DPM_FLAG_SMART_SUSPEND works properly
>  */
> -   if (amdgpu_device_supports_boco(drm_dev))
> -   return pm_runtime_suspended(dev);
> +   if (amdgpu_device_supports_boco(drm_dev) &&
> +   pm_runtime_suspended(dev))
> +   return 1;
>
> /* if we will not support s3 or s2i for the device
>  *  then skip suspend
> @@ -2435,7 +2436,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
> !amdgpu_acpi_is_s3_active(adev))
> return 1;
>
> -   return 0;
> +   return amdgpu_device_prepare(drm_dev);
>  }
>
>  static void amdgpu_pmops_complete(struct device *dev)
> --
> 2.34.1
>


[PATCH] drm/amdgpu: update ib start and size alignment

2023-10-05 Thread boyuan.zhang
From: Boyuan Zhang 

Update IB starting address alignment and size alignment with correct values
for decode and encode IPs.

Decode IB starting address alignment: 256 bytes
Decode IB size alignment: 64 bytes
Encode IB starting address alignment: 256 bytes
Encode IB size alignment: 4 bytes

Also bump amdgpu driver version for this update.

Signed-off-by: Boyuan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 22 +++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e3471293846f..9e345d503a47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -113,9 +113,10 @@
  *gl1c_cache_size, gl2c_cache_size, mall_size, 
enabled_rb_pipes_mask_hi
  *   3.53.0 - Support for GFX11 CP GFX shadowing
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ *   3.55.0 - Update IB start address and size alignment for decode and encode
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   54
+#define KMS_DRIVER_MINOR   55
 #define KMS_DRIVER_PATCHLEVEL  0
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 081bd28e2443..96db51765a6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,7 +447,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.inst[i].ring.sched.ready)
++num_rings;
}
-   ib_start_alignment = 64;
+   ib_start_alignment = 256;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
@@ -455,8 +455,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
for (i = 0; i < adev->vce.num_rings; i++)
if (adev->vce.ring[i].sched.ready)
++num_rings;
-   ib_start_alignment = 4;
-   ib_size_alignment = 1;
+   ib_start_alignment = 256;
+   ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
@@ -468,8 +468,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.inst[i].ring_enc[j].sched.ready)
++num_rings;
}
-   ib_start_alignment = 64;
-   ib_size_alignment = 64;
+   ib_start_alignment = 256;
+   ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
@@ -480,8 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.inst[i].ring_dec.sched.ready)
++num_rings;
}
-   ib_start_alignment = 16;
-   ib_size_alignment = 16;
+   ib_start_alignment = 256;
+   ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
@@ -493,8 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.inst[i].ring_enc[j].sched.ready)
++num_rings;
}
-   ib_start_alignment = 64;
-   ib_size_alignment = 1;
+   ib_start_alignment = 256;
+   ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = (amdgpu_device_ip_get_ip_block(adev, 
AMD_IP_BLOCK_TYPE_JPEG)) ?
@@ -508,8 +508,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
++num_rings;
}
-   ib_start_alignment = 16;
-   ib_size_alignment = 16;
+   ib_start_alignment = 256;
+   ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VPE:
type = AMD_IP_BLOCK_TYPE_VPE;
-- 
2.34.1



Re: [PATCH 2/2] drm/radeon: Fix UBSAN array-index-out-of-bounds for Radeon HD 5430

2023-10-05 Thread Alex Deucher
On Thu, Oct 5, 2023 at 12:42 AM Mario Limonciello
 wrote:
>
> For pptable structs that use flexible array sizes, use flexible arrays.
>
> Suggested-by: Felix Held 
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2894
> Signed-off-by: Mario Limonciello 

Series is:
Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/radeon/pptable.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/radeon/pptable.h 
> b/drivers/gpu/drm/radeon/pptable.h
> index 4c2eec49dadc..94947229888b 100644
> --- a/drivers/gpu/drm/radeon/pptable.h
> +++ b/drivers/gpu/drm/radeon/pptable.h
> @@ -74,7 +74,7 @@ typedef struct _ATOM_PPLIB_THERMALCONTROLLER
>  typedef struct _ATOM_PPLIB_STATE
>  {
>  UCHAR ucNonClockStateIndex;
> -UCHAR ucClockStateIndices[1]; // variable-sized
> +UCHAR ucClockStateIndices[]; // variable-sized
>  } ATOM_PPLIB_STATE;
>
>
> --
> 2.34.1
>


Re: [PATCH] drm/amd: Fix UBSAN array-index-out-of-bounds for Polaris and Tonga

2023-10-05 Thread Mario Limonciello

On 10/4/2023 16:50, Alex Deucher wrote:

On Wed, Oct 4, 2023 at 5:42 PM Mario Limonciello
 wrote:


For pptable structs that use flexible array sizes, use flexible arrays.

Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2036742
Signed-off-by: Mario Limonciello 


Acked-by: Alex Deucher 


Thanks, can you also review the similar (but different) series of the 
other variable arrays?


https://lore.kernel.org/amd-gfx/20231004202253.182540-1-mario.limoncie...@amd.com/T/#t




---
 From this bug report there are more to fix
  .../gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h| 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h
index 57bca1e81d3a..9fcad69a9f34 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h
@@ -164,7 +164,7 @@ typedef struct _ATOM_Tonga_State {
  typedef struct _ATOM_Tonga_State_Array {
 UCHAR ucRevId;
 UCHAR ucNumEntries; /* Number of entries. */
-   ATOM_Tonga_State entries[1];/* Dynamically allocate entries. */
+   ATOM_Tonga_State entries[]; /* Dynamically allocate entries. */
  } ATOM_Tonga_State_Array;

  typedef struct _ATOM_Tonga_MCLK_Dependency_Record {
@@ -210,7 +210,7 @@ typedef struct _ATOM_Polaris_SCLK_Dependency_Record {
  typedef struct _ATOM_Polaris_SCLK_Dependency_Table {
 UCHAR ucRevId;
 UCHAR ucNumEntries;
 /* Number of entries. */
-   ATOM_Polaris_SCLK_Dependency_Record entries[1]; 
 /* Dynamically allocate entries. */
+   ATOM_Polaris_SCLK_Dependency_Record entries[];  
 /* Dynamically allocate entries. */
  } ATOM_Polaris_SCLK_Dependency_Table;

  typedef struct _ATOM_Tonga_PCIE_Record {
@@ -222,7 +222,7 @@ typedef struct _ATOM_Tonga_PCIE_Record {
  typedef struct _ATOM_Tonga_PCIE_Table {
 UCHAR ucRevId;
 UCHAR ucNumEntries;
 /* Number of entries. */
-   ATOM_Tonga_PCIE_Record entries[1];  
/* Dynamically allocate entries. */
+   ATOM_Tonga_PCIE_Record entries[];   
/* Dynamically allocate entries. */
  } ATOM_Tonga_PCIE_Table;

  typedef struct _ATOM_Polaris10_PCIE_Record {
@@ -235,7 +235,7 @@ typedef struct _ATOM_Polaris10_PCIE_Record {
  typedef struct _ATOM_Polaris10_PCIE_Table {
 UCHAR ucRevId;
 UCHAR ucNumEntries; /* Number 
of entries. */
-   ATOM_Polaris10_PCIE_Record entries[1];  /* 
Dynamically allocate entries. */
+   ATOM_Polaris10_PCIE_Record entries[];  /* 
Dynamically allocate entries. */
  } ATOM_Polaris10_PCIE_Table;


@@ -252,7 +252,7 @@ typedef struct _ATOM_Tonga_MM_Dependency_Record {
  typedef struct _ATOM_Tonga_MM_Dependency_Table {
 UCHAR ucRevId;
 UCHAR ucNumEntries;
 /* Number of entries. */
-   ATOM_Tonga_MM_Dependency_Record entries[1];/* 
Dynamically allocate entries. */
+   ATOM_Tonga_MM_Dependency_Record entries[]; /* 
Dynamically allocate entries. */
  } ATOM_Tonga_MM_Dependency_Table;

  typedef struct _ATOM_Tonga_Voltage_Lookup_Record {
@@ -265,7 +265,7 @@ typedef struct _ATOM_Tonga_Voltage_Lookup_Record {
  typedef struct _ATOM_Tonga_Voltage_Lookup_Table {
 UCHAR ucRevId;
 UCHAR ucNumEntries;
 /* Number of entries. */
-   ATOM_Tonga_Voltage_Lookup_Record entries[1];
/* Dynamically allocate entries. */
+   ATOM_Tonga_Voltage_Lookup_Record entries[]; 
/* Dynamically allocate entries. */
  } ATOM_Tonga_Voltage_Lookup_Table;

  typedef struct _ATOM_Tonga_Fan_Table {
--
2.34.1





Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks

2023-10-05 Thread Mario Limonciello

On 10/5/2023 07:35, Christian König wrote:

Am 04.10.23 um 05:39 schrieb Mario Limonciello:

On 10/3/2023 16:22, Deucher, Alexander wrote:

[Public]


-Original Message-
From: Limonciello, Mario 
Sent: Tuesday, October 3, 2023 5:17 PM
To: Deucher, Alexander ; amd-
g...@lists.freedesktop.org
Cc: Wentland, Harry 
Subject: Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and
complete() callbacks

On 10/3/2023 16:11, Deucher, Alexander wrote:

[Public]


-Original Message-
From: amd-gfx  On Behalf Of
Mario Limonciello
Sent: Tuesday, October 3, 2023 4:55 PM
To: amd-gfx@lists.freedesktop.org
Cc: Wentland, Harry ; Limonciello, Mario

Subject: [PATCH v3 1/4] drm/amd: Add support for prepare() and
complete() callbacks

Linux PM core has a prepare() callback run before suspend and
complete() callback ran after resume() for devices to use.  Add
plumbing to bring
prepare() to amdgpu.

The idea with the new vfuncs for amdgpu is that all IP blocks that
memory allocations during suspend should do the allocation from this
call instead of the suspend() callback.

By moving the allocations to prepare() the system suspend will be
failed before any IP block has done any suspend code.

If the suspend fails, then do any cleanups in the complete() 
callback.


Signed-off-by: Mario Limonciello 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 39
--
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++---
   3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 73e825d20259..5d651552822c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1415,6 +1415,8 @@ void amdgpu_driver_postclose_kms(struct
drm_device *dev,  void amdgpu_driver_release_kms(struct drm_device
*dev);

   int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+int amdgpu_device_prepare(struct drm_device *dev); void
+amdgpu_device_complete(struct drm_device *dev);
   int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int
amdgpu_device_resume(struct drm_device *dev, bool fbcon);
   u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff
--git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bad2b5577e96..f53cf675c3ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4259,6 +4259,43 @@ static int
amdgpu_device_evict_resources(struct
amdgpu_device *adev)
   /*
    * Suspend & resume.
    */
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev) {
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ adev->in_suspend = true;
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_complete - complete the device after resume
+ *
+ * @dev: drm dev pointer
+ *
+ * Clean up any actions that the prepare step did.
+ * Called after driver resume.
+ */
+void amdgpu_device_complete(struct drm_device *dev) {
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ adev->in_suspend = false;
+}
+
   /**
    * amdgpu_device_suspend - initiate device suspend
    *
@@ -4277,8 +4314,6 @@ int amdgpu_device_suspend(struct drm_device
*dev, bool fbcon)
    if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
    return 0;

- adev->in_suspend = true;
-


We also set this to false in amdgpu_device_resume() so that should 
be fixed

up as well.  But, I'm not sure we want to move this out of
amdgpu_device_suspend().  There are places we use
amdgpu_device_suspend/resume() outside of pmops that also rely on these
being set.  Those places may need to be fixed up if we do. IIRC, the 
switcheroo

code uses this.

The big reason that I moved it from suspend() to prepare() was so that
amdgpu_device_evict_resources() was called with the context of it 
being set.


My thought process:
0) prepare() sets all the time
1) If prepare() fails complete() clears it.
2) If prepare() succeeds it remains set for suspend()
3) If suspend() succeeds it gets cleared at resume()
4) If resume() failed for some reason, it's cleared by complete().

Does it actually matter that it's set while evicting resources?


Shouldn't matter for evicting resources.  We even have debugfs nodes 
you can access to forcibly evict resources at runtime for testing 
memory pressure.


Then in that case I think what I'll do is put an extra call for 
amdgpu_device_evict_resources() in the prepare callback.


It shouldn't do any harm to call three times in the suspend sequence 
instead of two.


Yeah, I think you should move the first call 

Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-05 Thread Philip Yang

  
Sorry for the late reply, just notice 2 other issues:

1. function svm_range_split_by_granularity can be removed now.
2. svm_range_restore_pages should map partial range to GPUs after
  partial migration.
Regards,
Philip

On 2023-10-03 19:31, Xiaogang.Chen
  wrote:


  From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
 4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset);
 	migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+			unsigned long start_mgr, unsigned long last_mgr,
 			struct mm_struct *mm, uint32_t trigger)
 {
 	unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	unsigned long cpages = 0;
 	long r = 0;
 
-	if (prange->actual_loc == best_loc) {
-		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-			 prange->svms, prange->start, prange->last, best_loc);
+	if (!best_loc) {
+		pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+			prange->svms, start_mgr, last_mgr);
 		return 0;
 	}
 
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
+	}
+
 	node = svm_range_get_node_by_id(prange, best_loc);
 	if (!node) {
 		pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
 		return -ENODEV;
 	}
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-		 prange->start, prange->last, best_loc);
+	pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+		prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+		best_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	r = svm_range_vram_node_new(node, prange, true);
 	if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 
 	if (cpages) {
 		prange->actual_loc = best_loc;
-		svm_range_dma_unmap(prange);
-	} else {
+		prange->vram_pages = prange->vram_pages + cpages;
+	} else if (!prange->actual_loc) {
+		/* if no page migrated and all pages from prange are at
+		 * sys ram drop svm_bo got from svm_range_vram_node_new
+		 */
 		svm_range_vram_node_free(prange);
 	}
 
@@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
  */
 static long
 svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
-		   struct vm_area_struct *vma, uint64_t start, uint64_t end,
-		   uint32_t trigger, struct page *fault_page)
+			struct vm_area_struct *vma, uint64_t start, uint64_t end,
+			uint32_t trigger, struct page *fault_page)
 {
 	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
 	uint64_t npages = (end - 

Re: [PATCH 1/5] drm/amd/display: Remove migrate_en/dis from dc_fpu_begin().

2023-10-05 Thread Sebastian Andrzej Siewior
On 2023-10-04 08:10:35 [-0400], Hamza Mahfooz wrote:
> I did some digging, and it seems like the intention of that patch was to
> fix the following splat:
> 
> WARNING: CPU: 5 PID: 1062 at
> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:71
> dc_assert_fp_enabled+0x1a/0x30 [amdgpu]
> [...]
> CPU: 5 PID: 1062 Comm: Xorg Tainted: G   OE 5.15.0-56-generic

So it has hard to look this up with a upstream v5.15 kernel since the
dcn32_populate_dml_pipes_from_context() was introduced in v6.0-rc1.
Judging from v6.0-rc1 I don't see how that warning could occur other
than using dc_assert_fp_enabled() without invoking DC_FP_START first.

> Hamza

Sebastian


Re: [PATCH 0/5] drm/amd/display: Remove migrate-disable and move memory allocation.

2023-10-05 Thread Sebastian Andrzej Siewior
On 2023-10-04 08:44:58 [-0400], Harry Wentland wrote:
> CI passed.
> 
> Series is
> Acked-by: Harry Wentland 

Thank you.

> Harry

Sebastian


Re: [PATCH v2 1/5] drm/amdgpu: Move package type enum to amdgpu_smuio

2023-10-05 Thread Christian König

Am 04.10.23 um 09:39 schrieb Lijo Lazar:

Move definition of package type to amdgpu_smuio header and add new
package types for CEM and OAM.

Signed-off-by: Lijo Lazar 


Reviewed-by: Christian König 


---

v2: Move definition to amdgpu_smuio.h instead of amdgpu.h (Christian/Hawking)

  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   | 5 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h | 7 +++
  2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 42ac6d1bf9ca..7088c5015675 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -69,11 +69,6 @@ enum amdgpu_gfx_partition {
  
  #define NUM_XCC(x) hweight16(x)
  
-enum amdgpu_pkg_type {

-   AMDGPU_PKG_TYPE_APU = 2,
-   AMDGPU_PKG_TYPE_UNKNOWN,
-};
-
  enum amdgpu_gfx_ras_mem_id_type {
AMDGPU_GFX_CP_MEM = 0,
AMDGPU_GFX_GCEA_MEM,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index 89c38d864471..5910d50ac74d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -23,6 +23,13 @@
  #ifndef __AMDGPU_SMUIO_H__
  #define __AMDGPU_SMUIO_H__
  
+enum amdgpu_pkg_type {

+   AMDGPU_PKG_TYPE_APU = 2,
+   AMDGPU_PKG_TYPE_CEM = 3,
+   AMDGPU_PKG_TYPE_OAM = 4,
+   AMDGPU_PKG_TYPE_UNKNOWN,
+};
+
  struct amdgpu_smuio_funcs {
u32 (*get_rom_index_offset)(struct amdgpu_device *adev);
u32 (*get_rom_data_offset)(struct amdgpu_device *adev);




Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks

2023-10-05 Thread Christian König

Am 04.10.23 um 05:39 schrieb Mario Limonciello:

On 10/3/2023 16:22, Deucher, Alexander wrote:

[Public]


-Original Message-
From: Limonciello, Mario 
Sent: Tuesday, October 3, 2023 5:17 PM
To: Deucher, Alexander ; amd-
g...@lists.freedesktop.org
Cc: Wentland, Harry 
Subject: Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and
complete() callbacks

On 10/3/2023 16:11, Deucher, Alexander wrote:

[Public]


-Original Message-
From: amd-gfx  On Behalf Of
Mario Limonciello
Sent: Tuesday, October 3, 2023 4:55 PM
To: amd-gfx@lists.freedesktop.org
Cc: Wentland, Harry ; Limonciello, Mario

Subject: [PATCH v3 1/4] drm/amd: Add support for prepare() and
complete() callbacks

Linux PM core has a prepare() callback run before suspend and
complete() callback ran after resume() for devices to use.  Add
plumbing to bring
prepare() to amdgpu.

The idea with the new vfuncs for amdgpu is that all IP blocks that
memory allocations during suspend should do the allocation from this
call instead of the suspend() callback.

By moving the allocations to prepare() the system suspend will be
failed before any IP block has done any suspend code.

If the suspend fails, then do any cleanups in the complete() 
callback.


Signed-off-by: Mario Limonciello 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 39
--
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++---
   3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 73e825d20259..5d651552822c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1415,6 +1415,8 @@ void amdgpu_driver_postclose_kms(struct
drm_device *dev,  void amdgpu_driver_release_kms(struct drm_device
*dev);

   int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+int amdgpu_device_prepare(struct drm_device *dev); void
+amdgpu_device_complete(struct drm_device *dev);
   int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);  
int

amdgpu_device_resume(struct drm_device *dev, bool fbcon);
   u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff
--git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bad2b5577e96..f53cf675c3ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4259,6 +4259,43 @@ static int
amdgpu_device_evict_resources(struct
amdgpu_device *adev)
   /*
    * Suspend & resume.
    */
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev) {
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ adev->in_suspend = true;
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_complete - complete the device after resume
+ *
+ * @dev: drm dev pointer
+ *
+ * Clean up any actions that the prepare step did.
+ * Called after driver resume.
+ */
+void amdgpu_device_complete(struct drm_device *dev) {
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ adev->in_suspend = false;
+}
+
   /**
    * amdgpu_device_suspend - initiate device suspend
    *
@@ -4277,8 +4314,6 @@ int amdgpu_device_suspend(struct drm_device
*dev, bool fbcon)
    if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
    return 0;

- adev->in_suspend = true;
-


We also set this to false in amdgpu_device_resume() so that should 
be fixed

up as well.  But, I'm not sure we want to move this out of
amdgpu_device_suspend().  There are places we use
amdgpu_device_suspend/resume() outside of pmops that also rely on these
being set.  Those places may need to be fixed up if we do. IIRC, the 
switcheroo

code uses this.

The big reason that I moved it from suspend() to prepare() was so that
amdgpu_device_evict_resources() was called with the context of it 
being set.


My thought process:
0) prepare() sets all the time
1) If prepare() fails complete() clears it.
2) If prepare() succeeds it remains set for suspend()
3) If suspend() succeeds it gets cleared at resume()
4) If resume() failed for some reason, it's cleared by complete().

Does it actually matter that it's set while evicting resources?


Shouldn't matter for evicting resources.  We even have debugfs nodes 
you can access to forcibly evict resources at runtime for testing 
memory pressure.


Then in that case I think what I'll do is put an extra call for 
amdgpu_device_evict_resources() in the prepare callback.


It shouldn't do any harm to call three times in the suspend sequence 
instead of two.


Yeah, I think you should move the first call from suspend to prepare.

Evacuating VRAM 

Re: [PATCH v6 1/9] drm/amdgpu: UAPI for user queue management

2023-10-05 Thread Shashank Sharma



On 04/10/2023 23:23, Felix Kuehling wrote:


On 2023-09-08 12:04, Shashank Sharma wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

V2: Addressed review comments from Alex and Christian
 - Make the doorbell offset's comment clearer
 - Change the output parameter name to queue_id

V3: Integration with doorbell manager

V4:
 - Updated the UAPI doc (Pierre-Eric)
 - Created a Union for engine specific MQDs (Alex)
 - Added Christian's R-B
V5:
 - Add variables for GDS and CSA in MQD structure (Alex)
 - Make MQD data a ptr-size pair instead of union (Alex)

Cc: Alex Deucher 
Cc: Christian Koenig 
Reviewed-by: Christian König 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 110 ++
  1 file changed, 110 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index 79b14828d542..627b4a38c855 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
  #define DRM_AMDGPU_VM    0x13
  #define DRM_AMDGPU_FENCE_TO_HANDLE    0x14
  #define DRM_AMDGPU_SCHED    0x15
+#define DRM_AMDGPU_USERQ    0x16
    #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
  #define DRM_IOCTL_AMDGPU_GEM_MMAP    DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)

@@ -71,6 +72,7 @@ extern "C" {
  #define DRM_IOCTL_AMDGPU_VM    DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE 
+ DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
  #define DRM_IOCTL_AMDGPU_SCHED    DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ    DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

    /**
   * DOC: memory domains
@@ -304,6 +306,114 @@ union drm_amdgpu_ctx {
  union drm_amdgpu_ctx_out out;
  };
  +/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE    1
+#define AMDGPU_USERQ_OP_FREE    2
+
+/* Flag to indicate secure buffer related workload, unused for now */
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE    (1 << 0)
+/* Flag to indicate AQL workload, unused for now */
+#define AMDGPU_USERQ_MQD_FLAGS_AQL    (1 << 1)
+
+/*
+ * MQD (memory queue descriptor) is a set of parameters which allow


I find the term MQD misleading. For the firmware the MQD is a very 
different data structure from what you are defining here. It's a 
persistent data structure in kernel address space (VMID0) that is 
shared between the driver and the firmware that gets loaded or updated 
when queues are mapped or unmapped. I'd want to avoid confusing the 
firmware MQD with this structure.


I agree, I can change the name to something else like 
userq_properties_gfx_v11 or something similar


- Shashank


Regards,
  Felix



+ * the GPU to uniquely define and identify a usermode queue. This
+ * structure defines the MQD for GFX-V11 IP ver 0.
+ */
+struct drm_amdgpu_userq_mqd_gfx_v11_0 {
+    /**
+ * @queue_va: Virtual address of the GPU memory which holds the 
queue

+ * object. The queue holds the workload packets.
+ */
+    __u64   queue_va;
+    /**
+ * @queue_size: Size of the queue in bytes, this needs to be 
256-byte

+ * aligned.
+ */
+    __u64   queue_size;
+    /**
+ * @rptr_va : Virtual address of the GPU memory which holds the 
ring RPTR.
+ * This object must be at least 8 byte in size and aligned to 
8-byte offset.

+ */
+    __u64   rptr_va;
+    /**
+ * @wptr_va : Virtual address of the GPU memory which holds the 
ring WPTR.
+ * This object must be at least 8 byte in size and aligned to 
8-byte offset.

+ *
+ * Queue, RPTR and WPTR can come from the same object, as long 
as the size

+ * and alignment related requirements are met.
+ */
+    __u64   wptr_va;
+    /**
+ * @shadow_va: Virtual address of the GPU memory to hold the 
shadow buffer.
+ * This must be a from a separate GPU object, and must be at 
least 4-page

+ * sized.
+ */
+    __u64   shadow_va;
+    /**
+ * @gds_va: Virtual address of the GPU memory to hold the GDS 
buffer.
+ * This must be a from a separate GPU object, and must be at 
least 1-page

+ * sized.
+ */
+    __u64   gds_va;
+    /**
+ * @csa_va: Virtual address of the GPU memory to hold the CSA 
buffer.
+ * This must be a from a separate GPU object, and must be at 
least 1-page

+ * sized.
+ */
+    __u64   csa_va;
+};
+
+struct drm_amdgpu_userq_in {

Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART

2023-10-05 Thread Shashank Sharma

Hey Felix,

On 04/10/2023 23:34, Felix Kuehling wrote:


On 2023-09-18 06:32, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).
V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 81 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
  2 files changed, 82 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index e266674e0d44..c0eb622dfc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .funcs = _v11_0_ip_funcs,
  };
  +static int
+gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)

+{
+    int ret;
+
+    ret = amdgpu_bo_reserve(bo, true);
+    if (ret) {
+    DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+    goto err_reserve_bo_failed;
+    }
+
+    ret = amdgpu_ttm_alloc_gart(>tbo);
+    if (ret) {
+    DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+    goto err_map_bo_gart_failed;
+    }
+
+    amdgpu_bo_unreserve(bo);


The GART mapping can become invalid as soon as you unlock the BOs.

You need to attach an eviction fence for this to work correctly.


Don't you need an eviction fence on the WPTR BO regardless of the GTT 
mapping?


Yes, Christian also mentioned this in this iteration, I have implemented 
the basic eviction fence for [V7], I will publish it soon.


- Shashank



Regards,
  Felix





+    bo = amdgpu_bo_ref(bo);
+
+    return 0;
+
+err_map_bo_gart_failed:
+    amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+    return ret;
+}
+
+static int
+gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev,
+  struct amdgpu_usermode_queue *queue,
+  uint64_t wptr)
+{
+    struct amdgpu_bo_va_mapping *wptr_mapping;
+    struct amdgpu_vm *wptr_vm;
+    struct amdgpu_bo *wptr_bo = NULL;
+    int ret;
+
+    mutex_lock(>vm->eviction_lock);


Never ever touch the eviction lock outside of the VM code! That lock 
is completely unrelated to what you do here.



+    wptr_vm = queue->vm;
+    ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+    if (ret)
+    goto unlock;
+
+    wptr &= AMDGPU_GMC_HOLE_MASK;
+    wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
PAGE_SHIFT);

+    amdgpu_bo_unreserve(wptr_vm->root.bo);
+    if (!wptr_mapping) {
+    DRM_ERROR("Failed to lookup wptr bo\n");
+    ret = -EINVAL;
+    goto unlock;
+    }
+
+    wptr_bo = wptr_mapping->bo_va->base.bo;
+    if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+    DRM_ERROR("Requested GART mapping for wptr bo larger than 
one page\n");

+    ret = -EINVAL;
+    goto unlock;
+    }


We probably also want to enforce that this BO is a per VM BO.


+
+    ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo);
+    if (ret) {
+    DRM_ERROR("Failed to map wptr bo to GART\n");
+    goto unlock;
+    }
+
+    queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT;


This needs to be amdgpu_bo_gpu_offset() instead.

Regards,
Christian.


+
+unlock:
+    mutex_unlock(>vm->eviction_lock);
+    return ret;
+}
+
  static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
    struct amdgpu_usermode_queue *queue)
  {
@@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct 
amdgpu_userq_mgr *uq_mgr,

  queue_input.queue_size = userq_props->queue_size >> 2;
  queue_input.doorbell_offset = userq_props->doorbell_index;
  queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);

+    queue_input.wptr_mc_addr = queue->wptr_mc_addr;
    amdgpu_mes_lock(>mes);
  r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  goto free_mqd;
  }
  +    /* FW expects WPTR BOs to be mapped into GART */
+    r = gfx_v11_0_create_wptr_mapping(adev, queue, 
userq_props.wptr_gpu_addr);

+    if (r) {
+    DRM_ERROR("Failed to create WPTR mapping\n");
+    goto free_ctx;
+    }
+
  /* Map userqueue into FW using MES */
  r = gfx_v11_0_userq_map(uq_mgr, queue, _props);
  if (r) {
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h

index 34e20daa06c8..ae155de62560 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ 

Re: [PATCH 0/9] drm: Annotate structs with __counted_by

2023-10-05 Thread Christian König

Am 02.10.23 um 20:22 schrieb Kees Cook:

On Mon, Oct 02, 2023 at 08:11:41PM +0200, Christian König wrote:

Am 02.10.23 um 20:08 schrieb Kees Cook:

On Mon, Oct 02, 2023 at 08:01:57PM +0200, Christian König wrote:

Am 02.10.23 um 18:53 schrieb Kees Cook:

On Mon, Oct 02, 2023 at 11:06:19AM -0400, Alex Deucher wrote:

On Mon, Oct 2, 2023 at 5:20 AM Christian König
 wrote:

Am 29.09.23 um 21:33 schrieb Kees Cook:

On Fri, 22 Sep 2023 10:32:05 -0700, Kees Cook wrote:

This is a batch of patches touching drm for preparing for the coming
implementation by GCC and Clang of the __counted_by attribute. Flexible
array members annotated with __counted_by can have their accesses
bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array
indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions).

As found with Coccinelle[1], add __counted_by to structs that would
benefit from the annotation.

[...]

Since this got Acks, I figure I should carry it in my tree. Let me know
if this should go via drm instead.

Applied to for-next/hardening, thanks!

[1/9] drm/amd/pm: Annotate struct smu10_voltage_dependency_table with 
__counted_by
  https://git.kernel.org/kees/c/a6046ac659d6

STOP! In a follow up discussion Alex and I figured out that this won't work.

I'm so confused; from the discussion I saw that Alex said both instances
were false positives?


The value in the structure is byte swapped based on some firmware
endianness which not necessary matches the CPU endianness.

SMU10 is APU only so the endianess of the SMU firmware and the CPU
will always match.

Which I think is what is being said here?


Please revert that one from going upstream if it's already on it's way.

And because of those reasons I strongly think that patches like this
should go through the DRM tree :)

Sure, that's fine -- please let me know. It was others Acked/etc. Who
should carry these patches?

Probably best if the relevant maintainer pick them up individually.

Some of those structures are filled in by firmware/hardware and only the
maintainers can judge if that value actually matches what the compiler
needs.

We have cases where individual bits are used as flags or when the size is
byte swapped etc...

Even Alex and I didn't immediately say how and where that field is actually
used and had to dig that up. That's where the confusion came from.

Okay, I've dropped them all from my tree. Several had Acks/Reviews, so
hopefully those can get picked up for the DRM tree?

I will pick those up to go through drm-misc-next.

Going to ping maintainers once more when I'm not sure if stuff is correct or
not.

Sounds great; thanks!


I wasn't 100% sure for the VC4 patch, but pushed the whole set to 
drm-misc-next anyway.


This also means that the patches are now auto merged into the drm-tip 
integration branch and should any build or unit test go boom we should 
notice immediately and can revert it pretty easily.


Thanks,
Christian.



-Kees





[PATCH] drm/amd/display: Fix mst hub unplug warning

2023-10-05 Thread Wayne Lin
[Why]
Unplug mst hub will cause warning. That's because
dm_helpers_construct_old_payload() is changed to be called after
payload removement from dc link.

In dm_helpers_construct_old_payload(), We refer to the vcpi in
payload allocation table of dc link to construct the old payload
and payload is no longer in the table when we call the function
now.

[How]
Refer to the mst_state to construct the number of time slot for old
payload now. Note that dm_helpers_construct_old_payload() is just
a quick workaround before and we are going to abandon it soon.

Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload 
allocation/removement")
Reviewed-by: Jerry Zuo 
Signed-off-by: Wayne Lin 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 38 +--
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index baf7e5254fb3..2f94bcf128c0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -204,15 +204,16 @@ void dm_helpers_dp_update_branch_info(
 {}
 
 static void dm_helpers_construct_old_payload(
-   struct dc_link *link,
-   int pbn_per_slot,
+   struct drm_dp_mst_topology_mgr *mgr,
+   struct drm_dp_mst_topology_state *mst_state,
struct drm_dp_mst_atomic_payload *new_payload,
struct drm_dp_mst_atomic_payload *old_payload)
 {
-   struct link_mst_stream_allocation_table current_link_table =
-   
link->mst_stream_alloc_table;
-   struct link_mst_stream_allocation *dc_alloc;
-   int i;
+   struct drm_dp_mst_atomic_payload *pos;
+   int pbn_per_slot = mst_state->pbn_div;
+   u8 next_payload_vc_start = mgr->next_start_slot;
+   u8 payload_vc_start = new_payload->vc_start_slot;
+   u8 allocated_time_slots;
 
*old_payload = *new_payload;
 
@@ -221,20 +222,17 @@ static void dm_helpers_construct_old_payload(
 * struct drm_dp_mst_atomic_payload are don't care fields
 * while calling drm_dp_remove_payload_part2()
 */
-   for (i = 0; i < current_link_table.stream_count; i++) {
-   dc_alloc =
-   _link_table.stream_allocations[i];
-
-   if (dc_alloc->vcp_id == new_payload->vcpi) {
-   old_payload->time_slots = dc_alloc->slot_count;
-   old_payload->pbn = dc_alloc->slot_count * pbn_per_slot;
-   break;
-   }
+   list_for_each_entry(pos, _state->payloads, next) {
+   if (pos != new_payload &&
+   pos->vc_start_slot > payload_vc_start &&
+   pos->vc_start_slot < next_payload_vc_start)
+   next_payload_vc_start = pos->vc_start_slot;
}
 
-   /* make sure there is an old payload*/
-   ASSERT(i != current_link_table.stream_count);
+   allocated_time_slots = next_payload_vc_start - payload_vc_start;
 
+   old_payload->time_slots = allocated_time_slots;
+   old_payload->pbn = allocated_time_slots * pbn_per_slot;
 }
 
 /*
@@ -272,8 +270,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload);
} else {
/* construct old payload by VCPI*/
-   dm_helpers_construct_old_payload(stream->link, 
mst_state->pbn_div,
-   new_payload, _payload);
+   dm_helpers_construct_old_payload(mst_mgr, mst_state,
+new_payload, _payload);
target_payload = _payload;
 
drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload);
@@ -366,7 +364,7 @@ bool dm_helpers_dp_mst_send_payload_allocation(
if (enable) {
ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, 
new_payload);
} else {
-   dm_helpers_construct_old_payload(stream->link, 
mst_state->pbn_div,
+   dm_helpers_construct_old_payload(mst_mgr, mst_state,
 new_payload, _payload);
drm_dp_remove_payload_part2(mst_mgr, mst_state, _payload, 
new_payload);
}
-- 
2.37.3