[PATCH v2 4/4] drm/amd/pm: Add sysfs attribute to get pm log
Add sysfs attribute to read power management log. A snapshot is captured to the buffer when the attribute is read. Signed-off-by: Lijo Lazar --- v2: Pass PAGE_SIZE as the max size of input buffer drivers/gpu/drm/amd/pm/amdgpu_pm.c | 40 ++ 1 file changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 4c65a2fac028..5a1d21c52672 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1794,6 +1794,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev, return count; } +static int amdgpu_pmlog_attr_update(struct amdgpu_device *adev, + struct amdgpu_device_attr *attr, + uint32_t mask, + enum amdgpu_device_attr_states *states) +{ + if (amdgpu_dpm_get_pm_log(adev, NULL, 0) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + + return 0; +} + +static ssize_t amdgpu_get_pmlog(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size = 0; + int ret; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + size = amdgpu_dpm_get_pm_log(adev, buf, PAGE_SIZE); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; +} + /** * DOC: gpu_metrics * @@ -2091,6 +2129,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(smartshift_bias, ATTR_FLAG_BASIC, .attr_update = ss_bias_attr_update), AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(pmlog, ATTR_FLAG_BASIC, + .attr_update = amdgpu_pmlog_attr_update), }; static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, -- 2.25.1
[PATCH v2 3/4] drm/amd/pm: Add pm log support to SMU v13.0.6
Add support to fetch PM log sample from SMU v13.0.6 Signed-off-by: Lijo Lazar --- v2: Check if input buffer has enough space to copy log data drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h | 4 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 59 +++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 0d84fb9640a6..01bc92875f3e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -253,6 +253,7 @@ struct smu_table { uint64_t mc_address; void *cpu_addr; struct amdgpu_bo *bo; + uint32_t version; }; enum smu_perf_level_designation { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 509e3cd483fb..891d03327ffa 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -91,7 +91,9 @@ #define PPSMC_MSG_QueryValidMcaCeCount 0x3A #define PPSMC_MSG_McaBankCeDumpDW 0x3B #define PPSMC_MSG_SelectPLPDMode0x40 -#define PPSMC_Message_Count 0x41 +#define PPSMC_MSG_PmLogReadSample 0x41 +#define PPSMC_MSG_PmLogGetTableVersion 0x42 +#define PPSMC_Message_Count 0x43 //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 4850e48bbef5..6ea9adabe30f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -253,7 +253,9 @@ __SMU_DUMMY_MAP(QueryValidMcaCeCount), \ __SMU_DUMMY_MAP(McaBankDumpDW), \ __SMU_DUMMY_MAP(McaBankCeDumpDW), \ - __SMU_DUMMY_MAP(SelectPLPDMode), + __SMU_DUMMY_MAP(SelectPLPDMode),\ + __SMU_DUMMY_MAP(PmLogGetTableVersion), \ + __SMU_DUMMY_MAP(PmLogReadSample), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index bf01a23f399a..e5f84d8dec80 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -172,6 +172,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(McaBankDumpDW, PPSMC_MSG_McaBankDumpDW, 0), MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, 0), MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), + MSG_MAP(PmLogGetTableVersion, PPSMC_MSG_PmLogGetTableVersion,0), + MSG_MAP(PmLogReadSample, PPSMC_MSG_PmLogReadSample, 0), }; static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { @@ -337,6 +339,61 @@ static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu, return 0; } +static int smu_v13_0_6_setup_pm_log(struct smu_context *smu) +{ + struct smu_table_context *smu_tbl_ctxt = >smu_table; + struct smu_table *table = _tbl_ctxt->tables[SMU_TABLE_PMSTATUSLOG]; + uint32_t pmlog_version; + int ret; + + if (!table->size) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PmLogGetTableVersion, + _version); + if (ret) + return ret; + + table->version = pmlog_version; + + return 0; +} + +static ssize_t smu_v13_0_6_get_pm_log(struct smu_context *smu, void *log, + size_t max_size) +{ + struct smu_table_context *smu_tbl_ctxt = >smu_table; + struct smu_table *table = _tbl_ctxt->tables[SMU_TABLE_PMSTATUSLOG]; + struct amdgpu_pmlog *pm_log = log; + uint32_t pmfw_version, log_size; + int ret; + + if (smu->adev->flags & AMD_IS_APU) + return -EOPNOTSUPP; + + if (!pm_log || !max_size) + return -EINVAL; + + smu_cmn_get_smc_version(smu, NULL, _version); + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PmLogReadSample, _size); + if (ret) + return ret; + + if (max_size < (log_size + sizeof(pm_log->common_header))) + return -EOVERFLOW; + + amdgpu_asic_invalidate_hdp(smu->adev, NULL); + memcpy(pm_log->data, table->cpu_addr, log_size); + + memset(_log->common_header, 0, sizeof(pm_log->common_header)); +
[PATCH v2 1/4] drm/amdgpu: add pmlog structure definition
From: Alex Deucher Define the pmlog structures to be exposed via sysfs. Signed-off-by: Alex Deucher Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 15 +++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index e0bb6d39f0c3..9905228fd89c 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -980,4 +980,19 @@ struct gpu_metrics_v2_4 { uint16_taverage_soc_current; uint16_taverage_gfx_current; }; + +struct amdgpu_pmlog_header { + uint16_t structure_size; + uint16_t pad; + uint32_t mp1_ip_discovery_version; + uint32_t pmfw_version; + uint32_t pmlog_version; +}; + +struct amdgpu_pmlog { + struct amdgpu_pmlog_header common_header; + + uint8_t data[]; +}; + #endif -- 2.25.1
[PATCH v2 2/4] drm/amd/pm: Add support to fetch pm log sample
Add API support to fetch a snapshot of power management log from PMFW. Signed-off-by: Lijo Lazar --- v2: Add max size of input buffer to take care of overflows drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c| 16 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h| 11 +++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 ++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 8 5 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 9905228fd89c..01eaafafd3c3 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -426,6 +426,7 @@ struct amd_pm_funcs { int (*set_df_cstate)(void *handle, enum pp_df_cstate state); int (*set_xgmi_pstate)(void *handle, uint32_t pstate); ssize_t (*get_gpu_metrics)(void *handle, void **table); + ssize_t (*get_pm_log)(void *handle, void *pmlog, size_t size); int (*set_watermarks_for_clock_ranges)(void *handle, struct pp_smu_wm_range_sets *ranges); int (*display_disable_memory_clock_switch)(void *handle, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 1b17a71ed45e..1db899485309 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -1300,6 +1300,22 @@ int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table) return ret; } +ssize_t amdgpu_dpm_get_pm_log(struct amdgpu_device *adev, void *pm_log, + size_t size) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret = 0; + + if (!pp_funcs->get_pm_log) + return 0; + + mutex_lock(>pm.mutex); + ret = pp_funcs->get_pm_log(adev->powerplay.pp_handle, pm_log, size); + mutex_unlock(>pm.mutex); + + return ret; +} + int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev, uint32_t *fan_mode) { diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index feccd2a7120d..ea2c1cc9c7b0 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -511,6 +511,17 @@ int amdgpu_dpm_get_power_profile_mode(struct amdgpu_device *adev, int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev, long *input, uint32_t size); int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table); + +/** + * @get_pm_log: Get one snapshot of power management log from PMFW. The sample + * is copied to pmlog buffer. It's expected to be allocated by the caller. Max + * size expected for a log sample is 4096 bytes. + * + * Return: Actual size of the log + */ +ssize_t amdgpu_dpm_get_pm_log(struct amdgpu_device *adev, void *pmlog, + size_t size); + int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev, uint32_t *fan_mode); int amdgpu_dpm_set_fan_speed_pwm(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 99750c182279..73f3e7915d23 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3090,6 +3090,19 @@ static ssize_t smu_sys_get_gpu_metrics(void *handle, void **table) return smu->ppt_funcs->get_gpu_metrics(smu, table); } +static ssize_t smu_sys_get_pm_log(void *handle, void *pm_log, size_t size) +{ + struct smu_context *smu = handle; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (!smu->ppt_funcs->get_pm_log) + return -EOPNOTSUPP; + + return smu->ppt_funcs->get_pm_log(smu, pm_log, size); +} + static int smu_enable_mgpu_fan_boost(void *handle) { struct smu_context *smu = handle; @@ -3231,6 +3244,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .set_df_cstate= smu_set_df_cstate, .set_xgmi_pstate = smu_set_xgmi_pstate, .get_gpu_metrics = smu_sys_get_gpu_metrics, + .get_pm_log = smu_sys_get_pm_log, .set_watermarks_for_clock_ranges = smu_set_watermarks_for_clock_ranges, .display_disable_memory_clock_switch = smu_display_disable_memory_clock_switch, .get_max_sustainable_clocks_by_dc= smu_get_max_sustainable_clocks_by_dc, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index f3cab5e633a7..0d84fb9640a6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@
[PATCH] drm/amdgpu: Increase IP discovery region size
IP discovery region has increased to > 8K on some SOCs.Maximum reserve size is upto 12K, but not used. For now increase to 10K. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 3a2f347bd50d..4d03cd5b3410 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,7 +24,7 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ -#define DISCOVERY_TMR_SIZE (8 << 10) +#define DISCOVERY_TMR_SIZE (10 << 10) #define DISCOVERY_TMR_OFFSET(64 << 10) void amdgpu_discovery_fini(struct amdgpu_device *adev); -- 2.25.1
Re: [PATCH] drm/amdgpu: Annotate struct amdgpu_bo_list with __counted_by
Applied. Thanks! Alex On Thu, Oct 5, 2023 at 10:32 AM Christian König wrote: > > Am 04.10.23 um 01:29 schrieb Kees Cook: > > Prepare for the coming implementation by GCC and Clang of the __counted_by > > attribute. Flexible array members annotated with __counted_by can have > > their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for > > array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family > > functions). > > > > As found with Coccinelle[1], add __counted_by for struct amdgpu_bo_list. > > Additionally, since the element count member must be set before accessing > > the annotated flexible array member, move its initialization earlier. > > > > Cc: Alex Deucher > > Cc: "Christian König" > > Cc: "Pan, Xinhui" > > Cc: David Airlie > > Cc: Daniel Vetter > > Cc: "Gustavo A. R. Silva" > > Cc: Luben Tuikov > > Cc: Christophe JAILLET > > Cc: Felix Kuehling > > Cc: amd-gfx@lists.freedesktop.org > > Cc: dri-de...@lists.freedesktop.org > > Cc: linux-harden...@vger.kernel.org > > Link: > > https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci > > [1] > > Signed-off-by: Kees Cook > > Reviewed-by: Christian König > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- > > 2 files changed, 2 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > > index 6f5b641b631e..781e5c5ce04d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > > @@ -84,6 +84,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, > > struct drm_file *filp, > > > > kref_init(>refcount); > > > > + list->num_entries = num_entries; > > array = list->entries; > > > > for (i = 0; i < num_entries; ++i) { > > @@ -129,7 +130,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, > > struct drm_file *filp, > > } > > > > list->first_userptr = first_userptr; > > - list->num_entries = num_entries; > > sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry), > >amdgpu_bo_list_entry_cmp, NULL); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > > index 6a703be45d04..555cd6d877c3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > > @@ -56,7 +56,7 @@ struct amdgpu_bo_list { > >*/ > > struct mutex bo_list_mutex; > > > > - struct amdgpu_bo_list_entry entries[]; > > + struct amdgpu_bo_list_entry entries[] __counted_by(num_entries); > > }; > > > > int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, >
Re: [PATCH v2 1/2] usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope
Hi, On Thu, Oct 05, 2023 at 12:52:29PM -0500, Mario Limonciello wrote: > On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs > at a black screen on startup. This issue occurs only if `ucsi_acpi` has > loaded before `amdgpu` has loaded. The reason for this failure is that > `amdgpu` uses power_supply_is_system_supplied() to determine if running > on AC or DC power at startup. If this value is reported incorrectly the > dGPU will also be programmed incorrectly and trigger errors. > > power_supply_is_system_supplied() reports the wrong value because UCSI > power supplies provided as part of the system don't properly report the > scope as "DEVICE" scope (not powering the system). > > In order to fix this issue check the capabilities reported from the UCSI > power supply to ensure that it supports charging a battery and that it can > be powered by AC. Mark the scope accordingly. > > Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope") > Link: > https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html > p28 > Signed-off-by: Mario Limonciello > --- > Cc: Kai-Heng Feng > Cc: Alex Deucher > > Cc: Richard Gong > --- > drivers/usb/typec/ucsi/psy.c | 9 + > 1 file changed, 9 insertions(+) > > diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c > index 384b42267f1f..b35c6e07911e 100644 > --- a/drivers/usb/typec/ucsi/psy.c > +++ b/drivers/usb/typec/ucsi/psy.c > @@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con, > struct device *dev = con->ucsi->dev; > > device_property_read_u8(dev, "scope", ); > + if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) { > + u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY | > +UCSI_CAP_ATTR_BATTERY_CHARGING; > + > + if (con->ucsi->cap.attributes & mask) > + scope = POWER_SUPPLY_SCOPE_SYSTEM; > + else > + scope = POWER_SUPPLY_SCOPE_DEVICE; > + } > val->intval = scope; > return 0; > } Reviewed-by: Sebastian Reichel -- Sebastian signature.asc Description: PGP signature
Re: [PATCH 2/3] power: supply: Don't count 'unknown' scope power supplies
On 10/4/2023 18:10, Sebastian Reichel wrote: Hi, On Sun, Oct 01, 2023 at 07:00:11PM -0500, Mario Limonciello wrote: Let me try to add more detail. This is an OEM system that has 3 USB type C ports. It's an Intel system, but this doesn't matter for the issue. * when ucsi_acpi is not loaded there are no power supplies in the system and it reports power_supply_is_system_supplied() as AC. * When ucsi_acpi is loaded 3 power supplies will be registered. power_supply_is_system_supplied() reports as DC. Now when you add in a Navi3x AMD dGPU to the system the power supplies don't change. This particular dGPU model doesn't contain a USB-C port, so there is no UCSI power supply registered. As amdgpu is loaded it looks at device initialization whether the system is powered by AC or DC. Here is how it looks: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c?h=linux-6.5.y#n3834 On the OEM system if amdgpu loads before the ucsi_acpi driver (such as in the initramfs) then the right value is returned for power_supply_is_system_supplied() - AC. If amdgpu is loaded after the ucsi_acpi driver, the wrong value is returned for power_supply_is_system_supplied() - DC. This value is very important to set up the dGPU properly. If the wrong value is returned, the wrong value will be notified to the hardware and the hardware will not behave properly. On the OEM system this is a "black screen" at bootup along with RAS errors emitted by the dGPU. With no changes to a malfunctioning kernel or initramfs binaries I can add modprobe.blacklist=ucsi_acpi to kernel command line avoid registering those 3 power supplies and the system behaves properly. So I think it's inappropriate for "UNKNOWN" scope power supplies to be registered and treated as system supplies, at least as it pertains to power_supply_is_system_supplied(). So the main issue is, that the ucsi_acpi registers a bunch of power-supply chargers with unknown scope on a desktop systems and that results in the system assumed to be supplied from battery. The problem with your change is, that many of the charger drivers don't set a scope at all (and thus report unknown scope). Those obviously should not be skipped. Probably most of these drivers could be changed to properly set the scope, but it needs to be checked on a case-by-case basis. With your current patch they would regress in the oposite direction of your use-case. Ideally ucsi is changed to properly describe the scope, but I suppose this information is not available in ACPI? Assuming that the above are not solvable easily, my idea would be to only count the number of POWER_SUPPLY_TYPE_BATTERY device, which have !POWER_SUPPLY_SCOPE_DEVICE and exit early if there are none. Basically change __power_supply_is_system_supplied(), so that it looks like this: ... if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, )) if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE) return 0; if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY) (*count)++; else if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_ONLINE, )) return ret.intval; ... That should work in both cases. I tested both your suggestion as well as modifying UCSI driver to set the scope. Both worked. I've sent out v2 modifying the scope for UCSI driver. If for some reason that ends up not working out we can revert to your generic suggestion. https://lore.kernel.org/linux-usb/20231005175230.232764-1-mario.limoncie...@amd.com/T/#m9543f1f2c3767c0e88135c2e3f15ced65cfdf004 -- Sebastian drivers/power/supply/power_supply_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index d325e6dbc770..3de6e6d00815 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -349,7 +349,7 @@ static int __power_supply_is_system_supplied(struct device *dev, void *data) unsigned int *count = data; if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, )) - if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE) + if (ret.intval != POWER_SUPPLY_SCOPE_SYSTEM) return 0; (*count)++; -- 2.34.1
Re: [PATCH v4 1/1] drm/amdkfd: get doorbell's absolute offset based on the db_size
On 2023-10-05 13:20, Arvind Yadav wrote: Here, Adding db_size in byte to find the doorbell's absolute offset for both 32-bit and 64-bit doorbell sizes. So that doorbell offset will be aligned based on the doorbell size. v2: - Addressed the review comment from Felix. v3: - Adding doorbell_size as parameter to get db absolute offset. v4: Squash the two patches into one. Cc: Christian Koenig Cc: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h| 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c| 13 + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 -- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 09f6727e7c73..4a8b33f55f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -357,8 +357,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev); void amdgpu_doorbell_fini(struct amdgpu_device *adev); int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev); uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, - struct amdgpu_bo *db_bo, - uint32_t doorbell_index); + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size); #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index da4be0bbb446..6690f5a72f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -114,19 +114,24 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) * @adev: amdgpu_device pointer * @db_bo: doorbell object's bo * @db_index: doorbell relative index in this doorbell object + * @db_size: doorbell size is in byte * * returns doorbell's absolute index in BAR */ uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, - struct amdgpu_bo *db_bo, - uint32_t doorbell_index) + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size) { int db_bo_offset; db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo); - /* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */ - return db_bo_offset / sizeof(u32) + doorbell_index * 2; + /* doorbell index is 32 bit but doorbell's size can be 32 bit +* or 64 bit, so *db_size(in byte)/4 for alignment. +*/ + return db_bo_offset / sizeof(u32) + doorbell_index * + DIV_ROUND_UP(db_size, 4); } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0d3d538b64eb..e07652e72496 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -407,7 +407,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, qpd->proc_doorbells, - q->doorbell_id); + q->doorbell_id, + dev->kfd->device_info.doorbell_size); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 7b38537c7c99..05c74887fd6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; - *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx); + *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, +kfd->doorbells, +inx, + kfd->device_info.doorbell_size); inx *= 2; pr_debug("Get kernel queue doorbell\n" @@ -240,7 +243,10
Re: [PATCH 3/3] drm/amdkfd: Check bitmap_mapped flag to skip retry fault
On 2023-10-02 13:08, Chen, Xiaogang wrote: On 9/29/2023 9:11 AM, Philip Yang wrote: Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding. Use bitmap_mapped flag to check if range already mapped to the specific GPU, to skip the retry fault from different page of the same range. Remove prange validate_timestamp which is not accurate for multiple GPUs. Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 - 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ac65bf25c685..5e063d902a46 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -43,10 +43,6 @@ #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 -/* Long enough to ensure no retry fault comes after svm range is restored and - * page table is updated. - */ -#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC) #if IS_ENABLED(CONFIG_DYNAMIC_DEBUG) #define dynamic_svm_range_dump(svms) \ _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms) @@ -365,7 +361,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(>deferred_list); INIT_LIST_HEAD(>child_list); atomic_set(>invalid, 0); - prange->validate_timestamp = 0; mutex_init(>migrate_mutex); mutex_init(>lock); @@ -1876,8 +1871,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } svm_range_unreserve_bos(ctx); - if (!r) - prange->validate_timestamp = ktime_get_boottime(); free_ctx: kfree(ctx); @@ -3162,15 +3155,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out_unlock_range; } - /* skip duplicate vm fault on different pages of same range */ - if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp, - AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) { - pr_debug("svms 0x%p [0x%lx %lx] already restored\n", - svms, prange->start, prange->last); - r = 0; - goto out_unlock_range; - } - /* __do_munmap removed VMA, return success as we are handling stale * retry fault. */ @@ -3196,6 +3180,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out_unlock_range; } + /* skip duplicate vm fault on different pages of same range */ I think the following call means if the prange->granularity range that the addr is in is mapped on gpuidex already, not different pages of same range. yes, the comment should update to "skip duplicate vm fault on different pages of same granularity range" Regards Xiaogang + if (svm_range_partial_mapped_dev(gpuidx, prange, addr, addr)) { + pr_debug("svms 0x%p [0x%lx %lx] already restored on gpu %d\n", + svms, prange->start, prange->last, gpuidx); + r = 0; + goto out_unlock_range; + } + pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n", svms, prange->start, prange->last, best_loc,
[linux-next:master] BUILD REGRESSION 7d730f1bf6f39ece2d9f3ae682f12e5b593d534d
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: 7d730f1bf6f39ece2d9f3ae682f12e5b593d534d Add linux-next specific files for 20231005 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202309122047.cri9yjrq-...@intel.com https://lore.kernel.org/oe-kbuild-all/202309192314.vbsjiim5-...@intel.com https://lore.kernel.org/oe-kbuild-all/202309212121.cul1ptra-...@intel.com https://lore.kernel.org/oe-kbuild-all/202309212339.hxhbu2f1-...@intel.com https://lore.kernel.org/oe-kbuild-all/202309221945.uwcq56zg-...@intel.com https://lore.kernel.org/oe-kbuild-all/202310041744.d34giv9v-...@intel.com https://lore.kernel.org/oe-kbuild-all/202310042215.w9pg3rqs-...@intel.com https://lore.kernel.org/oe-kbuild-all/202310051547.40nm4sif-...@intel.com https://lore.kernel.org/oe-kbuild-all/202310052201.anvbpgpr-...@intel.com Error/Warning: (recently discovered and may have been fixed) Documentation/gpu/amdgpu/thermal:43: ./drivers/gpu/drm/amd/pm/amdgpu_pm.c:988: WARNING: Unexpected indentation. arch/x86/include/asm/string_32.h:150:25: warning: '__builtin_memcpy' writing 3 bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] drivers/cpufreq/sti-cpufreq.c:215:50: warning: '%d' directive output may be truncated writing between 1 and 10 bytes into a region of size 2 [-Wformat-truncation=] drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c:274: warning: Function parameter or member 'gart_placement' not described in 'amdgpu_gmc_gart_location' fs/bcachefs/bcachefs_format.h:215:25: warning: 'p' offset 3 in 'struct bkey' isn't aligned to 4 [-Wpacked-not-aligned] fs/bcachefs/bcachefs_format.h:217:25: warning: 'version' offset 27 in 'struct bkey' isn't aligned to 4 [-Wpacked-not-aligned] fs/gfs2/inode.c:1876:14: sparse:struct gfs2_glock * fs/gfs2/inode.c:1876:14: sparse:struct gfs2_glock [noderef] __rcu * fs/gfs2/super.c:1543:17: sparse:struct gfs2_glock * fs/gfs2/super.c:1543:17: sparse:struct gfs2_glock [noderef] __rcu * include/linux/fortify-string.h:57:33: warning: writing 8 bytes into a region of size 0 [-Wstringop-overflow=] kernel/bpf/helpers.c:1906:19: warning: no previous declaration for 'bpf_percpu_obj_new_impl' [-Wmissing-declarations] kernel/bpf/helpers.c:1942:18: warning: no previous declaration for 'bpf_percpu_obj_drop_impl' [-Wmissing-declarations] kernel/bpf/helpers.c:2477:18: warning: no previous declaration for 'bpf_throw' [-Wmissing-declarations] Unverified Error/Warning (likely false positive, please contact us if interested): Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml: arch/x86/kvm/x86.c:8891 x86_emulate_instruction() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c:209 amdgpu_mca_smu_get_mca_entry() warn: variable dereferenced before check 'mca_funcs' (see line 200) drivers/gpu/drm/i915/display/intel_psr.c:3185 i915_psr_sink_status_show() error: uninitialized symbol 'error_status'. drivers/gpu/drm/i915/display/intel_tc.c:327 mtl_tc_port_get_max_lane_count() error: uninitialized symbol 'pin_mask'. fs/exfat/namei.c:393 exfat_find_empty_entry() error: uninitialized symbol 'last_clu'. fs/ntfs3/bitmap.c:663 wnd_init() warn: Please consider using kvcalloc instead of kvmalloc_array fs/ntfs3/super.c:466:23: sparse: sparse: unknown escape sequence: '\%' lib/kunit/executor_test.c:39 parse_filter_test() error: double free of 'filter.suite_glob' lib/kunit/executor_test.c:40 parse_filter_test() error: double free of 'filter.test_glob' scripts/mod/modpost.c:1437:14: warning: passing 'typeof (rela->r_offset) *' (aka 'const unsigned long *') to parameter of type 'void *' discards qualifiers [-Wincompatible-pointer-types-discards-qualifiers] scripts/mod/modpost.c:1440:11: warning: passing 'typeof (rela->r_addend) *' (aka 'const long *') to parameter of type 'void *' discards qualifiers [-Wincompatible-pointer-types-discards-qualifiers] scripts/mod/modpost.c:1472:14: warning: passing 'typeof (rel->r_offset) *' (aka 'const unsigned long *') to parameter of type 'void *' discards qualifiers [-Wincompatible-pointer-types-discards-qualifiers] {standard input}:1127: Error: unknown .loc sub-directive `is_stm' Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- arc-allmodconfig | |-- drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location | |-- fs-bcachefs-bcachefs_format.h:warning:p-offset-in-struct-bkey-isn-t-aligned-to | `-- fs-bcachefs-bcachefs_format.h:warning:version-offset-in-struct-bkey-isn-t-aligned-to |-- arc-allyesconfig | |-- drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location | |-- fs-bcachefs-bcachefs_format.h:warning:p-offset-in-struct-bkey-isn-t-aligned-to | `-- fs-bcachefs-bcachefs_format.h:warning:version-offset-in-struct-bkey-isn-t-aligned-to |-- arm-allmodconfig | `--
Re: [PATCH 2/3] amd/amdkfd: Unmap range from GPUs based on granularity
On 2023-10-02 15:27, Felix Kuehling wrote: On 2023-09-29 10:11, Philip Yang wrote: Align unmap range start and last address to granularity boundary. Skip unmap if range is already unmapped from GPUs. This only handles unmap due to MMU notifiers with XNACK on. What about svm_range_unmap_from_cpu? unmap_from_cpu is going to remove the range, we cannot align range based on granularity, still split the prange and unmap from GPU the exact range . Regards, Felix This also solve the rocgdb CWSR migration related issue. Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 626e0dd4ec79..ac65bf25c685 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2004,6 +2004,26 @@ static void svm_range_restore_work(struct work_struct *work) mmput(mm); } +static unsigned long +svm_range_align_start(struct svm_range *prange, unsigned long start) +{ + unsigned long start_align; + + start_align = ALIGN_DOWN(start, 1UL << prange->granularity); + start_align = max_t(unsigned long, start_align, prange->start); + return start_align; +} + +static unsigned long +svm_range_align_last(struct svm_range *prange, unsigned long last) +{ + unsigned long last_align; + + last_align = ALIGN(last, 1UL << prange->granularity) - 1; I think this should be last_align = ALIGN(last + 1, 1UL << prange->granularity) - 1; Otherwise you're off by one granule when (last & (1UL << prange->granularity)) == 0. + last_align = min_t(unsigned long, last_align, prange->last); + return last_align; +} + /** * svm_range_evict - evict svm range * @prange: svm range structure @@ -2078,6 +2098,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, unsigned long s, l; uint32_t trigger; + if (!svm_range_partial_mapped(prange, start, last)) { + pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n", +prange->svms, start, last); + return 0; + } + if (event == MMU_NOTIFY_MIGRATE) trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE; else @@ -2085,16 +2111,17 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); + list_for_each_entry(pchild, >child_list, child_list) { mutex_lock_nested(>lock, 1); - s = max(start, pchild->start); - l = min(last, pchild->last); + s = svm_range_align_start(pchild, start); + l = svm_range_align_last(pchild, last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); mutex_unlock(>lock); } - s = max(start, prange->start); - l = min(last, prange->last); + s = svm_range_align_start(prange, start); + l = svm_range_align_last(prange, last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); }
Re: [PATCH 2/3] amd/amdkfd: Unmap range from GPUs based on granularity
On 2023-10-02 13:06, Chen, Xiaogang wrote: On 9/29/2023 9:11 AM, Philip Yang wrote: Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding. Align unmap range start and last address to granularity boundary. Skip unmap if range is already unmapped from GPUs. This also solve the rocgdb CWSR migration related issue. Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 626e0dd4ec79..ac65bf25c685 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2004,6 +2004,26 @@ static void svm_range_restore_work(struct work_struct *work) mmput(mm); } +static unsigned long +svm_range_align_start(struct svm_range *prange, unsigned long start) +{ + unsigned long start_align; + + start_align = ALIGN_DOWN(start, 1UL << prange->granularity); + start_align = max_t(unsigned long, start_align, prange->start); + return start_align; +} + +static unsigned long +svm_range_align_last(struct svm_range *prange, unsigned long last) +{ + unsigned long last_align; + + last_align = ALIGN(last, 1UL << prange->granularity) - 1; should be ALIGN(last + 1, 1UL << prange->granularity) - 1;? Here last is included last page number. yes, you are right, if evicting range [0, 0x200], we should unmap range [0x, 0x3ff]. Regards Xiaogang + last_align = min_t(unsigned long, last_align, prange->last); + return last_align; +} + /** * svm_range_evict - evict svm range * @prange: svm range structure @@ -2078,6 +2098,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, unsigned long s, l; uint32_t trigger; + if (!svm_range_partial_mapped(prange, start, last)) { + pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n", + prange->svms, start, last); + return 0; + } + if (event == MMU_NOTIFY_MIGRATE) trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE; else @@ -2085,16 +2111,17 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); + list_for_each_entry(pchild, >child_list, child_list) { mutex_lock_nested(>lock, 1); - s = max(start, pchild->start); - l = min(last, pchild->last); + s = svm_range_align_start(pchild, start); + l = svm_range_align_last(pchild, last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); mutex_unlock(>lock); } - s = max(start, prange->start); - l = min(last, prange->last); + s = svm_range_align_start(prange, start); + l = svm_range_align_last(prange, last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger);
Re: [PATCH 1/3] amd/amdkfd: Add granularity bitmap mapped to gpu flag
On 2023-10-02 14:35, Felix Kuehling wrote: On 2023-09-29 10:11, Philip Yang wrote: Replace prange->mapped_to_gpu with prange->bitmap_mapped[], which is based on prange granularity, updated when map to GPUS or unmap from GPUs, to optimize multiple GPU map, unmap and retry fault recover. svm_range_is_mapped is false only if no parital range mapping on any GPUs. Split the bitmap_mapped when unmap from cpu to split the prange. Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 218 ++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +- 2 files changed, 184 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 040dc32ad475..626e0dd4ec79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -292,12 +292,12 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } - /* free dma_addr array for each gpu */ + /* free dma_addr array, bitmap_mapped for each gpu */ for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { - if (prange->dma_addr[gpuidx]) { + if (prange->dma_addr[gpuidx]) kvfree(prange->dma_addr[gpuidx]); -prange->dma_addr[gpuidx] = NULL; - } + if (prange->bitmap_mapped[gpuidx]) + bitmap_free(prange->bitmap_mapped[gpuidx]); } mutex_destroy(>lock); @@ -323,19 +323,38 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, uint64_t size = last - start + 1; struct svm_range *prange; struct kfd_process *p; - - prange = kzalloc(sizeof(*prange), GFP_KERNEL); - if (!prange) - return NULL; + unsigned int nbits; + uint32_t gpuidx; p = container_of(svms, struct kfd_process, svms); if (!p->xnack_enabled && update_mem_usage && amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) { pr_info("SVM mapping failed, exceeds resident system memory limit\n"); - kfree(prange); return NULL; } + + prange = kzalloc(sizeof(*prange), GFP_KERNEL); + if (!prange) + return NULL; + + svm_range_set_default_attributes(>preferred_loc, + >prefetch_loc, + >granularity, >flags); + + nbits = svm_range_mapped_nbits(size, prange->granularity); + pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped nbits %d\n", prange, + start, last, nbits); + for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) { + prange->bitmap_mapped[gpuidx] = bitmap_zalloc(nbits, GFP_KERNEL); + if (!prange->bitmap_mapped[gpuidx]) { + while (gpuidx--) +bitmap_free(prange->bitmap_mapped[gpuidx]); + kfree(prange); + return NULL; + } + } + prange->npages = size; prange->svms = svms; prange->start = start; @@ -354,10 +373,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, bitmap_copy(prange->bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); - svm_range_set_default_attributes(>preferred_loc, - >prefetch_loc, - >granularity, >flags); - pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last); return prange; @@ -972,6 +987,48 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, return 0; } +static int +svm_range_split_bitmap_mapped(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + struct kfd_process *p = container_of(new->svms, struct kfd_process, svms); + unsigned int nbits, old_nbits, old_nbits2; + unsigned long *bits; + uint32_t gpuidx; + + nbits = svm_range_mapped_nbits(new->npages, new->granularity); + old_nbits = svm_range_mapped_nbits(old->npages, old->granularity); + old_nbits2 = svm_range_mapped_nbits(last - start + 1, old->granularity); This may be off by one if start and last are not aligned on granularity boundaries. I think you need to calculate the index for each of start and last and subtract the indices. E.g. granularity = 9, start = 511, last = 512. last - start + 1 is 2 and the division tells you you need one bit. But this range touches two different granules, so you need two bits. right, thanks, will check granularity boundary to calculate nbits. + + pr_debug("old 0x%p [0x%lx 0x%lx] => [0x%llx 0x%llx] nbits %d => %d\n", + old, old->start, old->last, start, last, old_nbits, old_nbits2); + pr_debug("new 0x%p [0x%lx 0x%lx] nbits %d\n", new, new->start, new->last, + nbits); + + for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) { + bits = bitmap_alloc(old_nbits2, GFP_KERNEL); + if (!bits) + return -ENOMEM; + + if (start == old->start) { + bitmap_shift_right(new->bitmap_mapped[gpuidx], + old->bitmap_mapped[gpuidx], + old_nbits2, old_nbits); + bitmap_shift_right(bits, old->bitmap_mapped[gpuidx], 0, + old_nbits2);
Re: [PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"
On Thu, Oct 5, 2023 at 3:13 PM Greg Kroah-Hartman wrote: > > On Thu, Oct 05, 2023 at 12:52:30PM -0500, Mario Limonciello wrote: > > This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23. > > > > Reviewed-by: Alex Deucher > > Signed-off-by: Mario Limonciello > > No explaination as to why this needs to be reverted? And does this need > to be backported anywhere? This patch ultimately never went upstream, but there was some confusion about whether it did or not. It can be ignored. Alex
Re: [PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"
On 10/5/2023 14:12, Greg Kroah-Hartman wrote: On Thu, Oct 05, 2023 at 12:52:30PM -0500, Mario Limonciello wrote: This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello No explaination as to why this needs to be reverted? And does this need to be backported anywhere? thanks, greg k-h No need to be backported anywhere. The commit is only in amd-staging-drm-next right now. I think it's up to whether Alex includes the workaround commit in the final 6.7 pull request. If he does, then yeah this could use a larger write up to explain why it went in and out. I was sort of thinking we could land both commits amd-staging-drm-next and then when Alex did the pull request the workaround commit just wouldn't be part of the 6.7 PR since it's a no-op with the revert.
Re: [PATCH v2 1/2] usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope
On Thu, Oct 05, 2023 at 12:52:29PM -0500, Mario Limonciello wrote: > On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs > at a black screen on startup. This issue occurs only if `ucsi_acpi` has > loaded before `amdgpu` has loaded. The reason for this failure is that > `amdgpu` uses power_supply_is_system_supplied() to determine if running > on AC or DC power at startup. If this value is reported incorrectly the > dGPU will also be programmed incorrectly and trigger errors. > > power_supply_is_system_supplied() reports the wrong value because UCSI > power supplies provided as part of the system don't properly report the > scope as "DEVICE" scope (not powering the system). > > In order to fix this issue check the capabilities reported from the UCSI > power supply to ensure that it supports charging a battery and that it can > be powered by AC. Mark the scope accordingly. > > Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope") > Link: > https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html > p28 > Signed-off-by: Mario Limonciello > --- > Cc: Kai-Heng Feng > Cc: Alex Deucher > > Cc: Richard Gong > --- > drivers/usb/typec/ucsi/psy.c | 9 + > 1 file changed, 9 insertions(+) > > diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c > index 384b42267f1f..b35c6e07911e 100644 > --- a/drivers/usb/typec/ucsi/psy.c > +++ b/drivers/usb/typec/ucsi/psy.c > @@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con, > struct device *dev = con->ucsi->dev; > > device_property_read_u8(dev, "scope", ); > + if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) { > + u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY | > +UCSI_CAP_ATTR_BATTERY_CHARGING; > + > + if (con->ucsi->cap.attributes & mask) > + scope = POWER_SUPPLY_SCOPE_SYSTEM; > + else > + scope = POWER_SUPPLY_SCOPE_DEVICE; > + } > val->intval = scope; > return 0; > } > -- > 2.34.1 > > Hi, This is the friendly patch-bot of Greg Kroah-Hartman. You have sent him a patch that has triggered this response. He used to manually respond to these common problems, but in order to save his sanity (he kept writing the same thing over and over, yet to different people), I was created. Hopefully you will not take offence and will fix the problem in your patch and resubmit it so that it can be accepted into the Linux kernel tree. You are receiving this message because of the following common error(s) as indicated below: - You have marked a patch with a "Fixes:" tag for a commit that is in an older released kernel, yet you do not have a cc: stable line in the signed-off-by area at all, which means that the patch will not be applied to any older kernel releases. To properly fix this, please follow the documented rules in the Documentation/process/stable-kernel-rules.rst file for how to resolve this. If you wish to discuss this problem further, or you have questions about how to resolve this issue, please feel free to respond to this email and Greg will reply once he has dug out from the pending patches received from other developers. thanks, greg k-h's patch email bot
Re: [PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"
On Thu, Oct 05, 2023 at 12:52:30PM -0500, Mario Limonciello wrote: > This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23. > > Reviewed-by: Alex Deucher > Signed-off-by: Mario Limonciello No explaination as to why this needs to be reverted? And does this need to be backported anywhere? thanks, greg k-h
[PATCH v2 1/2] usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope
On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs at a black screen on startup. This issue occurs only if `ucsi_acpi` has loaded before `amdgpu` has loaded. The reason for this failure is that `amdgpu` uses power_supply_is_system_supplied() to determine if running on AC or DC power at startup. If this value is reported incorrectly the dGPU will also be programmed incorrectly and trigger errors. power_supply_is_system_supplied() reports the wrong value because UCSI power supplies provided as part of the system don't properly report the scope as "DEVICE" scope (not powering the system). In order to fix this issue check the capabilities reported from the UCSI power supply to ensure that it supports charging a battery and that it can be powered by AC. Mark the scope accordingly. Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope") Link: https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html p28 Signed-off-by: Mario Limonciello --- Cc: Kai-Heng Feng Cc: Alex Deucher > Cc: Richard Gong --- drivers/usb/typec/ucsi/psy.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c index 384b42267f1f..b35c6e07911e 100644 --- a/drivers/usb/typec/ucsi/psy.c +++ b/drivers/usb/typec/ucsi/psy.c @@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con, struct device *dev = con->ucsi->dev; device_property_read_u8(dev, "scope", ); + if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) { + u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY | + UCSI_CAP_ATTR_BATTERY_CHARGING; + + if (con->ucsi->cap.attributes & mask) + scope = POWER_SUPPLY_SCOPE_SYSTEM; + else + scope = POWER_SUPPLY_SCOPE_DEVICE; + } val->intval = scope; return 0; } -- 2.34.1
[PATCH v2 2/2] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"
This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 ++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 08cb9f8ce64e..9b62b45ebb7f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1026,7 +1026,8 @@ static int smu_v13_0_process_pending_interrupt(struct smu_context *smu) { int ret = 0; - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) + if (smu->dc_controlled_by_gpio && + smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) ret = smu_v13_0_allow_ih_interrupt(smu); return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 07df5be063e2..0fb6be11a0cc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2662,6 +2662,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_0_get_power_limit, .set_power_limit = smu_v13_0_set_power_limit, + .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_0_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_0_set_power_profile_mode, .run_btc = smu_v13_0_run_btc, -- 2.34.1
[PATCH v2 0/2] Fix Navi3x boot and hotplug problems
On some OEM systems multiple navi3x dGPUS are triggering RAS errors and BACO errors. These errors come from elements of the OEM system that weren't part of original test environment. This series addresses those problems. NOTE: Although this series touches two subsystems, I would prefer to take this all through DRM because there is a workaround in amd-staging-drm-next that I would like to be reverted at the same time as picking up the fix. v1->v2: * Drop _PR3 patch from series, it was cherry picked and is on it's way to 6.6-rcX already. * Rather than changing global policy, fix the problematic power supply driver. v1: https://lore.kernel.org/linux-pm/20230926225955.386553-1-mario.limoncie...@amd.com/ Mario Limonciello (2): usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0" drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 ++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + drivers/usb/typec/ucsi/psy.c | 10 ++ 3 files changed, 13 insertions(+), 1 deletion(-) -- 2.34.1
Re: [PATCH 1/3] amd/amdkfd: Add granularity bitmap mapped to gpu flag
On 2023-10-02 13:02, Chen, Xiaogang wrote: On 9/29/2023 9:11 AM, Philip Yang wrote: Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding. Replace prange->mapped_to_gpu with prange->bitmap_mapped[], which is based on prange granularity, updated when map to GPUS or unmap from GPUs, to optimize multiple GPU map, unmap and retry fault recover. svm_range_is_mapped is false only if no parital range mapping on any GPUs. Split the bitmap_mapped when unmap from cpu to split the prange. Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 218 ++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +- 2 files changed, 184 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 040dc32ad475..626e0dd4ec79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -292,12 +292,12 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } - /* free dma_addr array for each gpu */ + /* free dma_addr array, bitmap_mapped for each gpu */ for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { - if (prange->dma_addr[gpuidx]) { + if (prange->dma_addr[gpuidx]) kvfree(prange->dma_addr[gpuidx]); - prange->dma_addr[gpuidx] = NULL; - } + if (prange->bitmap_mapped[gpuidx]) + bitmap_free(prange->bitmap_mapped[gpuidx]); } mutex_destroy(>lock); @@ -323,19 +323,38 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, uint64_t size = last - start + 1; struct svm_range *prange; struct kfd_process *p; - - prange = kzalloc(sizeof(*prange), GFP_KERNEL); - if (!prange) - return NULL; + unsigned int nbits; + uint32_t gpuidx; p = container_of(svms, struct kfd_process, svms); if (!p->xnack_enabled && update_mem_usage && amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) { pr_info("SVM mapping failed, exceeds resident system memory limit\n"); - kfree(prange); return NULL; } + + prange = kzalloc(sizeof(*prange), GFP_KERNEL); + if (!prange) + return NULL; + + svm_range_set_default_attributes(>preferred_loc, + >prefetch_loc, + >granularity, >flags); + + nbits = svm_range_mapped_nbits(size, prange->granularity); + pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped nbits %d\n", prange, + start, last, nbits); + for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) { + prange->bitmap_mapped[gpuidx] = bitmap_zalloc(nbits, GFP_KERNEL); + if (!prange->bitmap_mapped[gpuidx]) { + while (gpuidx--) + bitmap_free(prange->bitmap_mapped[gpuidx]); + kfree(prange); + return NULL; + } + } +
[PATCH v4 1/1] drm/amdkfd: get doorbell's absolute offset based on the db_size
Here, Adding db_size in byte to find the doorbell's absolute offset for both 32-bit and 64-bit doorbell sizes. So that doorbell offset will be aligned based on the doorbell size. v2: - Addressed the review comment from Felix. v3: - Adding doorbell_size as parameter to get db absolute offset. v4: Squash the two patches into one. Cc: Christian Koenig Cc: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h| 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c| 13 + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 -- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 09f6727e7c73..4a8b33f55f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -357,8 +357,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev); void amdgpu_doorbell_fini(struct amdgpu_device *adev); int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev); uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, - struct amdgpu_bo *db_bo, - uint32_t doorbell_index); + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size); #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index da4be0bbb446..6690f5a72f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -114,19 +114,24 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) * @adev: amdgpu_device pointer * @db_bo: doorbell object's bo * @db_index: doorbell relative index in this doorbell object + * @db_size: doorbell size is in byte * * returns doorbell's absolute index in BAR */ uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, - struct amdgpu_bo *db_bo, - uint32_t doorbell_index) + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size) { int db_bo_offset; db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo); - /* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */ - return db_bo_offset / sizeof(u32) + doorbell_index * 2; + /* doorbell index is 32 bit but doorbell's size can be 32 bit +* or 64 bit, so *db_size(in byte)/4 for alignment. +*/ + return db_bo_offset / sizeof(u32) + doorbell_index * + DIV_ROUND_UP(db_size, 4); } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0d3d538b64eb..e07652e72496 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -407,7 +407,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, qpd->proc_doorbells, - q->doorbell_id); + q->doorbell_id, + dev->kfd->device_info.doorbell_size); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 7b38537c7c99..05c74887fd6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; - *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx); + *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, +kfd->doorbells, +inx, + kfd->device_info.doorbell_size); inx *= 2; pr_debug("Get kernel queue doorbell\n" @@ -240,7 +243,10 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
[PATCH v4 0/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8
On older chips, the absolute doorbell offset within the doorbell page is based on the queue ID. KFD is using queue ID and doorbell size to get an absolute doorbell offset in userspace. Here, adding db_size in byte to find the doorbell's absolute offset for both 32-bit and 64-bit doorbell sizes. So that doorbell offset will be aligned based on the doorbell size. v2: - Addressed the review comment from Felix. v3: - Adding doorbell_size as parameter to get db absolute offset. v4: Squash the two patches into one. Arvind Yadav (1): drm/amdkfd: get doorbell's absolute offset based on the db_size drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h| 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c| 13 + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 -- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++- 5 files changed, 24 insertions(+), 10 deletions(-) -- 2.34.1
[PATCH v4 32/32] drm/amd/display: Add 3x4 CTM support for plane CTM
From: Joshua Ashton Create drm_color_ctm_3x4 to support 3x4-dimension plane CTM matrix and convert DRM CTM to DC CSC float matrix. v3: - rename ctm2 to ctm_3x4 (Harry) Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 28 +-- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 2 +- include/uapi/drm/drm_mode.h | 8 ++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index bc9dd75e8881..655c18c9a2d7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -433,6 +433,28 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } } +/** + * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix + * @ctm: DRM color transformation matrix with 3x4 dimensions + * @matrix: DC CSC float matrix + * + * The matrix needs to be a 3x4 (12 entry) matrix. + */ +static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm, + struct fixed31_32 *matrix) +{ + int i; + + /* The format provided is S31.32, using signed-magnitude representation. +* Our fixed31_32 is also S31.32, but is using 2's complement. We have +* to convert from signed-magnitude to 2's complement. +*/ + for (i = 0; i < 12; i++) { + /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ + matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]); + } +} + /** * __set_legacy_tf - Calculates the legacy transfer function * @func: transfer function @@ -1176,7 +1198,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, { struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); - struct drm_color_ctm *ctm = NULL; + struct drm_color_ctm_3x4 *ctm = NULL; struct dc_color_caps *color_caps = NULL; bool has_crtc_cm_degamma; int ret; @@ -1231,7 +1253,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, /* Setup CRTC CTM. */ if (dm_plane_state->ctm) { - ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data; + ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data; /* * DCN2 and older don't support both pre-blending and * post-blending gamut remap. For this HW family, if we have @@ -1243,7 +1265,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP, * as it's done by dcn30_program_gamut_remap(). */ - __drm_ctm_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); + __drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); dc_plane_state->gamut_remap_matrix.enable_remap = true; dc_plane_state->input_csc_color_matrix.enable_adjustment = false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index d9537d9bf18c..a3935c56189b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1549,7 +1549,7 @@ dm_atomic_plane_set_property(struct drm_plane *plane, ret = drm_property_replace_blob_from_id(plane->dev, _plane_state->ctm, val, - sizeof(struct drm_color_ctm), -1, + sizeof(struct drm_color_ctm_3x4), -1, ); dm_plane_state->base.color_mgmt_changed |= replaced; return ret; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 46becedf5b2f..a811d24e8ed5 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -838,6 +838,14 @@ struct drm_color_ctm { __u64 matrix[9]; }; +struct drm_color_ctm_3x4 { + /* +* Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude +* (not two's complement!) format. +*/ + __u64 matrix[12]; +}; + struct drm_color_lut { /* * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and -- 2.40.1
[PATCH v4 31/32] drm/amd/display: add plane CTM support
Map the plane CTM driver-specific property to DC plane, instead of DC stream. The remaining steps to program DPP block are already implemented on DC shared-code. v3: - fix comment about plane and CRTC CTMs priorities (Harry) Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 26 +++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5e64eda6ed11..7de67b5ab6e9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9998,6 +9998,7 @@ static bool should_reset_plane(struct drm_atomic_state *state, if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf || dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut || dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult || + dm_old_other_state->ctm != dm_new_other_state->ctm || dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut || dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf || dm_old_other_state->lut3d != dm_new_other_state->lut3d || diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 41c5926ca068..bc9dd75e8881 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1175,6 +1175,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct dc_plane_state *dc_plane_state) { struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + struct drm_color_ctm *ctm = NULL; struct dc_color_caps *color_caps = NULL; bool has_crtc_cm_degamma; int ret; @@ -1227,5 +1229,29 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, return ret; } + /* Setup CRTC CTM. */ + if (dm_plane_state->ctm) { + ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data; + /* +* DCN2 and older don't support both pre-blending and +* post-blending gamut remap. For this HW family, if we have +* the plane and CRTC CTMs simultaneously, CRTC CTM takes +* priority, and we discard plane CTM, as implemented in +* dcn10_program_gamut_remap(). However, DCN3+ has DPP +* (pre-blending) and MPC (post-blending) `gamut remap` blocks; +* therefore, we can program plane and CRTC CTMs together by +* mapping CRTC CTM to MPC and keeping plane CTM setup at DPP, +* as it's done by dcn30_program_gamut_remap(). +*/ + __drm_ctm_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); + + dc_plane_state->gamut_remap_matrix.enable_remap = true; + dc_plane_state->input_csc_color_matrix.enable_adjustment = false; + } else { + /* Bypass CTM. */ + dc_plane_state->gamut_remap_matrix.enable_remap = false; + dc_plane_state->input_csc_color_matrix.enable_adjustment = false; + } + return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state); } -- 2.40.1
[PATCH v4 30/32] drm/amd/display: add plane CTM driver-specific property
Plane CTM for pre-blending color space conversion. Only enable driver-specific plane CTM property on drivers that support both pre- and post-blending gamut remap matrix, i.e., DCN3+ family. Otherwise it conflits with DRM CRTC CTM property. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 ++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 7 +++ .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 +++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 20 +++ 4 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 071cc10bfd90..1347022ce57d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -363,6 +363,8 @@ struct amdgpu_mode_info { * @plane_hdr_mult_property: */ struct drm_property *plane_hdr_mult_property; + + struct drm_property *plane_ctm_property; /** * @shaper_lut_property: Plane property to set pre-blending shaper LUT * that converts color content before 3D LUT. If diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 219efa7fe181..c9cd2e5f79ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -784,6 +784,13 @@ struct dm_plane_state { * TF is needed for any subsequent linear-to-non-linear transforms. */ __u64 hdr_mult; + /** +* @ctm: +* +* Color transformation matrix. The blob (if not NULL) is a +* drm_color_ctm_3x4. +*/ + struct drm_property_blob *ctm; /** * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an * array of drm_color_lut. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 251b5f14bd89..41c5926ca068 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -239,6 +239,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_hdr_mult_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_CTM", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_ctm_property = prop; + prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_BLOB, "AMD_PLANE_SHAPER_LUT", 0); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index f1070ca7076a..d9537d9bf18c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1361,6 +1361,8 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane) if (dm_plane_state->degamma_lut) drm_property_blob_get(dm_plane_state->degamma_lut); + if (dm_plane_state->ctm) + drm_property_blob_get(dm_plane_state->ctm); if (dm_plane_state->shaper_lut) drm_property_blob_get(dm_plane_state->shaper_lut); if (dm_plane_state->lut3d) @@ -1442,6 +1444,8 @@ static void dm_drm_plane_destroy_state(struct drm_plane *plane, if (dm_plane_state->degamma_lut) drm_property_blob_put(dm_plane_state->degamma_lut); + if (dm_plane_state->ctm) + drm_property_blob_put(dm_plane_state->ctm); if (dm_plane_state->lut3d) drm_property_blob_put(dm_plane_state->lut3d); if (dm_plane_state->shaper_lut) @@ -1479,6 +1483,11 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, dm->adev->mode_info.plane_hdr_mult_property, AMDGPU_HDR_MULT_DEFAULT); + /* Only enable plane CTM if both DPP and MPC gamut remap is available. */ + if (dm->dc->caps.color.mpc.gamut_remap) + drm_object_attach_property(>base, + dm->adev->mode_info.plane_ctm_property, 0); + if (dpp_color_caps.hw_3d_lut) { drm_object_attach_property(>base, mode_info.plane_shaper_lut_property, 0); @@ -1536,6 +1545,14 @@ dm_atomic_plane_set_property(struct drm_plane *plane, dm_plane_state->hdr_mult = val; dm_plane_state->base.color_mgmt_changed = 1; } + } else if (property == adev->mode_info.plane_ctm_property) { + ret =
[PATCH v4 29/32] drm/amd/display: copy 3D LUT settings from crtc state to stream_update
From: Joshua Ashton When commiting planes, we copy color mgmt resources to the stream state. Do the same for shaper and 3D LUTs. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4b4181447df9..5e64eda6ed11 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8501,6 +8501,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, _state->stream->csc_color_matrix; bundle->stream_update.out_transfer_func = acrtc_state->stream->out_transfer_func; + bundle->stream_update.lut3d_func = + (struct dc_3dlut *) acrtc_state->stream->lut3d_func; + bundle->stream_update.func_shaper = + (struct dc_transfer_func *) acrtc_state->stream->func_shaper; } acrtc_state->stream->abm_level = acrtc_state->abm_level; -- 2.40.1
[PATCH v4 28/32] drm/amd/display: allow newer DC hardware to use degamma ROM for PQ/HLG
From: Joshua Ashton Need to funnel the color caps through to these functions so it can check that the hardware is capable. v2: - remove redundant color caps assignment on plane degamma map (Harry) - pass color caps to degamma params v3: - remove unused color_caps parameter from set_color_properties (Harry) Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 29 --- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 0909ed5639bf..251b5f14bd89 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -564,6 +564,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, /** * __set_input_tf - calculates the input transfer function based on expected * input space. + * @caps: dc color capabilities * @func: transfer function * @lut: lookup table that defines the color space * @lut_size: size of respective lut. @@ -571,7 +572,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, * Returns: * 0 in case of success. -ENOMEM if fails. */ -static int __set_input_tf(struct dc_transfer_func *func, +static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size) { struct dc_gamma *gamma = NULL; @@ -588,7 +589,7 @@ static int __set_input_tf(struct dc_transfer_func *func, __drm_lut_to_dc_gamma(lut, gamma, false); } - res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != NULL); + res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL); if (gamma) dc_gamma_release(); @@ -752,7 +753,7 @@ static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, func_blend->tf = tf; func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; - ret = __set_input_tf(func_blend, blend_lut, blend_size); + ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size); } else { func_blend->type = TF_TYPE_BYPASS; func_blend->tf = TRANSFER_FUNCTION_LINEAR; @@ -968,7 +969,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) static int map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, -struct dc_plane_state *dc_plane_state) +struct dc_plane_state *dc_plane_state, +struct dc_color_caps *caps) { const struct drm_color_lut *degamma_lut; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; @@ -1023,7 +1025,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - r = __set_input_tf(dc_plane_state->in_transfer_func, + r = __set_input_tf(caps, dc_plane_state->in_transfer_func, degamma_lut, degamma_size); if (r) return r; @@ -1036,7 +1038,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = tf; if (tf != TRANSFER_FUNCTION_SRGB && - !mod_color_calculate_degamma_params(NULL, + !mod_color_calculate_degamma_params(caps, dc_plane_state->in_transfer_func, NULL, false)) return -ENOMEM; @@ -1047,7 +1049,8 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, static int __set_dm_plane_degamma(struct drm_plane_state *plane_state, - struct dc_plane_state *dc_plane_state) + struct dc_plane_state *dc_plane_state, + struct dc_color_caps *color_caps) { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); const struct drm_color_lut *degamma_lut; @@ -1078,7 +1081,7 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state, dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; - ret = __set_input_tf(dc_plane_state->in_transfer_func, + ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func, degamma_lut, degamma_size); if (ret) return ret; @@ -1086,7 +1089,7 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state, dc_plane_state->in_transfer_func->type =
[PATCH v4 27/32] drm/amd/display: add plane blend LUT and TF support
From: Joshua Ashton Map plane blend properties to DPP blend gamma. Plane blend is a post-3D LUT curve that linearizes color space for blending. It may be defined by a user-blob LUT and/or predefined transfer function. As hardcoded curve (ROM) is not supported on blend gamma, we use AMD color module to fill parameters when setting non-linear TF with empty LUT. v2: - rename DRM TFs to AMDGPU TFs Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 56 +-- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 26d19159bd79..4b4181447df9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8289,6 +8289,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func; + bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 25e9aa147e00..0909ed5639bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -732,6 +732,35 @@ static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, return ret; } +static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, + bool has_rom, + enum dc_transfer_func_predefined tf, + uint32_t blend_size, + struct dc_transfer_func *func_blend) +{ + int ret = 0; + + if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) { + /* +* DRM plane gamma LUT or TF means we are linearizing color +* space before blending (similar to degamma programming). As +* we don't have hardcoded curve support, or we use AMD color +* module to fill the parameters that will be translated to HW +* points. +*/ + func_blend->type = TF_TYPE_DISTRIBUTED_POINTS; + func_blend->tf = tf; + func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + + ret = __set_input_tf(func_blend, blend_lut, blend_size); + } else { + func_blend->type = TF_TYPE_BYPASS; + func_blend->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + /** * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user * shaper and 3D LUTs match the hw supported size @@ -1070,8 +1099,9 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; - const struct drm_color_lut *shaper_lut, *lut3d; - uint32_t shaper_size, lut3d_size; + enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut; + uint32_t shaper_size, lut3d_size, blend_size; int ret; /* We have nothing to do here, return */ @@ -1091,12 +1121,30 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, amdgpu_tf_to_dc_tf(shaper_tf), shaper_size, dc_plane_state->in_shaper_func); - if (ret) + if (ret) { drm_dbg_kms(plane_state->plane->dev, "setting plane %d shaper LUT failed.\n", plane_state->plane->index); - return ret; + return ret; + } + + blend_tf = dm_plane_state->blend_tf; + blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, _size); + blend_size = blend_lut != NULL ? blend_size : 0; + + ret = amdgpu_dm_atomic_blend_lut(blend_lut, false, +amdgpu_tf_to_dc_tf(blend_tf), +blend_size, dc_plane_state->blend_tf); + if (ret) { + drm_dbg_kms(plane_state->plane->dev, +
[PATCH v4 25/32] drm/amd/display: add plane 3D LUT support
Wire up DC 3D LUT to DM plane color management (pre-blending). On AMD display HW, 3D LUT comes after a shaper curve and we always have to program a shaper curve to delinearize or normalize the color space before applying a 3D LUT (since we have a reduced number of LUT entries). In this version, the default values of 3D LUT for size and bit_depth are 17x17x17 and 12-bit, but we already provide here a more generic mechanisms to program other supported values (9x9x9 size and 10-bit). v2: - started with plane 3D LUT instead of CRTC 3D LUT support v4: - lut3d_size is the max dimension size instead of # of entries Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 102 +- 2 files changed, 99 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c79cd98d1228..26d19159bd79 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8288,6 +8288,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].gamut_remap_matrix = _plane->gamut_remap_matrix; bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; + bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 0e65bf0a886e..9c38291c42a2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -622,6 +622,86 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) } } +static void __to_dc_lut3d_color(struct dc_rgb *rgb, + const struct drm_color_lut lut, + int bit_precision) +{ + rgb->red = drm_color_lut_extract(lut.red, bit_precision); + rgb->green = drm_color_lut_extract(lut.green, bit_precision); + rgb->blue = drm_color_lut_extract(lut.blue, bit_precision); +} + +static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut, + uint32_t lut3d_size, + struct tetrahedral_params *params, + bool use_tetrahedral_9, + int bit_depth) +{ + struct dc_rgb *lut0; + struct dc_rgb *lut1; + struct dc_rgb *lut2; + struct dc_rgb *lut3; + int lut_i, i; + + + if (use_tetrahedral_9) { + lut0 = params->tetrahedral_9.lut0; + lut1 = params->tetrahedral_9.lut1; + lut2 = params->tetrahedral_9.lut2; + lut3 = params->tetrahedral_9.lut3; + } else { + lut0 = params->tetrahedral_17.lut0; + lut1 = params->tetrahedral_17.lut1; + lut2 = params->tetrahedral_17.lut2; + lut3 = params->tetrahedral_17.lut3; + } + + for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) { + /* +* We should consider the 3D LUT RGB values are distributed +* along four arrays lut0-3 where the first sizes 1229 and the +* other 1228. The bit depth supported for 3dlut channel is +* 12-bit, but DC also supports 10-bit. +* +* TODO: improve color pipeline API to enable the userspace set +* bit depth and 3D LUT size/stride, as specified by VA-API. +*/ + __to_dc_lut3d_color([lut_i], lut[i], bit_depth); + __to_dc_lut3d_color([lut_i], lut[i + 1], bit_depth); + __to_dc_lut3d_color([lut_i], lut[i + 2], bit_depth); + __to_dc_lut3d_color([lut_i], lut[i + 3], bit_depth); + } + /* lut0 has 1229 points (lut_size/4 + 1) */ + __to_dc_lut3d_color([lut_i], lut[i], bit_depth); +} + +/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream + * @drm_lut3d: user 3D LUT + * @drm_lut3d_size: size of 3D LUT + * @lut3d: DC 3D LUT + * + * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it + * on DCN accordingly. + */ +static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d, + uint32_t drm_lut3d_size, + struct dc_3dlut *lut) +{ + if (!drm_lut3d_size) { + lut->state.bits.initialized = 0; + } else { + /* Stride and bit depth are not programmable by API
[PATCH v4 26/32] drm/amd/display: handle empty LUTs in __set_input_tf
From: Joshua Ashton Unlike degamma, blend gamma doesn't support hardcoded curve (predefined/ROM), but we can use AMD color module to fill blend gamma parameters when we have non-linear plane gamma TF without plane gamma LUT. The regular degamma path doesn't hit this. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 20 +++ 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 9c38291c42a2..25e9aa147e00 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -577,17 +577,21 @@ static int __set_input_tf(struct dc_transfer_func *func, struct dc_gamma *gamma = NULL; bool res; - gamma = dc_create_gamma(); - if (!gamma) - return -ENOMEM; + if (lut_size) { + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; - gamma->type = GAMMA_CUSTOM; - gamma->num_entries = lut_size; + gamma->type = GAMMA_CUSTOM; + gamma->num_entries = lut_size; - __drm_lut_to_dc_gamma(lut, gamma, false); + __drm_lut_to_dc_gamma(lut, gamma, false); + } - res = mod_color_calculate_degamma_params(NULL, func, gamma, true); - dc_gamma_release(); + res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != NULL); + + if (gamma) + dc_gamma_release(); return res ? 0 : -ENOMEM; } -- 2.40.1
[PATCH v4 24/32] drm/amd/display: add plane shaper TF support
Enable usage of predefined transfer func in addition to shaper 1D LUT. That means we can save some complexity by just setting a predefined curve, instead of programming a custom curve when preparing color space for applying 3D LUT. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 1d18f447f387..0e65bf0a886e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -623,20 +623,23 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) } static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, + bool has_rom, + enum dc_transfer_func_predefined tf, uint32_t shaper_size, struct dc_transfer_func *func_shaper) { int ret = 0; - if (shaper_size) { + if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) { /* * If user shaper LUT is set, we assume a linear color space * (linearized by degamma 1D LUT or not). */ func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS; - func_shaper->tf = TRANSFER_FUNCTION_LINEAR; + func_shaper->tf = tf; + func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; - ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, false); + ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom); } else { func_shaper->type = TF_TYPE_BYPASS; func_shaper->tf = TRANSFER_FUNCTION_LINEAR; @@ -971,6 +974,7 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; const struct drm_color_lut *shaper_lut; uint32_t shaper_size; int ret; @@ -983,8 +987,11 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, _size); shaper_size = shaper_lut != NULL ? shaper_size : 0; + shaper_tf = dm_plane_state->shaper_tf; - ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, shaper_size, + ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false, + amdgpu_tf_to_dc_tf(shaper_tf), + shaper_size, dc_plane_state->in_shaper_func); if (ret) drm_dbg_kms(plane_state->plane->dev, -- 2.40.1
[PATCH v4 23/32] drm/amd/display: add plane shaper LUT support
Map DC shaper LUT to DM plane color management. Shaper LUT can be used to delinearize and/or normalize the color space for computational efficiency and achiving specific visual styles. If a plane degamma is apply to linearize the color space, a custom shaper 1D LUT can be used just before applying 3D LUT. v2: - use DPP color caps to verify plane 3D LUT support - add debug message if shaper LUT programming fails v4: - remove helper to check 3D LUT color caps (Harry) - update desc of lut3d-setup helper from MPC to DPP Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 97 ++- 3 files changed, 96 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ded52dd780c8..c79cd98d1228 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8287,6 +8287,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; bundle->surface_updates[planes_count].gamut_remap_matrix = _plane->gamut_remap_matrix; bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; + bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index c5ec6e4f15c2..219efa7fe181 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -908,6 +908,8 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); /* 3D LUT max size is 17x17x17 (4913 entries) */ #define MAX_COLOR_3DLUT_SIZE 17 #define MAX_COLOR_3DLUT_BITDEPTH 12 +int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, + struct drm_plane_state *plane_state); /* 1D LUT size */ #define MAX_COLOR_LUT_ENTRIES 4096 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 849e07dd436a..1d18f447f387 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -622,6 +622,63 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) } } +static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, + uint32_t shaper_size, + struct dc_transfer_func *func_shaper) +{ + int ret = 0; + + if (shaper_size) { + /* +* If user shaper LUT is set, we assume a linear color space +* (linearized by degamma 1D LUT or not). +*/ + func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS; + func_shaper->tf = TRANSFER_FUNCTION_LINEAR; + + ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, false); + } else { + func_shaper->type = TF_TYPE_BYPASS; + func_shaper->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + +/** + * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user + * shaper and 3D LUTs match the hw supported size + * @adev: amdgpu device + * @crtc_state: the DRM CRTC state + * + * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or + * newer) and if the user shaper and 3D LUTs match the supported size. + * + * Returns: + * 0 on success. -EINVAL if lut size are invalid. + */ +int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, + struct drm_plane_state *plane_state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + const struct drm_color_lut *shaper = NULL; + uint32_t exp_size, size; + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; + + /* shaper LUT is only available if 3D LUT color caps */ + exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0; + shaper = __extract_blob_lut(dm_plane_state->shaper_lut, ); + + if (shaper && size != exp_size) { + drm_dbg(>ddev, + "Invalid Shaper LUT size. Should be %u but got %u.\n", + exp_size, size); + return -EINVAL; + } + + return 0; +} + /** * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes * @crtc_state: the DRM CRTC state @@ -909,6
[PATCH v4 20/32] drm/amd/display: reject atomic commit if setting both plane and CRTC degamma
DC only has pre-blending degamma caps (plane/DPP) that is currently in use for CRTC/post-blending degamma, so that we don't have HW caps to perform plane and CRTC degamma at the same time. Reject atomic updates when serspace sets both plane and CRTC degamma properties. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 6acc9ebc52da..354ab46894d2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -943,9 +943,20 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); ret = __set_dm_plane_degamma(plane_state, dc_plane_state); - if (ret != -EINVAL) + if (ret == -ENOMEM) return ret; + /* We only have one degamma block available (pre-blending) for the +* whole color correction pipeline, so that we can't actually perform +* plane and CRTC degamma at the same time. Explicitly reject atomic +* updates when userspace sets both plane and CRTC degamma properties. +*/ + if (has_crtc_cm_degamma && ret != -EINVAL){ + drm_dbg_kms(crtc->base.crtc->dev, + "doesn't support plane and CRTC degamma at the same time\n"); + return -EINVAL; + } + /* If we are here, it means we don't have plane degamma settings, check * if we have CRTC degamma waiting for mapping to pre-blending degamma * block -- 2.40.1
[PATCH v4 22/32] drm/amd/display: add HDR multiplier support
From: Joshua Ashton With `dc_fixpt_from_s3132()` translation, we can just use it to set hdr_mult. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a8a1690b7322..ded52dd780c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8286,6 +8286,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction; bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; bundle->surface_updates[planes_count].gamut_remap_matrix = _plane->gamut_remap_matrix; + bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 599bba566226..849e07dd436a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -926,6 +926,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) { + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); bool has_crtc_cm_degamma; int ret; @@ -936,6 +937,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, /* After, we start to update values according to color props */ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); + dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); + ret = __set_dm_plane_degamma(plane_state, dc_plane_state); if (ret == -ENOMEM) return ret; -- 2.40.1
[PATCH v4 21/32] drm/amd/display: add dc_fixpt_from_s3132 helper
From: Joshua Ashton Detach value translation from CTM to reuse it for programming HDR multiplier property. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 8 +--- drivers/gpu/drm/amd/display/include/fixed31_32.h | 12 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 354ab46894d2..599bba566226 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -404,7 +404,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, struct fixed31_32 *matrix) { - int64_t val; int i; /* @@ -423,12 +422,7 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ - val = ctm->matrix[i - (i / 4)]; - /* If negative, convert to 2's complement. */ - if (val & (1ULL << 63)) - val = -(val & ~(1ULL << 63)); - - matrix[i].value = val; + matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]); } } diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index d4cf7ead1d87..84da1dd34efd 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL }; static const struct fixed31_32 dc_fixpt_half = { 0x8000LL }; static const struct fixed31_32 dc_fixpt_one = { 0x1LL }; +static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x) +{ + struct fixed31_32 val; + + /* If negative, convert to 2's complement. */ + if (x & (1ULL << 63)) + x = -(x & ~(1ULL << 63)); + + val.value = x; + return val; +} + /* * @brief * Initialization routines -- 2.40.1
[PATCH v4 19/32] drm/amd/display: add plane degamma TF and LUT support
From: Joshua Ashton Set DC plane with user degamma LUT or predefined TF from driver-specific plane color properties. If plane and CRTC degamma are set in the same time, plane degamma has priority. That means, we only set CRTC degamma if we don't have plane degamma LUT or TF to configure. We return -EINVAL if we don't have plane degamma settings, so we can continue and check CRTC degamma. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 70 +-- 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d44fd8cb6edf..a8a1690b7322 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5191,7 +5191,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, * Always set input transfer function, since plane state is refreshed * every time. */ - ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); + ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, + plane_state, + dc_plane_state); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index c138457ff12e..c5ec6e4f15c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -918,6 +918,7 @@ int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state); void amdgpu_dm_update_connector_after_detect( diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ffdf493b8ef2..6acc9ebc52da 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -867,9 +867,58 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, return 0; } +static int +__set_dm_plane_degamma(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + const struct drm_color_lut *degamma_lut; + enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + uint32_t degamma_size; + bool has_degamma_lut; + int ret; + + degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut, +_size); + + has_degamma_lut = degamma_lut && + !__is_lut_linear(degamma_lut, degamma_size); + + tf = dm_plane_state->degamma_tf; + + /* If we don't have plane degamma LUT nor TF to set on DC, we have +* nothing to do here, return. +*/ + if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT) + return -EINVAL; + + dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf); + + if (has_degamma_lut) { + ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); + + dc_plane_state->in_transfer_func->type = + TF_TYPE_DISTRIBUTED_POINTS; + + ret = __set_input_tf(dc_plane_state->in_transfer_func, +degamma_lut, degamma_size); + if (ret) + return ret; + } else { + dc_plane_state->in_transfer_func->type = + TF_TYPE_PREDEFINED; + + if (!mod_color_calculate_degamma_params(NULL, + dc_plane_state->in_transfer_func, NULL, false)) + return -ENOMEM; + } + return 0; +} + /** * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. * @crtc: amdgpu_dm crtc state + * @plane_state: DRM plane state * @dc_plane_state: target DC surface * * Update the underlying dc_stream_state's input transfer function (ITF) in @@ -880,13 +929,28 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, * 0 on success. -ENOMEM if mem allocation fails. */ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct drm_plane_state *plane_state, struct
[PATCH v4 18/32] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane
The next patch adds pre-blending degamma to AMD color mgmt pipeline, but pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC atomic degamma or implict degamma on legacy gamma. Detach degamma usage regarging CRTC color properties to manage plane and CRTC color correction combinations. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 60 +-- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index e81263d60b7d..ffdf493b8ef2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -788,20 +788,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) return 0; } -/** - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. - * @crtc: amdgpu_dm crtc state - * @dc_plane_state: target DC surface - * - * Update the underlying dc_stream_state's input transfer function (ITF) in - * preparation for hardware commit. The transfer function used depends on - * the preparation done on the stream for color management. - * - * Returns: - * 0 on success. -ENOMEM if mem allocation fails. - */ -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, - struct dc_plane_state *dc_plane_state) +static int +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, +struct dc_plane_state *dc_plane_state) { const struct drm_color_lut *degamma_lut; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; @@ -824,8 +813,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, _size); ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); - dc_plane_state->in_transfer_func->type = - TF_TYPE_DISTRIBUTED_POINTS; + dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; /* * This case isn't fully correct, but also fairly @@ -861,7 +849,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, degamma_lut, degamma_size); if (r) return r; - } else if (crtc->cm_is_degamma_srgb) { + } else { /* * For legacy gamma support we need the regamma input * in linear space. Assume that the input is sRGB. @@ -871,8 +859,44 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, if (tf != TRANSFER_FUNCTION_SRGB && !mod_color_calculate_degamma_params(NULL, - dc_plane_state->in_transfer_func, NULL, false)) + dc_plane_state->in_transfer_func, + NULL, false)) return -ENOMEM; + } + + return 0; +} + +/** + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. + * @crtc: amdgpu_dm crtc state + * @dc_plane_state: target DC surface + * + * Update the underlying dc_stream_state's input transfer function (ITF) in + * preparation for hardware commit. The transfer function used depends on + * the preparation done on the stream for color management. + * + * Returns: + * 0 on success. -ENOMEM if mem allocation fails. + */ +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct dc_plane_state *dc_plane_state) +{ + bool has_crtc_cm_degamma; + int ret; + + has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); + if (has_crtc_cm_degamma){ + /* +* AMD HW doesn't have post-blending degamma caps. When DRM +* CRTC atomic degamma is set, we maps it to DPP degamma block +* (pre-blending) or, on legacy gamma, we use DPP degamma to +* linearize (implicit degamma) from sRGB/BT709 according to +* the input space. +*/ + ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state); + if (ret) + return ret; } else { /* ...Otherwise we can just bypass the DGM block. */ dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; -- 2.40.1
[PATCH v4 16/32] drm/amd/display: set sdr_ref_white_level to 80 for out_transfer_func
From: Joshua Ashton Otherwise this is just initialized to 0. This needs to actually have a value so that compute_curve can work for PQ EOTF. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 782adb8bea43..e81263d60b7d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -72,6 +72,7 @@ */ #define MAX_DRM_LUT_VALUE 0x +#define SDR_WHITE_LEVEL_INIT_VALUE 80 /** * amdgpu_dm_init_color_mod - Initialize the color module. @@ -551,6 +552,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, */ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; out_tf->tf = tf; + out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); } else { -- 2.40.1
[PATCH v4 13/32] drm/amd/display: add comments to describe DM crtc color mgmt behavior
Describe some expected behavior of the AMD DM color mgmt programming. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 2ecfa0e886e8..2b2826a1d855 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -660,13 +660,25 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) crtc->cm_is_degamma_srgb = true; stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; - + /* +* Note: although we pass has_rom as parameter here, we never +* actually use ROM because the color module only takes the ROM +* path if transfer_func->type == PREDEFINED. +* +* See more in mod_color_calculate_regamma_params() +*/ r = __set_legacy_tf(stream->out_transfer_func, regamma_lut, regamma_size, has_rom); if (r) return r; } else if (has_regamma) { - /* If atomic regamma, CRTC RGM goes into RGM LUT. */ + /* +* CRTC RGM goes into RGM LUT. +* +* Note: there is no implicit sRGB regamma here. We are using +* degamma calculation from color module to calculate the curve +* from a linear base. +*/ stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; -- 2.40.1
[PATCH v4 14/32] drm/amd/display: encapsulate atomic regamma operation
We will wire up MPC 3D LUT to DM CRTC color pipeline in the next patch, but so far, only for atomic interface. By checking set_output_transfer_func in DC drivers with MPC 3D LUT support, we can verify that regamma is only programmed when 3D LUT programming fails. As a groundwork to introduce 3D LUT programming and better understand each step, detach atomic regamma programming from the crtc colocr updating code. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 55 --- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 2b2826a1d855..0487fb715945 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -524,6 +524,37 @@ static int __set_output_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } +static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, + const struct drm_color_lut *regamma_lut, + uint32_t regamma_size, bool has_rom) +{ + struct dc_transfer_func *out_tf = stream->out_transfer_func; + int ret = 0; + + if (regamma_size) { + /* +* CRTC RGM goes into RGM LUT. +* +* Note: there is no implicit sRGB regamma here. We are using +* degamma calculation from color module to calculate the curve +* from a linear base. +*/ + out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + out_tf->tf = TRANSFER_FUNCTION_LINEAR; + + ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); + } else { + /* +* No CRTC RGM means we can just put the block into bypass +* since we don't have any plane level adjustments using it. +*/ + out_tf->type = TF_TYPE_BYPASS; + out_tf->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + /** * __set_input_tf - calculates the input transfer function based on expected * input space. @@ -671,28 +702,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) regamma_size, has_rom); if (r) return r; - } else if (has_regamma) { - /* -* CRTC RGM goes into RGM LUT. -* -* Note: there is no implicit sRGB regamma here. We are using -* degamma calculation from color module to calculate the curve -* from a linear base. -*/ - stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - - r = __set_output_tf(stream->out_transfer_func, regamma_lut, - regamma_size, has_rom); + } else { + regamma_size = has_regamma ? regamma_size : 0; + r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, +regamma_size, has_rom); if (r) return r; - } else { - /* -* No CRTC RGM means we can just put the block into bypass -* since we don't have any plane level adjustments using it. -*/ - stream->out_transfer_func->type = TF_TYPE_BYPASS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; } /* -- 2.40.1
[PATCH v4 17/32] drm/amd/display: mark plane as needing reset if color props change
From: Joshua Ashton We should reset a plane state if at least one of the color management properties differs from old and new state. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 846dbeddd0fb..d44fd8cb6edf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9944,6 +9944,10 @@ static bool should_reset_plane(struct drm_atomic_state *state, */ for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) { struct amdgpu_framebuffer *old_afb, *new_afb; + struct dm_plane_state *dm_new_other_state, *dm_old_other_state; + + dm_new_other_state = to_dm_plane_state(new_other_state); + dm_old_other_state = to_dm_plane_state(old_other_state); if (other->type == DRM_PLANE_TYPE_CURSOR) continue; @@ -9980,6 +9984,17 @@ static bool should_reset_plane(struct drm_atomic_state *state, old_other_state->color_encoding != new_other_state->color_encoding) return true; + /* HDR/Transfer Function changes. */ + if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf || + dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut || + dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult || + dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut || + dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf || + dm_old_other_state->lut3d != dm_new_other_state->lut3d || + dm_old_other_state->blend_lut != dm_new_other_state->blend_lut || + dm_old_other_state->blend_tf != dm_new_other_state->blend_tf) + return true; + /* Framebuffer checks fall at the end. */ if (!old_other_state->fb || !new_other_state->fb) continue; -- 2.40.1
[PATCH v4 12/32] drm/amd/display: add CRTC gamma TF driver-specific property
Add AMD pre-defined transfer function property to default DRM CRTC gamma to convert to wire encoding with or without a user gamma LUT. There is no post-blending regamma ROM for pre-defined TF. When setting Gamma TF (!= Identity) and LUT at the same time, the color module will combine the pre-defined TF and the custom LUT values into the LUT that's actually programmed. v2: - enable CRTC prop in the end of driver-specific prop sequence - define inverse EOTFs as supported regamma TFs - reword driver-specific function doc to remove shaper/3D LUT v3: - spell out TF+LUT behavior in the commit and comments (Harry) Co-developed-by: Joshua Ashton Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 7 ++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 8 +++ .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 ++ .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 72 +++ 4 files changed, 94 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index dee35d208493..071cc10bfd90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -424,6 +424,13 @@ struct amdgpu_mode_info { * from a combination of pre-defined TF and the custom 1D LUT). */ struct drm_property *plane_blend_tf_property; + /* @regamma_tf_property: Transfer function for CRTC regamma +* (post-blending). Possible values are defined by `enum +* amdgpu_transfer_function`. There is no regamma ROM, but we can use +* AMD color modules to program LUT parameters from predefined TF (or +* from a combination of pre-defined TF and the custom 1D LUT). +*/ + struct drm_property *regamma_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 1b96c742d747..c138457ff12e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -836,6 +836,14 @@ struct dm_crtc_state { struct dc_info_packet vrr_infopacket; int abm_level; + +/** +* @regamma_tf: +* +* Pre-defined transfer function for converting internal FB -> wire +* encoding. +*/ + enum amdgpu_transfer_function regamma_tf; }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 82c554662faa..2ecfa0e886e8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -294,6 +294,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_blend_tf_property = prop; + prop = amdgpu_create_tf_property(adev_to_drm(adev), +"AMD_CRTC_REGAMMA_TF", +amdgpu_inv_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.regamma_tf_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 440fc0869a34..d746f0aa0f11 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -253,6 +253,7 @@ static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) state->freesync_config = cur->freesync_config; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->regamma_tf = cur->regamma_tf; state->crc_skip_count = cur->crc_skip_count; state->mpo_requested = cur->mpo_requested; /* TODO Duplicate dc_stream after objects are stream object is flattened */ @@ -289,6 +290,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) } #endif +#ifdef AMD_PRIVATE_COLOR +/** + * drm_crtc_additional_color_mgmt - enable additional color properties + * @crtc: DRM CRTC + * + * This function lets the driver enable post-blending CRTC regamma transfer + * function property in addition to DRM CRTC gamma LUT. Default value means + * linear transfer function, which is the default CRTC gamma LUT behaviour + * without this property. + */ +static void +dm_crtc_additional_color_mgmt(struct drm_crtc *crtc) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + + if(adev->dm.dc->caps.color.mpc.ogam_ram) + drm_object_attach_property(>base, + adev->mode_info.regamma_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); +} + +static
[PATCH v4 15/32] drm/amd/display: add CRTC gamma TF support
From: Joshua Ashton Add predefined transfer function programming. There is no post-blending out gamma ROM for hardcoded curves, but we can use AMD color modules to program LUT parameters from pre-defined coefficients and an empty regamma LUT (or bump up LUT parameters with pre-defined TF values). v2: - update crtc color mgmt if regamma TF differs between states (Joshua) - map inverse EOTF to DC transfer function (Melissa) v3: - update AMDGPU TF list v4: - update comment regarding regamma behavior Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 77 +++ 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7c55b1c0ac5c..846dbeddd0fb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9876,6 +9876,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * when a modeset is needed, to ensure it gets reprogrammed. */ if (dm_new_crtc_state->base.color_mgmt_changed || + dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); if (ret) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 0487fb715945..782adb8bea43 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -489,16 +489,18 @@ static int __set_output_tf(struct dc_transfer_func *func, struct calculate_buffer cal_buffer = {0}; bool res; - ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); - cal_buffer.buffer_index = -1; - gamma = dc_create_gamma(); - if (!gamma) - return -ENOMEM; + if (lut_size) { + ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); - gamma->num_entries = lut_size; - __drm_lut_to_dc_gamma(lut, gamma, false); + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; + + gamma->num_entries = lut_size; + __drm_lut_to_dc_gamma(lut, gamma, false); + } if (func->tf == TRANSFER_FUNCTION_LINEAR) { /* @@ -506,41 +508,49 @@ static int __set_output_tf(struct dc_transfer_func *func, * on top of a linear input. But degamma params can be used * instead to simulate this. */ - gamma->type = GAMMA_CUSTOM; + if (gamma) + gamma->type = GAMMA_CUSTOM; res = mod_color_calculate_degamma_params(NULL, func, - gamma, true); +gamma, gamma != NULL); } else { /* * Assume sRGB. The actual mapping will depend on whether the * input was legacy or not. */ - gamma->type = GAMMA_CS_TFM_1D; - res = mod_color_calculate_regamma_params(func, gamma, false, + if (gamma) + gamma->type = GAMMA_CS_TFM_1D; + res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL, has_rom, NULL, _buffer); } - dc_gamma_release(); + if (gamma) + dc_gamma_release(); return res ? 0 : -ENOMEM; } static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, const struct drm_color_lut *regamma_lut, - uint32_t regamma_size, bool has_rom) + uint32_t regamma_size, bool has_rom, + enum dc_transfer_func_predefined tf) { struct dc_transfer_func *out_tf = stream->out_transfer_func; int ret = 0; - if (regamma_size) { + if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { /* * CRTC RGM goes into RGM LUT. * * Note: there is no implicit sRGB regamma here. We are using * degamma calculation from color module to calculate the curve -* from a linear base. +* from a linear base if gamma TF is not set. However, if gamma +* TF (!= Linear) and LUT are set at the same time, we will use +* regamma calculation, and the color module will combine the +
[PATCH v4 11/32] drm/amd/display: add plane blend LUT and TF driver-specific properties
From: Joshua Ashton Blend 1D LUT or a pre-defined transfer function (TF) can be set to linearize content before blending, so that it's positioned just before blending planes in the AMD color mgmt pipeline, and after 3D LUT (non-linear space). Shaper and Blend LUTs are 1D LUTs that sandwich 3D LUT. Drivers should advertize blend properties according to HW caps. There is no blend ROM for pre-defined TF. When setting blend TF (!= Identity) and LUT at the same time, the color module will combine the pre-defined TF and the custom LUT values into the LUT that's actually programmed. v3: - spell out TF+LUT behavior in the commit and comments (Harry) Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 22 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 +++ .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 21 +++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 36 +++ 4 files changed, 91 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index af70db4f6b4b..dee35d208493 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -402,6 +402,28 @@ struct amdgpu_mode_info { * entries for 3D LUT array is the 3D LUT size cubed; */ struct drm_property *plane_lut3d_size_property; + /** +* @plane_blend_lut_property: Plane property for output gamma before +* blending. Userspace set a blend LUT to convert colors after 3D LUT +* conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they +* are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property +* != Identity TF, AMD color module will combine the user LUT values +* with pre-defined TF into the LUT parameters to be programmed. +*/ + struct drm_property *plane_blend_lut_property; + /** +* @plane_blend_lut_size_property: Plane property to define the max +* size of blend LUT as supported by the driver (read-only). +*/ + struct drm_property *plane_blend_lut_size_property; + /** +* @plane_blend_tf_property: Plane property to set a predefined +* transfer function for pre-blending blend/out_gamma (after applying +* 3D LUT) with or without LUT. There is no blend ROM, but we can use +* AMD color modules to program LUT parameters from predefined TF (or +* from a combination of pre-defined TF and the custom 1D LUT). +*/ + struct drm_property *plane_blend_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 0e2a04a3caf3..1b96c742d747 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -800,6 +800,18 @@ struct dm_plane_state { * drm_color_lut. */ struct drm_property_blob *lut3d; + /** +* @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an +* array of drm_color_lut. +*/ + struct drm_property_blob *blend_lut; + /** +* @blend_tf: +* +* Pre-defined transfer function for converting plane pixel data before +* applying blend LUT. +*/ + enum amdgpu_transfer_function blend_tf; }; struct dm_crtc_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index d3c7f9a13a61..82c554662faa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -273,6 +273,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_lut3d_size_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_BLEND_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), +DRM_MODE_PROP_IMMUTABLE, +"AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_lut_size_property = prop; + + prop = amdgpu_create_tf_property(adev_to_drm(adev), +"AMD_PLANE_BLEND_TF", +amdgpu_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_tf_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
[PATCH v4 10/32] drm/amd/display: add plane shaper LUT and TF driver-specific properties
On AMD HW, 3D LUT always assumes a preceding shaper 1D LUT used for delinearizing and/or normalizing the color space before applying a 3D LUT. Add pre-defined transfer function to enable delinearizing content with or without shaper LUT, where AMD color module calculates the resulted shaper curve. We apply an inverse EOTF to go from linear values to encoded values. If we are already in a non-linear space and/or don't need to normalize values, we can bypass shaper LUT with a linear transfer function that is also the default TF value. There is no shaper ROM. When setting shaper TF (!= Identity) and LUT at the same time, the color module will combine the pre-defined TF and the custom LUT values into the LUT that's actually programmed. v2: - squash commits for shaper LUT and shaper TF - define inverse EOTF as supported shaper TFs v3: - spell out TF+LUT behavior in the commit and comments (Harry) - replace BT709 EOTF by inv OETF Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 21 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++ .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 29 + .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 32 +++ 4 files changed, 93 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index f7adaa52c23f..af70db4f6b4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -363,6 +363,27 @@ struct amdgpu_mode_info { * @plane_hdr_mult_property: */ struct drm_property *plane_hdr_mult_property; + /** +* @shaper_lut_property: Plane property to set pre-blending shaper LUT +* that converts color content before 3D LUT. If +* plane_shaper_tf_property != Identity TF, AMD color module will +* combine the user LUT values with pre-defined TF into the LUT +* parameters to be programmed. +*/ + struct drm_property *plane_shaper_lut_property; + /** +* @shaper_lut_size_property: Plane property for the size of +* pre-blending shaper LUT as supported by the driver (read-only). +*/ + struct drm_property *plane_shaper_lut_size_property; + /** +* @plane_shaper_tf_property: Plane property to set a predefined +* transfer function for pre-blending shaper (before applying 3D LUT) +* with or without LUT. There is no shaper ROM, but we can use AMD +* color modules to program LUT parameters from predefined TF (or +* from a combination of pre-defined TF and the custom 1D LUT). +*/ + struct drm_property *plane_shaper_tf_property; /** * @plane_lut3d_property: Plane property for color transformation using * a 3D LUT (pre-blending), a three-dimensional array where each diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 7a2350c62cf1..0e2a04a3caf3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -784,6 +784,17 @@ struct dm_plane_state { * TF is needed for any subsequent linear-to-non-linear transforms. */ __u64 hdr_mult; + /** +* @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an +* array of drm_color_lut. +*/ + struct drm_property_blob *shaper_lut; + /** +* @shaper_tf: +* +* Predefined transfer function to delinearize color space. +*/ + enum amdgpu_transfer_function shaper_tf; /** * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of * drm_color_lut. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 011f2f9ec890..d3c7f9a13a61 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -173,6 +173,14 @@ static const u32 amdgpu_eotf = BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) | BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF); +static const u32 amdgpu_inv_eotf = + BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) | + BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF); + static struct drm_property * amdgpu_create_tf_property(struct drm_device *dev, const char *name, @@ -230,6 +238,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_hdr_mult_property = prop; + prop = drm_property_create(adev_to_drm(adev), +
[PATCH v4 09/32] drm/amd/display: add plane 3D LUT driver-specific properties
Add 3D LUT property for plane color transformations using a 3D lookup table. 3D LUT allows for highly accurate and complex color transformations and is suitable to adjust the balance between color channels. It's also more complex to manage and require more computational resources. Since a 3D LUT has a limited number of entries in each dimension we want to use them in an optimal fashion. This means using the 3D LUT in a colorspace that is optimized for human vision, such as sRGB, PQ, or another non-linear space. Therefore, userpace may need one 1D LUT (shaper) before it to delinearize content and another 1D LUT after 3D LUT (blend) to linearize content again for blending. The next patches add these 1D LUTs to the plane color mgmt pipeline. v3: - improve commit message about 3D LUT - describe the 3D LUT entries and size (Harry) v4: - advertise 3D LUT max size as the size of a single-dimension Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 18 +++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 9 .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 14 +++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 23 +++ 4 files changed, 64 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 62044d41da75..f7adaa52c23f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -363,6 +363,24 @@ struct amdgpu_mode_info { * @plane_hdr_mult_property: */ struct drm_property *plane_hdr_mult_property; + /** +* @plane_lut3d_property: Plane property for color transformation using +* a 3D LUT (pre-blending), a three-dimensional array where each +* element is an RGB triplet. Each dimension has a size of the cubed +* root of lut3d_size. The array contains samples from the approximated +* function. On AMD, values between samples are estimated by +* tetrahedral interpolation. The array is accessed with three indices, +* one for each input dimension (color channel), blue being the +* outermost dimension, red the innermost. +*/ + struct drm_property *plane_lut3d_property; + /** +* @plane_degamma_lut_size_property: Plane property to define the max +* size of 3D LUT as supported by the driver (read-only). The max size +* is the max size of one dimension and, therefore, the max number of +* entries for 3D LUT array is the 3D LUT size cubed; +*/ + struct drm_property *plane_lut3d_size_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index bb2ce843369d..7a2350c62cf1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -784,6 +784,11 @@ struct dm_plane_state { * TF is needed for any subsequent linear-to-non-linear transforms. */ __u64 hdr_mult; + /** +* @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of +* drm_color_lut. +*/ + struct drm_property_blob *lut3d; }; struct dm_crtc_state { @@ -869,6 +874,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); +/* 3D LUT max size is 17x17x17 (4913 entries) */ +#define MAX_COLOR_3DLUT_SIZE 17 +#define MAX_COLOR_3DLUT_BITDEPTH 12 +/* 1D LUT size */ #define MAX_COLOR_LUT_ENTRIES 4096 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index caf49a044ab4..011f2f9ec890 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -230,6 +230,20 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_hdr_mult_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_LUT3D", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_lut3d_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), +DRM_MODE_PROP_IMMUTABLE, +"AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_lut3d_size_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index
[PATCH v4 08/32] drm/amd/display: add plane HDR multiplier driver-specific property
From: Joshua Ashton Multiplier to 'gain' the plane. When PQ is decoded using the fixed func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at least) When sRGB is decoded, 1.0 -> 1.0. Therefore, 1.0 multiplier = 80 nits for SDR content. So if you want, 203 nits for SDR content, pass in (203.0 / 80.0). v4: - comment about the PQ TF need for L-to-NL (from Harry's review) Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h| 4 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 17 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 6 ++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 13 + 4 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 9b6fab86c6c3..62044d41da75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -359,6 +359,10 @@ struct amdgpu_mode_info { * to go from scanout/encoded values to linear values. */ struct drm_property *plane_degamma_tf_property; + /** +* @plane_hdr_mult_property: +*/ + struct drm_property *plane_hdr_mult_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index fc4f188d397e..bb2ce843369d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -55,6 +55,9 @@ #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x1A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3 + +#define AMDGPU_HDR_MULT_DEFAULT (0x1LL) + /* #include "include/amdgpu_dal_power_if.h" #include "amdgpu_dm_irq.h" @@ -767,6 +770,20 @@ struct dm_plane_state { * linearize. */ enum amdgpu_transfer_function degamma_tf; + /** +* @hdr_mult: +* +* Multiplier to 'gain' the plane. When PQ is decoded using the fixed +* func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on +* AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously. +* Therefore, 1.0 multiplier = 80 nits for SDR content. So if you +* want, 203 nits for SDR content, pass in (203.0 / 80.0). Format is +* S31.32 sign-magnitude. +* +* HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ +* TF is needed for any subsequent linear-to-non-linear transforms. +*/ + __u64 hdr_mult; }; struct dm_crtc_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index d5dbd20a6766..caf49a044ab4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -224,6 +224,12 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_degamma_tf_property = prop; + prop = drm_property_create_range(adev_to_drm(adev), +0, "AMD_PLANE_HDR_MULT", 0, U64_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_hdr_mult_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 04af6db8cffd..ae64d4b73360 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1337,6 +1337,7 @@ static void dm_drm_plane_reset(struct drm_plane *plane) __drm_atomic_helper_plane_reset(plane, _state->base); amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT; } static struct drm_plane_state * @@ -1360,6 +1361,7 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane) drm_property_blob_get(dm_plane_state->degamma_lut); dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; + dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; return _plane_state->base; } @@ -1456,6 +1458,10 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, dm->adev->mode_info.plane_degamma_tf_property, AMDGPU_TRANSFER_FUNCTION_DEFAULT); } + /* HDR MULT is always available */ + drm_object_attach_property(>base, + dm->adev->mode_info.plane_hdr_mult_property, + AMDGPU_HDR_MULT_DEFAULT); } static int @@ -1482,6 +1488,11 @@
[PATCH v4 07/32] drm/amd/display: document AMDGPU pre-defined transfer functions
Brief documentation about pre-defined transfer function usage on AMD display driver and standardized EOTFs and inverse EOTFs. v3: - Document BT709 OETF (Pekka) - Fix description of sRGB and pure power funcs (Pekka) v4: - Add description of linear and non-linear forms (Harry) Co-developed-by: Harry Wentland Signed-off-by: Harry Wentland Signed-off-by: Melissa Wen --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 62 +++ 1 file changed, 62 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index d03bdb010e8b..d5dbd20a6766 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -85,6 +85,68 @@ void amdgpu_dm_init_color_mod(void) } #ifdef AMD_PRIVATE_COLOR +/* Pre-defined Transfer Functions (TF) + * + * AMD driver supports pre-defined mathematical functions for transferring + * between encoded values and optical/linear space. Depending on HW color caps, + * ROMs and curves built by the AMD color module support these transforms. + * + * The driver-specific color implementation exposes properties for pre-blending + * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and + * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma + * supports ROM curves. AMD color module uses pre-defined coefficients to build + * curves for the other blocks. What can be done by each color block is + * described by struct dpp_color_capsand struct mpc_color_caps. + * + * AMD driver-specific color API exposes the following pre-defined transfer + * functions: + * + * - Identity: linear/identity relationship between pixel value and + * luminance value; + * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions; + * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999; + * - BT.709: has a linear segment in the bottom part and then a power function + * with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by + * ITU-R BT.709-6; + * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range + * capability of 0 to 10,000 nits; standardized by SMPTE ST 2084. + * + * The AMD color model is designed with an assumption that SDR (sRGB, BT.709, + * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ + * system. This has the implication that PQ EOTF (non-linear to linear) maps to + * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits. + * + * Non-linear and linear forms are described in the table below: + * + * ┌───┬─┬──┐ + * │ │ Non-linear │ Linear │ + * ├───┼─┼──┤ + * │ sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───┼─┼──┤ + * │ BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───┼─┼──┤ + * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───┼─┼──┤ + * │PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0] │ + * ├───┼─┼──┤ + * │ Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │ + * └───┴─┴──┘ + * * CCCS: Windows canonical composition color space + * ** Respectively + * + * In the driver-specific API, color block names attached to TF properties + * suggest the intention regarding non-linear encoding pixel's luminance + * values. As some newer encodings don't use gamma curve, we make encoding and + * decoding explicit by defining an enum list of transfer functions supported + * in terms of EOTF and inverse EOTF, where: + * + * - EOTF (electro-optical transfer function): is the transfer function to go + * from the encoded value to an optical (linear) value. De-gamma functions + * traditionally do this. + * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go + * from an optical/linear space (which might have been used for blending) + * back to the encoded values. Gamma functions traditionally do this. + */ static const char * const amdgpu_transfer_function_names[] = { [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default", -- 2.40.1
[PATCH v4 05/32] drm/amd/display: add plane degamma TF driver-specific property
From: Joshua Ashton Allow userspace to tell the kernel driver the input space and, therefore, uses correct predefined transfer function (TF) to go from encoded values to linear values. v2: - rename TF enum prefix from DRM_ to AMDGPU_ (Harry) - remove HLG TF Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 5 + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 21 +++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 19 +++-- 4 files changed, 62 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2d00802b3265..9b6fab86c6c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -354,6 +354,11 @@ struct amdgpu_mode_info { * size of degamma LUT as supported by the driver (read-only). */ struct drm_property *plane_degamma_lut_size_property; + /** +* @plane_degamma_tf_property: Plane pre-defined transfer function to +* to go from scanout/encoded values to linear values. +*/ + struct drm_property *plane_degamma_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 0f565469b4b5..4c4cdf7fc6be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -724,6 +724,18 @@ struct amdgpu_dm_wb_connector { extern const struct amdgpu_ip_block_version dm_ip_block; +enum amdgpu_transfer_function { + AMDGPU_TRANSFER_FUNCTION_DEFAULT, + AMDGPU_TRANSFER_FUNCTION_SRGB, + AMDGPU_TRANSFER_FUNCTION_BT709, + AMDGPU_TRANSFER_FUNCTION_PQ, + AMDGPU_TRANSFER_FUNCTION_LINEAR, + AMDGPU_TRANSFER_FUNCTION_UNITY, + AMDGPU_TRANSFER_FUNCTION_GAMMA22, + AMDGPU_TRANSFER_FUNCTION_GAMMA24, + AMDGPU_TRANSFER_FUNCTION_GAMMA26, +}; + struct dm_plane_state { struct drm_plane_state base; struct dc_plane_state *dc_state; @@ -737,6 +749,13 @@ struct dm_plane_state { * The blob (if not NULL) is an array of drm_color_lut. */ struct drm_property_blob *degamma_lut; + /** +* @degamma_tf: +* +* Predefined transfer function to tell DC driver the input space to +* linearize. +*/ + enum amdgpu_transfer_function degamma_tf; }; struct dm_crtc_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index cf175b86ba80..56ce008b9095 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void) } #ifdef AMD_PRIVATE_COLOR +static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = { + { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" }, + { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" }, + { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" }, + { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" }, + { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" }, + { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" }, + { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" }, + { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" }, + { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" }, +}; + int amdgpu_dm_create_color_properties(struct amdgpu_device *adev) { @@ -104,6 +116,15 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_degamma_lut_size_property = prop; + prop = drm_property_create_enum(adev_to_drm(adev), + DRM_MODE_PROP_ENUM, + "AMD_PLANE_DEGAMMA_TF", + amdgpu_transfer_function_enum_list, + ARRAY_SIZE(amdgpu_transfer_function_enum_list)); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_tf_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 69357a8ae887..04af6db8cffd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1332,8 +1332,11 @@ static void dm_drm_plane_reset(struct drm_plane *plane) amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); WARN_ON(amdgpu_state == NULL); - if (amdgpu_state) - __drm_atomic_helper_plane_reset(plane, _state->base); + if
[PATCH v4 06/32] drm/amd/display: explicitly define EOTF and inverse EOTF
Instead of relying on color block names to get the transfer function intention regarding encoding pixel's luminance, define supported Electro-Optical Transfer Functions (EOTFs) and inverse EOTFs, that includes pure gamma or standardized transfer functions. v3: - squash linear and unity TFs to identity (Pekka) - define the right TFs for BT.709 (Pekka and Harry) - add comment about AMD TF coefficients Suggested-by: Harry Wentland Signed-off-by: Melissa Wen --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 27 +--- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 67 ++- 2 files changed, 71 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 4c4cdf7fc6be..fc4f188d397e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -724,16 +724,27 @@ struct amdgpu_dm_wb_connector { extern const struct amdgpu_ip_block_version dm_ip_block; +/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD. + * + * It includes standardized transfer functions and pure power functions. The + * transfer function coefficients are available at modules/color/color_gamma.c + */ enum amdgpu_transfer_function { AMDGPU_TRANSFER_FUNCTION_DEFAULT, - AMDGPU_TRANSFER_FUNCTION_SRGB, - AMDGPU_TRANSFER_FUNCTION_BT709, - AMDGPU_TRANSFER_FUNCTION_PQ, - AMDGPU_TRANSFER_FUNCTION_LINEAR, - AMDGPU_TRANSFER_FUNCTION_UNITY, - AMDGPU_TRANSFER_FUNCTION_GAMMA22, - AMDGPU_TRANSFER_FUNCTION_GAMMA24, - AMDGPU_TRANSFER_FUNCTION_GAMMA26, + AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF, + AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF, + AMDGPU_TRANSFER_FUNCTION_PQ_EOTF, + AMDGPU_TRANSFER_FUNCTION_IDENTITY, + AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF, + AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_BT709_OETF, + AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF, +AMDGPU_TRANSFER_FUNCTION_COUNT }; struct dm_plane_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 56ce008b9095..d03bdb010e8b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -85,18 +85,57 @@ void amdgpu_dm_init_color_mod(void) } #ifdef AMD_PRIVATE_COLOR -static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = { - { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" }, - { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" }, - { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" }, - { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" }, - { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" }, - { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" }, - { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" }, - { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" }, - { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" }, +static const char * const +amdgpu_transfer_function_names[] = { + [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default", + [AMDGPU_TRANSFER_FUNCTION_IDENTITY] = "Identity", + [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]= "sRGB EOTF", + [AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF] = "BT.709 inv_OETF", + [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF] = "PQ EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF", + [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]= "sRGB inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_BT709_OETF] = "BT.709 OETF", + [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF] = "PQ inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF", }; +static const u32 amdgpu_eotf = + BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) | + BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF); + +static struct drm_property * +amdgpu_create_tf_property(struct drm_device *dev, + const char *name, + u32 supported_tf) +{ + u32
[PATCH v4 00/32] drm/amd/display: add AMD driver-specific properties for color mgmt
Hello, Just another iteration for AMD driver-specific color properties. Basically, addressing comments from the previous version. Recap: this series extends the current KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck. The key additions to the color pipeline include: - plane degamma LUT and pre-defined TF; - plane HDR multiplier; - plane CTM 3x4; - plane shaper LUT and pre-defined TF; - plane 3D LUT; - plane blend LUT and pre-defined TF; - CRTC gamma pre-defined TF; You can find the AMD HW color capabilities documented here: https://dri.freedesktop.org/docs/drm/gpu/amdgpu/display/display-manager.html#color-management-properties The userspace case is Gamescope[1], the compositor for SteamOS. Gamescope has already adopted AMD driver-specific properties to implement comprehensive color management support, including gamut mapping, HDR rendering, SDR on HDR, HDR on SDR. Using these features in the SteamOS 3.5[2] users can expect a significantly enhanced visual experience. You can find a brief overview of the Steam Deck color pipeline here: https://github.com/ValveSoftware/gamescope/blob/master/src/docs/Steam%20Deck%20Display%20Pipeline.png Changes from: [RFC] https://lore.kernel.org/dri-devel/20230423141051.702990-1-m...@igalia.com - Remove KConfig and guard properties with `AMD_PRIVATE_COLOR`; - Remove properties for post-blending/CRTC shaper TF+LUT and 3D LUT; - Use color caps to improve the support of pre-defined curve; [v1] https://lore.kernel.org/dri-devel/20230523221520.3115570-1-m...@igalia.com - Replace DRM_ by AMDGPU_ prefix for transfer function (TF) enum; - Explicitly define EOTFs and inverse EOTFs and set props accordingly; - Document pre-defined transfer functions; - Remove HLG transfer function from supported TFs; - Remove misleading comments; - Remove post-blending shaper TF+LUT and 3D LUT support; - Move driver-specific property operations from amdgpu_display.c to amdgpu_dm_color.c; - Reset planes if any color props change; - Add plane CTM 3x4 support; - Removed two DC fixes already applied upstream; [v2] https://lore.kernel.org/dri-devel/20230810160314.48225-1-m...@igalia.com - Many documentation fixes: BT.709 OETF, description of sRGB and pure power functions, TF+1D LUT behavior; - Rename CTM2 to CTM 3x4 and fix misleading comment about DC gamut remap; - Squash `Linear` and `Unity` TF in `Identity`; - Remove the `MPC gamut remap` patch already applied upstream[3]; - Remove outdated delta segmentation fix; - Nits/small fixes; [v3] https://lore.kernel.org/amd-gfx/20230925194932.1329483-1-m...@igalia.com - Add table to describe value range in linear and non-linear forms - Comment the PQ TF need after HDR multiplier - Advertise the 3D LUT size as the size of a single-dimension (read-only) - remove function to check expected size from 3DLUT caps - cleanup comments It's worth noting that driver-specific properties are guarded by `AMD_PRIVATE_COLOR`. So, finally, this is the color management API when driver-specific properties are enabled: +--+ | PLANE | | | | ++ | | | AMD Degamma| | | || | | | EOTF | 1D LUT | | | ++---+ | | | | | +v---+ | | |AMD HDR | | | |Multiply| | | ++---+ | | | | | +v---+ | | | AMD CTM (3x4) | | | ++---+ | | | | | +v---+ | | | AMD Shaper | | | || | | | inv_EOTF | | | | | Custom 1D LUT | | | ++---+ | | | | | +v---+ | | | AMD 3D LUT | | | | 17^3/12-bit | | | ++---+ | | | | | +v---+ | | | AMD Blend | | | || | | | EOTF | 1D LUT | | | ++---+ | | | | ++--v-++ || Blending || ++--+-++ |CRTC | | | | | | +---v---+ | | | DRM Degamma | | | | | | | | Custom 1D LUT | | | +---+---+ | | | | | +---v---+ | | | DRM CTM (3x3) | | | +---+---+ | | | | | +---v---+ | | | DRM Gamma | | | | | | | | Custom 1D LUT | | | +---+ | | | *AMD Gamma| | | | inv_EOTF| | | +---+ | | | +--+ Please, let us know your thoughts. Best Regards, Melissa Wen [1] https://github.com/ValveSoftware/gamescope [2] https://store.steampowered.com/news/app/1675200/view/3686804163591367815 [3] https://lore.kernel.org/dri-devel/20230721132431.692158-1-m...@igalia.com Joshua Ashton (14): drm/amd/display: add plane degamma TF driver-specific property drm/amd/display: add plane
[PATCH v4 03/32] drm/drm_plane: track color mgmt changes per plane
We will add color mgmt properties to DRM planes in the next patches and we want to track when one of this properties change to define atomic commit behaviors. Using a similar approach from CRTC color props, we set a color_mgmt_changed boolean whenever a plane color prop changes. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- drivers/gpu/drm/drm_atomic.c | 1 + drivers/gpu/drm/drm_atomic_state_helper.c | 1 + include/drm/drm_plane.h | 7 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 2c454568a607..2925371d230d 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -724,6 +724,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, drm_get_color_encoding_name(state->color_encoding)); drm_printf(p, "\tcolor-range=%s\n", drm_get_color_range_name(state->color_range)); + drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed); if (plane->funcs->atomic_print_state) plane->funcs->atomic_print_state(p, state); diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 784e63d70a42..25bb0859fda7 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -338,6 +338,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, state->fence = NULL; state->commit = NULL; state->fb_damage_clips = NULL; + state->color_mgmt_changed = false; } EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 51291983ea44..52c3287da0da 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -237,6 +237,13 @@ struct drm_plane_state { /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; + + /** +* @color_mgmt_changed: Color management properties have changed. Used +* by the atomic helpers and drivers to steer the atomic commit control +* flow. +*/ + bool color_mgmt_changed : 1; }; static inline struct drm_rect -- 2.40.1
[PATCH v4 04/32] drm/amd/display: add driver-specific property for plane degamma LUT
Hook up driver-specific atomic operations for managing AMD color properties. Create AMD driver-specific color management properties and attach them according to HW capabilities defined by `struct dc_color_caps`. First add plane degamma LUT properties that means user-blob and its size. We will add more plane color properties in the next patches. In addition, we define AMD_PRIVATE_COLOR to guard these driver-specific plane properties. Plane degamma can be used to linearize input space for arithmetical operations that are more accurate when applied in linear color. v2: - update degamma LUT prop description - move private color operations from amdgpu_display to amdgpu_dm_color Reviewed-by: Harry Wentland Co-developed-by: Joshua Ashton Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 11 +++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++ .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 24 ++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 81 +++ 5 files changed, 132 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d8083972e393..2d00802b3265 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -343,6 +343,17 @@ struct amdgpu_mode_info { int disp_priority; const struct amdgpu_display_funcs *funcs; const enum drm_plane_type *plane_type; + + /* Driver-private color mgmt props */ + + /* @plane_degamma_lut_property: Plane property to set a degamma LUT to +* convert input space before blending. +*/ + struct drm_property *plane_degamma_lut_property; + /* @plane_degamma_lut_size_property: Plane property to define the max +* size of degamma LUT as supported by the driver (read-only). +*/ + struct drm_property *plane_degamma_lut_size_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a59a11ae42db..7c55b1c0ac5c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4106,6 +4106,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } +#ifdef AMD_PRIVATE_COLOR + if (amdgpu_dm_create_color_properties(adev)) + return -ENOMEM; +#endif + r = amdgpu_dm_audio_init(adev); if (r) { dc_release_state(state->context); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b16613082bc3..0f565469b4b5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -727,6 +727,16 @@ extern const struct amdgpu_ip_block_version dm_ip_block; struct dm_plane_state { struct drm_plane_state base; struct dc_plane_state *dc_state; + + /* Plane color mgmt */ + /** +* @degamma_lut: +* +* 1D LUT for mapping framebuffer/plane pixel data before sampling or +* blending operations. It's usually applied to linearize input space. +* The blob (if not NULL) is an array of drm_color_lut. +*/ + struct drm_property_blob *degamma_lut; }; struct dm_crtc_state { @@ -817,6 +827,7 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 void amdgpu_dm_init_color_mod(void); +int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a4cb23d059bd..cf175b86ba80 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -84,6 +84,30 @@ void amdgpu_dm_init_color_mod(void) setup_x_points_distribution(); } +#ifdef AMD_PRIVATE_COLOR +int +amdgpu_dm_create_color_properties(struct amdgpu_device *adev) +{ + struct drm_property *prop; + + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_DEGAMMA_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), +DRM_MODE_PROP_IMMUTABLE, +"AMD_PLANE_DEGAMMA_LUT_SIZE", 0,
[PATCH v4 02/32] drm/drm_property: make replace_property_blob_from_id a DRM helper
Place it in drm_property where drm_property_replace_blob and drm_property_lookup_blob live. Then we can use the DRM helper for driver-specific KMS properties too. Reviewed-by: Harry Wentland Reviewed-by: Liviu Dudau Signed-off-by: Melissa Wen --- drivers/gpu/drm/arm/malidp_crtc.c | 2 +- drivers/gpu/drm/drm_atomic_uapi.c | 43 --- drivers/gpu/drm/drm_property.c| 49 +++ include/drm/drm_property.h| 6 4 files changed, 61 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index dc01c43f6193..d72c22dcf685 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc, /* * The size of the ctm is checked in -* drm_atomic_replace_property_blob_from_id. +* drm_property_replace_blob_from_id. */ ctm = (struct drm_color_ctm *)state->ctm->data; for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index d867e7f9f2cd..a6a9ee5086dd 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -362,39 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state, return fence_ptr; } -static int -drm_atomic_replace_property_blob_from_id(struct drm_device *dev, -struct drm_property_blob **blob, -uint64_t blob_id, -ssize_t expected_size, -ssize_t expected_elem_size, -bool *replaced) -{ - struct drm_property_blob *new_blob = NULL; - - if (blob_id != 0) { - new_blob = drm_property_lookup_blob(dev, blob_id); - if (new_blob == NULL) - return -EINVAL; - - if (expected_size > 0 && - new_blob->length != expected_size) { - drm_property_blob_put(new_blob); - return -EINVAL; - } - if (expected_elem_size > 0 && - new_blob->length % expected_elem_size != 0) { - drm_property_blob_put(new_blob); - return -EINVAL; - } - } - - *replaced |= drm_property_replace_blob(blob, new_blob); - drm_property_blob_put(new_blob); - - return 0; -} - static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, struct drm_crtc_state *state, struct drm_property *property, uint64_t val) @@ -415,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, } else if (property == config->prop_vrr_enabled) { state->vrr_enabled = val; } else if (property == config->degamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, >degamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -423,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, >ctm, val, sizeof(struct drm_color_ctm), -1, @@ -431,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, >gamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -563,7 +530,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, } else if (property == plane->color_range_property) { state->color_range = val; } else if (property == config->prop_fb_damage_clips) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, >fb_damage_clips, val, -1, @@ -729,7 +696,7 @@ static int
[PATCH v4 01/32] drm/drm_mode_object: increase max objects to accommodate new color props
DRM_OBJECT_MAX_PROPERTY limits the number of properties to be attached and we are increasing that value all time we add a new property (generic or driver-specific). In this series, we are adding 13 new KMS driver-specific properties for AMD color manage: - CRTC Gamma enumerated Transfer Function - Plane: Degamma LUT+size+TF, HDR multiplier, shaper LUT+size+TF, 3D LUT+size, blend LUT+size+TF (12) Therefore, just increase DRM_OBJECT_MAX_PROPERTY to a number (64) that accomodates these new properties and gives some room for others, avoiding change this number everytime we add a new KMS property. Reviewed-by: Harry Wentland Reviewed-by: Simon Ser Signed-off-by: Melissa Wen --- include/drm/drm_mode_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h index 912f1e415685..08d7a7f0188f 100644 --- a/include/drm/drm_mode_object.h +++ b/include/drm/drm_mode_object.h @@ -60,7 +60,7 @@ struct drm_mode_object { void (*free_cb)(struct kref *kref); }; -#define DRM_OBJECT_MAX_PROPERTY 24 +#define DRM_OBJECT_MAX_PROPERTY 64 /** * struct drm_object_properties - property tracking for _mode_object */ -- 2.40.1
Re: [PATCH 0/9] drm: Annotate structs with __counted_by
On Thu, Oct 05, 2023 at 11:42:38AM +0200, Christian König wrote: > Am 02.10.23 um 20:22 schrieb Kees Cook: > > On Mon, Oct 02, 2023 at 08:11:41PM +0200, Christian König wrote: > > > Am 02.10.23 um 20:08 schrieb Kees Cook: > > > > On Mon, Oct 02, 2023 at 08:01:57PM +0200, Christian König wrote: > > > > > Am 02.10.23 um 18:53 schrieb Kees Cook: > > > > > > On Mon, Oct 02, 2023 at 11:06:19AM -0400, Alex Deucher wrote: > > > > > > > On Mon, Oct 2, 2023 at 5:20 AM Christian König > > > > > > > wrote: > > > > > > > > Am 29.09.23 um 21:33 schrieb Kees Cook: > > > > > > > > > On Fri, 22 Sep 2023 10:32:05 -0700, Kees Cook wrote: > > > > > > > > > > This is a batch of patches touching drm for preparing for > > > > > > > > > > the coming > > > > > > > > > > implementation by GCC and Clang of the __counted_by > > > > > > > > > > attribute. Flexible > > > > > > > > > > array members annotated with __counted_by can have their > > > > > > > > > > accesses > > > > > > > > > > bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS > > > > > > > > > > (for array > > > > > > > > > > indexing) and CONFIG_FORTIFY_SOURCE (for > > > > > > > > > > strcpy/memcpy-family functions). > > > > > > > > > > > > > > > > > > > > As found with Coccinelle[1], add __counted_by to structs > > > > > > > > > > that would > > > > > > > > > > benefit from the annotation. > > > > > > > > > > > > > > > > > > > > [...] > > > > > > > > > Since this got Acks, I figure I should carry it in my tree. > > > > > > > > > Let me know > > > > > > > > > if this should go via drm instead. > > > > > > > > > > > > > > > > > > Applied to for-next/hardening, thanks! > > > > > > > > > > > > > > > > > > [1/9] drm/amd/pm: Annotate struct > > > > > > > > > smu10_voltage_dependency_table with __counted_by > > > > > > > > > https://git.kernel.org/kees/c/a6046ac659d6 > > > > > > > > STOP! In a follow up discussion Alex and I figured out that > > > > > > > > this won't work. > > > > > > I'm so confused; from the discussion I saw that Alex said both > > > > > > instances > > > > > > were false positives? > > > > > > > > > > > > > > The value in the structure is byte swapped based on some > > > > > > > > firmware > > > > > > > > endianness which not necessary matches the CPU endianness. > > > > > > > SMU10 is APU only so the endianess of the SMU firmware and the CPU > > > > > > > will always match. > > > > > > Which I think is what is being said here? > > > > > > > > > > > > > > Please revert that one from going upstream if it's already on > > > > > > > > it's way. > > > > > > > > > > > > > > > > And because of those reasons I strongly think that patches like > > > > > > > > this > > > > > > > > should go through the DRM tree :) > > > > > > Sure, that's fine -- please let me know. It was others Acked/etc. > > > > > > Who > > > > > > should carry these patches? > > > > > Probably best if the relevant maintainer pick them up individually. > > > > > > > > > > Some of those structures are filled in by firmware/hardware and only > > > > > the > > > > > maintainers can judge if that value actually matches what the compiler > > > > > needs. > > > > > > > > > > We have cases where individual bits are used as flags or when the > > > > > size is > > > > > byte swapped etc... > > > > > > > > > > Even Alex and I didn't immediately say how and where that field is > > > > > actually > > > > > used and had to dig that up. That's where the confusion came from. > > > > Okay, I've dropped them all from my tree. Several had Acks/Reviews, so > > > > hopefully those can get picked up for the DRM tree? > > > I will pick those up to go through drm-misc-next. > > > > > > Going to ping maintainers once more when I'm not sure if stuff is correct > > > or > > > not. > > Sounds great; thanks! > > I wasn't 100% sure for the VC4 patch, but pushed the whole set to > drm-misc-next anyway. > > This also means that the patches are now auto merged into the drm-tip > integration branch and should any build or unit test go boom we should > notice immediately and can revert it pretty easily. Thanks very much; I'll keep an eye out for any reports. -- Kees Cook
Re: [PATCH] drm/amd/display: Fix mst hub unplug warning
On Thu, Oct 5, 2023 at 4:04 AM Wayne Lin wrote: > > [Why] > Unplug mst hub will cause warning. That's because > dm_helpers_construct_old_payload() is changed to be called after > payload removement from dc link. > > In dm_helpers_construct_old_payload(), We refer to the vcpi in > payload allocation table of dc link to construct the old payload > and payload is no longer in the table when we call the function > now. > > [How] > Refer to the mst_state to construct the number of time slot for old > payload now. Note that dm_helpers_construct_old_payload() is just > a quick workaround before and we are going to abandon it soon. > > Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload > allocation/removement") > Reviewed-by: Jerry Zuo > Signed-off-by: Wayne Lin Pushed to drm-misc-next. Thanks! Alex > --- > .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 38 +-- > 1 file changed, 18 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c > index baf7e5254fb3..2f94bcf128c0 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c > @@ -204,15 +204,16 @@ void dm_helpers_dp_update_branch_info( > {} > > static void dm_helpers_construct_old_payload( > - struct dc_link *link, > - int pbn_per_slot, > + struct drm_dp_mst_topology_mgr *mgr, > + struct drm_dp_mst_topology_state *mst_state, > struct drm_dp_mst_atomic_payload *new_payload, > struct drm_dp_mst_atomic_payload *old_payload) > { > - struct link_mst_stream_allocation_table current_link_table = > - > link->mst_stream_alloc_table; > - struct link_mst_stream_allocation *dc_alloc; > - int i; > + struct drm_dp_mst_atomic_payload *pos; > + int pbn_per_slot = mst_state->pbn_div; > + u8 next_payload_vc_start = mgr->next_start_slot; > + u8 payload_vc_start = new_payload->vc_start_slot; > + u8 allocated_time_slots; > > *old_payload = *new_payload; > > @@ -221,20 +222,17 @@ static void dm_helpers_construct_old_payload( > * struct drm_dp_mst_atomic_payload are don't care fields > * while calling drm_dp_remove_payload_part2() > */ > - for (i = 0; i < current_link_table.stream_count; i++) { > - dc_alloc = > - _link_table.stream_allocations[i]; > - > - if (dc_alloc->vcp_id == new_payload->vcpi) { > - old_payload->time_slots = dc_alloc->slot_count; > - old_payload->pbn = dc_alloc->slot_count * > pbn_per_slot; > - break; > - } > + list_for_each_entry(pos, _state->payloads, next) { > + if (pos != new_payload && > + pos->vc_start_slot > payload_vc_start && > + pos->vc_start_slot < next_payload_vc_start) > + next_payload_vc_start = pos->vc_start_slot; > } > > - /* make sure there is an old payload*/ > - ASSERT(i != current_link_table.stream_count); > + allocated_time_slots = next_payload_vc_start - payload_vc_start; > > + old_payload->time_slots = allocated_time_slots; > + old_payload->pbn = allocated_time_slots * pbn_per_slot; > } > > /* > @@ -272,8 +270,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( > drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload); > } else { > /* construct old payload by VCPI*/ > - dm_helpers_construct_old_payload(stream->link, > mst_state->pbn_div, > - new_payload, _payload); > + dm_helpers_construct_old_payload(mst_mgr, mst_state, > +new_payload, _payload); > target_payload = _payload; > > drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload); > @@ -366,7 +364,7 @@ bool dm_helpers_dp_mst_send_payload_allocation( > if (enable) { > ret = drm_dp_add_payload_part2(mst_mgr, > mst_state->base.state, new_payload); > } else { > - dm_helpers_construct_old_payload(stream->link, > mst_state->pbn_div, > + dm_helpers_construct_old_payload(mst_mgr, mst_state, > new_payload, _payload); > drm_dp_remove_payload_part2(mst_mgr, mst_state, _payload, > new_payload); > } > -- > 2.37.3 >
Re: [PATCH] drm/amdgpu: Enable SMU 13.0.0 optimizations when ROCm is active (v2)
On Wed, Oct 4, 2023 at 11:47 PM Zhang, Hawking wrote: > > [AMD Official Use Only - General] > > Hmm... thinking about it more, will it override the profile mode/workload for > 0xC8 or 0xCC SKU as well. In another words, does it mean the pmfw fix is > general to all the 13_0_0 SKUs. Yes, my understanding is that this should apply to all skus. Alex > > Other than that, the patch looks good to me. > > Regards, > Hawking > > -Original Message- > From: amd-gfx On Behalf Of Zhang, > Hawking > Sent: Thursday, October 5, 2023 11:32 > To: Deucher, Alexander ; > amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander ; Liu, Kun > > Subject: RE: [PATCH] drm/amdgpu: Enable SMU 13.0.0 optimizations when ROCm is > active (v2) > > [AMD Official Use Only - General] > > [AMD Official Use Only - General] > > Reviewed-by: Hawking Zhang > > Regards, > Hawking > -Original Message- > From: amd-gfx On Behalf Of Alex > Deucher > Sent: Wednesday, October 4, 2023 23:34 > To: amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander ; Liu, Kun > > Subject: [PATCH] drm/amdgpu: Enable SMU 13.0.0 optimizations when ROCm is > active (v2) > > From: Kun Liu > > When ROCm is active enable additional SMU 13.0.0 optimizations. > This reuses the unused powersave profile on PMFW. > > v2: move to the swsmu code since we need both bits active in > the workload mask. > > Signed-off-by: Alex Deucher > --- > .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c| 17 - > 1 file changed, 16 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > index 684b4e01fac2..83035fb1839a 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > @@ -2447,6 +2447,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct > smu_context *smu, > DpmActivityMonitorCoeffInt_t *activity_monitor = > &(activity_monitor_external.DpmActivityMonitorCoeffInt); > int workload_type, ret = 0; > + u32 workload_mask; > > smu->power_profile_mode = input[size]; > > @@ -2536,9 +2537,23 @@ static int smu_v13_0_0_set_power_profile_mode(struct > smu_context *smu, > if (workload_type < 0) > return -EINVAL; > > + workload_mask = 1 << workload_type; > + > + /* Add optimizations for SMU13.0.0. Reuse the power saving profile */ > + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && > + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, > 0)) && > + ((smu->adev->pm.fw_version == 0x004e6601) || > +(smu->adev->pm.fw_version >= 0x004e7300))) { > + workload_type = smu_cmn_to_asic_specific_index(smu, > + > CMN2ASIC_MAPPING_WORKLOAD, > + > PP_SMC_POWER_PROFILE_POWERSAVING); > + if (workload_type >= 0) > + workload_mask |= 1 << workload_type; > + } > + > return smu_cmn_send_smc_msg_with_param(smu, >SMU_MSG_SetWorkloadMask, > - 1 << workload_type, > + workload_mask, >NULL); } > > -- > 2.41.0 >
Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults
On 10/5/2023 8:25 AM, Philip Yang wrote: Sorry for the late reply, just notice 2 other issues: 1. function svm_range_split_by_granularity can be removed now. yes, the code has been sent to gerrit and merged. Will do it next time. 2. svm_range_restore_pages should map partial range to GPUs after partial migration. I think partial mapping is next step after partial migration is done. I have been thinking partial mapping. Will submit a new patch to address it. Regards Xiaogang Regards, Philip On 2023-10-03 19:31, Xiaogang.Chen wrote: From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. A svm range may include pages from both system ram and vram of one gpu now. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 83 +--- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 6 +- 4 files changed, 162 insertions(+), 89 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..6a059e4aff86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", + prange->svms, start_mgr, last_mgr); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, best_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", best_loc); return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, -prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", + prange->svms, start_mgr, last_mgr, prange->start, prange->last, + best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_dma_unmap(prange); - } else { + prange->vram_pages = prange->vram_pages + cpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at +* sys ram drop svm_bo got from svm_range_vram_node_new +*/ svm_range_vram_node_free(prange); } @@ -663,19 +676,19 @@
Re: [PATCH v4 1/3] drm/amd: Evict resources during PM ops prepare() callback
On 10/5/2023 09:39, Christian König wrote: Am 04.10.23 um 19:18 schrieb Mario Limonciello: Linux PM core has a prepare() callback run before suspend. If the system is under high memory pressure, the resources may need to be evicted into swap instead. If the storage backing for swap is offlined during the suspend() step then such a call may fail. So duplicate this step into prepare() to move evict majority of resources while leaving all existing steps that put the GPU into a low power state in suspend(). Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d23fb4b5ad95..6643d0ed6b1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1413,6 +1413,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bad2b5577e96..67acee569c08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4259,6 +4259,31 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) /* * Suspend & resume. */ +/** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + /* Evict the majority of BOs before starting suspend sequence */ + r = amdgpu_device_evict_resources(adev); + if (r) + return r; + + return 0; +} + /** * amdgpu_device_suspend - initiate device suspend * @@ -4279,7 +4304,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) adev->in_suspend = true; - /* Evict the majority of BOs before grabbing the full access */ r = amdgpu_device_evict_resources(adev); if (r) return r; I would just completely drop this extra amdgpu_device_evict_resources() call now. We have a second call which is used to evacuate firmware etc... after the hw has been shut down. That one can't move, but also shouldn't allocate that much memory. The problem is that amdgpu_device_suspend() is also called from amdgpu_switcheroo_set_state() as well as a bunch of pmops sequences that I don't expect call prepare() like poweroff(). I would think we still want to evict resources at the beginning of amdgpu_device_suspend() for all of those. So it's an extra call for the prepare() path but it should be harmless. Regards, Christian. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3471293846f..175167582db0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2425,8 +2425,9 @@ static int amdgpu_pmops_prepare(struct device *dev) /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev)) - return pm_runtime_suspended(dev); + if (amdgpu_device_supports_boco(drm_dev) && + pm_runtime_suspended(dev)) + return 1; /* if we will not support s3 or s2i for the device * then skip suspend @@ -2435,7 +2436,7 @@ static int amdgpu_pmops_prepare(struct device *dev) !amdgpu_acpi_is_s3_active(adev)) return 1; - return 0; + return amdgpu_device_prepare(drm_dev); } static void amdgpu_pmops_complete(struct device *dev)
Re: [PATCH v4 1/3] drm/amd: Evict resources during PM ops prepare() callback
Am 04.10.23 um 19:18 schrieb Mario Limonciello: Linux PM core has a prepare() callback run before suspend. If the system is under high memory pressure, the resources may need to be evicted into swap instead. If the storage backing for swap is offlined during the suspend() step then such a call may fail. So duplicate this step into prepare() to move evict majority of resources while leaving all existing steps that put the GPU into a low power state in suspend(). Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 7 +++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d23fb4b5ad95..6643d0ed6b1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1413,6 +1413,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bad2b5577e96..67acee569c08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4259,6 +4259,31 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) /* * Suspend & resume. */ +/** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + /* Evict the majority of BOs before starting suspend sequence */ + r = amdgpu_device_evict_resources(adev); + if (r) + return r; + + return 0; +} + /** * amdgpu_device_suspend - initiate device suspend * @@ -4279,7 +4304,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) adev->in_suspend = true; - /* Evict the majority of BOs before grabbing the full access */ r = amdgpu_device_evict_resources(adev); if (r) return r; I would just completely drop this extra amdgpu_device_evict_resources() call now. We have a second call which is used to evacuate firmware etc... after the hw has been shut down. That one can't move, but also shouldn't allocate that much memory. Regards, Christian. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3471293846f..175167582db0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2425,8 +2425,9 @@ static int amdgpu_pmops_prepare(struct device *dev) /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev)) - return pm_runtime_suspended(dev); + if (amdgpu_device_supports_boco(drm_dev) && + pm_runtime_suspended(dev)) + return 1; /* if we will not support s3 or s2i for the device * then skip suspend @@ -2435,7 +2436,7 @@ static int amdgpu_pmops_prepare(struct device *dev) !amdgpu_acpi_is_s3_active(adev)) return 1; - return 0; + return amdgpu_device_prepare(drm_dev); } static void amdgpu_pmops_complete(struct device *dev)
Re: [PATCH v4 2/3] drm/amd/display: Destroy DC context while keeping DML
On 10/5/2023 09:27, Alex Deucher wrote: On Wed, Oct 4, 2023 at 1:37 PM Mario Limonciello wrote: If there is memory pressure at suspend time then dynamically allocating a large structure as part of DC suspend code will fail. Instead re-use the same structure and clear all members except those that should be maintained. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/display/dc/core/dc.c | 25 --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 12 + 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 39e291a467e2..cb8c7c5a8807 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4728,9 +4728,6 @@ bool dc_set_power_state( struct dc *dc, enum dc_acpi_cm_power_state power_state) { - struct kref refcount; - struct display_mode_lib *dml; - if (!dc->current_state) return true; @@ -4750,30 +4747,8 @@ bool dc_set_power_state( break; default: ASSERT(dc->current_state->stream_count == 0); - /* Zero out the current context so that on resume we start with -* clean state, and dc hw programming optimizations will not -* cause any trouble. -*/ - dml = kzalloc(sizeof(struct display_mode_lib), - GFP_KERNEL); - - ASSERT(dml); - if (!dml) - return false; - - /* Preserve refcount */ - refcount = dc->current_state->refcount; - /* Preserve display mode lib */ - memcpy(dml, >current_state->bw_ctx.dml, sizeof(struct display_mode_lib)); dc_resource_state_destruct(dc->current_state); - memset(dc->current_state, 0, - sizeof(*dc->current_state)); - - dc->current_state->refcount = refcount; - dc->current_state->bw_ctx.dml = *dml; The dml dance seems a bit weird. I guess it's here because dc_resource_state_destruct() might change it? Can we safely drop this? If we do need it, we could pre-allocate a dml structure and use that. The dml structure is huge, so I think it's sub-optimal to have two copies of it. That's why I aimed to just destroy everything else except it instead. The only reason it's "safe" to drop the whole above stuff is because of "threading the needle" of what dc_resource_state_destruct() does. In the earlier version I had a mistake to miss clearing the scratch variable and it caused some IGT faliures. This probably needs to be double checked with the DML2 series landing as well to make sure it didn't get caught in the middle. Alex - - kfree(dml); break; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index aa7b5db83644..e487c966c118 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4350,6 +4350,18 @@ void dc_resource_state_destruct(struct dc_state *context) context->streams[i] = NULL; } context->stream_count = 0; + context->stream_mask = 0; + memset(>res_ctx, 0, sizeof(context->res_ctx)); + memset(>pp_display_cfg, 0, sizeof(context->pp_display_cfg)); + memset(>dcn_bw_vars, 0, sizeof(context->dcn_bw_vars)); + context->clk_mgr = NULL; + memset(>bw_ctx.bw, 0, sizeof(context->bw_ctx.bw)); + memset(context->block_sequence, 0, sizeof(context->block_sequence)); + context->block_sequence_steps = 0; + memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd)); + context->dmub_cmd_count = 0; + memset(>perf_params, 0, sizeof(context->perf_params)); + memset(>scratch, 0, sizeof(context->scratch)); } void dc_resource_state_copy_construct( -- 2.34.1
Re: [PATCH] drm/amdgpu: update ib start and size alignment
Am 05.10.23 um 15:49 schrieb boyuan.zh...@amd.com: From: Boyuan Zhang Update IB starting address alignment and size alignment with correct values for decode and encode IPs. Decode IB starting address alignment: 256 bytes Decode IB size alignment: 64 bytes Encode IB starting address alignment: 256 bytes Encode IB size alignment: 4 bytes Also bump amdgpu driver version for this update. Signed-off-by: Boyuan Zhang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 22 +++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3471293846f..9e345d503a47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -113,9 +113,10 @@ *gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * 3.53.0 - Support for GFX11 CP GFX shadowing * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support + * 3.55.0 - Update IB start address and size alignment for decode and encode */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 54 +#define KMS_DRIVER_MINOR 55 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 081bd28e2443..96db51765a6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -447,7 +447,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.inst[i].ring.sched.ready) ++num_rings; } - ib_start_alignment = 64; + ib_start_alignment = 256; ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCE: @@ -455,8 +455,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, for (i = 0; i < adev->vce.num_rings; i++) if (adev->vce.ring[i].sched.ready) ++num_rings; - ib_start_alignment = 4; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; @@ -468,8 +468,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.inst[i].ring_enc[j].sched.ready) ++num_rings; } - ib_start_alignment = 64; - ib_size_alignment = 64; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; @@ -480,8 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.inst[i].ring_dec.sched.ready) ++num_rings; } - ib_start_alignment = 16; - ib_size_alignment = 16; + ib_start_alignment = 256; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; @@ -493,8 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.inst[i].ring_enc[j].sched.ready) ++num_rings; } - ib_start_alignment = 64; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_VCN_JPEG: type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? @@ -508,8 +508,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->jpeg.inst[i].ring_dec[j].sched.ready) ++num_rings; } - ib_start_alignment = 16; - ib_size_alignment = 16; + ib_start_alignment = 256; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VPE: type = AMD_IP_BLOCK_TYPE_VPE;
Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART
Am 04.10.23 um 23:34 schrieb Felix Kuehling: On 2023-09-18 06:32, Christian König wrote: Am 08.09.23 um 18:04 schrieb Shashank Sharma: To support oversubscription, MES FW expects WPTR BOs to be mapped into GART, before they are submitted to usermode queues. This patch adds a function for the same. V4: fix the wptr value before mapping lookup (Bas, Christian). V5: Addressed review comments from Christian: - Either pin object or allocate from GART, but not both. - All the handling must be done with the VM locks held. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 81 +++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 2 files changed, 82 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e266674e0d44..c0eb622dfc37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block = .funcs = _v11_0_ip_funcs, }; +static int +gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(>tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); The GART mapping can become invalid as soon as you unlock the BOs. You need to attach an eviction fence for this to work correctly. Don't you need an eviction fence on the WPTR BO regardless of the GTT mapping? Yeah, indeed :) Long story short we need a general eviction fence handling for BOs. Regards, Christian. Regards, Felix + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_bo *wptr_bo = NULL; + int ret; + + mutex_lock(>vm->eviction_lock); Never ever touch the eviction lock outside of the VM code! That lock is completely unrelated to what you do here. + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + goto unlock; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + ret = -EINVAL; + goto unlock; + } + + wptr_bo = wptr_mapping->bo_va->base.bo; + if (wptr_bo->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + ret = -EINVAL; + goto unlock; + } We probably also want to enforce that this BO is a per VM BO. + + ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + goto unlock; + } + + queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT; This needs to be amdgpu_bo_gpu_offset() instead. Regards, Christian. + +unlock: + mutex_unlock(>vm->eviction_lock); + return ret; +} + static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { @@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input.queue_size = userq_props->queue_size >> 2; queue_input.doorbell_offset = userq_props->doorbell_index; queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_mc_addr; amdgpu_mes_lock(>mes); r = adev->mes.funcs->add_hw_queue(>mes, _input); @@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* FW expects WPTR BOs to be mapped into GART */ + r = gfx_v11_0_create_wptr_mapping(adev, queue, userq_props.wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + /* Map userqueue into FW using MES */ r = gfx_v11_0_userq_map(uq_mgr, queue, _props); if (r) { diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 34e20daa06c8..ae155de62560 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -39,6 +39,7 @@ struct
Re: [PATCH] drm/amdgpu: Annotate struct amdgpu_bo_list with __counted_by
Am 04.10.23 um 01:29 schrieb Kees Cook: Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct amdgpu_bo_list. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: David Airlie Cc: Daniel Vetter Cc: "Gustavo A. R. Silva" Cc: Luben Tuikov Cc: Christophe JAILLET Cc: Felix Kuehling Cc: amd-gfx@lists.freedesktop.org Cc: dri-de...@lists.freedesktop.org Cc: linux-harden...@vger.kernel.org Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 6f5b641b631e..781e5c5ce04d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -84,6 +84,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, kref_init(>refcount); + list->num_entries = num_entries; array = list->entries; for (i = 0; i < num_entries; ++i) { @@ -129,7 +130,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, } list->first_userptr = first_userptr; - list->num_entries = num_entries; sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry), amdgpu_bo_list_entry_cmp, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 6a703be45d04..555cd6d877c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -56,7 +56,7 @@ struct amdgpu_bo_list { */ struct mutex bo_list_mutex; - struct amdgpu_bo_list_entry entries[]; + struct amdgpu_bo_list_entry entries[] __counted_by(num_entries); }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
Re: [PATCH] drm/amdgpu: update ib start and size alignment
On Thu, Oct 5, 2023 at 10:17 AM wrote: > > From: Boyuan Zhang > > Update IB starting address alignment and size alignment with correct values > for decode and encode IPs. > > Decode IB starting address alignment: 256 bytes > Decode IB size alignment: 64 bytes > Encode IB starting address alignment: 256 bytes > Encode IB size alignment: 4 bytes > > Also bump amdgpu driver version for this update. > > Signed-off-by: Boyuan Zhang Acked-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 22 +++--- > 2 files changed, 13 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index e3471293846f..9e345d503a47 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -113,9 +113,10 @@ > *gl1c_cache_size, gl2c_cache_size, mall_size, > enabled_rb_pipes_mask_hi > * 3.53.0 - Support for GFX11 CP GFX shadowing > * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support > + * 3.55.0 - Update IB start address and size alignment for decode and > encode > */ > #define KMS_DRIVER_MAJOR 3 > -#define KMS_DRIVER_MINOR 54 > +#define KMS_DRIVER_MINOR 55 > #define KMS_DRIVER_PATCHLEVEL 0 > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 081bd28e2443..96db51765a6c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -447,7 +447,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > if (adev->uvd.inst[i].ring.sched.ready) > ++num_rings; > } > - ib_start_alignment = 64; > + ib_start_alignment = 256; > ib_size_alignment = 64; > break; > case AMDGPU_HW_IP_VCE: > @@ -455,8 +455,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > for (i = 0; i < adev->vce.num_rings; i++) > if (adev->vce.ring[i].sched.ready) > ++num_rings; > - ib_start_alignment = 4; > - ib_size_alignment = 1; > + ib_start_alignment = 256; > + ib_size_alignment = 4; > break; > case AMDGPU_HW_IP_UVD_ENC: > type = AMD_IP_BLOCK_TYPE_UVD; > @@ -468,8 +468,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > if (adev->uvd.inst[i].ring_enc[j].sched.ready) > ++num_rings; > } > - ib_start_alignment = 64; > - ib_size_alignment = 64; > + ib_start_alignment = 256; > + ib_size_alignment = 4; > break; > case AMDGPU_HW_IP_VCN_DEC: > type = AMD_IP_BLOCK_TYPE_VCN; > @@ -480,8 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > if (adev->vcn.inst[i].ring_dec.sched.ready) > ++num_rings; > } > - ib_start_alignment = 16; > - ib_size_alignment = 16; > + ib_start_alignment = 256; > + ib_size_alignment = 64; > break; > case AMDGPU_HW_IP_VCN_ENC: > type = AMD_IP_BLOCK_TYPE_VCN; > @@ -493,8 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > if (adev->vcn.inst[i].ring_enc[j].sched.ready) > ++num_rings; > } > - ib_start_alignment = 64; > - ib_size_alignment = 1; > + ib_start_alignment = 256; > + ib_size_alignment = 4; > break; > case AMDGPU_HW_IP_VCN_JPEG: > type = (amdgpu_device_ip_get_ip_block(adev, > AMD_IP_BLOCK_TYPE_JPEG)) ? > @@ -508,8 +508,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > if > (adev->jpeg.inst[i].ring_dec[j].sched.ready) > ++num_rings; > } > - ib_start_alignment = 16; > - ib_size_alignment = 16; > + ib_start_alignment = 256; > + ib_size_alignment = 64; > break; > case AMDGPU_HW_IP_VPE: > type = AMD_IP_BLOCK_TYPE_VPE; > -- > 2.34.1 >
Re: [PATCH v4 3/3] drm/amd/display: make dc_set_power_state() return type `void` again
On Wed, Oct 4, 2023 at 1:27 PM Mario Limonciello wrote: > > As dc_set_power_state() no longer allocates memory, it's not necessary > to have return types and check return code as it can't fail anymore. > > Change it back to `void`. > > Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher > --- > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 + > drivers/gpu/drm/amd/display/dc/core/dc.c| 6 ++ > drivers/gpu/drm/amd/display/dc/dc.h | 2 +- > 3 files changed, 8 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index a59a11ae42db..df9d9437f149 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -2685,11 +2685,6 @@ static void hpd_rx_irq_work_suspend(struct > amdgpu_display_manager *dm) > } > } > > -static int dm_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state > power_state) > -{ > - return dc_set_power_state(dc, power_state) ? 0 : -ENOMEM; > -} > - > static int dm_suspend(void *handle) > { > struct amdgpu_device *adev = handle; > @@ -2723,7 +2718,9 @@ static int dm_suspend(void *handle) > > hpd_rx_irq_work_suspend(dm); > > - return dm_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); > + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); > + > + return 0; > } > > struct drm_connector * > @@ -2917,9 +2914,7 @@ static int dm_resume(void *handle) > if (r) > DRM_ERROR("DMUB interface failed to initialize: > status=%d\n", r); > > - r = dm_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); > - if (r) > - return r; > + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); > > dc_resume(dm->dc); > > @@ -2969,9 +2964,7 @@ static int dm_resume(void *handle) > } > > /* power on hardware */ > - r = dm_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); > - if (r) > - return r; > +dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); > > /* program HPD filter */ > dc_resume(dm->dc); > diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c > b/drivers/gpu/drm/amd/display/dc/core/dc.c > index cb8c7c5a8807..2645d59dc58e 100644 > --- a/drivers/gpu/drm/amd/display/dc/core/dc.c > +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c > @@ -4724,12 +4724,12 @@ void dc_power_down_on_boot(struct dc *dc) > dc->hwss.power_down_on_boot(dc); > } > > -bool dc_set_power_state( > +void dc_set_power_state( > struct dc *dc, > enum dc_acpi_cm_power_state power_state) > { > if (!dc->current_state) > - return true; > + return; > > switch (power_state) { > case DC_ACPI_CM_POWER_STATE_D0: > @@ -4752,8 +4752,6 @@ bool dc_set_power_state( > > break; > } > - > - return true; > } > > void dc_resume(struct dc *dc) > diff --git a/drivers/gpu/drm/amd/display/dc/dc.h > b/drivers/gpu/drm/amd/display/dc/dc.h > index b140eb240ad7..b6002b11a745 100644 > --- a/drivers/gpu/drm/amd/display/dc/dc.h > +++ b/drivers/gpu/drm/amd/display/dc/dc.h > @@ -2330,7 +2330,7 @@ void dc_notify_vsync_int_state(struct dc *dc, struct > dc_stream_state *stream, bo > > /* Power Interfaces */ > > -bool dc_set_power_state( > +void dc_set_power_state( > struct dc *dc, > enum dc_acpi_cm_power_state power_state); > void dc_resume(struct dc *dc); > -- > 2.34.1 >
Re: [PATCH v4 2/3] drm/amd/display: Destroy DC context while keeping DML
On Wed, Oct 4, 2023 at 1:37 PM Mario Limonciello wrote: > > If there is memory pressure at suspend time then dynamically > allocating a large structure as part of DC suspend code will > fail. > > Instead re-use the same structure and clear all members except > those that should be maintained. > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 > Signed-off-by: Mario Limonciello > --- > drivers/gpu/drm/amd/display/dc/core/dc.c | 25 --- > .../gpu/drm/amd/display/dc/core/dc_resource.c | 12 + > 2 files changed, 12 insertions(+), 25 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c > b/drivers/gpu/drm/amd/display/dc/core/dc.c > index 39e291a467e2..cb8c7c5a8807 100644 > --- a/drivers/gpu/drm/amd/display/dc/core/dc.c > +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c > @@ -4728,9 +4728,6 @@ bool dc_set_power_state( > struct dc *dc, > enum dc_acpi_cm_power_state power_state) > { > - struct kref refcount; > - struct display_mode_lib *dml; > - > if (!dc->current_state) > return true; > > @@ -4750,30 +4747,8 @@ bool dc_set_power_state( > break; > default: > ASSERT(dc->current_state->stream_count == 0); > - /* Zero out the current context so that on resume we start > with > -* clean state, and dc hw programming optimizations will not > -* cause any trouble. > -*/ > - dml = kzalloc(sizeof(struct display_mode_lib), > - GFP_KERNEL); > - > - ASSERT(dml); > - if (!dml) > - return false; > - > - /* Preserve refcount */ > - refcount = dc->current_state->refcount; > - /* Preserve display mode lib */ > - memcpy(dml, >current_state->bw_ctx.dml, sizeof(struct > display_mode_lib)); > > dc_resource_state_destruct(dc->current_state); > - memset(dc->current_state, 0, > - sizeof(*dc->current_state)); > - > - dc->current_state->refcount = refcount; > - dc->current_state->bw_ctx.dml = *dml; The dml dance seems a bit weird. I guess it's here because dc_resource_state_destruct() might change it? Can we safely drop this? If we do need it, we could pre-allocate a dml structure and use that. Alex > - > - kfree(dml); > > break; > } > diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c > b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c > index aa7b5db83644..e487c966c118 100644 > --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c > +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c > @@ -4350,6 +4350,18 @@ void dc_resource_state_destruct(struct dc_state > *context) > context->streams[i] = NULL; > } > context->stream_count = 0; > + context->stream_mask = 0; > + memset(>res_ctx, 0, sizeof(context->res_ctx)); > + memset(>pp_display_cfg, 0, sizeof(context->pp_display_cfg)); > + memset(>dcn_bw_vars, 0, sizeof(context->dcn_bw_vars)); > + context->clk_mgr = NULL; > + memset(>bw_ctx.bw, 0, sizeof(context->bw_ctx.bw)); > + memset(context->block_sequence, 0, sizeof(context->block_sequence)); > + context->block_sequence_steps = 0; > + memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd)); > + context->dmub_cmd_count = 0; > + memset(>perf_params, 0, sizeof(context->perf_params)); > + memset(>scratch, 0, sizeof(context->scratch)); > } > > void dc_resource_state_copy_construct( > -- > 2.34.1 >
Re: [PATCH v4 1/3] drm/amd: Evict resources during PM ops prepare() callback
On Wed, Oct 4, 2023 at 1:37 PM Mario Limonciello wrote: > > Linux PM core has a prepare() callback run before suspend. > > If the system is under high memory pressure, the resources may need > to be evicted into swap instead. If the storage backing for swap > is offlined during the suspend() step then such a call may fail. > > So duplicate this step into prepare() to move evict majority of > resources while leaving all existing steps that put the GPU into a > low power state in suspend(). > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 > Signed-off-by: Mario Limonciello > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 7 +++--- > 3 files changed, 30 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index d23fb4b5ad95..6643d0ed6b1b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1413,6 +1413,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, > void amdgpu_driver_release_kms(struct drm_device *dev); > > int amdgpu_device_ip_suspend(struct amdgpu_device *adev); > +int amdgpu_device_prepare(struct drm_device *dev); > int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); > int amdgpu_device_resume(struct drm_device *dev, bool fbcon); > u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index bad2b5577e96..67acee569c08 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4259,6 +4259,31 @@ static int amdgpu_device_evict_resources(struct > amdgpu_device *adev) > /* > * Suspend & resume. > */ > +/** > + * amdgpu_device_prepare - prepare for device suspend > + * > + * @dev: drm dev pointer > + * > + * Prepare to put the hw in the suspend state (all asics). > + * Returns 0 for success or an error on failure. > + * Called at driver suspend. > + */ > +int amdgpu_device_prepare(struct drm_device *dev) > +{ > + struct amdgpu_device *adev = drm_to_adev(dev); > + int r; > + > + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) > + return 0; > + > + /* Evict the majority of BOs before starting suspend sequence */ > + r = amdgpu_device_evict_resources(adev); > + if (r) > + return r; > + > + return 0; > +} > + > /** > * amdgpu_device_suspend - initiate device suspend > * > @@ -4279,7 +4304,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool > fbcon) > > adev->in_suspend = true; > > - /* Evict the majority of BOs before grabbing the full access */ > r = amdgpu_device_evict_resources(adev); > if (r) > return r; Might want to add a note that this is likely a noop in the normal suspend case and is just here to handle the case where amdgpu_device_suspend() is called outside of the normal pmops framework. Other than that, the patch is: Reviewed-by: Alex Deucher > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index e3471293846f..175167582db0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -2425,8 +2425,9 @@ static int amdgpu_pmops_prepare(struct device *dev) > /* Return a positive number here so > * DPM_FLAG_SMART_SUSPEND works properly > */ > - if (amdgpu_device_supports_boco(drm_dev)) > - return pm_runtime_suspended(dev); > + if (amdgpu_device_supports_boco(drm_dev) && > + pm_runtime_suspended(dev)) > + return 1; > > /* if we will not support s3 or s2i for the device > * then skip suspend > @@ -2435,7 +2436,7 @@ static int amdgpu_pmops_prepare(struct device *dev) > !amdgpu_acpi_is_s3_active(adev)) > return 1; > > - return 0; > + return amdgpu_device_prepare(drm_dev); > } > > static void amdgpu_pmops_complete(struct device *dev) > -- > 2.34.1 >
[PATCH] drm/amdgpu: update ib start and size alignment
From: Boyuan Zhang Update IB starting address alignment and size alignment with correct values for decode and encode IPs. Decode IB starting address alignment: 256 bytes Decode IB size alignment: 64 bytes Encode IB starting address alignment: 256 bytes Encode IB size alignment: 4 bytes Also bump amdgpu driver version for this update. Signed-off-by: Boyuan Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 22 +++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3471293846f..9e345d503a47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -113,9 +113,10 @@ *gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * 3.53.0 - Support for GFX11 CP GFX shadowing * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support + * 3.55.0 - Update IB start address and size alignment for decode and encode */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 54 +#define KMS_DRIVER_MINOR 55 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 081bd28e2443..96db51765a6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -447,7 +447,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.inst[i].ring.sched.ready) ++num_rings; } - ib_start_alignment = 64; + ib_start_alignment = 256; ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCE: @@ -455,8 +455,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, for (i = 0; i < adev->vce.num_rings; i++) if (adev->vce.ring[i].sched.ready) ++num_rings; - ib_start_alignment = 4; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; @@ -468,8 +468,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.inst[i].ring_enc[j].sched.ready) ++num_rings; } - ib_start_alignment = 64; - ib_size_alignment = 64; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; @@ -480,8 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.inst[i].ring_dec.sched.ready) ++num_rings; } - ib_start_alignment = 16; - ib_size_alignment = 16; + ib_start_alignment = 256; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; @@ -493,8 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.inst[i].ring_enc[j].sched.ready) ++num_rings; } - ib_start_alignment = 64; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_VCN_JPEG: type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? @@ -508,8 +508,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->jpeg.inst[i].ring_dec[j].sched.ready) ++num_rings; } - ib_start_alignment = 16; - ib_size_alignment = 16; + ib_start_alignment = 256; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VPE: type = AMD_IP_BLOCK_TYPE_VPE; -- 2.34.1
Re: [PATCH 2/2] drm/radeon: Fix UBSAN array-index-out-of-bounds for Radeon HD 5430
On Thu, Oct 5, 2023 at 12:42 AM Mario Limonciello wrote: > > For pptable structs that use flexible array sizes, use flexible arrays. > > Suggested-by: Felix Held > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2894 > Signed-off-by: Mario Limonciello Series is: Acked-by: Alex Deucher > --- > drivers/gpu/drm/radeon/pptable.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/radeon/pptable.h > b/drivers/gpu/drm/radeon/pptable.h > index 4c2eec49dadc..94947229888b 100644 > --- a/drivers/gpu/drm/radeon/pptable.h > +++ b/drivers/gpu/drm/radeon/pptable.h > @@ -74,7 +74,7 @@ typedef struct _ATOM_PPLIB_THERMALCONTROLLER > typedef struct _ATOM_PPLIB_STATE > { > UCHAR ucNonClockStateIndex; > -UCHAR ucClockStateIndices[1]; // variable-sized > +UCHAR ucClockStateIndices[]; // variable-sized > } ATOM_PPLIB_STATE; > > > -- > 2.34.1 >
Re: [PATCH] drm/amd: Fix UBSAN array-index-out-of-bounds for Polaris and Tonga
On 10/4/2023 16:50, Alex Deucher wrote: On Wed, Oct 4, 2023 at 5:42 PM Mario Limonciello wrote: For pptable structs that use flexible array sizes, use flexible arrays. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2036742 Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Thanks, can you also review the similar (but different) series of the other variable arrays? https://lore.kernel.org/amd-gfx/20231004202253.182540-1-mario.limoncie...@amd.com/T/#t --- From this bug report there are more to fix .../gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h| 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h index 57bca1e81d3a..9fcad69a9f34 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h @@ -164,7 +164,7 @@ typedef struct _ATOM_Tonga_State { typedef struct _ATOM_Tonga_State_Array { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_State entries[1];/* Dynamically allocate entries. */ + ATOM_Tonga_State entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_State_Array; typedef struct _ATOM_Tonga_MCLK_Dependency_Record { @@ -210,7 +210,7 @@ typedef struct _ATOM_Polaris_SCLK_Dependency_Record { typedef struct _ATOM_Polaris_SCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Polaris_SCLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Polaris_SCLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Polaris_SCLK_Dependency_Table; typedef struct _ATOM_Tonga_PCIE_Record { @@ -222,7 +222,7 @@ typedef struct _ATOM_Tonga_PCIE_Record { typedef struct _ATOM_Tonga_PCIE_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_PCIE_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_PCIE_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_PCIE_Table; typedef struct _ATOM_Polaris10_PCIE_Record { @@ -235,7 +235,7 @@ typedef struct _ATOM_Polaris10_PCIE_Record { typedef struct _ATOM_Polaris10_PCIE_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Polaris10_PCIE_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Polaris10_PCIE_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Polaris10_PCIE_Table; @@ -252,7 +252,7 @@ typedef struct _ATOM_Tonga_MM_Dependency_Record { typedef struct _ATOM_Tonga_MM_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_MM_Dependency_Record entries[1];/* Dynamically allocate entries. */ + ATOM_Tonga_MM_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_MM_Dependency_Table; typedef struct _ATOM_Tonga_Voltage_Lookup_Record { @@ -265,7 +265,7 @@ typedef struct _ATOM_Tonga_Voltage_Lookup_Record { typedef struct _ATOM_Tonga_Voltage_Lookup_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_Voltage_Lookup_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_Voltage_Lookup_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_Voltage_Lookup_Table; typedef struct _ATOM_Tonga_Fan_Table { -- 2.34.1
Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks
On 10/5/2023 07:35, Christian König wrote: Am 04.10.23 um 05:39 schrieb Mario Limonciello: On 10/3/2023 16:22, Deucher, Alexander wrote: [Public] -Original Message- From: Limonciello, Mario Sent: Tuesday, October 3, 2023 5:17 PM To: Deucher, Alexander ; amd- g...@lists.freedesktop.org Cc: Wentland, Harry Subject: Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks On 10/3/2023 16:11, Deucher, Alexander wrote: [Public] -Original Message- From: amd-gfx On Behalf Of Mario Limonciello Sent: Tuesday, October 3, 2023 4:55 PM To: amd-gfx@lists.freedesktop.org Cc: Wentland, Harry ; Limonciello, Mario Subject: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks Linux PM core has a prepare() callback run before suspend and complete() callback ran after resume() for devices to use. Add plumbing to bring prepare() to amdgpu. The idea with the new vfuncs for amdgpu is that all IP blocks that memory allocations during suspend should do the allocation from this call instead of the suspend() callback. By moving the allocations to prepare() the system suspend will be failed before any IP block has done any suspend code. If the suspend fails, then do any cleanups in the complete() callback. Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 39 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++--- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 73e825d20259..5d651552822c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1415,6 +1415,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); void +amdgpu_device_complete(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bad2b5577e96..f53cf675c3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4259,6 +4259,43 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) /* * Suspend & resume. */ +/** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) { + struct amdgpu_device *adev = drm_to_adev(dev); + int r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + adev->in_suspend = true; + + return 0; +} + +/** + * amdgpu_device_complete - complete the device after resume + * + * @dev: drm dev pointer + * + * Clean up any actions that the prepare step did. + * Called after driver resume. + */ +void amdgpu_device_complete(struct drm_device *dev) { + struct amdgpu_device *adev = drm_to_adev(dev); + + adev->in_suspend = false; +} + /** * amdgpu_device_suspend - initiate device suspend * @@ -4277,8 +4314,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - adev->in_suspend = true; - We also set this to false in amdgpu_device_resume() so that should be fixed up as well. But, I'm not sure we want to move this out of amdgpu_device_suspend(). There are places we use amdgpu_device_suspend/resume() outside of pmops that also rely on these being set. Those places may need to be fixed up if we do. IIRC, the switcheroo code uses this. The big reason that I moved it from suspend() to prepare() was so that amdgpu_device_evict_resources() was called with the context of it being set. My thought process: 0) prepare() sets all the time 1) If prepare() fails complete() clears it. 2) If prepare() succeeds it remains set for suspend() 3) If suspend() succeeds it gets cleared at resume() 4) If resume() failed for some reason, it's cleared by complete(). Does it actually matter that it's set while evicting resources? Shouldn't matter for evicting resources. We even have debugfs nodes you can access to forcibly evict resources at runtime for testing memory pressure. Then in that case I think what I'll do is put an extra call for amdgpu_device_evict_resources() in the prepare callback. It shouldn't do any harm to call three times in the suspend sequence instead of two. Yeah, I think you should move the first call
Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults
Sorry for the late reply, just notice 2 other issues: 1. function svm_range_split_by_granularity can be removed now. 2. svm_range_restore_pages should map partial range to GPUs after partial migration. Regards, Philip On 2023-10-03 19:31, Xiaogang.Chen wrote: From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. A svm range may include pages from both system ram and vram of one gpu now. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 83 +--- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 6 +- 4 files changed, 162 insertions(+), 89 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..6a059e4aff86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset); migrate_vma_pages(); @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", - prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", + prange->svms, start_mgr, last_mgr); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, best_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", best_loc); return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, - prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", + prange->svms, start_mgr, last_mgr, prange->start, prange->last, + best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_dma_unmap(prange); - } else { + prange->vram_pages = prange->vram_pages + cpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at + * sys ram drop svm_bo got from svm_range_vram_node_new + */ svm_range_vram_node_free(prange); } @@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: - * 0 - success with all pages migrated * negative values - indicate error - * positive values - partial migration, number of pages not migrated + * positive values or zero - number of pages got migrated */ static long svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, uint64_t end, - uint32_t trigger, struct page *fault_page) + struct vm_area_struct *vma, uint64_t start, uint64_t end, + uint32_t trigger, struct page *fault_page) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end -
Re: [PATCH 1/5] drm/amd/display: Remove migrate_en/dis from dc_fpu_begin().
On 2023-10-04 08:10:35 [-0400], Hamza Mahfooz wrote: > I did some digging, and it seems like the intention of that patch was to > fix the following splat: > > WARNING: CPU: 5 PID: 1062 at > drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:71 > dc_assert_fp_enabled+0x1a/0x30 [amdgpu] > [...] > CPU: 5 PID: 1062 Comm: Xorg Tainted: G OE 5.15.0-56-generic So it has hard to look this up with a upstream v5.15 kernel since the dcn32_populate_dml_pipes_from_context() was introduced in v6.0-rc1. Judging from v6.0-rc1 I don't see how that warning could occur other than using dc_assert_fp_enabled() without invoking DC_FP_START first. > Hamza Sebastian
Re: [PATCH 0/5] drm/amd/display: Remove migrate-disable and move memory allocation.
On 2023-10-04 08:44:58 [-0400], Harry Wentland wrote: > CI passed. > > Series is > Acked-by: Harry Wentland Thank you. > Harry Sebastian
Re: [PATCH v2 1/5] drm/amdgpu: Move package type enum to amdgpu_smuio
Am 04.10.23 um 09:39 schrieb Lijo Lazar: Move definition of package type to amdgpu_smuio header and add new package types for CEM and OAM. Signed-off-by: Lijo Lazar Reviewed-by: Christian König --- v2: Move definition to amdgpu_smuio.h instead of amdgpu.h (Christian/Hawking) drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 5 - drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h | 7 +++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 42ac6d1bf9ca..7088c5015675 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -69,11 +69,6 @@ enum amdgpu_gfx_partition { #define NUM_XCC(x) hweight16(x) -enum amdgpu_pkg_type { - AMDGPU_PKG_TYPE_APU = 2, - AMDGPU_PKG_TYPE_UNKNOWN, -}; - enum amdgpu_gfx_ras_mem_id_type { AMDGPU_GFX_CP_MEM = 0, AMDGPU_GFX_GCEA_MEM, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index 89c38d864471..5910d50ac74d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -23,6 +23,13 @@ #ifndef __AMDGPU_SMUIO_H__ #define __AMDGPU_SMUIO_H__ +enum amdgpu_pkg_type { + AMDGPU_PKG_TYPE_APU = 2, + AMDGPU_PKG_TYPE_CEM = 3, + AMDGPU_PKG_TYPE_OAM = 4, + AMDGPU_PKG_TYPE_UNKNOWN, +}; + struct amdgpu_smuio_funcs { u32 (*get_rom_index_offset)(struct amdgpu_device *adev); u32 (*get_rom_data_offset)(struct amdgpu_device *adev);
Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks
Am 04.10.23 um 05:39 schrieb Mario Limonciello: On 10/3/2023 16:22, Deucher, Alexander wrote: [Public] -Original Message- From: Limonciello, Mario Sent: Tuesday, October 3, 2023 5:17 PM To: Deucher, Alexander ; amd- g...@lists.freedesktop.org Cc: Wentland, Harry Subject: Re: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks On 10/3/2023 16:11, Deucher, Alexander wrote: [Public] -Original Message- From: amd-gfx On Behalf Of Mario Limonciello Sent: Tuesday, October 3, 2023 4:55 PM To: amd-gfx@lists.freedesktop.org Cc: Wentland, Harry ; Limonciello, Mario Subject: [PATCH v3 1/4] drm/amd: Add support for prepare() and complete() callbacks Linux PM core has a prepare() callback run before suspend and complete() callback ran after resume() for devices to use. Add plumbing to bring prepare() to amdgpu. The idea with the new vfuncs for amdgpu is that all IP blocks that memory allocations during suspend should do the allocation from this call instead of the suspend() callback. By moving the allocations to prepare() the system suspend will be failed before any IP block has done any suspend code. If the suspend fails, then do any cleanups in the complete() callback. Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 39 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++--- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 73e825d20259..5d651552822c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1415,6 +1415,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); void +amdgpu_device_complete(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bad2b5577e96..f53cf675c3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4259,6 +4259,43 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) /* * Suspend & resume. */ +/** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) { + struct amdgpu_device *adev = drm_to_adev(dev); + int r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + adev->in_suspend = true; + + return 0; +} + +/** + * amdgpu_device_complete - complete the device after resume + * + * @dev: drm dev pointer + * + * Clean up any actions that the prepare step did. + * Called after driver resume. + */ +void amdgpu_device_complete(struct drm_device *dev) { + struct amdgpu_device *adev = drm_to_adev(dev); + + adev->in_suspend = false; +} + /** * amdgpu_device_suspend - initiate device suspend * @@ -4277,8 +4314,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - adev->in_suspend = true; - We also set this to false in amdgpu_device_resume() so that should be fixed up as well. But, I'm not sure we want to move this out of amdgpu_device_suspend(). There are places we use amdgpu_device_suspend/resume() outside of pmops that also rely on these being set. Those places may need to be fixed up if we do. IIRC, the switcheroo code uses this. The big reason that I moved it from suspend() to prepare() was so that amdgpu_device_evict_resources() was called with the context of it being set. My thought process: 0) prepare() sets all the time 1) If prepare() fails complete() clears it. 2) If prepare() succeeds it remains set for suspend() 3) If suspend() succeeds it gets cleared at resume() 4) If resume() failed for some reason, it's cleared by complete(). Does it actually matter that it's set while evicting resources? Shouldn't matter for evicting resources. We even have debugfs nodes you can access to forcibly evict resources at runtime for testing memory pressure. Then in that case I think what I'll do is put an extra call for amdgpu_device_evict_resources() in the prepare callback. It shouldn't do any harm to call three times in the suspend sequence instead of two. Yeah, I think you should move the first call from suspend to prepare. Evacuating VRAM
Re: [PATCH v6 1/9] drm/amdgpu: UAPI for user queue management
On 04/10/2023 23:23, Felix Kuehling wrote: On 2023-09-08 12:04, Shashank Sharma wrote: From: Alex Deucher This patch intorduces new UAPI/IOCTL for usermode graphics queue. The userspace app will fill this structure and request the graphics driver to add a graphics work queue for it. The output of this UAPI is a queue id. This UAPI maps the queue into GPU, so the graphics app can start submitting work to the queue as soon as the call returns. V2: Addressed review comments from Alex and Christian - Make the doorbell offset's comment clearer - Change the output parameter name to queue_id V3: Integration with doorbell manager V4: - Updated the UAPI doc (Pierre-Eric) - Created a Union for engine specific MQDs (Alex) - Added Christian's R-B V5: - Add variables for GDS and CSA in MQD structure (Alex) - Make MQD data a ptr-size pair instead of union (Alex) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- include/uapi/drm/amdgpu_drm.h | 110 ++ 1 file changed, 110 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 79b14828d542..627b4a38c855 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -54,6 +54,7 @@ extern "C" { #define DRM_AMDGPU_VM 0x13 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -71,6 +72,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) /** * DOC: memory domains @@ -304,6 +306,114 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* user queue IOCTL */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* Flag to indicate secure buffer related workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0) +/* Flag to indicate AQL workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1) + +/* + * MQD (memory queue descriptor) is a set of parameters which allow I find the term MQD misleading. For the firmware the MQD is a very different data structure from what you are defining here. It's a persistent data structure in kernel address space (VMID0) that is shared between the driver and the firmware that gets loaded or updated when queues are mapped or unmapped. I'd want to avoid confusing the firmware MQD with this structure. I agree, I can change the name to something else like userq_properties_gfx_v11 or something similar - Shashank Regards, Felix + * the GPU to uniquely define and identify a usermode queue. This + * structure defines the MQD for GFX-V11 IP ver 0. + */ +struct drm_amdgpu_userq_mqd_gfx_v11_0 { + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * This must be a from a separate GPU object, and must be at least 4-page + * sized. + */ + __u64 shadow_va; + /** + * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. + * This must be a from a separate GPU object, and must be at least 1-page + * sized. + */ + __u64 gds_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and must be at least 1-page + * sized. + */ + __u64 csa_va; +}; + +struct drm_amdgpu_userq_in {
Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART
Hey Felix, On 04/10/2023 23:34, Felix Kuehling wrote: On 2023-09-18 06:32, Christian König wrote: Am 08.09.23 um 18:04 schrieb Shashank Sharma: To support oversubscription, MES FW expects WPTR BOs to be mapped into GART, before they are submitted to usermode queues. This patch adds a function for the same. V4: fix the wptr value before mapping lookup (Bas, Christian). V5: Addressed review comments from Christian: - Either pin object or allocate from GART, but not both. - All the handling must be done with the VM locks held. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 81 +++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 2 files changed, 82 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e266674e0d44..c0eb622dfc37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block = .funcs = _v11_0_ip_funcs, }; +static int +gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(>tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); The GART mapping can become invalid as soon as you unlock the BOs. You need to attach an eviction fence for this to work correctly. Don't you need an eviction fence on the WPTR BO regardless of the GTT mapping? Yes, Christian also mentioned this in this iteration, I have implemented the basic eviction fence for [V7], I will publish it soon. - Shashank Regards, Felix + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_bo *wptr_bo = NULL; + int ret; + + mutex_lock(>vm->eviction_lock); Never ever touch the eviction lock outside of the VM code! That lock is completely unrelated to what you do here. + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + goto unlock; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + ret = -EINVAL; + goto unlock; + } + + wptr_bo = wptr_mapping->bo_va->base.bo; + if (wptr_bo->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + ret = -EINVAL; + goto unlock; + } We probably also want to enforce that this BO is a per VM BO. + + ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + goto unlock; + } + + queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT; This needs to be amdgpu_bo_gpu_offset() instead. Regards, Christian. + +unlock: + mutex_unlock(>vm->eviction_lock); + return ret; +} + static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { @@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input.queue_size = userq_props->queue_size >> 2; queue_input.doorbell_offset = userq_props->doorbell_index; queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_mc_addr; amdgpu_mes_lock(>mes); r = adev->mes.funcs->add_hw_queue(>mes, _input); @@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* FW expects WPTR BOs to be mapped into GART */ + r = gfx_v11_0_create_wptr_mapping(adev, queue, userq_props.wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + /* Map userqueue into FW using MES */ r = gfx_v11_0_userq_map(uq_mgr, queue, _props); if (r) { diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 34e20daa06c8..ae155de62560 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++
Re: [PATCH 0/9] drm: Annotate structs with __counted_by
Am 02.10.23 um 20:22 schrieb Kees Cook: On Mon, Oct 02, 2023 at 08:11:41PM +0200, Christian König wrote: Am 02.10.23 um 20:08 schrieb Kees Cook: On Mon, Oct 02, 2023 at 08:01:57PM +0200, Christian König wrote: Am 02.10.23 um 18:53 schrieb Kees Cook: On Mon, Oct 02, 2023 at 11:06:19AM -0400, Alex Deucher wrote: On Mon, Oct 2, 2023 at 5:20 AM Christian König wrote: Am 29.09.23 um 21:33 schrieb Kees Cook: On Fri, 22 Sep 2023 10:32:05 -0700, Kees Cook wrote: This is a batch of patches touching drm for preparing for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by to structs that would benefit from the annotation. [...] Since this got Acks, I figure I should carry it in my tree. Let me know if this should go via drm instead. Applied to for-next/hardening, thanks! [1/9] drm/amd/pm: Annotate struct smu10_voltage_dependency_table with __counted_by https://git.kernel.org/kees/c/a6046ac659d6 STOP! In a follow up discussion Alex and I figured out that this won't work. I'm so confused; from the discussion I saw that Alex said both instances were false positives? The value in the structure is byte swapped based on some firmware endianness which not necessary matches the CPU endianness. SMU10 is APU only so the endianess of the SMU firmware and the CPU will always match. Which I think is what is being said here? Please revert that one from going upstream if it's already on it's way. And because of those reasons I strongly think that patches like this should go through the DRM tree :) Sure, that's fine -- please let me know. It was others Acked/etc. Who should carry these patches? Probably best if the relevant maintainer pick them up individually. Some of those structures are filled in by firmware/hardware and only the maintainers can judge if that value actually matches what the compiler needs. We have cases where individual bits are used as flags or when the size is byte swapped etc... Even Alex and I didn't immediately say how and where that field is actually used and had to dig that up. That's where the confusion came from. Okay, I've dropped them all from my tree. Several had Acks/Reviews, so hopefully those can get picked up for the DRM tree? I will pick those up to go through drm-misc-next. Going to ping maintainers once more when I'm not sure if stuff is correct or not. Sounds great; thanks! I wasn't 100% sure for the VC4 patch, but pushed the whole set to drm-misc-next anyway. This also means that the patches are now auto merged into the drm-tip integration branch and should any build or unit test go boom we should notice immediately and can revert it pretty easily. Thanks, Christian. -Kees
[PATCH] drm/amd/display: Fix mst hub unplug warning
[Why] Unplug mst hub will cause warning. That's because dm_helpers_construct_old_payload() is changed to be called after payload removement from dc link. In dm_helpers_construct_old_payload(), We refer to the vcpi in payload allocation table of dc link to construct the old payload and payload is no longer in the table when we call the function now. [How] Refer to the mst_state to construct the number of time slot for old payload now. Note that dm_helpers_construct_old_payload() is just a quick workaround before and we are going to abandon it soon. Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement") Reviewed-by: Jerry Zuo Signed-off-by: Wayne Lin --- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 38 +-- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index baf7e5254fb3..2f94bcf128c0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -204,15 +204,16 @@ void dm_helpers_dp_update_branch_info( {} static void dm_helpers_construct_old_payload( - struct dc_link *link, - int pbn_per_slot, + struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_topology_state *mst_state, struct drm_dp_mst_atomic_payload *new_payload, struct drm_dp_mst_atomic_payload *old_payload) { - struct link_mst_stream_allocation_table current_link_table = - link->mst_stream_alloc_table; - struct link_mst_stream_allocation *dc_alloc; - int i; + struct drm_dp_mst_atomic_payload *pos; + int pbn_per_slot = mst_state->pbn_div; + u8 next_payload_vc_start = mgr->next_start_slot; + u8 payload_vc_start = new_payload->vc_start_slot; + u8 allocated_time_slots; *old_payload = *new_payload; @@ -221,20 +222,17 @@ static void dm_helpers_construct_old_payload( * struct drm_dp_mst_atomic_payload are don't care fields * while calling drm_dp_remove_payload_part2() */ - for (i = 0; i < current_link_table.stream_count; i++) { - dc_alloc = - _link_table.stream_allocations[i]; - - if (dc_alloc->vcp_id == new_payload->vcpi) { - old_payload->time_slots = dc_alloc->slot_count; - old_payload->pbn = dc_alloc->slot_count * pbn_per_slot; - break; - } + list_for_each_entry(pos, _state->payloads, next) { + if (pos != new_payload && + pos->vc_start_slot > payload_vc_start && + pos->vc_start_slot < next_payload_vc_start) + next_payload_vc_start = pos->vc_start_slot; } - /* make sure there is an old payload*/ - ASSERT(i != current_link_table.stream_count); + allocated_time_slots = next_payload_vc_start - payload_vc_start; + old_payload->time_slots = allocated_time_slots; + old_payload->pbn = allocated_time_slots * pbn_per_slot; } /* @@ -272,8 +270,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload); } else { /* construct old payload by VCPI*/ - dm_helpers_construct_old_payload(stream->link, mst_state->pbn_div, - new_payload, _payload); + dm_helpers_construct_old_payload(mst_mgr, mst_state, +new_payload, _payload); target_payload = _payload; drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload); @@ -366,7 +364,7 @@ bool dm_helpers_dp_mst_send_payload_allocation( if (enable) { ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload); } else { - dm_helpers_construct_old_payload(stream->link, mst_state->pbn_div, + dm_helpers_construct_old_payload(mst_mgr, mst_state, new_payload, _payload); drm_dp_remove_payload_part2(mst_mgr, mst_state, _payload, new_payload); } -- 2.37.3