[V3 3/7] drm/amd/pm: add fan temperature/pwm curve OD setting support for SMU13

2023-08-29 Thread Evan Quan
Add SMU13 fan temperature/pwm curve OD setting support.

Signed-off-by: Evan Quan 
--
v1->v2:
  - add missing kerneldoc for the new interface(Alex)
v2->v3:
  - rich the document for the new interface(Alex)
---
 Documentation/gpu/amdgpu/thermal.rst  |   6 +
 .../gpu/drm/amd/include/kgd_pp_interface.h|   4 +-
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 210 +-
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |   4 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |   2 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |   1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 102 -
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 102 -
 8 files changed, 423 insertions(+), 8 deletions(-)

diff --git a/Documentation/gpu/amdgpu/thermal.rst 
b/Documentation/gpu/amdgpu/thermal.rst
index 5e27e4eb3959..08ee33b2acb6 100644
--- a/Documentation/gpu/amdgpu/thermal.rst
+++ b/Documentation/gpu/amdgpu/thermal.rst
@@ -64,6 +64,12 @@ gpu_metrics
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: gpu_metrics
 
+fan_curve
+-
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: fan_curve
+
 GFXOFF
 ==
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 84c5224d994c..7bdd2bdab970 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -113,6 +113,7 @@ enum pp_clock_type {
OD_RANGE,
OD_VDDGFX_OFFSET,
OD_CCLK,
+   OD_FAN_CURVE,
 };
 
 enum amd_pp_sensors {
@@ -186,7 +187,8 @@ enum PP_OD_DPM_TABLE_COMMAND {
PP_OD_EDIT_VDDC_CURVE,
PP_OD_RESTORE_DEFAULT_TABLE,
PP_OD_COMMIT_DPM_TABLE,
-   PP_OD_EDIT_VDDGFX_OFFSET
+   PP_OD_EDIT_VDDGFX_OFFSET,
+   PP_OD_EDIT_FAN_CURVE,
 };
 
 struct pp_states_info {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index edb52697c489..1008d43e9c3f 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3383,7 +3383,215 @@ static const struct attribute_group *hwmon_groups[] = {
NULL
 };
 
-static struct od_feature_set amdgpu_od_set;
+static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
+  enum pp_clock_type od_type,
+  char *buf)
+{
+   int size = 0;
+   int ret;
+
+   if (amdgpu_in_reset(adev))
+   return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
+
+   ret = pm_runtime_get_sync(adev->dev);
+   if (ret < 0) {
+   pm_runtime_put_autosuspend(adev->dev);
+   return ret;
+   }
+
+   size = amdgpu_dpm_print_clock_levels(adev, od_type, buf);
+   if (size == 0)
+   size = sysfs_emit(buf, "\n");
+
+   pm_runtime_mark_last_busy(adev->dev);
+   pm_runtime_put_autosuspend(adev->dev);
+
+   return size;
+}
+
+static int parse_input_od_command_lines(const char *buf,
+   size_t count,
+   u32 *type,
+   long *params,
+   uint32_t *num_of_params)
+{
+   const char delimiter[3] = {' ', '\n', '\0'};
+   uint32_t parameter_size = 0;
+   char buf_cpy[128] = {0};
+   char *tmp_str, *sub_str;
+   int ret;
+
+   if (count > sizeof(buf_cpy) - 1)
+   return -EINVAL;
+
+   memcpy(buf_cpy, buf, count);
+   tmp_str = buf_cpy;
+
+   /* skip heading spaces */
+   while (isspace(*tmp_str))
+   tmp_str++;
+
+   switch (*tmp_str) {
+   case 'c':
+   *type = PP_OD_COMMIT_DPM_TABLE;
+   return 0;
+   default:
+   break;
+   }
+
+   while ((sub_str = strsep(_str, delimiter)) != NULL) {
+   if (strlen(sub_str) == 0)
+   continue;
+
+   ret = kstrtol(sub_str, 0, [parameter_size]);
+   if (ret)
+   return -EINVAL;
+   parameter_size++;
+
+   while (isspace(*tmp_str))
+   tmp_str++;
+   }
+
+   *num_of_params = parameter_size;
+
+   return 0;
+}
+
+static int
+amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
+enum PP_OD_DPM_TABLE_COMMAND cmd_type,
+const char *in_buf,
+size_t count)
+{
+   uint32_t parameter_size = 0;
+   long parameter[64];
+   int ret;
+
+   if (amdgpu_in_reset(adev))
+   return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
+
+   ret = parse_input_od_command_lines(in_buf,
+  count,
+  _type,
+

[V3 7/7] drm/amd/pm: add fan minimum pwm OD setting support for SMU13

2023-08-29 Thread Evan Quan
Add SMU13 fan minimum pwm OD setting support.

Signed-off-by: Evan Quan 
--
v1->v2:
  - add missing kerneldoc for the new interface(Alex)
v2->v3:
  - rich the document for the new interface(Alex)
---
 Documentation/gpu/amdgpu/thermal.rst  |  6 ++
 .../gpu/drm/amd/include/kgd_pp_interface.h|  2 +
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 64 +++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  2 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  2 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 51 ++-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 51 ++-
 8 files changed, 177 insertions(+), 2 deletions(-)

diff --git a/Documentation/gpu/amdgpu/thermal.rst 
b/Documentation/gpu/amdgpu/thermal.rst
index aa2d15706cda..2f6166f81e6a 100644
--- a/Documentation/gpu/amdgpu/thermal.rst
+++ b/Documentation/gpu/amdgpu/thermal.rst
@@ -88,6 +88,12 @@ fan_target_temperature
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: fan_target_temperature
 
+fan_minimum_pwm
+---
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: fan_minimum_pwm
+
 GFXOFF
 ==
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 0501d174dbd8..5a889f733462 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -117,6 +117,7 @@ enum pp_clock_type {
OD_ACOUSTIC_LIMIT,
OD_ACOUSTIC_TARGET,
OD_FAN_TARGET_TEMPERATURE,
+   OD_FAN_MINIMUM_PWM,
 };
 
 enum amd_pp_sensors {
@@ -195,6 +196,7 @@ enum PP_OD_DPM_TABLE_COMMAND {
PP_OD_EDIT_ACOUSTIC_LIMIT,
PP_OD_EDIT_ACOUSTIC_TARGET,
PP_OD_EDIT_FAN_TARGET_TEMPERATURE,
+   PP_OD_EDIT_FAN_MINIMUM_PWM,
 };
 
 struct pp_states_info {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 060a38e30878..84e1af6a6ce7 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3745,6 +3745,62 @@ static umode_t fan_target_temperature_visible(struct 
amdgpu_device *adev)
return umode;
 }
 
+/**
+ * DOC: fan_minimum_pwm
+ *
+ * The amdgpu driver provides a sysfs API for checking and adjusting the
+ * minimum fan speed in PWM.
+ *
+ * Reading back the file shows you the current setting and the permitted
+ * ranges if changable.
+ *
+ * Writing an integer to the file, change the setting accordingly.
+ *
+ * When you have finished the editing, write "c" (commit) to the file to commit
+ * your changes.
+ *
+ * This setting works under auto fan control mode only. It can co-exist with
+ * other settings which can work also under auto mode. It adjusts the PMFW's
+ * behavior about the minimum fan speed in PWM the fan should spin. Setting
+ * via this interface will switch the fan control to auto mode implicitly.
+ */
+static ssize_t fan_minimum_pwm_show(struct kobject *kobj,
+   struct kobj_attribute *attr,
+   char *buf)
+{
+   struct od_kobj *container = container_of(kobj, struct od_kobj, kobj);
+   struct amdgpu_device *adev = (struct amdgpu_device *)container->priv;
+
+   return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_MINIMUM_PWM, 
buf);
+}
+
+static ssize_t fan_minimum_pwm_store(struct kobject *kobj,
+struct kobj_attribute *attr,
+const char *buf,
+size_t count)
+{
+   struct od_kobj *container = container_of(kobj, struct od_kobj, kobj);
+   struct amdgpu_device *adev = (struct amdgpu_device *)container->priv;
+
+   return (ssize_t)amdgpu_distribute_custom_od_settings(adev,
+
PP_OD_EDIT_FAN_MINIMUM_PWM,
+buf,
+count);
+}
+
+static umode_t fan_minimum_pwm_visible(struct amdgpu_device *adev)
+{
+   umode_t umode = ;
+
+   if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE)
+   umode |= S_IRUSR | S_IRGRP | S_IROTH;
+
+   if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET)
+   umode |= S_IWUSR;
+
+   return umode;
+}
+
 static struct od_feature_set amdgpu_od_set = {
.containers = {
[0] = {
@@ -3782,6 +3838,14 @@ static struct od_feature_set amdgpu_od_set = {
.store = 
fan_target_temperature_store,
},
},
+   [4] = {
+   .name = "fan_minimum_pwm",
+   .ops = {
+   .is_visible = 

[V3 6/7] drm/amd/pm: add fan target temperature OD setting support for SMU13

2023-08-29 Thread Evan Quan
Add SMU13 fan target temperature OD setting support.

Signed-off-by: Evan Quan 
--
v1->v2:
  - add missing kerneldoc for the new interface(Alex)
v2->v3:
  - rich the document for the new interface(Alex)
---
 Documentation/gpu/amdgpu/thermal.rst  |  6 ++
 .../gpu/drm/amd/include/kgd_pp_interface.h|  2 +
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 66 +++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  2 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  2 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 51 +-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 51 +-
 8 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/Documentation/gpu/amdgpu/thermal.rst 
b/Documentation/gpu/amdgpu/thermal.rst
index 26ca4812b9f6..aa2d15706cda 100644
--- a/Documentation/gpu/amdgpu/thermal.rst
+++ b/Documentation/gpu/amdgpu/thermal.rst
@@ -82,6 +82,12 @@ acoustic_target_rpm_threshold
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: acoustic_target_rpm_threshold
 
+fan_target_temperature
+--
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: fan_target_temperature
+
 GFXOFF
 ==
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index a7bc81a0a36e..0501d174dbd8 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -116,6 +116,7 @@ enum pp_clock_type {
OD_FAN_CURVE,
OD_ACOUSTIC_LIMIT,
OD_ACOUSTIC_TARGET,
+   OD_FAN_TARGET_TEMPERATURE,
 };
 
 enum amd_pp_sensors {
@@ -193,6 +194,7 @@ enum PP_OD_DPM_TABLE_COMMAND {
PP_OD_EDIT_FAN_CURVE,
PP_OD_EDIT_ACOUSTIC_LIMIT,
PP_OD_EDIT_ACOUSTIC_TARGET,
+   PP_OD_EDIT_FAN_TARGET_TEMPERATURE,
 };
 
 struct pp_states_info {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 0d63d31b05d3..060a38e30878 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3687,6 +3687,64 @@ static umode_t acoustic_target_threshold_visible(struct 
amdgpu_device *adev)
return umode;
 }
 
+/**
+ * DOC: fan_target_temperature
+ *
+ * The amdgpu driver provides a sysfs API for checking and adjusting the
+ * target tempeature in Celsius degree for fan control.
+ *
+ * Reading back the file shows you the current setting and the permitted
+ * ranges if changable.
+ *
+ * Writing an integer to the file, change the setting accordingly.
+ *
+ * When you have finished the editing, write "c" (commit) to the file to commit
+ * your changes.
+ *
+ * This setting works under auto fan control mode only. It can co-exist with
+ * other settings which can work also under auto mode. Paring with the
+ * acoustic_target_rpm_threshold setting, they define the maximum speed in
+ * RPM the fan can spin when ASIC temperature is not greater than target
+ * temperature. Setting via this interface will switch the fan control to
+ * auto mode implicitly.
+ */
+static ssize_t fan_target_temperature_show(struct kobject *kobj,
+  struct kobj_attribute *attr,
+  char *buf)
+{
+   struct od_kobj *container = container_of(kobj, struct od_kobj, kobj);
+   struct amdgpu_device *adev = (struct amdgpu_device *)container->priv;
+
+   return (ssize_t)amdgpu_retrieve_od_settings(adev, 
OD_FAN_TARGET_TEMPERATURE, buf);
+}
+
+static ssize_t fan_target_temperature_store(struct kobject *kobj,
+   struct kobj_attribute *attr,
+   const char *buf,
+   size_t count)
+{
+   struct od_kobj *container = container_of(kobj, struct od_kobj, kobj);
+   struct amdgpu_device *adev = (struct amdgpu_device *)container->priv;
+
+   return (ssize_t)amdgpu_distribute_custom_od_settings(adev,
+
PP_OD_EDIT_FAN_TARGET_TEMPERATURE,
+buf,
+count);
+}
+
+static umode_t fan_target_temperature_visible(struct amdgpu_device *adev)
+{
+   umode_t umode = ;
+
+   if (adev->pm.od_feature_mask & 
OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE)
+   umode |= S_IRUSR | S_IRGRP | S_IROTH;
+
+   if (adev->pm.od_feature_mask & 
OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET)
+   umode |= S_IWUSR;
+
+   return umode;
+}
+
 static struct od_feature_set amdgpu_od_set = {
.containers = {
[0] = {
@@ -3716,6 +3774,14 @@ static struct od_feature_set amdgpu_od_set = {
.store = 
acoustic_target_threshold_store,
},
  

[V3 4/7] drm/amd/pm: add fan acoustic limit OD setting support for SMU13

2023-08-29 Thread Evan Quan
Add SMU13 fan acoustic limit OD setting support.

Signed-off-by: Evan Quan 
--
v1->v2:
  - add missing kerneldoc for the new interface(Alex)
v2->v3:
  - rich the document for the new interface(Alex)
---
 Documentation/gpu/amdgpu/thermal.rst  |  6 ++
 .../gpu/drm/amd/include/kgd_pp_interface.h|  2 +
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 63 +++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  2 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  2 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 51 ++-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 51 ++-
 8 files changed, 176 insertions(+), 2 deletions(-)

diff --git a/Documentation/gpu/amdgpu/thermal.rst 
b/Documentation/gpu/amdgpu/thermal.rst
index 08ee33b2acb6..8ed888f81d2c 100644
--- a/Documentation/gpu/amdgpu/thermal.rst
+++ b/Documentation/gpu/amdgpu/thermal.rst
@@ -70,6 +70,12 @@ fan_curve
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: fan_curve
 
+acoustic_limit_rpm_threshold
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
+   :doc: acoustic_limit_rpm_threshold
+
 GFXOFF
 ==
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 7bdd2bdab970..e3dcef8c4ab7 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -114,6 +114,7 @@ enum pp_clock_type {
OD_VDDGFX_OFFSET,
OD_CCLK,
OD_FAN_CURVE,
+   OD_ACOUSTIC_LIMIT,
 };
 
 enum amd_pp_sensors {
@@ -189,6 +190,7 @@ enum PP_OD_DPM_TABLE_COMMAND {
PP_OD_COMMIT_DPM_TABLE,
PP_OD_EDIT_VDDGFX_OFFSET,
PP_OD_EDIT_FAN_CURVE,
+   PP_OD_EDIT_ACOUSTIC_LIMIT,
 };
 
 struct pp_states_info {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 1008d43e9c3f..1ec8a8c4016a 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3575,6 +3575,61 @@ static umode_t fan_curve_visible(struct amdgpu_device 
*adev)
return umode;
 }
 
+/**
+ * DOC: acoustic_limit_rpm_threshold
+ *
+ * The amdgpu driver provides a sysfs API for checking and adjusting the
+ * acoustic limit in RPM for fan control.
+ *
+ * Reading back the file shows you the current setting and the permitted
+ * ranges if changable.
+ *
+ * Writing an integer to the file, change the setting accordingly.
+ *
+ * When you have finished the editing, write "c" (commit) to the file to commit
+ * your changes.
+ *
+ * This setting works under auto fan control mode only. It adjusts the PMFW's
+ * behavior about the maximum speed in RPM the fan can spin. Setting via this
+ * interface will switch the fan control to auto mode implicitly.
+ */
+static ssize_t acoustic_limit_threshold_show(struct kobject *kobj,
+struct kobj_attribute *attr,
+char *buf)
+{
+   struct od_kobj *container = container_of(kobj, struct od_kobj, kobj);
+   struct amdgpu_device *adev = (struct amdgpu_device *)container->priv;
+
+   return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_ACOUSTIC_LIMIT, 
buf);
+}
+
+static ssize_t acoustic_limit_threshold_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+   struct od_kobj *container = container_of(kobj, struct od_kobj, kobj);
+   struct amdgpu_device *adev = (struct amdgpu_device *)container->priv;
+
+   return (ssize_t)amdgpu_distribute_custom_od_settings(adev,
+
PP_OD_EDIT_ACOUSTIC_LIMIT,
+buf,
+count);
+}
+
+static umode_t acoustic_limit_threshold_visible(struct amdgpu_device *adev)
+{
+   umode_t umode = ;
+
+   if (adev->pm.od_feature_mask & 
OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_RETRIEVE)
+   umode |= S_IRUSR | S_IRGRP | S_IROTH;
+
+   if (adev->pm.od_feature_mask & 
OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_SET)
+   umode |= S_IWUSR;
+
+   return umode;
+}
+
 static struct od_feature_set amdgpu_od_set = {
.containers = {
[0] = {
@@ -3588,6 +3643,14 @@ static struct od_feature_set amdgpu_od_set = {
.store = fan_curve_store,
},
},
+   [1] = {
+   .name = "acoustic_limit_rpm_threshold",
+   .ops = {
+   .is_visible = 

[V3 2/7] drm/amdgpu: revise the device initialization sequences

2023-08-29 Thread Evan Quan
By placing the sysfs interfaces creation after `.late_int`. Since some
operations performed during `.late_init` may affect how the sysfs
interfaces should be created.

Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 --
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e588cf7a14f6..f324ef5da792 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3862,22 +3862,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
 
-   r = amdgpu_atombios_sysfs_init(adev);
-   if (r)
-   drm_err(>ddev,
-   "registering atombios sysfs failed (%d).\n", r);
-
-   r = amdgpu_pm_sysfs_init(adev);
-   if (r)
-   DRM_ERROR("registering pm sysfs failed (%d).\n", r);
-
-   r = amdgpu_ucode_sysfs_init(adev);
-   if (r) {
-   adev->ucode_sysfs_en = false;
-   DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
-   } else
-   adev->ucode_sysfs_en = true;
-
/*
 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
 * Otherwise the mgpu fan boost feature will be skipped due to the
@@ -3906,6 +3890,27 @@ int amdgpu_device_init(struct amdgpu_device *adev,
flush_delayed_work(>delayed_init_work);
}
 
+   /*
+* Place those sysfs registering after `late_init`. As some of those
+* operations performed in `late_init` might affect the sysfs
+* interfaces creating.
+*/
+   r = amdgpu_atombios_sysfs_init(adev);
+   if (r)
+   drm_err(>ddev,
+   "registering atombios sysfs failed (%d).\n", r);
+
+   r = amdgpu_pm_sysfs_init(adev);
+   if (r)
+   DRM_ERROR("registering pm sysfs failed (%d).\n", r);
+
+   r = amdgpu_ucode_sysfs_init(adev);
+   if (r) {
+   adev->ucode_sysfs_en = false;
+   DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+   } else
+   adev->ucode_sysfs_en = true;
+
r = sysfs_create_files(>dev->kobj, amdgpu_dev_attributes);
if (r)
dev_err(adev->dev, "Could not create amdgpu device attr\n");
-- 
2.34.1



[V3 0/7] A new set of Linux OD interfaces

2023-08-29 Thread Evan Quan
The existing OD interface `pp_od_clk_voltage` is unable to meet the growing
demands for more OD functionalities. Since the buf used within it comes with
size limit as one page. With more OD functionalities added, we will hit that
limit soon.

To better meet the growing demainds, a new set of OD interfaces are designed.
With this new design, there will be multiple interfaces exposed with each
representing a single OD functionality. And all those interfaces will be
arranged in a tree liked hierarchy as below. Meanwhile all functionalities
for the same component will be arranged under the same directory.

gpu_od/
├── fan_ctrl
│   ├── acoustic_limit_rpm_threshold
│   ├── acoustic_target_rpm_threshold
│   ├── fan_curve
│   ├── fan_minimum_pwm
│   ├── fan_target_temperature

Evan Quan (7):
  drm/amd/pm: introduce a new set of OD interfaces
  drm/amdgpu: revise the device initialization sequences
  drm/amd/pm: add fan temperature/pwm curve OD setting support for SMU13
  drm/amd/pm: add fan acoustic limit OD setting support for SMU13
  drm/amd/pm: add fan acoustic target OD setting support for SMU13
  drm/amd/pm: add fan target temperature OD setting support for SMU13
  drm/amd/pm: add fan minimum pwm OD setting support for SMU13

 Documentation/gpu/amdgpu/thermal.rst  |  30 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  39 +-
 .../gpu/drm/amd/include/kgd_pp_interface.h|  12 +-
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 730 +-
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  14 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  10 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |   5 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 298 ++-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 298 ++-
 9 files changed, 1411 insertions(+), 25 deletions(-)

-- 
2.34.1



[PATCH] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3

2023-08-29 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.
Add an "xcc_id" parameter to them so its uses the correct XCD's engine

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 24 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c|  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h|  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |  4 ++--
 5 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4de074243c4d..798248677849 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1136,12 +1136,14 @@ uint32_t amdgpu_device_wait_on_rreg(struct 
amdgpu_device *adev,
uint32_t inst, uint32_t reg_addr, char reg_name[],
uint32_t expected_value, uint32_t mask);
 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
-   uint32_t reg, uint32_t acc_flags);
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
-   uint32_t acc_flags);
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
@@ -1177,18 +1179,20 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define AMDGPU_REGS_NO_KIQ(1<<1)
 #define AMDGPU_REGS_RLC(1<<2)

-#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
-#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
+#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ, 
0)
+#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ, 0)

-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)

 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))

-#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", 
amdgpu_device_rreg(adev, (reg), 0))
-#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
+#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0, 0)
+#define RREG32_XCC(reg, xcc) amdgpu_device_rreg(adev, (reg), 0, xcc)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", 
amdgpu_device_rreg(adev, (reg), 0, 0))
+#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0, 0)
+#define WREG32_XCC(reg, v, xcc) amdgpu_device_wreg(adev, (reg), (v), 0, xcc)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
@@ -1236,7 +1240,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
WREG32_SMC(_Reg, tmp);  \
} while (0)

-#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", 
amdgpu_device_rreg((adev), (reg), false))
+#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", 
amdgpu_device_rreg((adev), (reg), false, 0))

 #define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
 #define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 533daba2accb..7290e79f7584 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -394,7 +394,8 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device 
*adev)
  * Returns the 32 bit value from the offset specified.
  */
 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
-   uint32_t reg, uint32_t acc_flags)
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id)
 {
uint32_t ret;

@@ -405,7 +406,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(>reset_domain->sem)) {
-   ret = amdgpu_kiq_rreg(adev, reg);
+   ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
up_read(>reset_domain->sem);
} else {
ret 

[PATCH] Revert "drm/amd/display: Remove v_startup workaround for dcn3+"

2023-08-29 Thread Hamza Mahfooz
This reverts commit 3a31e8b89b7240d9a17ace8a1ed050bdcb560f9e.

We still need to call dcn20_adjust_freesync_v_startup() for older DCN3+
ASICs otherwise it can cause DP to HDMI 2.1 PCONs to fail to light up.
So, reintroduce the reverted code and limit it to ASICs older than
DCN31.

Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2809
Signed-off-by: Hamza Mahfooz 
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 24 ---
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 0989a0152ae8..0841176e8d6c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -1099,6 +1099,10 @@ void dcn20_calculate_dlg_params(struct dc *dc,
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =

pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
context->res_ctx.pipe_ctx[i].pipe_dlg_param = 
pipes[pipe_idx].pipe.dest;
+   if (dc->ctx->dce_version < DCN_VERSION_3_1 &&
+   
context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+   
dcn20_adjust_freesync_v_startup(>res_ctx.pipe_ctx[i].stream->timing,
+   
>res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
 
pipe_idx++;
}
@@ -1927,7 +1931,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc 
*dc, struct dc_state *co
int vlevel = 0;
int pipe_split_from[MAX_PIPES];
int pipe_cnt = 0;
-   int i = 0;
display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * 
sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
DC_LOGGER_INIT(dc->ctx->logger);
 
@@ -1951,15 +1954,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc 
*dc, struct dc_state *co
dcn20_calculate_wm(dc, context, pipes, _cnt, pipe_split_from, 
vlevel, fast_validate);
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
 
-   for (i = 0; i < dc->res_pool->pipe_count; i++) {
-   if (!context->res_ctx.pipe_ctx[i].stream)
-   continue;
-   if 
(context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
-   dcn20_adjust_freesync_v_startup(
-   >res_ctx.pipe_ctx[i].stream->timing,
-   
>res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
-   }
-
BW_VAL_TRACE_END_WATERMARKS();
 
goto validate_out;
@@ -2232,7 +2226,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
int vlevel = 0;
int pipe_split_from[MAX_PIPES];
int pipe_cnt = 0;
-   int i = 0;
display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * 
sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
DC_LOGGER_INIT(dc->ctx->logger);
 
@@ -2261,15 +2254,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
dcn21_calculate_wm(dc, context, pipes, _cnt, pipe_split_from, 
vlevel, fast_validate);
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
 
-   for (i = 0; i < dc->res_pool->pipe_count; i++) {
-   if (!context->res_ctx.pipe_ctx[i].stream)
-   continue;
-   if 
(context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
-   dcn20_adjust_freesync_v_startup(
-   >res_ctx.pipe_ctx[i].stream->timing,
-   
>res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
-   }
-
BW_VAL_TRACE_END_WATERMARKS();
 
goto validate_out;
-- 
2.41.0



Re: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-29 Thread Alex Hung




On 2023-08-29 11:03, Jani Nikula wrote:

On Tue, 29 Aug 2023, Jani Nikula  wrote:

On Tue, 29 Aug 2023, Alex Deucher  wrote:

On Tue, Aug 29, 2023 at 6:48 AM Jani Nikula  wrote:


On Wed, 23 Aug 2023, Jani Nikula  wrote:

On Tue, 22 Aug 2023, Alex Hung  wrote:

On 2023-08-22 06:01, Jani Nikula wrote:

Over the past years I've been trying to unify the override and firmware
EDID handling as well as EDID property updates. It won't work if drivers
do their own random things.

Let's check how to replace these references by appropriate ones or fork
the function as reverting these patches causes regressions.


I think the fundamental problem you have is conflating connector forcing
with EDID override. They're orthogonal. The .force callback has no
business basing the decisions on connector->edid_override. Force is
force, override is override.

The driver isn't even supposed to know or care if the EDID originates
from the firmware loader or override EDID debugfs. drm_get_edid() will
handle that for you transparently. It'll return the EDID, and you
shouldn't look at connector->edid_blob_ptr either. Using that will make
future work in drm_edid.c harder.

You can't fix that with minor tweaks. I think you'll be better off
starting from scratch.

Also, connector->edid_override is debugfs. You actually can change the
behaviour. If your userspace, whatever it is, has been written to assume
connector forcing if EDID override is set, you *do* have to fix that,
and set both.


Any updates on fixing this, or shall we proceed with the reverts?


There is a patch under internal reviews. It removes calls edid_override 
and drm_edid_override_connector_update as intended in this patchset but 
does not remove the functionality.


With the patch. both following git grep commands return nothing in 
amd-staging-drm-next.


$ git grep drm_edid_override_connector_update -- drivers/gpu/drm/amd
$ git grep edid_override -- drivers/gpu/drm/amd

Best regards,
Alex Hung



What is the goal of the reverts?  I don't disagree that we may be
using the interfaces wrong, but reverting them will regess
functionality in the driver.


The commits are in v6.5-rc1, but not yet in a release. No user depends
on them yet. I'd strongly prefer them not reaching v6.5 final and users.


Sorry for confusion here, that's obviously come and gone already. :(


The firmware EDID, override EDID, connector forcing, the EDID property,
etc. have been and somewhat still are a hairy mess that we must keep
untangling, and this isn't helping.

I've put in crazy amounts of work on this, and I've added kernel-doc
comments about stuff that should and should not be done, but they go
unread and ignored.

I really don't want to end up having to clean this up myself before I
can embark on further cleanups and refactoring.

And again, if the functionality in the driver depends on conflating two
things that should be separate, it's probably not such a hot idea to let
it reach users either. Even if it's just debugfs.


BR,
Jani.




Re: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-29 Thread Jani Nikula
On Tue, 29 Aug 2023, Jani Nikula  wrote:
> On Tue, 29 Aug 2023, Alex Deucher  wrote:
>> On Tue, Aug 29, 2023 at 6:48 AM Jani Nikula  wrote:
>>>
>>> On Wed, 23 Aug 2023, Jani Nikula  wrote:
>>> > On Tue, 22 Aug 2023, Alex Hung  wrote:
>>> >> On 2023-08-22 06:01, Jani Nikula wrote:
>>> >>> Over the past years I've been trying to unify the override and firmware
>>> >>> EDID handling as well as EDID property updates. It won't work if drivers
>>> >>> do their own random things.
>>> >> Let's check how to replace these references by appropriate ones or fork
>>> >> the function as reverting these patches causes regressions.
>>> >
>>> > I think the fundamental problem you have is conflating connector forcing
>>> > with EDID override. They're orthogonal. The .force callback has no
>>> > business basing the decisions on connector->edid_override. Force is
>>> > force, override is override.
>>> >
>>> > The driver isn't even supposed to know or care if the EDID originates
>>> > from the firmware loader or override EDID debugfs. drm_get_edid() will
>>> > handle that for you transparently. It'll return the EDID, and you
>>> > shouldn't look at connector->edid_blob_ptr either. Using that will make
>>> > future work in drm_edid.c harder.
>>> >
>>> > You can't fix that with minor tweaks. I think you'll be better off
>>> > starting from scratch.
>>> >
>>> > Also, connector->edid_override is debugfs. You actually can change the
>>> > behaviour. If your userspace, whatever it is, has been written to assume
>>> > connector forcing if EDID override is set, you *do* have to fix that,
>>> > and set both.
>>>
>>> Any updates on fixing this, or shall we proceed with the reverts?
>>
>> What is the goal of the reverts?  I don't disagree that we may be
>> using the interfaces wrong, but reverting them will regess
>> functionality in the driver.
>
> The commits are in v6.5-rc1, but not yet in a release. No user depends
> on them yet. I'd strongly prefer them not reaching v6.5 final and users.

Sorry for confusion here, that's obviously come and gone already. :(

> The firmware EDID, override EDID, connector forcing, the EDID property,
> etc. have been and somewhat still are a hairy mess that we must keep
> untangling, and this isn't helping.
>
> I've put in crazy amounts of work on this, and I've added kernel-doc
> comments about stuff that should and should not be done, but they go
> unread and ignored.
>
> I really don't want to end up having to clean this up myself before I
> can embark on further cleanups and refactoring.
>
> And again, if the functionality in the driver depends on conflating two
> things that should be separate, it's probably not such a hot idea to let
> it reach users either. Even if it's just debugfs.
>
>
> BR,
> Jani.

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-29 Thread Jani Nikula
On Tue, 29 Aug 2023, Alex Deucher  wrote:
> On Tue, Aug 29, 2023 at 6:48 AM Jani Nikula  wrote:
>>
>> On Wed, 23 Aug 2023, Jani Nikula  wrote:
>> > On Tue, 22 Aug 2023, Alex Hung  wrote:
>> >> On 2023-08-22 06:01, Jani Nikula wrote:
>> >>> Over the past years I've been trying to unify the override and firmware
>> >>> EDID handling as well as EDID property updates. It won't work if drivers
>> >>> do their own random things.
>> >> Let's check how to replace these references by appropriate ones or fork
>> >> the function as reverting these patches causes regressions.
>> >
>> > I think the fundamental problem you have is conflating connector forcing
>> > with EDID override. They're orthogonal. The .force callback has no
>> > business basing the decisions on connector->edid_override. Force is
>> > force, override is override.
>> >
>> > The driver isn't even supposed to know or care if the EDID originates
>> > from the firmware loader or override EDID debugfs. drm_get_edid() will
>> > handle that for you transparently. It'll return the EDID, and you
>> > shouldn't look at connector->edid_blob_ptr either. Using that will make
>> > future work in drm_edid.c harder.
>> >
>> > You can't fix that with minor tweaks. I think you'll be better off
>> > starting from scratch.
>> >
>> > Also, connector->edid_override is debugfs. You actually can change the
>> > behaviour. If your userspace, whatever it is, has been written to assume
>> > connector forcing if EDID override is set, you *do* have to fix that,
>> > and set both.
>>
>> Any updates on fixing this, or shall we proceed with the reverts?
>
> What is the goal of the reverts?  I don't disagree that we may be
> using the interfaces wrong, but reverting them will regess
> functionality in the driver.

The commits are in v6.5-rc1, but not yet in a release. No user depends
on them yet. I'd strongly prefer them not reaching v6.5 final and users.

The firmware EDID, override EDID, connector forcing, the EDID property,
etc. have been and somewhat still are a hairy mess that we must keep
untangling, and this isn't helping.

I've put in crazy amounts of work on this, and I've added kernel-doc
comments about stuff that should and should not be done, but they go
unread and ignored.

I really don't want to end up having to clean this up myself before I
can embark on further cleanups and refactoring.

And again, if the functionality in the driver depends on conflating two
things that should be separate, it's probably not such a hot idea to let
it reach users either. Even if it's just debugfs.


BR,
Jani.


-- 
Jani Nikula, Intel Open Source Graphics Center


RE: [PATCH] drm/amdkfd: Fix reg offset for setting CWSR grace period

2023-08-29 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Joshi, Mukul 
> Sent: Tuesday, August 29, 2023 10:55 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kim, Jonathan
> ; Joshi, Mukul 
> Subject: [PATCH] drm/amdkfd: Fix reg offset for setting CWSR grace period
>
> This patch fixes the case where the code currently passes
> absolute register address and not the reg offset, which HWS
> expects, when sending the PM4 packet to set/update CWSR grace
> period. Additionally, cleanup the signature of
> build_grace_period_packet_info function as it no longer needs
> the inst parameter.
>
> Signed-off-by: Mukul Joshi 

Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 3 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h| 3 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c| 3 +--
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 3 +--
>  7 files changed, 8 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index f1f2c24de081..69810b3f1c63 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -980,8 +980,7 @@ void
> kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
>   uint32_t wait_times,
>   uint32_t grace_period,
>   uint32_t *reg_offset,
> - uint32_t *reg_data,
> - uint32_t inst)
> + uint32_t *reg_data)
>  {
>   *reg_data = wait_times;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index ecaead24e8c9..67bcaa3d4226 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -55,5 +55,4 @@ void
> kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
>  uint32_t wait_times,
>  uint32_t grace_period,
>  uint32_t *reg_offset,
> -uint32_t *reg_data,
> -uint32_t inst);
> +uint32_t *reg_data);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index fa5ee96f8845..3c45a188b701 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -1103,8 +1103,7 @@ void
> kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>   uint32_t wait_times,
>   uint32_t grace_period,
>   uint32_t *reg_offset,
> - uint32_t *reg_data,
> - uint32_t inst)
> + uint32_t *reg_data)
>  {
>   *reg_data = wait_times;
>
> @@ -1120,8 +1119,7 @@ void
> kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>   SCH_WAVE,
>   grace_period);
>
> - *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
> - mmCP_IQ_WAIT_TIME2);
> + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
>  }
>
>  void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device
> *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 936e501908ce..ce424615f59b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -100,5 +100,4 @@ void
> kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>  uint32_t wait_times,
>  uint32_t grace_period,
>  uint32_t *reg_offset,
> -uint32_t *reg_data,
> -uint32_t inst);
> +uint32_t *reg_data);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index b166f30f083e..8a6cb41444a4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1677,8 +1677,7 @@ static int start_cpsch(struct 

[PATCH] drm/amdkfd: Fix unaligned 64-bit doorbell warning

2023-08-29 Thread Mukul Joshi
This patch fixes the following unaligned 64-bit doorbell
warning seen when submitting packets on HIQ on GFX v9.4.3
by making the HIQ doorbell 64-bit aligned.
The warning is seen when GPU is loaded in any mode other
than SPX mode.

[  +0.000301] [ cut here ]
[  +0.03] Unaligned 64-bit doorbell
[  +0.30] WARNING: /amdkfd/kfd_doorbell.c:339 
write_kernel_doorbell64+0x72/0x80 [amdgpu]
[  +0.03] RIP: 0010:write_kernel_doorbell64+0x72/0x80 [amdgpu]
[  +0.04] RSP: 0018:c90004287730 EFLAGS: 00010246
[  +0.05] RAX:  RBX:  RCX: 
[  +0.03] RDX: 0001 RSI: 82837c71 RDI: 
[  +0.03] RBP: c90004287748 R08: 0003 R09: 0001
[  +0.02] R10: 001a R11: 88a034008198 R12: c900013bd004
[  +0.03] R13: 0008 R14: c900042877b0 R15: 007f
[  +0.03] FS:  7fa8c7b62000() GS:889f8840() 
knlGS:
[  +0.04] CS:  0010 DS:  ES:  CR0: 80050033
[  +0.03] CR2: 56111c45aaf0 CR3: 0001414f2002 CR4: 00770ee0
[  +0.03] PKRU: 5554
[  +0.02] Call Trace:
[  +0.04]  
[  +0.06]  kq_submit_packet+0x45/0x50 [amdgpu]
[  +0.000524]  pm_send_set_resources+0x7f/0xc0 [amdgpu]
[  +0.000500]  set_sched_resources+0xe4/0x160 [amdgpu]
[  +0.000503]  start_cpsch+0x1c5/0x2a0 [amdgpu]
[  +0.000497]  kgd2kfd_device_init.cold+0x816/0xb42 [amdgpu]
[  +0.000743]  amdgpu_amdkfd_device_init+0x15f/0x1f0 [amdgpu]
[  +0.000602]  amdgpu_device_init.cold+0x1813/0x2176 [amdgpu]
[  +0.000684]  ? pci_bus_read_config_word+0x4a/0x80
[  +0.12]  ? do_pci_enable_device+0xdc/0x110
[  +0.08]  amdgpu_driver_load_kms+0x1a/0x110 [amdgpu]
[  +0.000545]  amdgpu_pci_probe+0x197/0x400 [amdgpu]

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index c2e0b79dcc6d..b1c2772c3a8d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -168,7 +168,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
" doorbell index== 0x%x\n",
*doorbell_off, inx);
 
-   return kfd->doorbell_kernel_ptr + inx;
+   return kfd->doorbell_kernel_ptr + inx * 2;
 }
 
 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
@@ -176,6 +176,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 
__iomem *db_addr)
unsigned int inx;
 
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+   inx /= 2;
 
mutex_lock(>doorbell_mutex);
__clear_bit(inx, kfd->doorbell_bitmap);
-- 
2.35.1



RE: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-29 Thread Wu, Hersen
[AMD Official Use Only - General]

+ Charlie Wang

-Original Message-
From: Alex Deucher 
Sent: Tuesday, August 29, 2023 11:44 AM
To: Jani Nikula 
Cc: Hung, Alex ; dri-de...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; Li, Sun peng (Leo) ; 
intel-...@lists.freedesktop.org; Siqueira, Rodrigo ; 
Wheeler, Daniel ; Wu, Hersen ; 
Chien, WenChieh (Jay) ; Deucher, Alexander 

Subject: Re: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using 
drm_edid_override_connector_update()

On Tue, Aug 29, 2023 at 6:48 AM Jani Nikula  wrote:
>
> On Wed, 23 Aug 2023, Jani Nikula  wrote:
> > On Tue, 22 Aug 2023, Alex Hung  wrote:
> >> On 2023-08-22 06:01, Jani Nikula wrote:
> >>> Over the past years I've been trying to unify the override and
> >>> firmware EDID handling as well as EDID property updates. It won't
> >>> work if drivers do their own random things.
> >> Let's check how to replace these references by appropriate ones or
> >> fork the function as reverting these patches causes regressions.
> >
> > I think the fundamental problem you have is conflating connector
> > forcing with EDID override. They're orthogonal. The .force callback
> > has no business basing the decisions on connector->edid_override.
> > Force is force, override is override.
> >
> > The driver isn't even supposed to know or care if the EDID
> > originates from the firmware loader or override EDID debugfs.
> > drm_get_edid() will handle that for you transparently. It'll return
> > the EDID, and you shouldn't look at connector->edid_blob_ptr either.
> > Using that will make future work in drm_edid.c harder.
> >
> > You can't fix that with minor tweaks. I think you'll be better off
> > starting from scratch.
> >
> > Also, connector->edid_override is debugfs. You actually can change
> > the behaviour. If your userspace, whatever it is, has been written
> > to assume connector forcing if EDID override is set, you *do* have
> > to fix that, and set both.
>
> Any updates on fixing this, or shall we proceed with the reverts?

What is the goal of the reverts?  I don't disagree that we may be using the 
interfaces wrong, but reverting them will regess functionality in the driver.

Alex


Re: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-29 Thread Alex Deucher
On Tue, Aug 29, 2023 at 6:48 AM Jani Nikula  wrote:
>
> On Wed, 23 Aug 2023, Jani Nikula  wrote:
> > On Tue, 22 Aug 2023, Alex Hung  wrote:
> >> On 2023-08-22 06:01, Jani Nikula wrote:
> >>> Over the past years I've been trying to unify the override and firmware
> >>> EDID handling as well as EDID property updates. It won't work if drivers
> >>> do their own random things.
> >> Let's check how to replace these references by appropriate ones or fork
> >> the function as reverting these patches causes regressions.
> >
> > I think the fundamental problem you have is conflating connector forcing
> > with EDID override. They're orthogonal. The .force callback has no
> > business basing the decisions on connector->edid_override. Force is
> > force, override is override.
> >
> > The driver isn't even supposed to know or care if the EDID originates
> > from the firmware loader or override EDID debugfs. drm_get_edid() will
> > handle that for you transparently. It'll return the EDID, and you
> > shouldn't look at connector->edid_blob_ptr either. Using that will make
> > future work in drm_edid.c harder.
> >
> > You can't fix that with minor tweaks. I think you'll be better off
> > starting from scratch.
> >
> > Also, connector->edid_override is debugfs. You actually can change the
> > behaviour. If your userspace, whatever it is, has been written to assume
> > connector forcing if EDID override is set, you *do* have to fix that,
> > and set both.
>
> Any updates on fixing this, or shall we proceed with the reverts?

What is the goal of the reverts?  I don't disagree that we may be
using the interfaces wrong, but reverting them will regess
functionality in the driver.

Alex


[PATCH] drm/amdgpu: Free ras cmd input buffer properly

2023-08-29 Thread Hawking Zhang
Do not access the pointer for ras input cmd buffer
if it is even not allocated.

Signed-off-by: Hawking Zhang 
Reviewed-by: Stanley Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e47600a8e88e..8eb6f6943778 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -764,7 +764,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
union ta_ras_cmd_input *info;
-   int ret = 0;
+   int ret;
 
if (!con)
return -EINVAL;
@@ -774,7 +774,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
/* Force issue enable or disable ras feature commands */
if (head->block != AMDGPU_RAS_BLOCK__GFX &&
!amdgpu_ras_is_feature_allowed(adev, head))
-   goto out;
+   return 0;
 
/* Only enable gfx ras feature from host side */
if (head->block == AMDGPU_RAS_BLOCK__GFX &&
@@ -802,16 +802,16 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
-   goto out;
+   return ret;
}
+
+   kfree(info);
}
 
/* setup the obj */
__amdgpu_ras_feature_enable(adev, head, enable);
-out:
-   if (head->block == AMDGPU_RAS_BLOCK__GFX)
-   kfree(info);
-   return ret;
+
+   return 0;
 }
 
 /* Only used in device probe stage and called only once. */
-- 
2.17.1



RE: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-29 Thread Wu, Hersen
[AMD Official Use Only - General]

+ Charlie

-Original Message-
From: Jani Nikula 
Sent: Tuesday, August 29, 2023 6:49 AM
To: Hung, Alex ; dri-de...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org
Cc: Li, Sun peng (Leo) ; David Airlie ; 
intel-...@lists.freedesktop.org; Siqueira, Rodrigo ; 
Wheeler, Daniel ; Wu, Hersen ; 
Daniel Vetter ; Chien, WenChieh (Jay) 
; Deucher, Alexander ; 
Wentland, Harry 
Subject: Re: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using 
drm_edid_override_connector_update()

On Wed, 23 Aug 2023, Jani Nikula  wrote:
> On Tue, 22 Aug 2023, Alex Hung  wrote:
>> On 2023-08-22 06:01, Jani Nikula wrote:
>>> Over the past years I've been trying to unify the override and
>>> firmware EDID handling as well as EDID property updates. It won't
>>> work if drivers do their own random things.
>> Let's check how to replace these references by appropriate ones or
>> fork the function as reverting these patches causes regressions.
>
> I think the fundamental problem you have is conflating connector
> forcing with EDID override. They're orthogonal. The .force callback
> has no business basing the decisions on connector->edid_override.
> Force is force, override is override.
>
> The driver isn't even supposed to know or care if the EDID originates
> from the firmware loader or override EDID debugfs. drm_get_edid() will
> handle that for you transparently. It'll return the EDID, and you
> shouldn't look at connector->edid_blob_ptr either. Using that will
> make future work in drm_edid.c harder.
>
> You can't fix that with minor tweaks. I think you'll be better off
> starting from scratch.
>
> Also, connector->edid_override is debugfs. You actually can change the
> behaviour. If your userspace, whatever it is, has been written to
> assume connector forcing if EDID override is set, you *do* have to fix
> that, and set both.

Any updates on fixing this, or shall we proceed with the reverts?

BR,
Jani.



>
> BR,
> Jani.
>
>
>>
>> Cheers,
>> Alex
>>
>>>
>>> BR,
>>> Jani.
>>>
>>>
>>> Cc: Alex Deucher 
>>> Cc: Alex Hung 
>>> Cc: Chao-kai Wang 
>>> Cc: Daniel Wheeler 
>>> Cc: Harry Wentland 
>>> Cc: Hersen Wu 
>>> Cc: Leo Li 
>>> Cc: Rodrigo Siqueira 
>>> Cc: Wenchieh Chien 
>>> Cc: David Airlie 
>>> Cc: Daniel Vetter 
>>>
>>> Jani Nikula (4):
>>>Revert "drm/amd/display: drop unused count variable in
>>>  create_eml_sink()"
>>>Revert "drm/amd/display: assign edid_blob_ptr with edid from debugfs"
>>>Revert "drm/amd/display: mark amdgpu_dm_connector_funcs_force static"
>>>Revert "drm/amd/display: implement force function in
>>>  amdgpu_dm_connector_funcs"
>>>
>>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 44 +++
>>>   1 file changed, 5 insertions(+), 39 deletions(-)
>>>

--
Jani Nikula, Intel Open Source Graphics Center


RE: [PATCH] drm/amdgpu: Free ras cmd input buffer properly

2023-08-29 Thread Zhang, Hawking
[AMD Official Use Only - General]

Sorry about that, will send it out a clean up version.

Regards,
Hawking

-Original Message-
From: Yang, Stanley 
Sent: Tuesday, August 29, 2023 23:00
To: Zhang, Hawking ; amd-gfx@lists.freedesktop.org; 
Zhou1, Tao 
Subject: RE: [PATCH] drm/amdgpu: Free ras cmd input buffer properly

[AMD Official Use Only - General]

Reviewed-by: Stanley.Yang 

Regards,
Stanley
> -Original Message-
> From: Zhang, Hawking 
> Sent: Tuesday, August 29, 2023 10:55 PM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao ;
> Yang, Stanley 
> Cc: Zhang, Hawking 
> Subject: [PATCH] drm/amdgpu: Free ras cmd input buffer properly
>
> Do not access the pointer for ras input cmd buffer if it is even not 
> allocated.
>
> Signed-off-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index e47600a8e88e..16c5fe487ea0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -804,13 +804,13 @@ int amdgpu_ras_feature_enable(struct
> amdgpu_device *adev,
>
>   amdgpu_ras_is_poison_mode_supported(adev), ret);
>   goto out;
>   }
> +
> + kfree(info);
>   }
>
>   /* setup the obj */
>   __amdgpu_ras_feature_enable(adev, head, enable);
> -out:
> - if (head->block == AMDGPU_RAS_BLOCK__GFX)
> - kfree(info);
> +
>   return ret;
>  }
>
> --
> 2.17.1




[PATCH] drm/amdgpu: Free ras cmd input buffer properly

2023-08-29 Thread Hawking Zhang
Do not access the pointer for ras input cmd buffer
if it is even not allocated.

Signed-off-by: Hawking Zhang 
Reviewed-by: Stanley Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e47600a8e88e..8eb6f6943778 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -764,7 +764,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
union ta_ras_cmd_input *info;
-   int ret = 0;
+   int ret;
 
if (!con)
return -EINVAL;
@@ -774,7 +774,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
/* Force issue enable or disable ras feature commands */
if (head->block != AMDGPU_RAS_BLOCK__GFX &&
!amdgpu_ras_is_feature_allowed(adev, head))
-   goto out;
+   return 0;
 
/* Only enable gfx ras feature from host side */
if (head->block == AMDGPU_RAS_BLOCK__GFX &&
@@ -802,16 +802,16 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
-   goto out;
+   return ret;
}
+
+   kfree(info);
}
 
/* setup the obj */
__amdgpu_ras_feature_enable(adev, head, enable);
-out:
-   if (head->block == AMDGPU_RAS_BLOCK__GFX)
-   kfree(info);
-   return ret;
+
+   return 0;
 }
 
 /* Only used in device probe stage and called only once. */
-- 
2.17.1



RE: [PATCH] drm/amdgpu: Free ras cmd input buffer properly

2023-08-29 Thread Yang, Stanley
[AMD Official Use Only - General]

Reviewed-by: Stanley.Yang 

Regards,
Stanley
> -Original Message-
> From: Zhang, Hawking 
> Sent: Tuesday, August 29, 2023 10:55 PM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao ;
> Yang, Stanley 
> Cc: Zhang, Hawking 
> Subject: [PATCH] drm/amdgpu: Free ras cmd input buffer properly
>
> Do not access the pointer for ras input cmd buffer if it is even not 
> allocated.
>
> Signed-off-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index e47600a8e88e..16c5fe487ea0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -804,13 +804,13 @@ int amdgpu_ras_feature_enable(struct
> amdgpu_device *adev,
>
>   amdgpu_ras_is_poison_mode_supported(adev), ret);
>   goto out;
>   }
> +
> + kfree(info);
>   }
>
>   /* setup the obj */
>   __amdgpu_ras_feature_enable(adev, head, enable);
> -out:
> - if (head->block == AMDGPU_RAS_BLOCK__GFX)
> - kfree(info);
> +
>   return ret;
>  }
>
> --
> 2.17.1



[PATCH] drm/amdgpu: Free ras cmd input buffer properly

2023-08-29 Thread Hawking Zhang
Do not access the pointer for ras input cmd buffer
if it is even not allocated.

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e47600a8e88e..16c5fe487ea0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -804,13 +804,13 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
amdgpu_ras_is_poison_mode_supported(adev), ret);
goto out;
}
+
+   kfree(info);
}
 
/* setup the obj */
__amdgpu_ras_feature_enable(adev, head, enable);
-out:
-   if (head->block == AMDGPU_RAS_BLOCK__GFX)
-   kfree(info);
+
return ret;
 }
 
-- 
2.17.1



[PATCH] drm/amdkfd: Fix reg offset for setting CWSR grace period

2023-08-29 Thread Mukul Joshi
This patch fixes the case where the code currently passes
absolute register address and not the reg offset, which HWS
expects, when sending the PM4 packet to set/update CWSR grace
period. Additionally, cleanup the signature of
build_grace_period_packet_info function as it no longer needs
the inst parameter.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h| 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c| 3 +--
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 3 +--
 7 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index f1f2c24de081..69810b3f1c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct 
amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
-   uint32_t *reg_data,
-   uint32_t inst)
+   uint32_t *reg_data)
 {
*reg_data = wait_times;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index ecaead24e8c9..67bcaa3d4226 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct 
amdgpu_device *adev,
   uint32_t wait_times,
   uint32_t grace_period,
   uint32_t *reg_offset,
-  uint32_t *reg_data,
-  uint32_t inst);
+  uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index fa5ee96f8845..3c45a188b701 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct 
amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
-   uint32_t *reg_data,
-   uint32_t inst)
+   uint32_t *reg_data)
 {
*reg_data = wait_times;
 
@@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct 
amdgpu_device *adev,
SCH_WAVE,
grace_period);
 
-   *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-   mmCP_IQ_WAIT_TIME2);
+   *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
 }
 
 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 936e501908ce..ce424615f59b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct 
amdgpu_device *adev,
   uint32_t wait_times,
   uint32_t grace_period,
   uint32_t *reg_offset,
-  uint32_t *reg_data,
-  uint32_t inst);
+  uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b166f30f083e..8a6cb41444a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->dev->kfd2kgd->build_grace_period_packet_info(
dqm->dev->adev, dqm->wait_times,
grace_period, _offset,
-   >wait_times,
-   ffs(dqm->dev->xcc_mask) - 1);
+   >wait_times);
  

Re: [PATCH v2] drm/amd/display: Adjust kdoc for 'optc35_set_odm_combine'

2023-08-29 Thread Rodrigo Siqueira Jordao




On 8/29/23 08:38, Srinivasan Shanmugam wrote:

Fixes the following W=1 kernel build warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn35/dcn35_optc.c:46: warning: This 
comment starts with '/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst
* Enable CRTC

Cc: Qingqing Zhuo 
Cc: Rodrigo Siqueira 
Cc: Harry Wentland 
Cc: Aurabindo Pillai 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---

v2:

- Addressed the following comments (Rodrigo)
   - Updated Commit title to 'drm/amd/display: Adjust kdoc for 
'optc35_set_odm_combine'
   - Updated the description for paramaters @optc, opp_id, opp_cnt.

  drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c | 11 ---
  1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
index 5f7adc83258b..d64be1a5071c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
@@ -43,10 +43,15 @@
optc1->tg_shift->field_name, optc1->tg_mask->field_name
  
  /**

- * Enable CRTC
- * Enable CRTC - call ASIC Control Object to enable Timing generator.
+ * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable 
Timing generator.
+ *
+ * @optc: Output Pipe Timing Combine instance reference.
+ * @opp_id: Output Plane Processor instance ID.
+ * @opp_cnt: Output Plane Processor count.
+ * @timing: Timing parameters used to configure DCN blocks.
+ *
+ * Return: void.
   */
-
  static void optc35_set_odm_combine(struct timing_generator *optc, int 
*opp_id, int opp_cnt,
struct dc_crtc_timing *timing)
  {


When sending a new patch version, send it as a new patch instead of 
replying the previous one.


Anyway,

Reviewed-by: Rodrigo Siqueira 



[PATCH v2] drm/amd/display: Adjust kdoc for 'optc35_set_odm_combine'

2023-08-29 Thread Srinivasan Shanmugam
Fixes the following W=1 kernel build warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn35/dcn35_optc.c:46: warning: This 
comment starts with '/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst
* Enable CRTC

Cc: Qingqing Zhuo 
Cc: Rodrigo Siqueira 
Cc: Harry Wentland 
Cc: Aurabindo Pillai 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---

v2:

- Addressed the following comments (Rodrigo)
  - Updated Commit title to 'drm/amd/display: Adjust kdoc for 
'optc35_set_odm_combine'
  - Updated the description for paramaters @optc, opp_id, opp_cnt.

 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
index 5f7adc83258b..d64be1a5071c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
@@ -43,10 +43,15 @@
optc1->tg_shift->field_name, optc1->tg_mask->field_name
 
 /**
- * Enable CRTC
- * Enable CRTC - call ASIC Control Object to enable Timing generator.
+ * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable 
Timing generator.
+ *
+ * @optc: Output Pipe Timing Combine instance reference.
+ * @opp_id: Output Plane Processor instance ID.
+ * @opp_cnt: Output Plane Processor count.
+ * @timing: Timing parameters used to configure DCN blocks.
+ *
+ * Return: void.
  */
-
 static void optc35_set_odm_combine(struct timing_generator *optc, int *opp_id, 
int opp_cnt,
struct dc_crtc_timing *timing)
 {
-- 
2.25.1



Keyword Review - Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs

2023-08-29 Thread Christian König
*sigh* yeah that's exactly what we have seen before as well. Let me know 
when you need help with that.


Regards,
Christian.

Am 29.08.23 um 08:17 schrieb Zhang, Yifan:


[AMD Official Use Only - General]


Validated on Phoenix and Raphael w/ IOMMU remapping mode, and found 
random page fault when launching Xorg. I will debug this issue and 
update the patch.


Best Regards,

Yifan

*From:* Deucher, Alexander 
*Sent:* Tuesday, August 29, 2023 2:06 AM
*To:* Koenig, Christian ; Zhang, Yifan 
; Christian König 
; amd-gfx@lists.freedesktop.org
*Subject:* Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT 
memory for gfx10 onwards APUs


[AMD Official Use Only - General]

Technically the AMD IOMMU uses direct mapping mode for any device 
which claims to support ATS in order to support the IOMMUv2 
functionality, but that was also the case with Raven systems which 
were problematic when remapping mode was enabled.  That said, now that 
IOMMUv2 support has been removed, there's no reason to use direct 
mapping in the IOMMU driver so that may change.


Alex



*From:*Koenig, Christian 
*Sent:* Monday, August 28, 2023 7:30 AM
*To:* Zhang, Yifan ; Christian König 
; amd-gfx@lists.freedesktop.org 


*Cc:* Deucher, Alexander 
*Subject:* Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT 
memory for gfx10 onwards APUs


Well, there seems to be a very basic misunderstood here: The IOMMU
isolation level is *not* ASIC dependent!

Try to set amd_iommu=force_isolation on the kernel command line.

This is a configuration option customers can use to harden their systems
and when this isn't properly tested we can't allow page tables in system
memory.

Regards,
Christian.

Am 28.08.23 um 13:23 schrieb Zhang, Yifan:
> [Public]
>
> Not yet. It will be only enabled for gfx10.3.3 and later APU 
initially, IOMMU is pass through in these ASIC.

>
> -Original Message-
> From: Christian König 
> Sent: Monday, August 28, 2023 5:41 PM
> To: Zhang, Yifan ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, 
Christian 
> Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT 
memory for gfx10 onwards APUs

>
> Is that now validated with IOMMU in non pass through mode?
>
> Christian.
>
> Am 28.08.23 um 10:58 schrieb Zhang, Yifan:
>> [AMD Official Use Only - General]
>>
>> Ping
>>
>> -Original Message-
>> From: Zhang, Yifan 
>> Sent: Friday, August 25, 2023 8:34 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander ; Koenig, 
Christian ; Zhang, Yifan 
>> Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory 
for gfx10 onwards APUs

>>
>> To decrease VRAM pressure for APUs, put page tables to GTT domain 
for gfx10 and newer APUs.

>>
>> v2: only enable it for gfx10 and newer APUs (Alex, Christian)
>>
>> Signed-off-by: Yifan Zhang 
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++---
>>    1 file changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

>> index 96d601e209b8..4603d87c61a0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device 
*adev, struct amdgpu_vm *vm,

>>   bp.size = amdgpu_vm_pt_size(adev, level);
>>   bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
>>
>> -   if (!adev->gmc.is_app_apu)
>> -   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> -   else
>> +   if (adev->gmc.is_app_apu ||
>> +   ((adev->flags & AMD_IS_APU) &&
>> + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3
>>   bp.domain = AMDGPU_GEM_DOMAIN_GTT;
>> +   else
>> +   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> +
>>
>>   bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
>>   bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> --
>> 2.37.3
>>



Re: [PATCH] drm/amd/display: Fix up kdoc for 'optc35_set_odm_combine'

2023-08-29 Thread Rodrigo Siqueira Jordao

Hi,

Consider renaming the commit title to something like "Adjust kernel-doc 
for ...". Anyway, avoid use the keyword "Fix/Fixes" for kernel-doc.


On 8/28/23 22:50, Srinivasan Shanmugam wrote:

Fixes the following W=1 kernel build warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn35/dcn35_optc.c:46: warning: This 
comment starts with '/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst
* Enable CRTC

Cc: Qingqing Zhuo 
Cc: Rodrigo Siqueira 
Cc: Harry Wentland 
Cc: Aurabindo Pillai 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c | 11 ---
  1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
index 5f7adc83258b..294799d8c34e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
@@ -43,10 +43,15 @@
optc1->tg_shift->field_name, optc1->tg_mask->field_name
  
  /**

- * Enable CRTC
- * Enable CRTC - call ASIC Control Object to enable Timing generator.
+ * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable 
Timing generator.
+ *
+ * @optc: timing_generator instance.


How about:

@optc: Output Pipe Timing Combine instance reference.


+ * @opp_id: OPP instance.


How about:

@opp_id: Output Plane Processor instance ID.

Thanks
Siqueira


+ * @opp_cnt: OPP count.
+ * @timing: Timing parameters used to configure DCN blocks.
+ *
+ * Return: void.
   */
-
  static void optc35_set_odm_combine(struct timing_generator *optc, int 
*opp_id, int opp_cnt,
struct dc_crtc_timing *timing)
  {




Re: [PATCH] Revert "Revert "drm/amd/display: Implement zpos property""

2023-08-29 Thread Melissa Wen
On 08/29, Hamza Mahfooz wrote:
> This reverts commit 984612bd4649c91f12e9c7c7f9e914fdc8ba7d3f.
> 
> The problematic IGT test case (i.e. kms_atomic@plane-immutable-zpos) has
> been fixed as of commit cb77add45011 ("tests/kms_atomic: remove zpos <
> N-planes assert") to the IGT repo. So, reintroduce the reverted code.
> 
> Link: 
> https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/commit/cb77add45011b129e21f3cb2a4089a73dde56179
> Signed-off-by: Hamza Mahfooz 

Reviewed-by: Melissa Wen 

Thanks!

> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> index 894bc7e4fdaa..df568a7cd005 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> @@ -1469,6 +1469,15 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager 
> *dm,
>   drm_plane_create_blend_mode_property(plane, blend_caps);
>   }
>  
> + if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
> + drm_plane_create_zpos_immutable_property(plane, 0);
> + } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
> + unsigned int zpos = 1 + drm_plane_index(plane);
> + drm_plane_create_zpos_property(plane, zpos, 1, 254);
> + } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
> + drm_plane_create_zpos_immutable_property(plane, 255);
> + }
> +
>   if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
>   plane_cap &&
>   (plane_cap->pixel_format_support.nv12 ||
> -- 
> 2.41.0
> 


signature.asc
Description: PGP signature


Re: [PATCH] Revert "Revert "drm/amd/display: Implement zpos property""

2023-08-29 Thread Alex Deucher
On Tue, Aug 29, 2023 at 7:40 AM Hamza Mahfooz  wrote:
>
> This reverts commit 984612bd4649c91f12e9c7c7f9e914fdc8ba7d3f.
>
> The problematic IGT test case (i.e. kms_atomic@plane-immutable-zpos) has
> been fixed as of commit cb77add45011 ("tests/kms_atomic: remove zpos <
> N-planes assert") to the IGT repo. So, reintroduce the reverted code.
>
> Link: 
> https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/commit/cb77add45011b129e21f3cb2a4089a73dde56179
> Signed-off-by: Hamza Mahfooz 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> index 894bc7e4fdaa..df568a7cd005 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> @@ -1469,6 +1469,15 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager 
> *dm,
> drm_plane_create_blend_mode_property(plane, blend_caps);
> }
>
> +   if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
> +   drm_plane_create_zpos_immutable_property(plane, 0);
> +   } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
> +   unsigned int zpos = 1 + drm_plane_index(plane);
> +   drm_plane_create_zpos_property(plane, zpos, 1, 254);
> +   } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
> +   drm_plane_create_zpos_immutable_property(plane, 255);
> +   }
> +
> if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
> plane_cap &&
> (plane_cap->pixel_format_support.nv12 ||
> --
> 2.41.0
>


Re: [PATCH] drm/amdgpu: fix amdgpu_cs_p1_user_fence

2023-08-29 Thread Alex Deucher
On Tue, Aug 29, 2023 at 8:00 AM Christian König
 wrote:
>
> The offset is just 32bits here so this can potentially overflow if
> somebody specifies a large value. Instead reduce the size to calculate
> the last possible offset.
>
> The error handling path incorrectly drops the reference to the user
> fence BO resulting in potential reference count underflow.
>
> Signed-off-by: Christian König 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 17 -
>  1 file changed, 4 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f4b5572c54f2..5c8729491105 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -139,23 +139,14 @@ static int amdgpu_cs_p1_user_fence(struct 
> amdgpu_cs_parser *p,
> drm_gem_object_put(gobj);
>
> size = amdgpu_bo_size(bo);
> -   if (size != PAGE_SIZE || (data->offset + 8) > size) {
> -   r = -EINVAL;
> -   goto error_unref;
> -   }
> +   if (size != PAGE_SIZE || data->offset > (size - 8))
> +   return -EINVAL;
>
> -   if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
> -   r = -EINVAL;
> -   goto error_unref;
> -   }
> +   if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> +   return -EINVAL;
>
> *offset = data->offset;
> -
> return 0;
> -
> -error_unref:
> -   amdgpu_bo_unref();
> -   return r;
>  }
>
>  static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
> --
> 2.34.1
>


[PATCH AUTOSEL 6.4 17/17] drm/amd/pm: Fix temperature unit of SMU v13.0.6

2023-08-29 Thread Sasha Levin
From: Lijo Lazar 

[ Upstream commit 8d036427f0042a91136e6f19a39542eedec4e96c ]

Temperature needs to be reported in millidegree Celsius.

Signed-off-by: Lijo Lazar 
Reviewed-by: Yang Wang 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index c9093517b1bda..bfa020fe0d4fe 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -697,16 +697,19 @@ static int smu_v13_0_6_get_smu_metrics_data(struct 
smu_context *smu,
*value = SMUQ10_TO_UINT(metrics->SocketPower) << 8;
break;
case METRICS_TEMPERATURE_HOTSPOT:
-   *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_MEM:
-   *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
/* This is the max of all VRs and not just SOC VR.
 * No need to define another data type for the same.
 */
case METRICS_TEMPERATURE_VRSOC:
-   *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
+   *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature) *
+SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_THROTTLER_STATUS:
*value = smu_v13_0_6_get_throttler_status(smu, metrics);
-- 
2.40.1



Re: radeon.ko/i586: BUG: kernel NULL pointer dereference,address:00000004

2023-08-29 Thread Linux regression tracking (Thorsten Leemhuis)
Hi, Thorsten here, the Linux kernel's regression tracker. Top-posting
for once, to make this easily accessible to everyone.

I still have this issue on my list of tracked regressions.

Was this fixed in between? Doesn't look like it from here, but I might
be missing something.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
If I did something stupid, please tell me, as explained on that page.

#regzbot poke

On 23.07.23 16:32, Steven Rostedt wrote:
> On Sun, 23 Jul 2023 20:55:06 +0900
>  wrote:
> 
>> So I tried to trap NULL and return:
>>
>>  patch-drm_vblank_cancel_pending_works-printk-NULL-ret.patch
>> diff -up ./drivers/gpu/drm/drm_vblank_work.c.pk2 
>> ./drivers/gpu/drm/drm_vblank_work.c
>> --- ./drivers/gpu/drm/drm_vblank_work.c.pk2  2023-06-06 20:50:40.0 
>> +0900
>> +++ ./drivers/gpu/drm/drm_vblank_work.c  2023-07-23 14:29:56.383093673 
>> +0900
>> @@ -71,6 +71,10 @@ void drm_vblank_cancel_pending_works(str
>>  {
>>  struct drm_vblank_work *work, *next;
>>  
>> +if (!vblank->dev) {
>> +printk(KERN_WARNING "%s: vblank->dev == NULL? returning\n", 
>> __func__);
>> +return;
>> +}
>>  assert_spin_locked(>dev->event_lock);
>>  
>>  list_for_each_entry_safe(work, next, >pending_work, node) {
>> 
>>
>> This time, the printk trap does not happen!! and radeon.ko works.
>> (NULL check for vblank->worker is still fireing though)
>>
>> Now this is puzzling.
>> Is this a timing issue?
> 
> It could very well be. And the ftrace patch could possibly not be the
> cause at all. But the thread that is created to do the work is causing
> the race window to be opened up, which is why you see it with the patch
> and don't without it. It may not be the problem, it may just tickle the
> timings enough to trigger the bug, and is causing you to go on a wild
> goose chase in the wrong direction.
> 
> -- Steve
> 
> 
>> Is systemd-udevd doing something not favaorble to kernel?
>> Is drm vblank code running without enough initialization?
>>
>> Puzzling is, that purely useland activity
>> (logging in on tty1 before radeon.ko load)
>> is affecting kernel panic/no-panic.
> 
> 
> 


Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-08-29 Thread Pekka Paalanen
On Mon, 28 Aug 2023 12:56:04 -0100
Melissa Wen  wrote:

> On 08/28, Pekka Paalanen wrote:
> > On Mon, 28 Aug 2023 09:45:44 +0100
> > Joshua Ashton  wrote:
> >   
> > > Degamma has always been on the plane on AMD. CRTC DEGAMMA_LUT has actually
> > > just been applying it to every plane pre-blend.  
> > 
> > I've never seen that documented anywhere.
> > 
> > It has seemed obvious, that since we have KMS objects for planes and
> > CRTCs, stuff on the CRTC does not do plane stuff before blending. That
> > also has not been documented in the past, but it seemed the most
> > logical choice.
> > 
> > Even today
> > https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#color-management-properties
> > make no mention of whether they apply before or after blending.  
> 
> It's mentioned in the next section:
> https://dri.freedesktop.org/docs/drm/gpu/amdgpu/display/display-manager.html#dc-color-capabilities-between-dcn-generations
> In hindsight, maybe it isn't the best place...

That is driver-specific documentation. As a userspace dev, I'd never
look at driver-specific documentation, because I'm interested in the
KMS UAPI which is supposed to be generic, and therefore documented with
the DRM "core".

Maybe kernel reviewers also never look at driver-specific docs to find
attempts at redefining common KMS properties?

(I still don't know which definition is prevalent.)

> >   
> > > Degamma makes no sense after blending anyway.  
> > 
> > If the goal is to allow blending in optical or other space, you are
> > correct. However, APIs do not need to make sense to exist, like most of
> > the options of "Colorspace" connector property.
> > 
> > I have always thought the CRTC DEGAMMA only exists to allow the CRTC
> > CTM to work in linear or other space.
> > 
> > I have at times been puzzled by what the DEGAMMA and CTM are actually
> > good for.
> >   
> > > The entire point is for it to happen before blending to blend in linear
> > > space. Otherwise DEGAMMA_LUT and REGAMMA_LUT are the exact same thing...  
> > 
> > The CRTC CTM is between CRTC DEGAMMA and CRTC GAMMA, meaning they are
> > not interchangeable.
> > 
> > I have literally believed that DRM KMS UAPI simply does not support
> > blending in optical space, unless your framebuffers are in optical
> > which no-one does, until the color management properties are added to
> > KMS planes. This never even seemed weird, because non-linear blending
> > is so common.
> > 
> > So I have been misunderstanding the CRTC DEGAMMA property forever. Am I
> > the only one? Do all drivers agree today at what point does CRTC
> > DEGAMMA apply, before blending on all planes or after blending?
> >   
> 
> I'd like to know current userspace cases on Linux of this CRTC DEGAMMA
> LUT.

I don't know of any, but that doesn't mean anything.

> > Does anyone know of any doc about that?  
> 
> From what I retrieved about the introduction of CRTC color props[1], it
> seems the main concern at that point was getting a linear space for
> CTM[2] and CRTC degamma property seems to have followed intel
> requirements, but didn't find anything about the blending space.

Right. I've always thought CRTC props apply after blending.

> AFAIU, we have just interpreted that all CRTC color properties for DRM
> interface are after blending[3]. Can this be seen in another way?

Joshua did, and he has a logical point.

I guess if we really want to know, someone would need review all
drivers exposing these props, and even check if they changed in the
past.

FWIW, the usefulness of (RE)GAMMA (not DEGAMMA) LUT is limited by the
fact that attempting to represent 1/2.2 power function as a uniformly
distributed LUT is infeasible due to the approximation errors near zero.


Thanks,
pq

> [1] https://patchwork.freedesktop.org/series/2720/
> [2] https://codereview.chromium.org/1182063002
> [3] https://dri.freedesktop.org/docs/drm/_images/dcn3_cm_drm_current.svg
> 
> > 
> > If drivers do not agree on the behaviour of a KMS property, then that
> > property is useless for generic userspace.
> > 
> > 
> > Thanks,
> > pq
> > 
> >   
> > > On Tuesday, 22 August 2023, Pekka Paalanen 
> > > wrote:  
> > > > On Thu, 10 Aug 2023 15:02:59 -0100
> > > > Melissa Wen  wrote:
> > > >
> > > >> The next patch adds pre-blending degamma to AMD color mgmt pipeline, 
> > > >> but
> > > >> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
> > > >> atomic degamma or implict degamma on legacy gamma. Detach degamma usage
> > > >> regarging CRTC color properties to manage plane and CRTC color
> > > >> correction combinations.
> > > >>
> > > >> Reviewed-by: Harry Wentland 
> > > >> Signed-off-by: Melissa Wen 
> > > >> ---
> > > >>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 59 +--
> > > >>  1 file changed, 41 insertions(+), 18 deletions(-)
> > > >>
> > > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c   
> > > >>  
> > > 

Re: [Intel-gfx] [PATCH 0/4] drm/amd/display: stop using drm_edid_override_connector_update()

2023-08-29 Thread Jani Nikula
On Wed, 23 Aug 2023, Jani Nikula  wrote:
> On Tue, 22 Aug 2023, Alex Hung  wrote:
>> On 2023-08-22 06:01, Jani Nikula wrote:
>>> Over the past years I've been trying to unify the override and firmware
>>> EDID handling as well as EDID property updates. It won't work if drivers
>>> do their own random things.
>> Let's check how to replace these references by appropriate ones or fork 
>> the function as reverting these patches causes regressions.
>
> I think the fundamental problem you have is conflating connector forcing
> with EDID override. They're orthogonal. The .force callback has no
> business basing the decisions on connector->edid_override. Force is
> force, override is override.
>
> The driver isn't even supposed to know or care if the EDID originates
> from the firmware loader or override EDID debugfs. drm_get_edid() will
> handle that for you transparently. It'll return the EDID, and you
> shouldn't look at connector->edid_blob_ptr either. Using that will make
> future work in drm_edid.c harder.
>
> You can't fix that with minor tweaks. I think you'll be better off
> starting from scratch.
>
> Also, connector->edid_override is debugfs. You actually can change the
> behaviour. If your userspace, whatever it is, has been written to assume
> connector forcing if EDID override is set, you *do* have to fix that,
> and set both.

Any updates on fixing this, or shall we proceed with the reverts?

BR,
Jani.



>
> BR,
> Jani.
>
>
>>
>> Cheers,
>> Alex
>>
>>> 
>>> BR,
>>> Jani.
>>> 
>>> 
>>> Cc: Alex Deucher 
>>> Cc: Alex Hung 
>>> Cc: Chao-kai Wang 
>>> Cc: Daniel Wheeler 
>>> Cc: Harry Wentland 
>>> Cc: Hersen Wu 
>>> Cc: Leo Li 
>>> Cc: Rodrigo Siqueira 
>>> Cc: Wenchieh Chien 
>>> Cc: David Airlie 
>>> Cc: Daniel Vetter 
>>> 
>>> Jani Nikula (4):
>>>Revert "drm/amd/display: drop unused count variable in
>>>  create_eml_sink()"
>>>Revert "drm/amd/display: assign edid_blob_ptr with edid from debugfs"
>>>Revert "drm/amd/display: mark amdgpu_dm_connector_funcs_force static"
>>>Revert "drm/amd/display: implement force function in
>>>  amdgpu_dm_connector_funcs"
>>> 
>>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 44 +++
>>>   1 file changed, 5 insertions(+), 39 deletions(-)
>>> 

-- 
Jani Nikula, Intel Open Source Graphics Center


[PATCH] Revert "Revert "drm/amd/display: Implement zpos property""

2023-08-29 Thread Hamza Mahfooz
This reverts commit 984612bd4649c91f12e9c7c7f9e914fdc8ba7d3f.

The problematic IGT test case (i.e. kms_atomic@plane-immutable-zpos) has
been fixed as of commit cb77add45011 ("tests/kms_atomic: remove zpos <
N-planes assert") to the IGT repo. So, reintroduce the reverted code.

Link: 
https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/commit/cb77add45011b129e21f3cb2a4089a73dde56179
Signed-off-by: Hamza Mahfooz 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 894bc7e4fdaa..df568a7cd005 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1469,6 +1469,15 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager 
*dm,
drm_plane_create_blend_mode_property(plane, blend_caps);
}
 
+   if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
+   drm_plane_create_zpos_immutable_property(plane, 0);
+   } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
+   unsigned int zpos = 1 + drm_plane_index(plane);
+   drm_plane_create_zpos_property(plane, zpos, 1, 254);
+   } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+   drm_plane_create_zpos_immutable_property(plane, 255);
+   }
+
if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
plane_cap &&
(plane_cap->pixel_format_support.nv12 ||
-- 
2.41.0



Re: [PATCH 6/7] Revert "drm/amd/display: Implement zpos property"

2023-08-29 Thread Melissa Wen
On 08/18, Leo Li wrote:
> 
> 
> On 2023-08-18 04:25, Melissa Wen wrote:
> > On 07/26, Aurabindo Pillai wrote:
> > > This reverts commit 6c8ff1683d30024c8cff137d30b740a405cc084e.
> > > 
> > > This patch causes IGT test case 'kms_atomic@plane-immutable-zpos' to
> > > fail on AMDGPU hardware.
> > > 
> > > Cc: Joshua Ashton 
> > > Signed-off-by: Nicholas Choi 
> > > ---
> > >   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 9 -
> > >   1 file changed, 9 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
> > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> > > index 2198df96ed6f..8eeca160d434 100644
> > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
> > > @@ -1468,15 +1468,6 @@ int amdgpu_dm_plane_init(struct 
> > > amdgpu_display_manager *dm,
> > >   drm_plane_create_blend_mode_property(plane, blend_caps);
> > >   }
> > > - if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
> > > - drm_plane_create_zpos_immutable_property(plane, 0);
> > > - } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
> > > - unsigned int zpos = 1 + drm_plane_index(plane);
> > > - drm_plane_create_zpos_property(plane, zpos, 1, 254);
> > > - } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
> > > - drm_plane_create_zpos_immutable_property(plane, 255);
> > > - }
> > 
> > Hi Jay and Nicholas,
> > 
> > I'm examining this regression and, looking at the test code, I consider
> > this failure is caused by an incorrect assumption in the IGT test in
> > which any zpos values must be in the normalized range of 0 and N planes
> > per CRTC.
> > 
> > for (int k = 0; k < n_planes; k++) {
> > int zpos;
> > igt_plane_t *temp;
> > 
> > temp = >pipes[pipe->pipe].planes[k];
> > 
> > if (!igt_plane_has_prop(temp, IGT_PLANE_ZPOS))
> > continue;
> > 
> > zpos = igt_plane_get_prop(temp, IGT_PLANE_ZPOS);
> > 
> > igt_assert_lt(zpos, n_planes);  // test case fails here
> > 
> > plane_ptr[zpos] = temp;
> > }
> > 
> > 
> > I didn't find anything in the DRM documentation that imposes this
> > behavior. Also, the plane composition in the test is working correctly
> > with this patch and without this likely-misleading assert. In addition,
> > enabling zpos property increases the coverage of
> > `kms_atomic@plane-immutable-zpos` test (previously this part of the test
> > was just bypassed), so it's not a regression per se. Therefore, I'm
> > inclined to send a fix to IGT, instead of implementing a behavior that
> > fit the test but may not fit well AMD display machinery.
> > 
> > But first I wonder if you guys find any other test or visual check that
> > fail with this feature?
> > 
> > I checked other IGT KMS plane tests and AMD MPO tests (in `amd_plane`)
> > and results are preserved. If there are no other issues besides IGT
> > plane-immutable-zpos, I'll proceed with sending the change to IGT.
> > 
> > Thanks,
> > 
> > Melissa
> 
> Hi Melissa,
> 
> Thanks for taking a look at the IGT test. Looks like the IGT failures
> are the only concerns, and I agree that it doesn't make sense to require
> zpos to be normalized between 0 and number of planes.

Hi Leo,

Thanks for the feedback.

I've also checked that msm driver creates zpos properties in a similar
way of this commit here, so I sent and applied the IGT test change:
https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/commit/cb77add45011b129e21f3cb2a4089a73dde56179

With that, could you guys revert this commit reversion?

Thanks,

Melissa

> 
> Thanks,
> Leo
> 
> > 
> > > -
> > >   if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
> > >   plane_cap &&
> > >   (plane_cap->pixel_format_support.nv12 ||
> > > -- 
> > > 2.41.0
> > > 


signature.asc
Description: PGP signature


[PATCH] drm/amdgpu: fix amdgpu_cs_p1_user_fence

2023-08-29 Thread Christian König
The offset is just 32bits here so this can potentially overflow if
somebody specifies a large value. Instead reduce the size to calculate
the last possible offset.

The error handling path incorrectly drops the reference to the user
fence BO resulting in potential reference count underflow.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f4b5572c54f2..5c8729491105 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -139,23 +139,14 @@ static int amdgpu_cs_p1_user_fence(struct 
amdgpu_cs_parser *p,
drm_gem_object_put(gobj);
 
size = amdgpu_bo_size(bo);
-   if (size != PAGE_SIZE || (data->offset + 8) > size) {
-   r = -EINVAL;
-   goto error_unref;
-   }
+   if (size != PAGE_SIZE || data->offset > (size - 8))
+   return -EINVAL;
 
-   if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-   r = -EINVAL;
-   goto error_unref;
-   }
+   if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+   return -EINVAL;
 
*offset = data->offset;
-
return 0;
-
-error_unref:
-   amdgpu_bo_unref();
-   return r;
 }
 
 static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
-- 
2.34.1



Re: [PATCH v3 5/7] drm/amdgpu: Set/Reset GPU workload profile

2023-08-29 Thread Christian König

Am 28.08.23 um 14:26 schrieb Arvind Yadav:

This patch is to switch the GPU workload profile based
on the submitted job. The workload profile is reset to
default when the job is done.

v3:
- Addressed the review comment about changing the function
   name from *_set() to *_get().


That looks like a really bad idea in general. This are the high level 
functions, but what you want to use are the low level functions for each 
ring.


Take a look at amdgpu_ring_begin_use()/_end_use() instead.

Christian.



Cc: Christian Koenig 
Cc: Alex Deucher 
Reviewed-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c3d9d75143f4..c5032762d497 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -176,6 +176,9 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
  static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
  {
struct amdgpu_job *job = to_amdgpu_job(s_job);
+   struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+
+   amdgpu_workload_profile_put(ring->adev, ring->funcs->type);
  
  	drm_sched_job_cleanup(s_job);
  
@@ -295,6 +298,8 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)

DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
  
+	amdgpu_workload_profile_get(adev, ring->funcs->type);

+
job->job_run_counter++;
amdgpu_job_free_resources(job);
  




RE: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs

2023-08-29 Thread Zhang, Yifan
[AMD Official Use Only - General]

Validated on Phoenix and Raphael w/ IOMMU remapping mode, and found random page 
fault when launching Xorg. I will debug this issue and update the patch.

Best Regards,
Yifan

From: Deucher, Alexander 
Sent: Tuesday, August 29, 2023 2:06 AM
To: Koenig, Christian ; Zhang, Yifan 
; Christian König ; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs


[AMD Official Use Only - General]

Technically the AMD IOMMU uses direct mapping mode for any device which claims 
to support ATS in order to support the IOMMUv2 functionality, but that was also 
the case with Raven systems which were problematic when remapping mode was 
enabled.  That said, now that IOMMUv2 support has been removed, there's no 
reason to use direct mapping in the IOMMU driver so that may change.

Alex


From: Koenig, Christian 
mailto:christian.koe...@amd.com>>
Sent: Monday, August 28, 2023 7:30 AM
To: Zhang, Yifan mailto:yifan1.zh...@amd.com>>; Christian 
König 
mailto:ckoenig.leichtzumer...@gmail.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs

Well, there seems to be a very basic misunderstood here: The IOMMU
isolation level is *not* ASIC dependent!

Try to set amd_iommu=force_isolation on the kernel command line.

This is a configuration option customers can use to harden their systems
and when this isn't properly tested we can't allow page tables in system
memory.

Regards,
Christian.

Am 28.08.23 um 13:23 schrieb Zhang, Yifan:
> [Public]
>
> Not yet. It will be only enabled for gfx10.3.3 and later APU initially, IOMMU 
> is pass through in these ASIC.
>
> -Original Message-
> From: Christian König 
> mailto:ckoenig.leichtzumer...@gmail.com>>
> Sent: Monday, August 28, 2023 5:41 PM
> To: Zhang, Yifan mailto:yifan1.zh...@amd.com>>; 
> amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> mailto:alexander.deuc...@amd.com>>; Koenig, 
> Christian mailto:christian.koe...@amd.com>>
> Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for 
> gfx10 onwards APUs
>
> Is that now validated with IOMMU in non pass through mode?
>
> Christian.
>
> Am 28.08.23 um 10:58 schrieb Zhang, Yifan:
>> [AMD Official Use Only - General]
>>
>> Ping
>>
>> -Original Message-
>> From: Zhang, Yifan mailto:yifan1.zh...@amd.com>>
>> Sent: Friday, August 25, 2023 8:34 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander 
>> mailto:alexander.deuc...@amd.com>>; Koenig, 
>> Christian mailto:christian.koe...@amd.com>>; 
>> Zhang, Yifan mailto:yifan1.zh...@amd.com>>
>> Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
>> onwards APUs
>>
>> To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 
>> and newer APUs.
>>
>> v2: only enable it for gfx10 and newer APUs (Alex, Christian)
>>
>> Signed-off-by: Yifan Zhang 
>> mailto:yifan1.zh...@amd.com>>
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++---
>>1 file changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> index 96d601e209b8..4603d87c61a0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>   bp.size = amdgpu_vm_pt_size(adev, level);
>>   bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
>>
>> -   if (!adev->gmc.is_app_apu)
>> -   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> -   else
>> +   if (adev->gmc.is_app_apu ||
>> +   ((adev->flags & AMD_IS_APU) &&
>> +   (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3
>>   bp.domain = AMDGPU_GEM_DOMAIN_GTT;
>> +   else
>> +   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> +
>>
>>   bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
>>   bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> --
>> 2.37.3
>>