[PATCH 1/2] drm/amd/pm: unify the interface for loading SMU microcode

2021-03-24 Thread Evan Quan
No need to have special handling for swSMU supported ASICs.

Change-Id: I49395bfc31b43b09bac78527cd8f08e8600749b3
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 10 ++
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c   |  5 ++-
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |  4 ---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 41 +++
 4 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9a65ff871a58..b4fd0394cd08 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7174,16 +7174,10 @@ static int gfx_v10_0_hw_init(void *handle)
 * loaded firstly, so in direct type, it has to load smc ucode
 * here before rlc.
 */
-   if (adev->smu.ppt_funcs != NULL && !(adev->flags & AMD_IS_APU)) 
{
-   r = smu_load_microcode(&adev->smu);
+   if (!(adev->flags & AMD_IS_APU)) {
+   r = amdgpu_pm_load_smu_firmware(adev, NULL);
if (r)
return r;
-
-   r = smu_check_fw_status(&adev->smu);
-   if (r) {
-   pr_err("SMC firmware status is not correct\n");
-   return r;
-   }
}
gfx_v10_0_disable_gpa_mode(adev);
}
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 0a6bb3311f0f..464fc04fb334 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1606,7 +1606,10 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device 
*adev, uint32_t *smu_versio
pr_err("smu firmware loading failed\n");
return r;
}
-   *smu_version = adev->pm.fw_version;
+
+   if (smu_version)
+   *smu_version = adev->pm.fw_version;
}
+
return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 9507ae34c4ca..4684eca7b37b 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -1287,10 +1287,6 @@ enum smu_cmn2asic_mapping_type {
[profile] = {1, (workload)}
 
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && 
!defined(SWSMU_CODE_LAYER_L4)
-int smu_load_microcode(struct smu_context *smu);
-
-int smu_check_fw_status(struct smu_context *smu);
-
 int smu_get_power_limit(struct smu_context *smu,
uint32_t *limit,
enum smu_ppt_limit_level limit_level);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 10a8c4a65619..3bbe0278e50e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2115,36 +2115,34 @@ const struct amdgpu_ip_block_version smu_v13_0_ip_block 
=
.funcs = &smu_ip_funcs,
 };
 
-int smu_load_microcode(struct smu_context *smu)
+static int smu_load_microcode(void *handle)
 {
+   struct smu_context *smu = handle;
+   struct amdgpu_device *adev = smu->adev;
int ret = 0;
 
-   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   if (!smu->pm_enabled)
return -EOPNOTSUPP;
 
-   mutex_lock(&smu->mutex);
+   /* This should be used for non PSP loading */
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+   return 0;
 
-   if (smu->ppt_funcs->load_microcode)
+   if (smu->ppt_funcs->load_microcode) {
ret = smu->ppt_funcs->load_microcode(smu);
+   if (ret) {
+   dev_err(adev->dev, "Load microcode failed\n");
+   return ret;
+   }
+   }
 
-   mutex_unlock(&smu->mutex);
-
-   return ret;
-}
-
-int smu_check_fw_status(struct smu_context *smu)
-{
-   int ret = 0;
-
-   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
-   return -EOPNOTSUPP;
-
-   mutex_lock(&smu->mutex);
-
-   if (smu->ppt_funcs->check_fw_status)
+   if (smu->ppt_funcs->check_fw_status) {
ret = smu->ppt_funcs->check_fw_status(smu);
-
-   mutex_unlock(&smu->mutex);
+   if (ret) {
+   dev_err(adev->dev, "SMC is not ready\n");
+   return ret;
+   }
+   }
 
return ret;
 }
@@ -2995,6 +2993,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.set_watermarks_for_clock_ranges = 
smu_set_watermarks_for_clock_ranges,
.display_disable_memory_clock_switch = 
smu_display_disable_memory_clock_switch,
.get_max_sustainable_clocks_by_dc= 
smu_get_max_sustainable_clocks_by_dc,
+   .load_firmware 

[PATCH 2/2] drm/amd/pm: fix missing static declarations

2021-03-24 Thread Evan Quan
Add "static" declarations for those APIs used internally.

Change-Id: I38bfa8a117b3056202935163935a867f03ebaefe
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 3bbe0278e50e..e136f071b4f2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1467,7 +1467,7 @@ static int smu_hw_fini(void *handle)
return smu_smc_hw_cleanup(smu);
 }
 
-int smu_reset(struct smu_context *smu)
+static int smu_reset(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
int ret;
@@ -1715,10 +1715,10 @@ static int smu_adjust_power_state_dynamic(struct 
smu_context *smu,
return ret;
 }
 
-int smu_handle_task(struct smu_context *smu,
-   enum amd_dpm_forced_level level,
-   enum amd_pp_task task_id,
-   bool lock_needed)
+static int smu_handle_task(struct smu_context *smu,
+  enum amd_dpm_forced_level level,
+  enum amd_pp_task task_id,
+  bool lock_needed)
 {
int ret = 0;
 
@@ -2147,7 +2147,7 @@ static int smu_load_microcode(void *handle)
return ret;
 }
 
-int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled)
+static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled)
 {
int ret = 0;
 
@@ -2466,7 +2466,7 @@ static u32 smu_get_fan_control_mode(void *handle)
return ret;
 }
 
-int smu_set_fan_control_mode(struct smu_context *smu, int value)
+static int smu_set_fan_control_mode(struct smu_context *smu, int value)
 {
int ret = 0;
 
-- 
2.29.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/pm: no need to force MCLK to highest when no display connected

2021-03-24 Thread Evan Quan
Correct the check for vblank short.

Change-Id: I0eb3a6bd95b7f6d97029772914154324c8ccd2c0
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 7edafef095a3..301b6769f007 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -3330,7 +3330,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr 
*hwmgr,
 
disable_mclk_switching_for_display = ((1 < 
hwmgr->display_config->num_display) &&

!hwmgr->display_config->multi_monitor_in_sync) ||
-   smu7_vblank_too_short(hwmgr, 
hwmgr->display_config->min_vblank_time);
+   
(hwmgr->display_config->num_display &&
+   smu7_vblank_too_short(hwmgr, 
hwmgr->display_config->min_vblank_time));
 
disable_mclk_switching = disable_mclk_switching_for_frame_lock ||
 disable_mclk_switching_for_display;
-- 
2.29.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu/pm: bail on sysfs/debugfs queries during platform suspend

2021-03-24 Thread Quan, Evan
[AMD Public Use]

Maybe we can have an API like is_hw_access_blocked(). So that we can put all 
those checks below within it.
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
Anyway, patch is reviewed-by: Evan Quan 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Thursday, March 25, 2021 5:18 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH] drm/amdgpu/pm: bail on sysfs/debugfs queries during platform 
suspend

The GPU is in the process of being shutdown.  Spurious queries during
suspend and resume can put the SMU into a bad state.  Runtime PM is
handled dynamically so we check if we are in non-runtime suspend.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 98 ++
 1 file changed, 98 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 2627870a786e..3c1b5483688b 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -129,6 +129,8 @@ static ssize_t amdgpu_get_power_dpm_state(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -162,6 +164,8 @@ static ssize_t amdgpu_set_power_dpm_state(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (strncmp("battery", buf, strlen("battery")) == 0)
state = POWER_STATE_TYPE_BATTERY;
@@ -268,6 +272,8 @@ static ssize_t 
amdgpu_get_power_dpm_force_performance_level(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -310,6 +316,8 @@ static ssize_t 
amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (strncmp("low", buf, strlen("low")) == 0) {
level = AMD_DPM_FORCED_LEVEL_LOW;
@@ -408,6 +416,8 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -448,6 +458,8 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -484,6 +496,8 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (adev->pp_force_state_enabled)
return amdgpu_get_pp_cur_state(dev, attr, buf);
@@ -504,6 +518,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (strlen(buf) == 1)
adev->pp_force_state_enabled = false;
@@ -564,6 +580,8 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -602,6 +620,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -764,6 +784,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (count > 127)
return -EINVAL;
@@ -865,6 +887,8 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -916,6 +940,8 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
 
if (amdgpu_in_reset(adev))
 

Re: [PATCH] drm/amdgpu/display: fix dmub invalid register read

2021-03-24 Thread Thomas Lambertz


On 24/03/2021 22.26, Harry Wentland wrote:
> 
> 
> On 2021-03-24 5:13 p.m., Harry Wentland wrote:
>> +Nick, Bhawan
>>
>> On 2021-03-24 4:39 p.m., Alex Deucher wrote:
>>> On Tue, Mar 23, 2021 at 4:18 AM Thomas Lambertz  
>>> wrote:

 DMCUB_SCRATCH_0 sometimes contains 0xdeadbeef during initialization.
 If this is detected, return 0 instead. This prevents wrong bit-flags
 from being read.

 The main impact of this bug is in the status check loop in
 dmub_srv_wait_for_auto_load. As it is waiting for the device to become
 ready, returning too early leads to a race condition. It is usually won
 on first boot, but lost when laptop resumes from sleep, breaking screen
 brightness control.

 This issue was always present, but previously mitigated by the fact that
 the full register was compared to the wanted value. Currently, only the
 bottom two bits are tested, which are also set in 0xdeadbeef, thus
 returning readiness to early.

 Fixes: 5fe6b98ae00d ("drm/amd/display: Update dmub code")
>>>
>>> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1518 Harry, Leo, 
>>> Rodrigo, any ideas?
>>>
>>
>> When I checked with our DMUB experts yesterday they said they'd never expect 
>> to see 0xdeadbeef in SCRATCH0.
>>
>> That said based on Thomas's test it does look like we're getting deadbeef at 
>> resume so I'm almost inclined to ACK this patch. It doesn't really do any 
>> harm.
>>
> 
> Apparently an older version of the renoir_dmcub (and likely other versions) 
> had a 0xdeadbeef write to SCRATCH0. In order to support these older FW this 
> patch is
> Reviewed-by: Harry Wentland 
> 
> I recommend updating to the latest FW from linux-firmware if any other issues 
> are observed.

Thank you for the quick triage!

I just tried the newest firmware from git, and have some good and some bad news.
The 0xdeadbeef issue is fixed, and initialization is way faster on the newer 
firmware.

But the screen brightness issue has been reintroduced. The cause is a different 
one, and I am not sure how to debug it further.

To test, I booted the same kernel (v5.12-RC4 with my patch applied) with two 
versions of linux-firmware.
The old one worked, the new one shows the same issue as the old without the 
patch.

Relevant dmesg parts:

a) NEW, 24/03/2021 (3f026a2)
[6.350233] [drm] Display Core initialized with v3.2.122!
[6.350256] DMCUB_SCRATCH0: 0x0
[6.350359] DMCUB_SCRATCH0: 0x0
[6.350461] DMCUB_SCRATCH0: 0x0
[6.350563] DMCUB_SCRATCH0: 0x0
[6.350666] DMCUB_SCRATCH0: 0x3
[6.350883] [drm] DMUB hardware initialized: version=0x01020003
 suspend, resume 
[   59.709695] DMCUB_SCRATCH0: 0x0
 repeats 6 more times
[   59.710432] DMCUB_SCRATCH0: 0x3
[   59.710438] [drm] DMUB hardware initialized: version=0x01020003



b) OLD, 15/03/2021 (3568f96)
[7.805468] [drm] Display Core initialized with v3.2.122!
[7.805496] DMCUB_SCRATCH0: 0x0
[7.805598] DMCUB_SCRATCH0: 0x0
[7.805700] DMCUB_SCRATCH0: 0x0
[7.805801] DMCUB_SCRATCH0: 0x0
[7.805903] DMCUB_SCRATCH0: 0x1
 repeats 115 more times
[7.817841] DMCUB_SCRATCH0: 0x1
[7.817944] DMCUB_SCRATCH0: 0xdeadbeef
[7.818047] DMCUB_SCRATCH0: 0xdeadbeef
[7.818149] DMCUB_SCRATCH0: 0xdeadbeef
[7.818251] DMCUB_SCRATCH0: 0xdeadbeef
[7.818353] DMCUB_SCRATCH0: 0xdeadbeef
[7.818454] DMCUB_SCRATCH0: 0x3
[7.818676] [drm] DMUB hardware initialized: version=0x0100
 suspend, resume 
[   89.818383] DMCUB_SCRATCH0: 0x0
[   89.818493] DMCUB_SCRATCH0: 0x0
[   89.818601] DMCUB_SCRATCH0: 0x0
[   89.818708] DMCUB_SCRATCH0: 0x0
[   89.818811] DMCUB_SCRATCH0: 0x0
[   89.818918] DMCUB_SCRATCH0: 0x0
[   89.819025] DMCUB_SCRATCH0: 0x1
 repeats 112 more times
[   89.831007] DMCUB_SCRATCH0: 0x1
[   89.831110] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831218] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831325] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831428] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831535] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831643] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831745] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831853] DMCUB_SCRATCH0: 0xdeadbeef
[   89.831956] DMCUB_SCRATCH0: 0x3
[   89.832299] [drm] DMUB hardware initialized: version=0x0100

Thomas


> 
> Harry
> 
>> Harry
>>
>>> Alex
>>>
 Signed-off-by: Thomas Lambertz 
 ---
   drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 8 +++-
   drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h | 2 ++
   2 files changed, 9 insertions(+), 1 deletion(-)

 diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c 
 b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
 index 8e8e65fa83c0..d6fcae182f68 100644
 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
 +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
 @@ -323,8 +323,14 @@ uint32_t dmub_dcn20_get_gpint_response(struct 
 dmub_srv *dmub)
   union dmub_fw_boot_status dmub_dcn20_get_fw_boot_status(struct 

Re: [PATCH] drm/radeon/r600_cs: Couple of typo fixes

2021-03-24 Thread Randy Dunlap
On 3/24/21 6:50 AM, Bhaskar Chowdhury wrote:
> 
> s/miror/mirror/
> s/needind/needing/
> 
> Signed-off-by: Bhaskar Chowdhury 
> ---
>  drivers/gpu/drm/radeon/r600_cs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/r600_cs.c 
> b/drivers/gpu/drm/radeon/r600_cs.c
> index 34b7c6f16479..aded1f2264e0 100644
> --- a/drivers/gpu/drm/radeon/r600_cs.c
> +++ b/drivers/gpu/drm/radeon/r600_cs.c
> @@ -38,7 +38,7 @@ extern void r600_cs_legacy_get_tiling_conf(struct 
> drm_device *dev, u32 *npipes,
> 
> 
>  struct r600_cs_track {
> - /* configuration we miror so that we use same code btw kms/ums */
> + /* configuration we mirror so that we use same code btw kms/ums */
>   u32 group_size;
>   u32 nbanks;
>   u32 npipes;
> @@ -963,7 +963,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser 
> *p,
>   *
>   * This function will test against r600_reg_safe_bm and return 0
>   * if register is safe. If register is not flag as safe this function
> - * will test it against a list of register needind special handling.
> + * will test it against a list of register needing special handling.
>   */
>  static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
>  {
> --

Those 2 LGTM, but please fix this while you are touching this file:

drivers/gpu/drm/radeon/r600_cs.c:2339: informations  ==> information


thanks.
-- 
~Randy

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/radeon/r600_cs: Couple of typo fixes

2021-03-24 Thread Bhaskar Chowdhury

On 14:48 Wed 24 Mar 2021, Randy Dunlap wrote:

On 3/24/21 6:50 AM, Bhaskar Chowdhury wrote:


s/miror/mirror/
s/needind/needing/

Signed-off-by: Bhaskar Chowdhury 
---
 drivers/gpu/drm/radeon/r600_cs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 34b7c6f16479..aded1f2264e0 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -38,7 +38,7 @@ extern void r600_cs_legacy_get_tiling_conf(struct drm_device 
*dev, u32 *npipes,


 struct r600_cs_track {
-   /* configuration we miror so that we use same code btw kms/ums */
+   /* configuration we mirror so that we use same code btw kms/ums */
u32 group_size;
u32 nbanks;
u32 npipes;
@@ -963,7 +963,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
  *
  * This function will test against r600_reg_safe_bm and return 0
  * if register is safe. If register is not flag as safe this function
- * will test it against a list of register needind special handling.
+ * will test it against a list of register needing special handling.
  */
 static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
--


Those 2 LGTM, but please fix this while you are touching this file:

drivers/gpu/drm/radeon/r600_cs.c:2339: informations  ==> information


Thanks, I have sent a V2 with the change...


thanks.
--
~Randy



signature.asc
Description: PGP signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH V2] drm/radeon/r600_cs: Few typo fixes

2021-03-24 Thread Randy Dunlap
On 3/24/21 4:29 PM, Bhaskar Chowdhury wrote:
> s/miror/mirror/
> s/needind/needing/
> s/informations/information/
> 
> Signed-off-by: Bhaskar Chowdhury 

Acked-by: Randy Dunlap 

Thanks.

> ---
>  Changes from V1:
>  Randy's finding incorporated ,i.e in one place,informations->information
>   Adjusted the subject line accordingly
> 
>  drivers/gpu/drm/radeon/r600_cs.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/r600_cs.c 
> b/drivers/gpu/drm/radeon/r600_cs.c
> index 34b7c6f16479..8be4799a98ef 100644
> --- a/drivers/gpu/drm/radeon/r600_cs.c
> +++ b/drivers/gpu/drm/radeon/r600_cs.c
> @@ -38,7 +38,7 @@ extern void r600_cs_legacy_get_tiling_conf(struct 
> drm_device *dev, u32 *npipes,
> 
> 
>  struct r600_cs_track {
> - /* configuration we miror so that we use same code btw kms/ums */
> + /* configuration we mirror so that we use same code btw kms/ums */
>   u32 group_size;
>   u32 nbanks;
>   u32 npipes;
> @@ -963,7 +963,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser 
> *p,
>   *
>   * This function will test against r600_reg_safe_bm and return 0
>   * if register is safe. If register is not flag as safe this function
> - * will test it against a list of register needind special handling.
> + * will test it against a list of register needing special handling.
>   */
>  static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
>  {
> @@ -2336,7 +2336,7 @@ int r600_cs_parse(struct radeon_cs_parser *p)
>  /**
>   * r600_dma_cs_next_reloc() - parse next reloc
>   * @p:   parser structure holding parsing context.
> - * @cs_reloc:reloc informations
> + * @cs_reloc:reloc information
>   *
>   * Return the next reloc, do bo validation and compute
>   * GPU offset using the provided start.
> --


-- 
~Randy

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH V2] drm/radeon/r600_cs: Few typo fixes

2021-03-24 Thread Bhaskar Chowdhury
s/miror/mirror/
s/needind/needing/
s/informations/information/

Signed-off-by: Bhaskar Chowdhury 
---
 Changes from V1:
 Randy's finding incorporated ,i.e in one place,informations->information
  Adjusted the subject line accordingly

 drivers/gpu/drm/radeon/r600_cs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 34b7c6f16479..8be4799a98ef 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -38,7 +38,7 @@ extern void r600_cs_legacy_get_tiling_conf(struct drm_device 
*dev, u32 *npipes,


 struct r600_cs_track {
-   /* configuration we miror so that we use same code btw kms/ums */
+   /* configuration we mirror so that we use same code btw kms/ums */
u32 group_size;
u32 nbanks;
u32 npipes;
@@ -963,7 +963,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
  *
  * This function will test against r600_reg_safe_bm and return 0
  * if register is safe. If register is not flag as safe this function
- * will test it against a list of register needind special handling.
+ * will test it against a list of register needing special handling.
  */
 static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
@@ -2336,7 +2336,7 @@ int r600_cs_parse(struct radeon_cs_parser *p)
 /**
  * r600_dma_cs_next_reloc() - parse next reloc
  * @p: parser structure holding parsing context.
- * @cs_reloc:  reloc informations
+ * @cs_reloc:  reloc information
  *
  * Return the next reloc, do bo validation and compute
  * GPU offset using the provided start.
--
2.30.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu/display: fix dmub invalid register read

2021-03-24 Thread Harry Wentland



On 2021-03-24 5:13 p.m., Harry Wentland wrote:

+Nick, Bhawan

On 2021-03-24 4:39 p.m., Alex Deucher wrote:
On Tue, Mar 23, 2021 at 4:18 AM Thomas Lambertz 
 wrote:


DMCUB_SCRATCH_0 sometimes contains 0xdeadbeef during initialization.
If this is detected, return 0 instead. This prevents wrong bit-flags
from being read.

The main impact of this bug is in the status check loop in
dmub_srv_wait_for_auto_load. As it is waiting for the device to become
ready, returning too early leads to a race condition. It is usually won
on first boot, but lost when laptop resumes from sleep, breaking screen
brightness control.

This issue was always present, but previously mitigated by the fact that
the full register was compared to the wanted value. Currently, only the
bottom two bits are tested, which are also set in 0xdeadbeef, thus
returning readiness to early.

Fixes: 5fe6b98ae00d ("drm/amd/display: Update dmub code")


Bug: 
https://gitlab.freedesktop.org/drm/amd/-/issues/1518 
Harry, Leo, Rodrigo, any ideas?




When I checked with our DMUB experts yesterday they said they'd never 
expect to see 0xdeadbeef in SCRATCH0.


That said based on Thomas's test it does look like we're getting 
deadbeef at resume so I'm almost inclined to ACK this patch. It doesn't 
really do any harm.




Apparently an older version of the renoir_dmcub (and likely other 
versions) had a 0xdeadbeef write to SCRATCH0. In order to support these 
older FW this patch is

Reviewed-by: Harry Wentland 

I recommend updating to the latest FW from linux-firmware if any other 
issues are observed.


Harry


Harry


Alex


Signed-off-by: Thomas Lambertz 
---
  drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 8 +++-
  drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h | 2 ++
  2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c

index 8e8e65fa83c0..d6fcae182f68 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
@@ -323,8 +323,14 @@ uint32_t dmub_dcn20_get_gpint_response(struct 
dmub_srv *dmub)
  union dmub_fw_boot_status dmub_dcn20_get_fw_boot_status(struct 
dmub_srv *dmub)

  {
 union dmub_fw_boot_status status;
+   uint32_t value;
+
+   value = REG_READ(DMCUB_SCRATCH0);
+   if (value == DMCUB_SCRATCH0_INVALID)
+   status.all = 0;
+   else
+   status.all = value;

-   status.all = REG_READ(DMCUB_SCRATCH0);
 return status;
  }

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h

index a62be9c0652e..9557e76cf5d4 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
@@ -154,6 +154,8 @@ struct dmub_srv_common_regs {

  extern const struct dmub_srv_common_regs dmub_srv_dcn20_regs;

+#define DMCUB_SCRATCH0_INVALID 0xdeadbeef
+
  /* Hardware functions. */

  void dmub_dcn20_init(struct dmub_srv *dmub);
--
2.31.0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx 


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx 



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu/pm: bail on sysfs/debugfs queries during platform suspend

2021-03-24 Thread Alex Deucher
The GPU is in the process of being shutdown.  Spurious queries during
suspend and resume can put the SMU into a bad state.  Runtime PM is
handled dynamically so we check if we are in non-runtime suspend.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 98 ++
 1 file changed, 98 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 2627870a786e..3c1b5483688b 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -129,6 +129,8 @@ static ssize_t amdgpu_get_power_dpm_state(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -162,6 +164,8 @@ static ssize_t amdgpu_set_power_dpm_state(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (strncmp("battery", buf, strlen("battery")) == 0)
state = POWER_STATE_TYPE_BATTERY;
@@ -268,6 +272,8 @@ static ssize_t 
amdgpu_get_power_dpm_force_performance_level(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -310,6 +316,8 @@ static ssize_t 
amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (strncmp("low", buf, strlen("low")) == 0) {
level = AMD_DPM_FORCED_LEVEL_LOW;
@@ -408,6 +416,8 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -448,6 +458,8 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -484,6 +496,8 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (adev->pp_force_state_enabled)
return amdgpu_get_pp_cur_state(dev, attr, buf);
@@ -504,6 +518,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (strlen(buf) == 1)
adev->pp_force_state_enabled = false;
@@ -564,6 +580,8 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -602,6 +620,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -764,6 +784,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
if (count > 127)
return -EINVAL;
@@ -865,6 +887,8 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device 
*dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -916,6 +940,8 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = kstrtou64(buf, 0, &featuremask);
if (ret)
@@ -959,6 +985,8 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
 
if (amdgpu_in_reset(adev))
return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
 
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -1018,6 +1046,8 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
 
if (amdgpu_

Re: [PATCH] drm/amdgpu/display: fix dmub invalid register read

2021-03-24 Thread Harry Wentland

+Nick, Bhawan

On 2021-03-24 4:39 p.m., Alex Deucher wrote:

On Tue, Mar 23, 2021 at 4:18 AM Thomas Lambertz  wrote:


DMCUB_SCRATCH_0 sometimes contains 0xdeadbeef during initialization.
If this is detected, return 0 instead. This prevents wrong bit-flags
from being read.

The main impact of this bug is in the status check loop in
dmub_srv_wait_for_auto_load. As it is waiting for the device to become
ready, returning too early leads to a race condition. It is usually won
on first boot, but lost when laptop resumes from sleep, breaking screen
brightness control.

This issue was always present, but previously mitigated by the fact that
the full register was compared to the wanted value. Currently, only the
bottom two bits are tested, which are also set in 0xdeadbeef, thus
returning readiness to early.

Fixes: 5fe6b98ae00d ("drm/amd/display: Update dmub code")


Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1518>> 
Harry, Leo, Rodrigo, any ideas?




When I checked with our DMUB experts yesterday they said they'd never 
expect to see 0xdeadbeef in SCRATCH0.


That said based on Thomas's test it does look like we're getting 
deadbeef at resume so I'm almost inclined to ACK this patch. It doesn't 
really do any harm.


Harry


Alex


Signed-off-by: Thomas Lambertz 
---
  drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 8 +++-
  drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h | 2 ++
  2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
index 8e8e65fa83c0..d6fcae182f68 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
@@ -323,8 +323,14 @@ uint32_t dmub_dcn20_get_gpint_response(struct dmub_srv 
*dmub)
  union dmub_fw_boot_status dmub_dcn20_get_fw_boot_status(struct dmub_srv *dmub)
  {
 union dmub_fw_boot_status status;
+   uint32_t value;
+
+   value = REG_READ(DMCUB_SCRATCH0);
+   if (value == DMCUB_SCRATCH0_INVALID)
+   status.all = 0;
+   else
+   status.all = value;

-   status.all = REG_READ(DMCUB_SCRATCH0);
 return status;
  }

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
index a62be9c0652e..9557e76cf5d4 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
@@ -154,6 +154,8 @@ struct dmub_srv_common_regs {

  extern const struct dmub_srv_common_regs dmub_srv_dcn20_regs;

+#define DMCUB_SCRATCH0_INVALID 0xdeadbeef
+
  /* Hardware functions. */

  void dmub_dcn20_init(struct dmub_srv *dmub);
--
2.31.0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[pull] amdgpu drm-fixes-5.12

2021-03-24 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.12.

The following changes since commit d27ce83fa4baa5cb908a42e9878564cad6ea0eb3:

  Merge tag 'du-fixes-20210316' of git://linuxtv.org/pinchartl/media into 
drm-fixes (2021-03-22 13:49:55 +1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.12-2021-03-24

for you to fetch changes up to 5c458585c0141754cdcbf25feebb547dd671b559:

  drm/amdgpu/display: restore AUX_DPHY_TX_CONTROL for DCN2.x (2021-03-24 
00:30:57 -0400)


amd-drm-fixes-5.12-2021-03-24:

amdgpu:
- S0ix fixes
- Add PCI ID
- Polaris PCIe DPM fix
- Display fix for high refresh rate monitors


Alex Deucher (11):
  drm/amdgpu: rework S3/S4/S0ix state handling
  drm/amdgpu: don't evict vram on APUs for suspend to ram (v4)
  drm/amdgpu: clean up non-DC suspend/resume handling
  drm/amdgpu: move s0ix check into amdgpu_device_ip_suspend_phase2 (v3)
  drm/amdgpu: re-enable suspend phase 2 for S0ix
  drm/amdgpu/swsmu: skip gfx cgpg on s0ix suspend
  drm/amdgpu: update comments about s0ix suspend/resume
  drm/amdgpu: drop S0ix checks around CG/PG in suspend
  drm/amdgpu: skip kfd suspend/resume for S0ix
  drm/amdgpu: Add additional Sienna Cichlid PCI ID
  drm/amdgpu/display: restore AUX_DPHY_TX_CONTROL for DCN2.x

Kenneth Feng (1):
  drm/amd/pm: workaround for audio noise issue

Pratik Vishwakarma (1):
  drm/amdgpu: skip CG/PG for gfx during S0ix

Prike Liang (1):
  drm/amdgpu: fix the hibernation suspend with s0ix

 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 132 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c|  89 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.h|   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  31 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   7 +-
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c |   9 +-
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c |   9 +-
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c  |   8 +-
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c  |   9 +-
 drivers/gpu/drm/amd/amdgpu/dce_virtual.c   |  15 ++-
 .../drm/amd/display/dc/dcn20/dcn20_link_encoder.c  |   3 +-
 .../gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c|  54 +
 .../gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c  |  74 ++--
 .../gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c  |  24 
 .../gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c  |  25 
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c  |   5 +-
 17 files changed, 365 insertions(+), 142 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu/display: fix dmub invalid register read

2021-03-24 Thread Alex Deucher
On Tue, Mar 23, 2021 at 4:18 AM Thomas Lambertz  wrote:
>
> DMCUB_SCRATCH_0 sometimes contains 0xdeadbeef during initialization.
> If this is detected, return 0 instead. This prevents wrong bit-flags
> from being read.
>
> The main impact of this bug is in the status check loop in
> dmub_srv_wait_for_auto_load. As it is waiting for the device to become
> ready, returning too early leads to a race condition. It is usually won
> on first boot, but lost when laptop resumes from sleep, breaking screen
> brightness control.
>
> This issue was always present, but previously mitigated by the fact that
> the full register was compared to the wanted value. Currently, only the
> bottom two bits are tested, which are also set in 0xdeadbeef, thus
> returning readiness to early.
>
> Fixes: 5fe6b98ae00d ("drm/amd/display: Update dmub code")

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1518

Harry, Leo, Rodrigo, any ideas?

Alex

> Signed-off-by: Thomas Lambertz 
> ---
>  drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 8 +++-
>  drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h | 2 ++
>  2 files changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c 
> b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
> index 8e8e65fa83c0..d6fcae182f68 100644
> --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
> +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
> @@ -323,8 +323,14 @@ uint32_t dmub_dcn20_get_gpint_response(struct dmub_srv 
> *dmub)
>  union dmub_fw_boot_status dmub_dcn20_get_fw_boot_status(struct dmub_srv 
> *dmub)
>  {
> union dmub_fw_boot_status status;
> +   uint32_t value;
> +
> +   value = REG_READ(DMCUB_SCRATCH0);
> +   if (value == DMCUB_SCRATCH0_INVALID)
> +   status.all = 0;
> +   else
> +   status.all = value;
>
> -   status.all = REG_READ(DMCUB_SCRATCH0);
> return status;
>  }
>
> diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h 
> b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
> index a62be9c0652e..9557e76cf5d4 100644
> --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
> +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
> @@ -154,6 +154,8 @@ struct dmub_srv_common_regs {
>
>  extern const struct dmub_srv_common_regs dmub_srv_dcn20_regs;
>
> +#define DMCUB_SCRATCH0_INVALID 0xdeadbeef
> +
>  /* Hardware functions. */
>
>  void dmub_dcn20_init(struct dmub_srv *dmub);
> --
> 2.31.0
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Try YCbCr420 color when YCbCr444 fails

2021-03-24 Thread Alex Deucher
On Wed, Mar 17, 2021 at 11:25 AM Werner Sembach  
wrote:
>
> When encoder validation of a display mode fails, retry with less bandwidth
> heavy YCbCr420 color mode, if available. This enables some HDMI 1.4 setups
> to support 4k60Hz output, which previously failed silently.
>
> On some setups, while the monitor and the gpu support display modes with
> pixel clocks of up to 600MHz, the link encoder might not. This prevents
> YCbCr444 and RGB encoding for 4k60Hz, but YCbCr420 encoding might still be
> possible. However, which color mode is used is decided before the link
> encoder capabilities are checked. This patch fixes the problem by retrying
> to find a display mode with YCbCr420 enforced and using it, if it is
> valid.
>
> Signed-off-by: Werner Sembach 
> Cc: 


This seems reasonable to me.  Harry, Leo, Any objections?

Alex

> ---
>
> From c9398160caf4ff20e63b8ba3a4366d6ef95c4ac3 Mon Sep 17 00:00:00 2001
> From: Werner Sembach 
> Date: Wed, 17 Mar 2021 12:52:22 +0100
> Subject: [PATCH] Retry forcing YCbCr420 color on failed encoder validation
>
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 961abf1cf040..2d16389b5f1e 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -5727,6 +5727,15 @@ create_validate_stream_for_sink(struct 
> amdgpu_dm_connector *aconnector,
>
> } while (stream == NULL && requested_bpc >= 6);
>
> +   if (dc_result == DC_FAIL_ENC_VALIDATE && 
> !aconnector->force_yuv420_output) {
> +   DRM_DEBUG_KMS("Retry forcing YCbCr420 encoding\n");
> +
> +   aconnector->force_yuv420_output = true;
> +   stream = create_validate_stream_for_sink(aconnector, drm_mode,
> +   dm_state, old_stream);
> +   aconnector->force_yuv420_output = false;
> +   }
> +
> return stream;
>  }
>
> --
> 2.25.1
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-24 Thread Mark Yacoub
From: Mark Yacoub 

On initializing the framebuffer, call drm_any_plane_has_format to do a
check if the modifier is supported. drm_any_plane_has_format calls
dm_plane_format_mod_supported which is extended to validate that the
modifier is on the list of the plane's supported modifiers.

The bug was caught using igt-gpu-tools test: 
kms_addfb_basic.addfb25-bad-modifier

Tested on ChromeOS Zork by turning on the display, running an overlay
test, and running a YT video.

=== Changes from v1 ===
Explicitly handle DRM_FORMAT_MOD_INVALID modifier.

Cc: Alex Deucher 
Cc: Bas Nieuwenhuizen 
Signed-off-by: default avatarMark Yacoub 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c| 13 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 18 +++---
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index afa5f8ad0f563..a947b5aa420d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -908,6 +908,19 @@ int amdgpu_display_gem_fb_verify_and_init(
 &amdgpu_fb_funcs);
if (ret)
goto err;
+   /* Verify that the modifier is supported. */
+   if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
+ mode_cmd->modifier[0])) {
+   struct drm_format_name_buf format_name;
+   drm_dbg_kms(dev,
+   "unsupported pixel format %s / modifier 0x%llx\n",
+   drm_get_format_name(mode_cmd->pixel_format,
+   &format_name),
+   mode_cmd->modifier[0]);
+
+   ret = -EINVAL;
+   goto err;
+   }
 
ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
if (ret)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 961abf1cf040c..6fc746996c24f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3939,6 +3939,7 @@ static bool dm_plane_format_mod_supported(struct 
drm_plane *plane,
 {
struct amdgpu_device *adev = drm_to_adev(plane->dev);
const struct drm_format_info *info = drm_format_info(format);
+   int i;
 
enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) 
& 3;
 
@@ -3946,11 +3947,22 @@ static bool dm_plane_format_mod_supported(struct 
drm_plane *plane,
return false;
 
/*
-* We always have to allow this modifier, because core DRM still
-* checks LINEAR support if userspace does not provide modifers.
+* We always have to allow these modifiers: 
+* 1. Core DRM checks for LINEAR support if userspace does not provide 
modifiers. 
+* 2. Not passing any modifiers is the same as explicitly passing 
INVALID.
 */
-   if (modifier == DRM_FORMAT_MOD_LINEAR)
+   if (modifier == DRM_FORMAT_MOD_LINEAR ||
+   modifier == DRM_FORMAT_MOD_INVALID) {
return true;
+   }
+
+   /* Check that the modifier is on the list of the plane's supported 
modifiers. */
+   for (i = 0; i < plane->modifier_count; i++) {
+   if (modifier == plane->modifiers[i])
+   break;
+   }
+   if (i == plane->modifier_count)
+   return false;
 
/*
 * The arbitrary tiling support for multiplane formats has not been 
hooked
-- 
2.31.0.291.g576ba9dcdaf-goog

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-24 Thread Mark Yacoub
On Wed, Mar 24, 2021 at 11:25 AM Daniel Stone  wrote:
>
> Hi Mark,
>
> On Wed, 24 Mar 2021 at 14:58, Mark Yacoub  wrote:
>>
>> So you mean it would make more sense to be more explicit in handling
>> DRM_FORMAT_MOD_INVALID as an incoming modifier (which will, just like
>> DRM_FORMAT_MOD_LINEAR, will return true in
>> dm_plane_format_mod_supported)?
>
>
> That's correct. Not passing any modifiers is the same as explicitly passing 
> INVALID, both of which mean 'the driver will figure it out somehow'; that 
> driver-specific determination is not the same as explicit LINEAR.
>
> (I cannot regret enough that INVALID is not 0.)
I feel you. When I tested it on a board that doesn't support
modifiers, the modifier value was Zero. when I checked it, it was
basically LINEAR.
I'll amend my changes to explicitly handle INVALID.
>
> Cheers,
> Daniel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-24 Thread Daniel Vetter
On Wed, Mar 24, 2021 at 01:07:44PM +0100, Christian König wrote:
> 
> 
> Am 24.03.21 um 13:01 schrieb Daniel Vetter:
> > On Wed, Mar 24, 2021 at 01:00:28PM +0100, Christian König wrote:
> > > Am 24.03.21 um 12:55 schrieb Daniel Vetter:
> > > > On Wed, Mar 24, 2021 at 11:19:13AM +0100, Thomas Hellström (Intel) 
> > > > wrote:
> > > > > On 3/23/21 4:45 PM, Christian König wrote:
> > > > > > Am 23.03.21 um 16:13 schrieb Michal Hocko:
> > > > > > > On Tue 23-03-21 14:56:54, Christian König wrote:
> > > > > > > > Am 23.03.21 um 14:41 schrieb Michal Hocko:
> > > > > > > [...]
> > > > > > > > > Anyway, I am wondering whether the overall approach is
> > > > > > > > > sound. Why don't
> > > > > > > > > you simply use shmem as your backing storage from the
> > > > > > > > > beginning and pin
> > > > > > > > > those pages if they are used by the device?
> > > > > > > > Yeah, that is exactly what the Intel guys are doing for their
> > > > > > > > integrated
> > > > > > > > GPUs :)
> > > > > > > > 
> > > > > > > > Problem is for TTM I need to be able to handle dGPUs and those 
> > > > > > > > have all
> > > > > > > > kinds of funny allocation restrictions. In other words I need to
> > > > > > > > guarantee
> > > > > > > > that the allocated memory is coherent accessible to the GPU
> > > > > > > > without using
> > > > > > > > SWIOTLB.
> > > > > > > > 
> > > > > > > > The simple case is that the device can only do DMA32, but you 
> > > > > > > > also got
> > > > > > > > device which can only do 40bits or 48bits.
> > > > > > > > 
> > > > > > > > On top of that you also got AGP, CMA and stuff like CPU cache 
> > > > > > > > behavior
> > > > > > > > changes (write back vs. write through, vs. uncached).
> > > > > > > OK, so the underlying problem seems to be that gfp mask (thus
> > > > > > > mapping_gfp_mask) cannot really reflect your requirements, right? 
> > > > > > >  Would
> > > > > > > it help if shmem would allow to provide an allocation callback to
> > > > > > > override alloc_page_vma which is used currently? I am pretty sure 
> > > > > > > there
> > > > > > > will be more to handle but going through shmem for the whole life 
> > > > > > > time
> > > > > > > is just so much easier to reason about than some tricks to abuse 
> > > > > > > shmem
> > > > > > > just for the swapout path.
> > > > > > Well it's a start, but the pages can have special CPU cache 
> > > > > > settings. So
> > > > > > direct IO from/to them usually doesn't work as expected.
> > > > > > 
> > > > > > Additional to that for AGP and CMA I need to make sure that I give 
> > > > > > those
> > > > > > pages back to the relevant subsystems instead of just dropping the 
> > > > > > page
> > > > > > reference.
> > > > > > 
> > > > > > So I would need to block for the swapio to be completed.
> > > > > > 
> > > > > > Anyway I probably need to revert those patches for now since this 
> > > > > > isn't
> > > > > > working as we hoped it would.
> > > > > > 
> > > > > > Thanks for the explanation how stuff works here.
> > > > > Another alternative here that I've tried before without being 
> > > > > successful
> > > > > would perhaps be to drop shmem completely and, if it's a normal page 
> > > > > (no dma
> > > > > or funny caching attributes) just use add_to_swap_cache()? If it's 
> > > > > something
> > > > > else, try alloc a page with relevant gfp attributes, copy and
> > > > > add_to_swap_cache()? Or perhaps that doesn't work well from a shrinker
> > > > > either?
> > > > So before we toss everything and go an a great rewrite-the-world tour,
> > > > what if we just try to split up big objects. So for objects which are
> > > > bigger than e.g. 10mb
> > > > 
> > > > - move them to a special "under eviction" list
> > > > - keep a note how far we evicted thus far
> > > > - interleave allocating shmem pages, copying data and releasing the ttm
> > > > backing store on a chunk basis (maybe 10mb or whatever, tuning tbh)
> > > > 
> > > > If that's not enough, occasionally break out of the shrinker entirely so
> > > > other parts of reclaim can reclaim the shmem stuff. But just releasing 
> > > > our
> > > > own pages as we go should help a lot I think.
> > > Yeah, the later is exactly what I was currently prototyping.
> > > 
> > > I just didn't used a limit but rather a only partially evicted BOs list
> > > which is used when we fail to allocate a page.
> > > 
> > > For the 5.12 cycle I think we should just go back to a hard 50% limit for
> > > now and then resurrect this when we have solved the issues.
> > Can we do the 50% limit without tossing out all the code we've done thus
> > far? Just so this doesn't get too disruptive.
> 
> Yeah, I just need to get back to v1 of this patch. Before you convinced me
> that the shrinker is the better approach .)

I don't think there's anything else than the shrinker if you want
dynamically sized memory usage. Or pinning it all. Implementing our own
kswapd and not tying into direct reclaim does not sound like a good id

Re: [PATCH] drm/amdgpu: Fix check for RAS support

2021-03-24 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Alex Deucher 

From: Tuikov, Luben 
Sent: Wednesday, March 24, 2021 1:11 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Tuikov, Luben ; Yang, Stanley ; 
Deucher, Alexander 
Subject: [PATCH] drm/amdgpu: Fix check for RAS support

Use positive logic to check for RAS
support. Rename the function to actually indicate
what it is testing for. Essentially, make the
function a predicate with the correct name.

Cc: Stanley Yang 
Cc: Alexander Deucher 
Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 0e16683876aa..17652972fd49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1933,15 +1933,12 @@ int amdgpu_ras_request_reset_on_boot(struct 
amdgpu_device *adev,
 return 0;
 }

-static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
+static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
 {
-   if (adev->asic_type != CHIP_VEGA10 &&
-   adev->asic_type != CHIP_VEGA20 &&
-   adev->asic_type != CHIP_ARCTURUS &&
-   adev->asic_type != CHIP_SIENNA_CICHLID)
-   return 1;
-   else
-   return 0;
+   return adev->asic_type == CHIP_VEGA10 ||
+   adev->asic_type == CHIP_VEGA20 ||
+   adev->asic_type == CHIP_ARCTURUS ||
+   adev->asic_type == CHIP_SIENNA_CICHLID;
 }

 /*
@@ -1960,7 +1957,7 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
 *supported = 0;

 if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
-   amdgpu_ras_check_asic_type(adev))
+   !amdgpu_ras_asic_supported(adev))
 return;

 if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
--
2.31.0.97.g1424303384

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Set amdgpu.noretry=1 for Arcturus

2021-03-24 Thread Russell, Kent
[AMD Public Use]



Reviewed-by: Kent Russell 



> -Original Message-
> From: amd-gfx  On Behalf Of Philip Cox
> Sent: Wednesday, March 24, 2021 9:24 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Cox, Philip 
> Subject: [PATCH] drm/amdgpu: Set amdgpu.noretry=1 for Arcturus
> 
> Setting amdgpu.noretry=1 as default for Arcturus.
> 
> Signed-off-by: Philip Cox 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 1e07c66676c2..b9d68fd2610c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -516,6 +516,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
>   switch (adev->asic_type) {
>   case CHIP_VEGA10:
>   case CHIP_VEGA20:
> + case CHIP_ARCTURUS:
>   case CHIP_ALDEBARAN:
>   /*
>* noretry = 0 will cause kfd page fault tests fail
> --
> 2.17.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.or
> g%2Fmailman%2Flistinfo%2Famd-
> gfx&data=04%7C01%7Ckent.russell%40amd.com%7C2c07a0acac274b89901908d8eec8
> 26c0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637521890728881804%7CUn
> known%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJX
> VCI6Mn0%3D%7C1000&sdata=OBKwCLSerFdepJPSPF1njp8QJV0hlTeR2xdpWmApSnM
> %3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-24 Thread Daniel Stone
Hi Mark,

On Wed, 24 Mar 2021 at 14:58, Mark Yacoub  wrote:

> So you mean it would make more sense to be more explicit in handling
> DRM_FORMAT_MOD_INVALID as an incoming modifier (which will, just like
> DRM_FORMAT_MOD_LINEAR, will return true in
> dm_plane_format_mod_supported)?
>

That's correct. Not passing any modifiers is the same as explicitly passing
INVALID, both of which mean 'the driver will figure it out somehow'; that
driver-specific determination is not the same as explicit LINEAR.

(I cannot regret enough that INVALID is not 0.)

Cheers,
Daniel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-24 Thread Mark Yacoub
On Wed, Mar 24, 2021 at 8:10 AM Daniel Stone  wrote:
>
> On Wed, 24 Mar 2021 at 10:53, Bas Nieuwenhuizen  
> wrote:
>>
>> On Wed, Mar 24, 2021 at 11:13 AM Michel Dänzer  wrote:
>>>
>>> No modifier support does not imply linear. It's generally signalled via 
>>> DRM_FORMAT_MOD_INVALID, which roughly means "tiling is determined by driver 
>>> specific mechanisms".
So you mean it would make more sense to be more explicit in handling
DRM_FORMAT_MOD_INVALID as an incoming modifier (which will, just like
DRM_FORMAT_MOD_LINEAR, will return true in
dm_plane_format_mod_supported)?
>>
>>
>> Doesn't quite work that way in the kernel sadly. If you don't set 
>> DRM_MODE_FB_MODIFIERS then the modifier fields have to be 0 (which happens 
>> to alias DRM_FORMAT_MOD_LINEAR and then now deprecated DRM_FORMAT_MOD_NONE). 
>> This is verified in shared drm code.
>>
>> (and all userspace code I've seen simply doesn't set DRM_MODE_FB_MODIFIERS 
>> if the incoming modifier is DRM_FORMAT_MOD_INVALID)
>
>
> Yes, but even though the field is zero, the lack of the flag means it must be 
> treated as INVALID. If the kernel is not doing this, the kernel is 
> objectively wrong. (And I know it doesn't do this in most cases, because 
> otherwise I wouldn't be able to use this GNOME session on an Intel laptop, 
> where modifiers are blacklisted.)
>
> Cheers,
> Daniel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] amdgpu: fix gcc -Wrestrict warning

2021-03-24 Thread Joe Perches
On Tue, 2021-03-23 at 14:04 +0100, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> gcc warns about an sprintf() that uses the same buffer as source
> and destination, which is undefined behavior in C99:
> 
> drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c: In function 
> 'amdgpu_securedisplay_debugfs_write':
> drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c:141:6: error: 'sprintf' 
> argument 3 overlaps destination object 'i2c_output' [-Werror=restrict]
>   141 |  sprintf(i2c_output, "%s 0x%X", i2c_output,
>   |  ^~
>   142 |   
> securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf[i]);
>   |   
> ~
> drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c:97:7: note: destination 
> object referenced by 'restrict'-qualified argument 1 was declared here
>    97 |  char i2c_output[256];
>   |   ^~
> 
> Rewrite it to remember the current offset into the buffer instead.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> index 834440ab9ff7..69d7f6bff5d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> @@ -136,9 +136,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct 
> file *f, const char __u
>   ret = psp_securedisplay_invoke(psp, 
> TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
>   if (!ret) {
>   if (securedisplay_cmd->status == 
> TA_SECUREDISPLAY_STATUS__SUCCESS) {
> + int pos = 0;
>   memset(i2c_output,  0, sizeof(i2c_output));
>   for (i = 0; i < 
> TA_SECUREDISPLAY_I2C_BUFFER_SIZE; i++)
> - sprintf(i2c_output, "%s 0x%X", 
> i2c_output,
> + pos += sprintf(i2c_output + pos, " 
> 0x%X",
>   
> securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf[i]);
>   dev_info(adev->dev, "SECUREDISPLAY: I2C buffer 
> out put is :%s\n", i2c_output);

Perhaps use a hex output like:

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 9cf856c94f94..25bb34c72d20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -97,13 +97,12 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct 
file *f, const char __u
uint32_t op;
int i;
char str[64];
-   char i2c_output[256];
int ret;
 
if (*pos || size > sizeof(str) - 1)
return -EINVAL;
 
-   memset(str,  0, sizeof(str));
+   memset(str, 0, sizeof(str));
ret = copy_from_user(str, buf, size);
if (ret)
return -EFAULT;
@@ -139,11 +138,9 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct 
file *f, const char __u
ret = psp_securedisplay_invoke(psp, 
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
if (!ret) {
if (securedisplay_cmd->status == 
TA_SECUREDISPLAY_STATUS__SUCCESS) {
-   memset(i2c_output,  0, sizeof(i2c_output));
-   for (i = 0; i < 
TA_SECUREDISPLAY_I2C_BUFFER_SIZE; i++)
-   sprintf(i2c_output, "%s 0x%X", 
i2c_output,
-   
securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf[i]);
-   dev_info(adev->dev, "SECUREDISPLAY: I2C buffer 
out put is :%s\n", i2c_output);
+   dev_info(adev->dev, "SECUREDISPLAY: I2C buffer 
output is: %*ph\n",
+(int)TA_SECUREDISPLAY_I2C_BUFFER_SIZE,
+
securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf);
} else {
psp_securedisplay_parse_resp_status(psp, 
securedisplay_cmd->status);
}


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] amdgpu: fix gcc -Wrestrict warning

2021-03-24 Thread Rasmus Villemoes
On 24/03/2021 14.33, Arnd Bergmann wrote:
> On Tue, Mar 23, 2021 at 4:57 PM Rasmus Villemoes
>  wrote:
>> On 23/03/2021 14.04, Arnd Bergmann wrote:
>>>   if (securedisplay_cmd->status == 
>>> TA_SECUREDISPLAY_STATUS__SUCCESS) {
>>> + int pos = 0;
>>>   memset(i2c_output,  0, sizeof(i2c_output));
>>>   for (i = 0; i < 
>>> TA_SECUREDISPLAY_I2C_BUFFER_SIZE; i++)
>>> - sprintf(i2c_output, "%s 0x%X", 
>>> i2c_output,
>>> + pos += sprintf(i2c_output + pos, " 
>>> 0x%X",
>>>   
>>> securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf[i]);
>>>   dev_info(adev->dev, "SECUREDISPLAY: I2C 
>>> buffer out put is :%s\n", i2c_output);
>>
>> Eh, why not get rid of the 256 byte stack allocation and just replace
>> all of this by
>>
>>   dev_info(adev->dev, ""SECUREDISPLAY: I2C buffer out put is: %*ph\n",
>> TA_SECUREDISPLAY_I2C_BUFFER_SIZE,
>> securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf);
>>
>> That's much less code (both in #LOC and .text), and avoids adding yet
>> another place that will be audited over and over for "hm, yeah, that
>> sprintf() is actually not gonna overflow".
>>
>> Yeah, it'll lose the 0x prefixes for each byte and use lowercase hex chars.
> 
> Ah, I didn't know the kernel's sprintf could do that, that's really nice.

If you're bored, you can "git grep -E -C4 '%[0.]2[xX]'" and find places
that are inside a small loop, many can trivially be converted to %ph,
though often with some small change in formatting. If you're lucky, you
even get to fix real bugs when people pass a "char" to %02x and "know"
that that will produce precisely two bytes of output, so they've sized
their stack buffer accordingly - boom when "char" happens to be signed
and one of the bytes have a value beyond ascii and %02x produces 0xffXX.

%ph has a hard-coded upper bound of 64 bytes, I think that's silly
because people instead do these inefficient and very verbose loops
instead, wasting stack, .text and runtime.

Rasmus
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/radeon/r600_cs: Couple of typo fixes

2021-03-24 Thread Bhaskar Chowdhury


s/miror/mirror/
s/needind/needing/

Signed-off-by: Bhaskar Chowdhury 
---
 drivers/gpu/drm/radeon/r600_cs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 34b7c6f16479..aded1f2264e0 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -38,7 +38,7 @@ extern void r600_cs_legacy_get_tiling_conf(struct drm_device 
*dev, u32 *npipes,


 struct r600_cs_track {
-   /* configuration we miror so that we use same code btw kms/ums */
+   /* configuration we mirror so that we use same code btw kms/ums */
u32 group_size;
u32 nbanks;
u32 npipes;
@@ -963,7 +963,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
  *
  * This function will test against r600_reg_safe_bm and return 0
  * if register is safe. If register is not flag as safe this function
- * will test it against a list of register needind special handling.
+ * will test it against a list of register needing special handling.
  */
 static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
--
2.30.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] amdgpu: securedisplay: simplify i2c hexdump output

2021-03-24 Thread Arnd Bergmann
From: Arnd Bergmann 

A previous fix I did left a rather complicated loop in
amdgpu_securedisplay_debugfs_write() for what could be expressed in a
simple sprintf, as Rasmus pointed out.

This drops the leading 0x for each byte, but is otherwise
much nicer.

Suggested-by: Rasmus Villemoes 
Signed-off-by: Arnd Bergmann 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 69d7f6bff5d4..fc3ddd7aa6f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -92,9 +92,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file 
*f, const char __u
struct drm_device *dev = adev_to_drm(adev);
uint32_t phy_id;
uint32_t op;
-   int i;
char str[64];
-   char i2c_output[256];
int ret;
 
if (*pos || size > sizeof(str) - 1)
@@ -136,12 +134,9 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct 
file *f, const char __u
ret = psp_securedisplay_invoke(psp, 
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
if (!ret) {
if (securedisplay_cmd->status == 
TA_SECUREDISPLAY_STATUS__SUCCESS) {
-   int pos = 0;
-   memset(i2c_output,  0, sizeof(i2c_output));
-   for (i = 0; i < 
TA_SECUREDISPLAY_I2C_BUFFER_SIZE; i++)
-   pos += sprintf(i2c_output + pos, " 
0x%X",
-   
securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf[i]);
-   dev_info(adev->dev, "SECUREDISPLAY: I2C buffer 
out put is :%s\n", i2c_output);
+   dev_info(adev->dev, "SECUREDISPLAY: I2C buffer 
out put is: %*ph\n",
+TA_SECUREDISPLAY_I2C_BUFFER_SIZE,
+
securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf);
} else {
psp_securedisplay_parse_resp_status(psp, 
securedisplay_cmd->status);
}
-- 
2.29.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] amdgpu: fix gcc -Wrestrict warning

2021-03-24 Thread Arnd Bergmann
On Tue, Mar 23, 2021 at 4:57 PM Rasmus Villemoes
 wrote:
> On 23/03/2021 14.04, Arnd Bergmann wrote:
> >   if (securedisplay_cmd->status == 
> > TA_SECUREDISPLAY_STATUS__SUCCESS) {
> > + int pos = 0;
> >   memset(i2c_output,  0, sizeof(i2c_output));
> >   for (i = 0; i < 
> > TA_SECUREDISPLAY_I2C_BUFFER_SIZE; i++)
> > - sprintf(i2c_output, "%s 0x%X", 
> > i2c_output,
> > + pos += sprintf(i2c_output + pos, " 
> > 0x%X",
> >   
> > securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf[i]);
> >   dev_info(adev->dev, "SECUREDISPLAY: I2C 
> > buffer out put is :%s\n", i2c_output);
>
> Eh, why not get rid of the 256 byte stack allocation and just replace
> all of this by
>
>   dev_info(adev->dev, ""SECUREDISPLAY: I2C buffer out put is: %*ph\n",
> TA_SECUREDISPLAY_I2C_BUFFER_SIZE,
> securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf);
>
> That's much less code (both in #LOC and .text), and avoids adding yet
> another place that will be audited over and over for "hm, yeah, that
> sprintf() is actually not gonna overflow".
>
> Yeah, it'll lose the 0x prefixes for each byte and use lowercase hex chars.

Ah, I didn't know the kernel's sprintf could do that, that's really nice.

I'll send a follow-up patch, as Alex has already applied the first one.

Alex, feel free to merge the two into one, or just keep as they are.

  Arnd
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Set amdgpu.noretry=1 for Arcturus

2021-03-24 Thread Philip Cox
Setting amdgpu.noretry=1 as default for Arcturus.

Signed-off-by: Philip Cox 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 1e07c66676c2..b9d68fd2610c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -516,6 +516,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA20:
+   case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
/*
 * noretry = 0 will cause kfd page fault tests fail
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH drm/amdgpu 0/2] Convert sysfs sprintf/snprintf family to sysfs_emit

2021-03-24 Thread Tian Tao
Use the generic sysfs_emit() function to take place of
snprintf/scnprintf, to avoid buffer overrun.

Tian Tao (2):
  drm/amdgpu: Convert sysfs sprintf/snprintf family to sysfs_emit
  drm/amd/pm: Convert sysfs sprintf/snprintf family to sysfs_emit

 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   |  8 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  8 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 32 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c |  2 +-
 drivers/gpu/drm/amd/pm/amdgpu_pm.c   | 88 ++--
 9 files changed, 73 insertions(+), 79 deletions(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH drm/amdgpu 1/2] drm/amdgpu: Convert sysfs sprintf/snprintf family to sysfs_emit

2021-03-24 Thread Tian Tao
Fix the following coccicheck warning:
drivers/gpu//drm/amd/amdgpu/amdgpu_ras.c:434:9-17: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_xgmi.c:220:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_xgmi.c:249:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/df_v3_6.c:208:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_psp.c:2973:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_vram_mgr.c:75:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_vram_mgr.c:112:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_vram_mgr.c:58:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_vram_mgr.c:93:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_vram_mgr.c:125:9-17: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_gtt_mgr.c:52:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_gtt_mgr.c:71:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_device.c:140:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_device.c:164:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_device.c:186:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_device.c:208:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu//drm/amd/amdgpu/amdgpu_atombios.c:1916:8-16: WARNING:
use scnprintf or sprintf

Signed-off-by: Tian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   |  8 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c  |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  8 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 32 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c |  2 +-
 8 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 86add0f..5b04bfcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1947,7 +1947,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct 
device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
struct atom_context *ctx = adev->mode_info.atom_context;
 
-   return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version);
+   return sysfs_emit(buf, "%s\n", ctx->vbios_version);
 }
 
 static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6447cd6..33b6e46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -136,7 +136,7 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct 
device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 
-   return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
+   return sysfs_emit(buf, "%llu\n", cnt);
 }
 
 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
@@ -160,7 +160,7 @@ static ssize_t amdgpu_device_get_product_name(struct device 
*dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
+   return sysfs_emit(buf, "%s\n", adev->product_name);
 }
 
 static DEVICE_ATTR(product_name, S_IRUGO,
@@ -182,7 +182,7 @@ static ssize_t amdgpu_device_get_product_number(struct 
device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
+   return sysfs_emit(buf, "%s\n", adev->product_number);
 }
 
 static DEVICE_ATTR(product_number, S_IRUGO,
@@ -204,7 +204,7 @@ static ssize_t amdgpu_device_get_serial_number(struct 
device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
+   return sysfs_emit(buf, "%s\n", adev->serial);
 }
 
 static DEVICE_ATTR(serial_number, S_IRUGO,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8980329..540c010 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -49,8 +49,7 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device 
*dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, 
TTM_PL_TT);
 
-   return snprintf(buf, PAGE_SIZE, "%llu\n",

Re: [RESEND 00/19] Rid GPU from W=1 warnings

2021-03-24 Thread Lee Jones
Daniel,

> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> This is a resend of the remaining patches.
> 
> All of these patches have been sent before.

Are you still keen to 'hoover these up'?

Just leave the one that requires work and take the rest perhaps?

> Lee Jones (19):
>   drm/nouveau/nvkm/subdev/bios/init: Demote obvious abuse of kernel-doc
>   drm/nouveau/dispnv50/disp: Remove unused variable 'ret'
>   drm/msm/dp/dp_display: Remove unused variable 'hpd'
>   include: drm: drm_atomic: Make use of 'new_plane_state'
>   drm/nouveau/nvkm/subdev/volt/gk20a: Demote non-conformant kernel-doc
> headers
>   drm/amd/display/dc/calcs/dce_calcs: Move some large variables from the
> stack to the heap
>   drm/amd/display/dc/calcs/dce_calcs: Remove some large variables from
> the stack
>   drm/amd/display/dc/dce80/dce80_resource: Make local functions static
>   drm/nouveau/nvkm/engine/gr/gf100: Demote non-conformant kernel-doc
> header
>   drm/nouveau/nouveau_bo: Remove unused variables 'dev'
>   drm/nouveau/nouveau_display: Remove set but unused variable 'width'
>   drm/nouveau/dispnv04/crtc: Demote non-conforming kernel-doc headers
>   drm/nouveau/dispnv50/disp: Remove unused variable 'ret' from function
> returning void
>   drm/nouveau/dispnv50/headc57d: Make local function 'headc57d_olut'
> static
>   drm/nouveau/nv50_display: Remove superfluous prototype for local
> static functions
>   drm/nouveau/dispnv50/disp: Include header containing our prototypes
>   drm/nouveau/nouveau_ioc32: File headers are not good candidates for
> kernel-doc
>   drm/nouveau/nouveau_svm: Remove unused variable 'ret' from void
> function
>   drm/nouveau/nouveau_ioc32: Demote kernel-doc abuse to standard comment
> block
> 
>  .../gpu/drm/amd/display/dc/calcs/dce_calcs.c  | 1154 +
>  .../drm/amd/display/dc/dce80/dce80_resource.c |   16 +-
>  drivers/gpu/drm/msm/dp/dp_display.c   |3 -
>  drivers/gpu/drm/nouveau/dispnv04/crtc.c   |4 +-
>  drivers/gpu/drm/nouveau/dispnv50/disp.c   |   10 +-
>  drivers/gpu/drm/nouveau/dispnv50/headc57d.c   |2 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c  |4 -
>  drivers/gpu/drm/nouveau/nouveau_display.c |8 +-
>  drivers/gpu/drm/nouveau/nouveau_ioc32.c   |4 +-
>  drivers/gpu/drm/nouveau/nouveau_svm.c |5 +-
>  drivers/gpu/drm/nouveau/nv50_display.h|3 -
>  .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c|2 +-
>  .../gpu/drm/nouveau/nvkm/subdev/bios/init.c   |  204 +--
>  .../gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c  |4 +-
>  include/drm/drm_atomic.h  |3 +-
>  15 files changed, 692 insertions(+), 734 deletions(-)
> 
> Cc: Alex Deucher 
> Cc: amd-gfx@lists.freedesktop.org
> Cc: Anthony Koo 
> Cc: Ben Skeggs 
> Cc: "Christian König" 
> Cc: Colin Ian King 
> Cc: Daniel Vetter 
> Cc: David Airlie 
> Cc: dri-de...@lists.freedesktop.org
> Cc: freedr...@lists.freedesktop.org
> Cc: Harry Wentland 
> Cc: Jeremy Kolb 
> Cc: Kuogee Hsieh 
> Cc: Leo Li 
> Cc: linaro-mm-...@lists.linaro.org
> Cc: linux-arm-...@vger.kernel.org
> Cc: linux-me...@vger.kernel.org
> Cc: Lyude Paul 
> Cc: Maarten Lankhorst 
> Cc: Maxime Ripard 
> Cc: nouv...@lists.freedesktop.org
> Cc: Rob Clark 
> Cc: Sean Paul 
> Cc: Sumit Semwal 
> Cc: Thomas Zimmermann 

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH drm/amdgpu 2/2] drm/amd/pm: Convert sysfs sprintf/snprintf family to sysfs_emit

2021-03-24 Thread Tian Tao
Fix the following coccicheck warning:
drivers/gpu/drm/amd/pm/amdgpu_pm.c:1940:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:1978:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2022:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:294:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:154:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:496:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:512:9-17: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:1740:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:1667:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2074:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2047:9-17: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2768:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2738:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2442:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3246:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3253:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2458:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3047:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3133:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3209:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3216:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2410:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2496:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2470:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2426:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2965:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2972:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3006:8-16: WARNING:
use scnprintf or sprintf
drivers/gpu/drm/amd/pm/amdgpu_pm.c:3013:8-16: WARNING:
use scnprintf or sprintf

Signed-off-by: Tian Tao 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 88 +++---
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 5fa65f1..0ee3e55 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -151,9 +151,9 @@ static ssize_t amdgpu_get_power_dpm_state(struct device 
*dev,
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
-   (pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
-   (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : 
"performance");
+   return sysfs_emit(buf, "%s\n",
+ (pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
+ (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : 
"performance");
 }
 
 static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
@@ -291,16 +291,16 @@ static ssize_t 
amdgpu_get_power_dpm_force_performance_level(struct device *dev,
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
-   (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
-   (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
-   (level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" :
-   (level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" :
-   (level == AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD) ? 
"profile_standard" :
-   (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) ? 
"profile_min_sclk" :
-   (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) ? 
"profile_min_mclk" :
-   (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) ? 
"profile_peak" :
-   "unknown");
+   return sysfs_emit(buf, "%s\n",
+ (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
+ (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
+ (level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" :
+ (level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" :
+ (level == AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD) ? 
"profile_standard" :
+ (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) ? 
"profile_min_sclk" :
+ (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) ? 
"profile_min_mclk" :
+ (level == AMD_

Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-24 Thread Daniel Stone
On Wed, 24 Mar 2021 at 10:53, Bas Nieuwenhuizen 
wrote:

> On Wed, Mar 24, 2021 at 11:13 AM Michel Dänzer  wrote:
>
>> No modifier support does not imply linear. It's generally signalled via
>> DRM_FORMAT_MOD_INVALID, which roughly means "tiling is determined by driver
>> specific mechanisms".
>>
>
> Doesn't quite work that way in the kernel sadly. If you don't set
> DRM_MODE_FB_MODIFIERS then the modifier fields have to be 0 (which happens
> to alias DRM_FORMAT_MOD_LINEAR and then now deprecated
> DRM_FORMAT_MOD_NONE). This is verified in shared drm code.
>
> (and all userspace code I've seen simply doesn't set DRM_MODE_FB_MODIFIERS
> if the incoming modifier is DRM_FORMAT_MOD_INVALID)
>

Yes, but even though the field is zero, the lack of the flag means it must
be treated as INVALID. If the kernel is not doing this, the kernel is
objectively wrong. (And I know it doesn't do this in most cases, because
otherwise I wouldn't be able to use this GNOME session on an Intel laptop,
where modifiers are blacklisted.)

Cheers,
Daniel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-24 Thread Christian König



Am 24.03.21 um 13:01 schrieb Daniel Vetter:

On Wed, Mar 24, 2021 at 01:00:28PM +0100, Christian König wrote:

Am 24.03.21 um 12:55 schrieb Daniel Vetter:

On Wed, Mar 24, 2021 at 11:19:13AM +0100, Thomas Hellström (Intel) wrote:

On 3/23/21 4:45 PM, Christian König wrote:

Am 23.03.21 um 16:13 schrieb Michal Hocko:

On Tue 23-03-21 14:56:54, Christian König wrote:

Am 23.03.21 um 14:41 schrieb Michal Hocko:

[...]

Anyway, I am wondering whether the overall approach is
sound. Why don't
you simply use shmem as your backing storage from the
beginning and pin
those pages if they are used by the device?

Yeah, that is exactly what the Intel guys are doing for their
integrated
GPUs :)

Problem is for TTM I need to be able to handle dGPUs and those have all
kinds of funny allocation restrictions. In other words I need to
guarantee
that the allocated memory is coherent accessible to the GPU
without using
SWIOTLB.

The simple case is that the device can only do DMA32, but you also got
device which can only do 40bits or 48bits.

On top of that you also got AGP, CMA and stuff like CPU cache behavior
changes (write back vs. write through, vs. uncached).

OK, so the underlying problem seems to be that gfp mask (thus
mapping_gfp_mask) cannot really reflect your requirements, right?  Would
it help if shmem would allow to provide an allocation callback to
override alloc_page_vma which is used currently? I am pretty sure there
will be more to handle but going through shmem for the whole life time
is just so much easier to reason about than some tricks to abuse shmem
just for the swapout path.

Well it's a start, but the pages can have special CPU cache settings. So
direct IO from/to them usually doesn't work as expected.

Additional to that for AGP and CMA I need to make sure that I give those
pages back to the relevant subsystems instead of just dropping the page
reference.

So I would need to block for the swapio to be completed.

Anyway I probably need to revert those patches for now since this isn't
working as we hoped it would.

Thanks for the explanation how stuff works here.

Another alternative here that I've tried before without being successful
would perhaps be to drop shmem completely and, if it's a normal page (no dma
or funny caching attributes) just use add_to_swap_cache()? If it's something
else, try alloc a page with relevant gfp attributes, copy and
add_to_swap_cache()? Or perhaps that doesn't work well from a shrinker
either?

So before we toss everything and go an a great rewrite-the-world tour,
what if we just try to split up big objects. So for objects which are
bigger than e.g. 10mb

- move them to a special "under eviction" list
- keep a note how far we evicted thus far
- interleave allocating shmem pages, copying data and releasing the ttm
backing store on a chunk basis (maybe 10mb or whatever, tuning tbh)

If that's not enough, occasionally break out of the shrinker entirely so
other parts of reclaim can reclaim the shmem stuff. But just releasing our
own pages as we go should help a lot I think.

Yeah, the later is exactly what I was currently prototyping.

I just didn't used a limit but rather a only partially evicted BOs list
which is used when we fail to allocate a page.

For the 5.12 cycle I think we should just go back to a hard 50% limit for
now and then resurrect this when we have solved the issues.

Can we do the 50% limit without tossing out all the code we've done thus
far? Just so this doesn't get too disruptive.


Yeah, I just need to get back to v1 of this patch. Before you convinced 
me that the shrinker is the better approach .)


Cheers,
Christian.


-Daniel


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-24 Thread Daniel Vetter
On Wed, Mar 24, 2021 at 01:00:28PM +0100, Christian König wrote:
> Am 24.03.21 um 12:55 schrieb Daniel Vetter:
> > On Wed, Mar 24, 2021 at 11:19:13AM +0100, Thomas Hellström (Intel) wrote:
> > > On 3/23/21 4:45 PM, Christian König wrote:
> > > > Am 23.03.21 um 16:13 schrieb Michal Hocko:
> > > > > On Tue 23-03-21 14:56:54, Christian König wrote:
> > > > > > Am 23.03.21 um 14:41 schrieb Michal Hocko:
> > > > > [...]
> > > > > > > Anyway, I am wondering whether the overall approach is
> > > > > > > sound. Why don't
> > > > > > > you simply use shmem as your backing storage from the
> > > > > > > beginning and pin
> > > > > > > those pages if they are used by the device?
> > > > > > Yeah, that is exactly what the Intel guys are doing for their
> > > > > > integrated
> > > > > > GPUs :)
> > > > > > 
> > > > > > Problem is for TTM I need to be able to handle dGPUs and those have 
> > > > > > all
> > > > > > kinds of funny allocation restrictions. In other words I need to
> > > > > > guarantee
> > > > > > that the allocated memory is coherent accessible to the GPU
> > > > > > without using
> > > > > > SWIOTLB.
> > > > > > 
> > > > > > The simple case is that the device can only do DMA32, but you also 
> > > > > > got
> > > > > > device which can only do 40bits or 48bits.
> > > > > > 
> > > > > > On top of that you also got AGP, CMA and stuff like CPU cache 
> > > > > > behavior
> > > > > > changes (write back vs. write through, vs. uncached).
> > > > > OK, so the underlying problem seems to be that gfp mask (thus
> > > > > mapping_gfp_mask) cannot really reflect your requirements, right?  
> > > > > Would
> > > > > it help if shmem would allow to provide an allocation callback to
> > > > > override alloc_page_vma which is used currently? I am pretty sure 
> > > > > there
> > > > > will be more to handle but going through shmem for the whole life time
> > > > > is just so much easier to reason about than some tricks to abuse shmem
> > > > > just for the swapout path.
> > > > Well it's a start, but the pages can have special CPU cache settings. So
> > > > direct IO from/to them usually doesn't work as expected.
> > > > 
> > > > Additional to that for AGP and CMA I need to make sure that I give those
> > > > pages back to the relevant subsystems instead of just dropping the page
> > > > reference.
> > > > 
> > > > So I would need to block for the swapio to be completed.
> > > > 
> > > > Anyway I probably need to revert those patches for now since this isn't
> > > > working as we hoped it would.
> > > > 
> > > > Thanks for the explanation how stuff works here.
> > > Another alternative here that I've tried before without being successful
> > > would perhaps be to drop shmem completely and, if it's a normal page (no 
> > > dma
> > > or funny caching attributes) just use add_to_swap_cache()? If it's 
> > > something
> > > else, try alloc a page with relevant gfp attributes, copy and
> > > add_to_swap_cache()? Or perhaps that doesn't work well from a shrinker
> > > either?
> > So before we toss everything and go an a great rewrite-the-world tour,
> > what if we just try to split up big objects. So for objects which are
> > bigger than e.g. 10mb
> > 
> > - move them to a special "under eviction" list
> > - keep a note how far we evicted thus far
> > - interleave allocating shmem pages, copying data and releasing the ttm
> >backing store on a chunk basis (maybe 10mb or whatever, tuning tbh)
> > 
> > If that's not enough, occasionally break out of the shrinker entirely so
> > other parts of reclaim can reclaim the shmem stuff. But just releasing our
> > own pages as we go should help a lot I think.
> 
> Yeah, the later is exactly what I was currently prototyping.
> 
> I just didn't used a limit but rather a only partially evicted BOs list
> which is used when we fail to allocate a page.
> 
> For the 5.12 cycle I think we should just go back to a hard 50% limit for
> now and then resurrect this when we have solved the issues.

Can we do the 50% limit without tossing out all the code we've done thus
far? Just so this doesn't get too disruptive.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-24 Thread Christian König

Am 24.03.21 um 12:55 schrieb Daniel Vetter:

On Wed, Mar 24, 2021 at 11:19:13AM +0100, Thomas Hellström (Intel) wrote:

On 3/23/21 4:45 PM, Christian König wrote:

Am 23.03.21 um 16:13 schrieb Michal Hocko:

On Tue 23-03-21 14:56:54, Christian König wrote:

Am 23.03.21 um 14:41 schrieb Michal Hocko:

[...]

Anyway, I am wondering whether the overall approach is
sound. Why don't
you simply use shmem as your backing storage from the
beginning and pin
those pages if they are used by the device?

Yeah, that is exactly what the Intel guys are doing for their
integrated
GPUs :)

Problem is for TTM I need to be able to handle dGPUs and those have all
kinds of funny allocation restrictions. In other words I need to
guarantee
that the allocated memory is coherent accessible to the GPU
without using
SWIOTLB.

The simple case is that the device can only do DMA32, but you also got
device which can only do 40bits or 48bits.

On top of that you also got AGP, CMA and stuff like CPU cache behavior
changes (write back vs. write through, vs. uncached).

OK, so the underlying problem seems to be that gfp mask (thus
mapping_gfp_mask) cannot really reflect your requirements, right?  Would
it help if shmem would allow to provide an allocation callback to
override alloc_page_vma which is used currently? I am pretty sure there
will be more to handle but going through shmem for the whole life time
is just so much easier to reason about than some tricks to abuse shmem
just for the swapout path.

Well it's a start, but the pages can have special CPU cache settings. So
direct IO from/to them usually doesn't work as expected.

Additional to that for AGP and CMA I need to make sure that I give those
pages back to the relevant subsystems instead of just dropping the page
reference.

So I would need to block for the swapio to be completed.

Anyway I probably need to revert those patches for now since this isn't
working as we hoped it would.

Thanks for the explanation how stuff works here.

Another alternative here that I've tried before without being successful
would perhaps be to drop shmem completely and, if it's a normal page (no dma
or funny caching attributes) just use add_to_swap_cache()? If it's something
else, try alloc a page with relevant gfp attributes, copy and
add_to_swap_cache()? Or perhaps that doesn't work well from a shrinker
either?

So before we toss everything and go an a great rewrite-the-world tour,
what if we just try to split up big objects. So for objects which are
bigger than e.g. 10mb

- move them to a special "under eviction" list
- keep a note how far we evicted thus far
- interleave allocating shmem pages, copying data and releasing the ttm
   backing store on a chunk basis (maybe 10mb or whatever, tuning tbh)

If that's not enough, occasionally break out of the shrinker entirely so
other parts of reclaim can reclaim the shmem stuff. But just releasing our
own pages as we go should help a lot I think.


Yeah, the later is exactly what I was currently prototyping.

I just didn't used a limit but rather a only partially evicted BOs list 
which is used when we fail to allocate a page.


For the 5.12 cycle I think we should just go back to a hard 50% limit 
for now and then resurrect this when we have solved the issues.


Christian.


-Daniel


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-24 Thread Daniel Vetter
On Wed, Mar 24, 2021 at 11:19:13AM +0100, Thomas Hellström (Intel) wrote:
> 
> On 3/23/21 4:45 PM, Christian König wrote:
> > Am 23.03.21 um 16:13 schrieb Michal Hocko:
> > > On Tue 23-03-21 14:56:54, Christian König wrote:
> > > > Am 23.03.21 um 14:41 schrieb Michal Hocko:
> > > [...]
> > > > > Anyway, I am wondering whether the overall approach is
> > > > > sound. Why don't
> > > > > you simply use shmem as your backing storage from the
> > > > > beginning and pin
> > > > > those pages if they are used by the device?
> > > > Yeah, that is exactly what the Intel guys are doing for their
> > > > integrated
> > > > GPUs :)
> > > > 
> > > > Problem is for TTM I need to be able to handle dGPUs and those have all
> > > > kinds of funny allocation restrictions. In other words I need to
> > > > guarantee
> > > > that the allocated memory is coherent accessible to the GPU
> > > > without using
> > > > SWIOTLB.
> > > > 
> > > > The simple case is that the device can only do DMA32, but you also got
> > > > device which can only do 40bits or 48bits.
> > > > 
> > > > On top of that you also got AGP, CMA and stuff like CPU cache behavior
> > > > changes (write back vs. write through, vs. uncached).
> > > OK, so the underlying problem seems to be that gfp mask (thus
> > > mapping_gfp_mask) cannot really reflect your requirements, right?  Would
> > > it help if shmem would allow to provide an allocation callback to
> > > override alloc_page_vma which is used currently? I am pretty sure there
> > > will be more to handle but going through shmem for the whole life time
> > > is just so much easier to reason about than some tricks to abuse shmem
> > > just for the swapout path.
> > 
> > Well it's a start, but the pages can have special CPU cache settings. So
> > direct IO from/to them usually doesn't work as expected.
> > 
> > Additional to that for AGP and CMA I need to make sure that I give those
> > pages back to the relevant subsystems instead of just dropping the page
> > reference.
> > 
> > So I would need to block for the swapio to be completed.
> > 
> > Anyway I probably need to revert those patches for now since this isn't
> > working as we hoped it would.
> > 
> > Thanks for the explanation how stuff works here.
> 
> Another alternative here that I've tried before without being successful
> would perhaps be to drop shmem completely and, if it's a normal page (no dma
> or funny caching attributes) just use add_to_swap_cache()? If it's something
> else, try alloc a page with relevant gfp attributes, copy and
> add_to_swap_cache()? Or perhaps that doesn't work well from a shrinker
> either?

So before we toss everything and go an a great rewrite-the-world tour,
what if we just try to split up big objects. So for objects which are
bigger than e.g. 10mb

- move them to a special "under eviction" list
- keep a note how far we evicted thus far
- interleave allocating shmem pages, copying data and releasing the ttm
  backing store on a chunk basis (maybe 10mb or whatever, tuning tbh)

If that's not enough, occasionally break out of the shrinker entirely so
other parts of reclaim can reclaim the shmem stuff. But just releasing our
own pages as we go should help a lot I think.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-24 Thread Bas Nieuwenhuizen
On Wed, Mar 24, 2021 at 11:13 AM Michel Dänzer  wrote:

> On 2021-03-23 4:32 p.m., Mark Yacoub wrote:
> > On Tue, Mar 23, 2021 at 11:02 AM Alex Deucher 
> wrote:
> >>
> >> On Wed, Mar 10, 2021 at 11:15 AM Mark Yacoub 
> wrote:
> >>>
> >>> From: Mark Yacoub 
> >>>
> >>> On initializing the framebuffer, call drm_any_plane_has_format to do a
> >>> check if the modifier is supported. drm_any_plane_has_format calls
> >>> dm_plane_format_mod_supported which is extended to validate that the
> >>> modifier is on the list of the plane's supported modifiers.
> >>>
> >>> The bug was caught using igt-gpu-tools test:
> kms_addfb_basic.addfb25-bad-modifier
> >>>
> >>> Tested on ChromeOS Zork by turning on the display, running an overlay
> >>> test, and running a YT video.
> >>>
> >>> Cc: Alex Deucher 
> >>> Cc: Bas Nieuwenhuizen 
> >>> Signed-off-by: default avatarMark Yacoub 
> >>
> >> I'm not an expert with modifiers yet.  Will this break chips which
> >> don't currently support modifiers?
> > No it shouldn't. When you don't support modifiers yet, your will
> > default to Linear Modifier (DRM_FORMAT_MOD_LINEAR),
> > [...]
> No modifier support does not imply linear. It's generally signalled via
> DRM_FORMAT_MOD_INVALID, which roughly means "tiling is determined by driver
> specific mechanisms".
>

Doesn't quite work that way in the kernel sadly. If you don't set
DRM_MODE_FB_MODIFIERS then the modifier fields have to be 0 (which happens
to alias DRM_FORMAT_MOD_LINEAR and then now deprecated
DRM_FORMAT_MOD_NONE). This is verified in shared drm code.

(and all userspace code I've seen simply doesn't set DRM_MODE_FB_MODIFIERS
if the incoming modifier is DRM_FORMAT_MOD_INVALID)

>
>
> --
> Earthling Michel Dänzer   |   https://redhat.com
> Libre software enthusiast | Mesa and X developer
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-24 Thread Intel


On 3/23/21 4:45 PM, Christian König wrote:

Am 23.03.21 um 16:13 schrieb Michal Hocko:

On Tue 23-03-21 14:56:54, Christian König wrote:

Am 23.03.21 um 14:41 schrieb Michal Hocko:

[...]
Anyway, I am wondering whether the overall approach is sound. Why 
don't
you simply use shmem as your backing storage from the beginning and 
pin

those pages if they are used by the device?
Yeah, that is exactly what the Intel guys are doing for their 
integrated

GPUs :)

Problem is for TTM I need to be able to handle dGPUs and those have all
kinds of funny allocation restrictions. In other words I need to 
guarantee
that the allocated memory is coherent accessible to the GPU without 
using

SWIOTLB.

The simple case is that the device can only do DMA32, but you also got
device which can only do 40bits or 48bits.

On top of that you also got AGP, CMA and stuff like CPU cache behavior
changes (write back vs. write through, vs. uncached).

OK, so the underlying problem seems to be that gfp mask (thus
mapping_gfp_mask) cannot really reflect your requirements, right?  Would
it help if shmem would allow to provide an allocation callback to
override alloc_page_vma which is used currently? I am pretty sure there
will be more to handle but going through shmem for the whole life time
is just so much easier to reason about than some tricks to abuse shmem
just for the swapout path.


Well it's a start, but the pages can have special CPU cache settings. 
So direct IO from/to them usually doesn't work as expected.


Additional to that for AGP and CMA I need to make sure that I give 
those pages back to the relevant subsystems instead of just dropping 
the page reference.


So I would need to block for the swapio to be completed.

Anyway I probably need to revert those patches for now since this 
isn't working as we hoped it would.


Thanks for the explanation how stuff works here.


Another alternative here that I've tried before without being successful 
would perhaps be to drop shmem completely and, if it's a normal page (no 
dma or funny caching attributes) just use add_to_swap_cache()? If it's 
something else, try alloc a page with relevant gfp attributes, copy and 
add_to_swap_cache()? Or perhaps that doesn't work well from a shrinker 
either?


/Thomas





Christian.
___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-24 Thread Michel Dänzer
On 2021-03-23 4:32 p.m., Mark Yacoub wrote:
> On Tue, Mar 23, 2021 at 11:02 AM Alex Deucher  wrote:
>>
>> On Wed, Mar 10, 2021 at 11:15 AM Mark Yacoub  wrote:
>>>
>>> From: Mark Yacoub 
>>>
>>> On initializing the framebuffer, call drm_any_plane_has_format to do a
>>> check if the modifier is supported. drm_any_plane_has_format calls
>>> dm_plane_format_mod_supported which is extended to validate that the
>>> modifier is on the list of the plane's supported modifiers.
>>>
>>> The bug was caught using igt-gpu-tools test: 
>>> kms_addfb_basic.addfb25-bad-modifier
>>>
>>> Tested on ChromeOS Zork by turning on the display, running an overlay
>>> test, and running a YT video.
>>>
>>> Cc: Alex Deucher 
>>> Cc: Bas Nieuwenhuizen 
>>> Signed-off-by: default avatarMark Yacoub 
>>
>> I'm not an expert with modifiers yet.  Will this break chips which
>> don't currently support modifiers?
> No it shouldn't. When you don't support modifiers yet, your will
> default to Linear Modifier (DRM_FORMAT_MOD_LINEAR),
> [...]
No modifier support does not imply linear. It's generally signalled via 
DRM_FORMAT_MOD_INVALID, which roughly means "tiling is determined by driver 
specific mechanisms".


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Color mode exposed to user space?

2021-03-24 Thread Werner Sembach
Hello,

is the information which color mode is currently in used for a display (RGB, 
YCbCr444, or YCbCr420) exposed to user space somewhere?

If no: Where would be the best place to put code to expose it to sysfs?

Thanks in advance,

Werner Sembach


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/radeon/radeon_pm: Convert sysfs sprintf/snprintf family to sysfs_emit

2021-03-24 Thread Tian Tao
Fix the following coccicheck warning:
drivers/gpu//drm/radeon/radeon_pm.c:521:9-17: WARNING: use scnprintf or
sprintf
drivers/gpu//drm/radeon/radeon_pm.c:475:8-16: WARNING: use scnprintf or
sprintf
drivers/gpu//drm/radeon/radeon_pm.c:418:8-16: WARNING: use scnprintf or
sprintf
drivers/gpu//drm/radeon/radeon_pm.c:363:8-16: WARNING: use scnprintf or
sprintf
drivers/gpu//drm/radeon/radeon_pm.c:734:8-16: WARNING: use scnprintf or
sprintf
drivers/gpu//drm/radeon/radeon_pm.c:688:8-16: WARNING: use scnprintf or
sprintf
drivers/gpu//drm/radeon/radeon_pm.c:704:8-16: WARNING: use scnprintf or
sprintf
drivers/gpu//drm/radeon/radeon_pm.c:755:8-16: WARNING: use scnprintf or
sprintf

Signed-off-by: Tian Tao 
---
 drivers/gpu/drm/radeon/radeon_pm.c | 36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_pm.c 
b/drivers/gpu/drm/radeon/radeon_pm.c
index 1995dad..dd56fcd 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -361,11 +361,10 @@ static ssize_t radeon_get_pm_profile(struct device *dev,
struct radeon_device *rdev = ddev->dev_private;
int cp = rdev->pm.profile;
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
-   (cp == PM_PROFILE_AUTO) ? "auto" :
-   (cp == PM_PROFILE_LOW) ? "low" :
-   (cp == PM_PROFILE_MID) ? "mid" :
-   (cp == PM_PROFILE_HIGH) ? "high" : "default");
+   return sysfs_emit(buf, "%s\n", (cp == PM_PROFILE_AUTO) ? "auto" :
+ (cp == PM_PROFILE_LOW) ? "low" :
+ (cp == PM_PROFILE_MID) ? "mid" :
+ (cp == PM_PROFILE_HIGH) ? "high" : "default");
 }
 
 static ssize_t radeon_set_pm_profile(struct device *dev,
@@ -416,9 +415,8 @@ static ssize_t radeon_get_pm_method(struct device *dev,
struct radeon_device *rdev = ddev->dev_private;
int pm = rdev->pm.pm_method;
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
-   (pm == PM_METHOD_DYNPM) ? "dynpm" :
-   (pm == PM_METHOD_PROFILE) ? "profile" : "dpm");
+   return sysfs_emit(buf, "%s\n", (pm == PM_METHOD_DYNPM) ? "dynpm" :
+ (pm == PM_METHOD_PROFILE) ? "profile" : "dpm");
 }
 
 static ssize_t radeon_set_pm_method(struct device *dev,
@@ -473,9 +471,9 @@ static ssize_t radeon_get_dpm_state(struct device *dev,
struct radeon_device *rdev = ddev->dev_private;
enum radeon_pm_state_type pm = rdev->pm.dpm.user_state;
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
-   (pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
-   (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : 
"performance");
+   return sysfs_emit(buf, "%s\n",
+ (pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
+ (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : 
"performance");
 }
 
 static ssize_t radeon_set_dpm_state(struct device *dev,
@@ -519,11 +517,11 @@ static ssize_t 
radeon_get_dpm_forced_performance_level(struct device *dev,
 
if  ((rdev->flags & RADEON_IS_PX) &&
 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-   return snprintf(buf, PAGE_SIZE, "off\n");
+   return sysfs_emit(buf, "off\n");
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
-   (level == RADEON_DPM_FORCED_LEVEL_AUTO) ? "auto" :
-   (level == RADEON_DPM_FORCED_LEVEL_LOW) ? "low" : 
"high");
+   return sysfs_emit(buf, "%s\n",
+ (level == RADEON_DPM_FORCED_LEVEL_AUTO) ? "auto" :
+ (level == RADEON_DPM_FORCED_LEVEL_LOW) ? "low" : 
"high");
 }
 
 static ssize_t radeon_set_dpm_forced_performance_level(struct device *dev,
@@ -686,7 +684,7 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev,
else
temp = 0;
 
-   return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+   return sysfs_emit(buf, "%d\n", temp);
 }
 
 static ssize_t radeon_hwmon_show_temp_thresh(struct device *dev,
@@ -702,7 +700,7 @@ static ssize_t radeon_hwmon_show_temp_thresh(struct device 
*dev,
else
temp = rdev->pm.dpm.thermal.max_temp;
 
-   return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+   return sysfs_emit(buf, "%d\n", temp);
 }
 
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, radeon_hwmon_show_temp, NULL, 
0);
@@ -732,7 +730,7 @@ static ssize_t radeon_hwmon_show_sclk(struct device *dev,
   for hwmon */
sclk *= 1;
 
-   return snprintf(buf, PAGE_SIZE, "%u\n", sclk);
+   return sysfs_emit(buf, "%u\n", sclk);
 }
 
 static SENSOR_DEVICE_ATTR(freq1_input, S_IRUGO, radeon_hwmon_show_sclk, NULL,
@@ -753,7 +751,7 @@ static ssize_t radeon_hwmon_show_vddc(struct device *dev,
if (rdev->asic->dpm.get_current_vddc)
vddc = rdev->asic->dpm.get_current_vddc(rd

Re: [PATCH 1/2] drm/amdgpu: use zero as start for dummy resource walks

2021-03-24 Thread Christian König

Am 24.03.21 um 04:26 schrieb Pan, Xinhui:

[AMD Official Use Only - Internal Distribution Only]

I don’t think so. Start is offset here. We get the valid physical address from 
pages_addr[offset] when we update mapping.


Good point, mhm need to take an even closer look at this then.


Btw, what issue we are seeing?


We see a BUG_ON() backtrace when this is used together with PRT mapping 
in OpenGL and Vulkan because the offset is just nonsense in this case.


Thanks for pointing our the issue,
Christian.



-Original Message-
From: amd-gfx  On Behalf Of Christian 
K?nig
Sent: 2021年3月23日 22:55
To: amd-gfx@lists.freedesktop.org
Cc: Das, Nirmoy ; Chen, Guchun 
Subject: [PATCH 1/2] drm/amdgpu: use zero as start for dummy resource walks

When we don't have a physically backing store we should use zero instead of the 
virtual start address since that isn't necessary a valid physical one.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 40f2adf305bc..e94362ccf9d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -54,7 +54,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
  struct drm_mm_node *node;

  if (!res || !res->mm_node) {
-cur->start = start;
+cur->start = 0;
  cur->size = size;
  cur->remaining = size;
  cur->node = NULL;
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Cxinhui.pan%40amd.com%7C031c743bd7c448e8d91508d8ee0ba402%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637521081053105295%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=lrJ6k3QBXqM9G6GRK25frFlqANkbfR4kAv6A3%2F8myBc%3D&reserved=0


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx