date:20211012

Re: [PATCH 0/5] 0 MHz is not a valid current frequency

2021-10-12 Thread Lazar, Lijo





On 10/13/2021 8:40 AM, Luben Tuikov wrote:

Some ASIC support low-power functionality for the whole ASIC or just
an IP block. When in such low-power mode, some sysfs interfaces would
report a frequency of 0, e.g.,

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 500Mhz
1: 0Mhz *
2: 2200Mhz
$_

An operating frequency of 0 MHz doesn't make sense, and this interface
is designed to report only operating clock frequencies, i.e. non-zero,
and possibly the current one.

When in this low-power state, round to the smallest
operating frequency, for this interface, as follows,



Would rather avoid this -

1) It is manipulating FW reported value. If at all there is an uncaught 
issue in FW reporting of frequency values, that is masked here.
2) Otherwise, if 0MHz is described as GFX power gated case, this 
provides a convenient interface to check if GFX is power gated.


If seeing a '0' is not pleasing, consider changing to something like
"NA" - not available (frequency cannot be fetched at the moment).

Thanks,
Lijo


$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 500Mhz *
1: 2200Mhz
$_

Luben Tuikov (5):
   drm/amd/pm: Slight function rename
   drm/amd/pm: Rename cur_value to curr_value
   drm/amd/pm: Rename freq_values --> freq_value
   dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
   dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency

  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +--
  .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ---
  2 files changed, 86 insertions(+), 47 deletions(-)

[PATCH 1/5] drm/amd/pm: Slight function rename

2021-10-12 Thread Luben Tuikov

Rename
sienna_cichlid_is_support_fine_grained_dpm() to
sienna_cichlid_support_fine_grained_dpm().

Rename
navi10_is_support_fine_grained_dpm() to
navi10_supports_fine_grained_dpm().

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 7 ---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 7 ---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 71161f6b78fea9..0fe9790f67f5af 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1231,7 +1231,8 @@ static int navi10_get_current_clk_freq_by_table(struct 
smu_context *smu,
   value);
 }
 
-static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum 
smu_clk_type clk_type)
+static bool navi10_supports_fine_grained_dpm(struct smu_context *smu,
+enum smu_clk_type clk_type)
 {
PPTable_t *pptable = smu->smu_table.driver_pptable;
DpmDescriptor_t *dpm_desc = NULL;
@@ -1299,7 +1300,7 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
if (ret)
return size;
 
-   if (!navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+   if (!navi10_supports_fine_grained_dpm(smu, clk_type)) {
for (i = 0; i < count; i++) {
ret = smu_v11_0_get_dpm_freq_by_index(smu, 
clk_type, i, );
if (ret)
@@ -1465,7 +1466,7 @@ static int navi10_force_clk_levels(struct smu_context 
*smu,
case SMU_UCLK:
case SMU_FCLK:
/* There is only 2 levels for fine grained DPM */
-   if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+   if (navi10_supports_fine_grained_dpm(smu, clk_type)) {
soft_max_level = (soft_max_level >= 1 ? 1 : 0);
soft_min_level = (soft_min_level >= 1 ? 1 : 0);
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 15e66e1912de33..3f5721baa5ff50 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1006,7 +1006,8 @@ static int 
sienna_cichlid_get_current_clk_freq_by_table(struct smu_context *smu,
 
 }
 
-static bool sienna_cichlid_is_support_fine_grained_dpm(struct smu_context 
*smu, enum smu_clk_type clk_type)
+static bool sienna_cichlid_supports_fine_grained_dpm(struct smu_context *smu,
+enum smu_clk_type clk_type)
 {
DpmDescriptor_t *dpm_desc = NULL;
DpmDescriptor_t *table_member;
@@ -1084,7 +1085,7 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
if (ret)
goto print_clk_out;
 
-   if (!sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) 
{
+   if (!sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
for (i = 0; i < count; i++) {
ret = smu_v11_0_get_dpm_freq_by_index(smu, 
clk_type, i, );
if (ret)
@@ -1235,7 +1236,7 @@ static int sienna_cichlid_force_clk_levels(struct 
smu_context *smu,
case SMU_UCLK:
case SMU_FCLK:
/* There is only 2 levels for fine grained DPM */
-   if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
+   if (sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
soft_max_level = (soft_max_level >= 1 ? 1 : 0);
soft_min_level = (soft_min_level >= 1 ? 1 : 0);
}
-- 
2.33.1.558.g2bd2f258f4

[PATCH 3/5] drm/amd/pm: Rename freq_values --> freq_value

2021-10-12 Thread Luben Tuikov

By usage: read freq_values[x] to freq_value[x].

Signed-off-by: Luben Tuikov 
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c| 16 
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c| 18 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index f810549df493d5..646e9bbf8af42a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1268,7 +1268,7 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
uint16_t *curve_settings;
int i, size = 0, ret = 0;
uint32_t curr_value = 0, value = 0, count = 0;
-   uint32_t freq_values[3] = {0};
+   uint32_t freq_value[3] = {0, 0, 0};
uint32_t mark_index = 0;
struct smu_table_context *table_context = >smu_table;
uint32_t gen_speed, lane_width;
@@ -1310,21 +1310,21 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
curr_value == value ? "*" : "");
}
} else {
-   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_values[0]);
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_value[0]);
if (ret)
return size;
-   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 
count - 1, _values[2]);
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 
count - 1, _value[2]);
if (ret)
return size;
 
-   freq_values[1] = curr_value;
-   mark_index = curr_value == freq_values[0] ? 0 :
-curr_value == freq_values[2] ? 2 : 1;
+   freq_value[1] = curr_value;
+   mark_index = curr_value == freq_value[0] ? 0 :
+curr_value == freq_value[2] ? 2 : 1;
if (mark_index != 1)
-   freq_values[1] = (freq_values[0] + 
freq_values[2]) / 2;
+   freq_value[1] = (freq_value[0] + freq_value[2]) 
/ 2;
 
for (i = 0; i < 3; i++) {
-   size += sysfs_emit_at(buf, size, "%d: %uMhz 
%s\n", i, freq_values[i],
+   size += sysfs_emit_at(buf, size, "%d: %uMhz 
%s\n", i, freq_value[i],
i == mark_index ? "*" : "");
}
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 3ebded3a99b5f2..f630d5e928ccfe 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1053,7 +1053,7 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
(OverDriveTable_t *)table_context->overdrive_table;
int i, size = 0, ret = 0;
uint32_t curr_value = 0, value = 0, count = 0;
-   uint32_t freq_values[3] = {0};
+   uint32_t freq_value[3] = {0, 0, 0};
uint32_t mark_index = 0;
uint32_t gen_speed, lane_width;
uint32_t min_value, max_value;
@@ -1096,26 +1096,26 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
curr_value == value ? "*" : "");
}
} else {
-   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_values[0]);
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_value[0]);
if (ret)
goto print_clk_out;
-   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 
count - 1, _values[2]);
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 
count - 1, _value[2]);
if (ret)
goto print_clk_out;
 
-   freq_values[1] = curr_value;
-   mark_index = curr_value == freq_values[0] ? 0 :
-curr_value == freq_values[2] ? 2 : 1;
+   freq_value[1] = curr_value;
+   mark_index = curr_value == freq_value[0] ? 0 :
+curr_value == freq_value[2] ? 2 : 1;
 
count = 3;
if (mark_index != 1) {
count = 2;
-   freq_values[1] = freq_values[2];
+   freq_value[1] = freq_value[2];
}
 
for (i = 0; i < count; i++) {
-

[PATCH 4/5] dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency

2021-10-12 Thread Luben Tuikov

A current value of a clock frequency of 0, means
that the IP block is in some kind of low power
state. Ignore it and don't report it here. Here we
only report the possible operating (non-zero)
frequencies of the block requested. So, if the
current clock value is 0, then report as the
current clock the minimum operating one, which is
non-zero.

Signed-off-by: Luben Tuikov 
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 57 +--
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index f630d5e928ccfe..00be2b851baf93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1054,10 +1054,10 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
int i, size = 0, ret = 0;
uint32_t curr_value = 0, value = 0, count = 0;
uint32_t freq_value[3] = {0, 0, 0};
-   uint32_t mark_index = 0;
uint32_t gen_speed, lane_width;
uint32_t min_value, max_value;
uint32_t smu_version;
+   bool fine_grained;
 
smu_cmn_get_sysfs_buf(, );
 
@@ -1077,6 +1077,22 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
if (ret)
goto print_clk_out;
 
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0,
+ _value[0]);
+   if (ret)
+   goto print_clk_out;
+
+   /* A current value of a clock frequency of 0, means
+* that the IP block is in some kind of low power
+* state. Ignore it and don't report it here. Here we
+* only report the possible operating (non-zero)
+* frequencies of the block requested. So, if the
+* current clock value is 0, then report as the
+* current clock the minimum operating one, which is
+* non-zero.
+*/
+   if (curr_value == 0)
+   curr_value = freq_value[0];
 
/* no need to disable gfxoff when retrieving the current gfxclk 
*/
if ((clk_type == SMU_GFXCLK) || (clk_type == SMU_SCLK))
@@ -1086,38 +1102,43 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
if (ret)
goto print_clk_out;
 
-   if (!sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
-   for (i = 0; i < count; i++) {
+   fine_grained = sienna_cichlid_supports_fine_grained_dpm(smu, 
clk_type);
+   if (!fine_grained) {
+   /* We already got the 0-th index--print it
+* here and continue thereafter.
+*/
+   size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", 0, 
freq_value[0],
+ curr_value == freq_value[0] ? "*" 
: "");
+   for (i = 1; i < count; i++) {
ret = smu_v11_0_get_dpm_freq_by_index(smu, 
clk_type, i, );
if (ret)
goto print_clk_out;
-
size += sysfs_emit_at(buf, size, "%d: %uMhz 
%s\n", i, value,
curr_value == value ? "*" : "");
}
} else {
-   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_value[0]);
-   if (ret)
-   goto print_clk_out;
+   freq_value[1] = curr_value;
ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 
count - 1, _value[2]);
if (ret)
goto print_clk_out;
 
-   freq_value[1] = curr_value;
-   mark_index = curr_value == freq_value[0] ? 0 :
-curr_value == freq_value[2] ? 2 : 1;
-
-   count = 3;
-   if (mark_index != 1) {
+   if (freq_value[1] == freq_value[0]) {
+   i = 1;
+   count = 3;
+   } else if (freq_value[1] == freq_value[2]) {
+   i = 0;
count = 2;
-   freq_value[1] = freq_value[2];
+   } else {
+   i = 0;
+   count = 3;
}
 
-   for (i = 0; i < count; i++) {
-   size += sysfs_emit_at(buf, size, "%d: %uMhz 
%s\n", i, freq_value[i],
-

[PATCH 5/5] dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency

2021-10-12 Thread Luben Tuikov

A current value of a clock frequency of 0, means
that the IP block is in some kind of low power
state. Ignore it and don't report it here. Here we
only report the possible operating (non-zero)
frequencies of the block requested. So, if the
current clock value is 0, then report as the
current clock the minimum operating one, which is
non-zero.

Signed-off-by: Luben Tuikov 
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 45 ---
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 646e9bbf8af42a..de1a558dc81047 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1269,7 +1269,6 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
int i, size = 0, ret = 0;
uint32_t curr_value = 0, value = 0, count = 0;
uint32_t freq_value[3] = {0, 0, 0};
-   uint32_t mark_index = 0;
struct smu_table_context *table_context = >smu_table;
uint32_t gen_speed, lane_width;
struct smu_dpm_context *smu_dpm = >smu_dpm;
@@ -1279,6 +1278,7 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
(OverDriveTable_t *)table_context->overdrive_table;
struct smu_11_0_overdrive_table *od_settings = smu->od_settings;
uint32_t min_value, max_value;
+   bool fine_grained;
 
smu_cmn_get_sysfs_buf(, );
 
@@ -1296,12 +1296,23 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
if (ret)
return size;
 
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0,
+ _value[0]);
+   if (ret)
+   return size;
+
+   if (curr_value == 0)
+   curr_value = freq_value[0];
+
ret = smu_v11_0_get_dpm_level_count(smu, clk_type, );
if (ret)
return size;
 
-   if (!navi10_supports_fine_grained_dpm(smu, clk_type)) {
-   for (i = 0; i < count; i++) {
+   fine_grained = navi10_supports_fine_grained_dpm(smu, clk_type);
+   if (!fine_grained) {
+   size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", 0, 
freq_value[0],
+ curr_value == freq_value[0] ? "*" 
: "");
+   for (i = 1; i < count; i++) {
ret = smu_v11_0_get_dpm_freq_by_index(smu, 
clk_type, i, );
if (ret)
return size;
@@ -1310,24 +1321,28 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
curr_value == value ? "*" : "");
}
} else {
-   ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_value[0]);
-   if (ret)
-   return size;
+   freq_value[1] = curr_value;
ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 
count - 1, _value[2]);
if (ret)
return size;
 
-   freq_value[1] = curr_value;
-   mark_index = curr_value == freq_value[0] ? 0 :
-curr_value == freq_value[2] ? 2 : 1;
-   if (mark_index != 1)
-   freq_value[1] = (freq_value[0] + freq_value[2]) 
/ 2;
-
-   for (i = 0; i < 3; i++) {
-   size += sysfs_emit_at(buf, size, "%d: %uMhz 
%s\n", i, freq_value[i],
-   i == mark_index ? "*" : "");
+   if (freq_value[1] == freq_value[0]) {
+   i = 1;
+   count = 3;
+   } else if (freq_value[1] == freq_value[2]) {
+   i = 0;
+   count = 2;
+   } else {
+   i = 0;
+   count = 3;
}
 
+   for ( ; i < count; i++) {
+   size += sysfs_emit_at(buf, size,
+ "%d: %uMhz %s\n",
+ i, freq_value[i],
+ curr_value == 
freq_value[i] ? "*" : "");
+   }
}
break;
case SMU_PCIE:
-- 
2.33.1.558.g2bd2f258f4

[PATCH 2/5] drm/amd/pm: Rename cur_value to curr_value

2021-10-12 Thread Luben Tuikov

Rename "cur_value", which stands for "cursor
value" to "curr_value", which stands for "current
value".

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 12 ++--
 .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 15 ---
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 0fe9790f67f5af..f810549df493d5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1267,7 +1267,7 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
 {
uint16_t *curve_settings;
int i, size = 0, ret = 0;
-   uint32_t cur_value = 0, value = 0, count = 0;
+   uint32_t curr_value = 0, value = 0, count = 0;
uint32_t freq_values[3] = {0};
uint32_t mark_index = 0;
struct smu_table_context *table_context = >smu_table;
@@ -1292,7 +1292,7 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
case SMU_VCLK:
case SMU_DCLK:
case SMU_DCEFCLK:
-   ret = navi10_get_current_clk_freq_by_table(smu, clk_type, 
_value);
+   ret = navi10_get_current_clk_freq_by_table(smu, clk_type, 
_value);
if (ret)
return size;
 
@@ -1307,7 +1307,7 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
return size;
 
size += sysfs_emit_at(buf, size, "%d: %uMhz 
%s\n", i, value,
-   cur_value == value ? "*" : "");
+   curr_value == value ? "*" : "");
}
} else {
ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_values[0]);
@@ -1317,9 +1317,9 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
if (ret)
return size;
 
-   freq_values[1] = cur_value;
-   mark_index = cur_value == freq_values[0] ? 0 :
-cur_value == freq_values[2] ? 2 : 1;
+   freq_values[1] = curr_value;
+   mark_index = curr_value == freq_values[0] ? 0 :
+curr_value == freq_values[2] ? 2 : 1;
if (mark_index != 1)
freq_values[1] = (freq_values[0] + 
freq_values[2]) / 2;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 3f5721baa5ff50..3ebded3a99b5f2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1052,7 +1052,7 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
OverDriveTable_t *od_table =
(OverDriveTable_t *)table_context->overdrive_table;
int i, size = 0, ret = 0;
-   uint32_t cur_value = 0, value = 0, count = 0;
+   uint32_t curr_value = 0, value = 0, count = 0;
uint32_t freq_values[3] = {0};
uint32_t mark_index = 0;
uint32_t gen_speed, lane_width;
@@ -1073,10 +1073,11 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
case SMU_DCLK:
case SMU_DCLK1:
case SMU_DCEFCLK:
-   ret = sienna_cichlid_get_current_clk_freq_by_table(smu, 
clk_type, _value);
+   ret = sienna_cichlid_get_current_clk_freq_by_table(smu, 
clk_type, _value);
if (ret)
goto print_clk_out;
 
+
/* no need to disable gfxoff when retrieving the current gfxclk 
*/
if ((clk_type == SMU_GFXCLK) || (clk_type == SMU_SCLK))
amdgpu_gfx_off_ctrl(adev, false);
@@ -1092,7 +1093,7 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
goto print_clk_out;
 
size += sysfs_emit_at(buf, size, "%d: %uMhz 
%s\n", i, value,
-   cur_value == value ? "*" : "");
+   curr_value == value ? "*" : "");
}
} else {
ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, 
_values[0]);
@@ -1102,9 +1103,9 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
if (ret)
goto print_clk_out;
 
-   freq_values[1] = cur_value;
-   mark_index = cur_value == freq_values[0] ? 0 :
-cur_value == freq_values[2] ? 2 : 1;
+   freq_values[1] = curr_value;
+

[PATCH 0/5] 0 MHz is not a valid current frequency

2021-10-12 Thread Luben Tuikov

Some ASIC support low-power functionality for the whole ASIC or just
an IP block. When in such low-power mode, some sysfs interfaces would
report a frequency of 0, e.g.,

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 500Mhz 
1: 0Mhz *
2: 2200Mhz 
$_

An operating frequency of 0 MHz doesn't make sense, and this interface
is designed to report only operating clock frequencies, i.e. non-zero,
and possibly the current one.

When in this low-power state, round to the smallest
operating frequency, for this interface, as follows,

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 500Mhz *
1: 2200Mhz 
$_

Luben Tuikov (5):
  drm/amd/pm: Slight function rename
  drm/amd/pm: Rename cur_value to curr_value
  drm/amd/pm: Rename freq_values --> freq_value
  dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
  dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency

 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +--
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ---
 2 files changed, 86 insertions(+), 47 deletions(-)

-- 
2.33.1.558.g2bd2f258f4

[PATCH AUTOSEL 5.10 06/11] drm/amdgpu/display: fix dependencies for DRM_AMD_DC_SI

2021-10-12 Thread Sasha Levin

From: Alex Deucher 

[ Upstream commit 4702b34d1de9582df9dfa0e583ea28fff7de29df ]

Depends on DRM_AMDGPU_SI and DRM_AMD_DC

Reviewed-by: Christian König 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/display/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/Kconfig 
b/drivers/gpu/drm/amd/display/Kconfig
index 3c410d236c49..f3274eb6b341 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -33,6 +33,8 @@ config DRM_AMD_DC_HDCP
 
 config DRM_AMD_DC_SI
bool "AMD DC support for Southern Islands ASICs"
+   depends on DRM_AMDGPU_SI
+   depends on DRM_AMD_DC
default n
help
  Choose this option to enable new AMD DC support for SI asics
-- 
2.33.0

[PATCH AUTOSEL 5.14 09/17] drm/amdgpu: init iommu after amdkfd device init

2021-10-12 Thread Sasha Levin

From: Yifan Zhang 

[ Upstream commit 714d9e4574d54596973ee3b0624ee4a16264d700 ]

This patch is to fix clinfo failure in Raven/Picasso:

Number of platforms: 1
  Platform Profile: FULL_PROFILE
  Platform Version: OpenCL 2.2 AMD-APP (3364.0)
  Platform Name: AMD Accelerated Parallel Processing
  Platform Vendor: Advanced Micro Devices, Inc.
  Platform Extensions: cl_khr_icd cl_amd_event_callback

  Platform Name: AMD Accelerated Parallel Processing Number of devices: 0

Signed-off-by: Yifan Zhang 
Reviewed-by: James Zhu 
Tested-by: James Zhu 
Acked-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d3247a5cceb4..580db14fd722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2342,10 +2342,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
if (r)
goto init_failed;
 
-   r = amdgpu_amdkfd_resume_iommu(adev);
-   if (r)
-   goto init_failed;
-
r = amdgpu_device_ip_hw_init_phase1(adev);
if (r)
goto init_failed;
@@ -2384,6 +2380,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
 
+   r = amdgpu_amdkfd_resume_iommu(adev);
+   if (r)
+   goto init_failed;
+
amdgpu_fru_get_product_info(adev);
 
 init_failed:
-- 
2.33.0

[PATCH AUTOSEL 5.14 08/17] drm/amdgpu/display: fix dependencies for DRM_AMD_DC_SI

2021-10-12 Thread Sasha Levin

From: Alex Deucher 

[ Upstream commit 4702b34d1de9582df9dfa0e583ea28fff7de29df ]

Depends on DRM_AMDGPU_SI and DRM_AMD_DC

Reviewed-by: Christian König 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/display/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/Kconfig 
b/drivers/gpu/drm/amd/display/Kconfig
index 7dffc04a557e..127667e549c1 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -25,6 +25,8 @@ config DRM_AMD_DC_HDCP
 
 config DRM_AMD_DC_SI
bool "AMD DC support for Southern Islands ASICs"
+   depends on DRM_AMDGPU_SI
+   depends on DRM_AMD_DC
default n
help
  Choose this option to enable new AMD DC support for SI asics
-- 
2.33.0

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Felix Kuehling

Am 2021-10-12 um 3:03 p.m. schrieb Andrew Morton:
> On Tue, 12 Oct 2021 15:56:29 -0300 Jason Gunthorpe  wrote:
>
>>> To what other uses will this infrastructure be put?
>>>
>>> Because I must ask: if this feature is for one single computer which
>>> presumably has a custom kernel, why add it to mainline Linux?
>> Well, it certainly isn't just "one single computer". Overall I know of
>> about, hmm, ~10 *datacenters* worth of installations that are using
>> similar technology underpinnings.
>>
>> "Frontier" is the code name for a specific installation but as the
>> technology is proven out there will be many copies made of that same
>> approach.
>>
>> The previous program "Summit" was done with NVIDIA GPUs and PowerPC
>> CPUs and also included a very similar capability. I think this is a
>> good sign that this coherently attached accelerator will continue to
>> be a theme in computing going foward. IIRC this was done using out of
>> tree kernel patches and NUMA localities.
>>
>> Specifically with CXL now being standardized and on a path to ubiquity
>> I think we will see an explosion in deployments of coherently attached
>> accelerator memory. This is the high end trickling down to wider
>> usage.
>>
>> I strongly think many CXL accelerators are going to want to manage
>> their on-accelerator memory in this way as it makes universal sense to
>> want to carefully manage memory access locality to optimize for
>> performance.
> Thanks.  Can we please get something like the above into the [0/n]
> changelog?  Along with any other high-level info which is relevant?
>
> It's rather important.  "why should I review this", "why should we
> merge this", etc.

Using Jason's input, I suggest adding this text for the next revision of
the cover letter:

DEVICE_PRIVATE memory emulates coherence between CPU and the device by
migrating data back and forth. An application that accesses the same
page (or huge page) from CPU and device concurrently can cause many
migrations, each involving device cache flushes, page table updates and
page faults on the CPU or device.

In contrast, DEVICE_COHERENT enables truly concurrent CPU and device
access to to ZONE_DEVICE pages by taking advantage of HW coherence
protocols.

As a historical reference point, the Summit supercomputer implemented
such a coherent memory architecture with NVidia GPUs and PowerPC CPUs.

The initial user for the DEVICE_COHERENT memory type will be the AMD GPU
driver on the Frontier supercomputer. CXL standardizes a coherent
peripheral interconnect, leading to more mainstream systems and devices
with that capability.

Best regards,
  Felix

Re: [PATCH v2 1/3] drm/amdkfd: ratelimited svm debug messages

2021-10-12 Thread Felix Kuehling

Am 2021-10-12 um 9:55 a.m. schrieb Philip Yang:
> No function change, use pr_debug_ratelimited to avoid per page debug
> message overflowing dmesg buf and console log.
>
> use dev_err to show error message from unexpected situation, to provide
> clue to help debug without enabling dynamic debug log. Define dev_fmt to
> output function name in error message.
>
> Signed-off-by: Philip Yang 

Reviewed-by: Felix Kuehling 


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 34 +---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +++-
>  2 files changed, 30 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index f53e17a94ad8..b05c0579d0b9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -20,7 +20,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -
>  #include 
>  #include 
>  #include 
> @@ -34,6 +33,11 @@
>  #include "kfd_svm.h"
>  #include "kfd_migrate.h"
>  
> +#ifdef dev_fmt
> +#undef dev_fmt
> +#endif
> +#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__
> +
>  static uint64_t
>  svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
>  {
> @@ -151,14 +155,14 @@ svm_migrate_copy_memory_gart(struct amdgpu_device 
> *adev, dma_addr_t *sys,
>   gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
>   }
>   if (r) {
> - pr_debug("failed %d to create gart mapping\n", r);
> + dev_err(adev->dev, "fail %d create gart mapping\n", r);
>   goto out_unlock;
>   }
>  
>   r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
>  NULL, , false, true, false);
>   if (r) {
> - pr_debug("failed %d to copy memory\n", r);
> + dev_err(adev->dev, "fail %d to copy memory\n", r);
>   goto out_unlock;
>   }
>  
> @@ -285,7 +289,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>  
>   r = svm_range_vram_node_new(adev, prange, true);
>   if (r) {
> - pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
> + dev_err(adev->dev, "fail %d to alloc vram\n", r);
>   goto out;
>   }
>  
> @@ -305,7 +309,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
> DMA_TO_DEVICE);
>   r = dma_mapping_error(dev, src[i]);
>   if (r) {
> - pr_debug("failed %d dma_map_page\n", r);
> + dev_err(adev->dev, "fail %d dma_map_page\n", r);
>   goto out_free_vram_pages;
>   }
>   } else {
> @@ -325,8 +329,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   continue;
>   }
>  
> - pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
> -  src[i] >> PAGE_SHIFT, page_to_pfn(spage));
> + pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
> +  src[i] >> PAGE_SHIFT, page_to_pfn(spage));
>  
>   if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
>   r = svm_migrate_copy_memory_gart(adev, src + i - j,
> @@ -405,8 +409,8 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>  
>   r = migrate_vma_setup();
>   if (r) {
> - pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
> -  r, prange->svms, prange->start, prange->last);
> + dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
> + prange->start, prange->last);
>   goto out_free;
>   }
>   if (migrate.cpages != npages) {
> @@ -506,7 +510,7 @@ static void svm_migrate_page_free(struct page *page)
>   struct svm_range_bo *svm_bo = page->zone_device_data;
>  
>   if (svm_bo) {
> - pr_debug("svm_bo ref left: %d\n", kref_read(_bo->kref));
> + pr_debug_ratelimited("ref: %d\n", kref_read(_bo->kref));
>   svm_range_bo_unref(svm_bo);
>   }
>  }
> @@ -572,12 +576,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, 
> DMA_FROM_DEVICE);
>   r = dma_mapping_error(dev, dst[i]);
>   if (r) {
> - pr_debug("failed %d dma_map_page\n", r);
> + dev_err(adev->dev, "fail %d dma_map_page\n", r);
>   goto out_oom;

Re: [PATCH v4 2/3] drm/amdkfd: handle svm partial migration cpages 0

2021-10-12 Thread Felix Kuehling



Am 2021-10-12 um 6:39 p.m. schrieb Philip Yang:
> migrate_vma_setup may return cpages 0, means 0 page can be migrated,
> treat this as error case to skip the rest of vma migration steps.
>
> Change svm_migrate_vma_to_vram and svm_migrate_vma_to_ram to return the
> number of pages migrated successfully or error code. The caller add up
> all the successful migration pages and update prange->actual_loc only if
> the total migrated pages is not 0.
>
> This also removes the warning message "VRAM BO missing during
> validation" if migration cpages is 0.
>
> Signed-off-by: Philip Yang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 86 ++--
>  1 file changed, 52 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index b05c0579d0b9..537e32f77eb5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -376,7 +376,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   return r;
>  }
>  
> -static int
> +static long
>  svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   struct vm_area_struct *vma, uint64_t start,
>   uint64_t end)
> @@ -413,32 +413,37 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   prange->start, prange->last);
>   goto out_free;
>   }
> - if (migrate.cpages != npages) {
> - pr_debug("Partial migration. 0x%lx/0x%llx pages can be 
> migrated\n",
> -  migrate.cpages,
> -  npages);
> - }
>  
> - if (migrate.cpages) {
> - r = svm_migrate_copy_to_vram(adev, prange, , ,
> -  scratch);
> - migrate_vma_pages();
> - svm_migrate_copy_done(adev, mfence);
> - migrate_vma_finalize();
> + if (migrate.cpages != npages)
> + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
> +  migrate.cpages, npages);
> + else
> + pr_debug("0x%lx pages migrated\n", migrate.cpages);
> +
> + if (!migrate.cpages) {
> + pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
> +  prange->start, prange->last);
> + goto out_free;
>   }
>  
> + r = svm_migrate_copy_to_vram(adev, prange, , , scratch);
> + migrate_vma_pages();
> + svm_migrate_copy_done(adev, mfence);
> + migrate_vma_finalize();
> +
>   svm_range_dma_unmap(adev->dev, scratch, 0, npages);
>   svm_range_free_dma_mappings(prange);
>  
>  out_free:
>   kvfree(buf);
>  out:
> - if (!r) {
> + if (!r && migrate.cpages) {
>   pdd = svm_range_get_pdd_by_adev(prange, adev);
>   if (pdd)
>   WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
> - }
>  
> + return migrate.cpages;
> + }
>   return r;
>  }
>  
> @@ -460,7 +465,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
> uint32_t best_loc,
>   unsigned long addr, start, end;
>   struct vm_area_struct *vma;
>   struct amdgpu_device *adev;
> - int r = 0;
> + unsigned long cpages = 0;
> + long r = 0;
>  
>   if (prange->actual_loc == best_loc) {
>   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
> @@ -492,17 +498,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
> uint32_t best_loc,
>  
>   next = min(vma->vm_end, end);
>   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
> - if (r) {
> - pr_debug("failed to migrate\n");
> + if (r < 0) {
> + pr_debug("failed %ld to migrate\n", r);
>   break;
>   }
> + if (r > 0)

This should be "else if" or even just "else".


> + cpages += r;
>   addr = next;
>   }
>  
> - if (!r)
> + if (cpages)
>   prange->actual_loc = best_loc;
>  
> - return r;
> + return r < 0 ? r : 0;
>  }
>  
>  static void svm_migrate_page_free(struct page *page)
> @@ -603,7 +611,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   return r;
>  }
>  
> -static int
> +static long
>  svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>  struct vm_area_struct *vma, uint64_t start, uint64_t end)
>  {
> @@ -640,29 +648,35 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   goto out_free;
>   }
>  
> - pr_debug("cpages %ld\n", migrate.cpages);
> + if (migrate.cpages != npages)
> + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
> +  migrate.cpages,

[PATCH v4 2/3] drm/amdkfd: handle svm partial migration cpages 0

2021-10-12 Thread Philip Yang

migrate_vma_setup may return cpages 0, means 0 page can be migrated,
treat this as error case to skip the rest of vma migration steps.

Change svm_migrate_vma_to_vram and svm_migrate_vma_to_ram to return the
number of pages migrated successfully or error code. The caller add up
all the successful migration pages and update prange->actual_loc only if
the total migrated pages is not 0.

This also removes the warning message "VRAM BO missing during
validation" if migration cpages is 0.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 86 ++--
 1 file changed, 52 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b05c0579d0b9..537e32f77eb5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -376,7 +376,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
return r;
 }
 
-static int
+static long
 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
uint64_t end)
@@ -413,32 +413,37 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
prange->start, prange->last);
goto out_free;
}
-   if (migrate.cpages != npages) {
-   pr_debug("Partial migration. 0x%lx/0x%llx pages can be 
migrated\n",
-migrate.cpages,
-npages);
-   }
 
-   if (migrate.cpages) {
-   r = svm_migrate_copy_to_vram(adev, prange, , ,
-scratch);
-   migrate_vma_pages();
-   svm_migrate_copy_done(adev, mfence);
-   migrate_vma_finalize();
+   if (migrate.cpages != npages)
+   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+migrate.cpages, npages);
+   else
+   pr_debug("0x%lx pages migrated\n", migrate.cpages);
+
+   if (!migrate.cpages) {
+   pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
+prange->start, prange->last);
+   goto out_free;
}
 
+   r = svm_migrate_copy_to_vram(adev, prange, , , scratch);
+   migrate_vma_pages();
+   svm_migrate_copy_done(adev, mfence);
+   migrate_vma_finalize();
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
 
 out_free:
kvfree(buf);
 out:
-   if (!r) {
+   if (!r && migrate.cpages) {
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
-   }
 
+   return migrate.cpages;
+   }
return r;
 }
 
@@ -460,7 +465,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
-   int r = 0;
+   unsigned long cpages = 0;
+   long r = 0;
 
if (prange->actual_loc == best_loc) {
pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
@@ -492,17 +498,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
 
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
-   if (r) {
-   pr_debug("failed to migrate\n");
+   if (r < 0) {
+   pr_debug("failed %ld to migrate\n", r);
break;
}
+   if (r > 0)
+   cpages += r;
addr = next;
}
 
-   if (!r)
+   if (cpages)
prange->actual_loc = best_loc;
 
-   return r;
+   return r < 0 ? r : 0;
 }
 
 static void svm_migrate_page_free(struct page *page)
@@ -603,7 +611,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
return r;
 }
 
-static int
+static long
 svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
   struct vm_area_struct *vma, uint64_t start, uint64_t end)
 {
@@ -640,29 +648,35 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
goto out_free;
}
 
-   pr_debug("cpages %ld\n", migrate.cpages);
+   if (migrate.cpages != npages)
+   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+migrate.cpages, npages);
+   else
+   pr_debug("0x%lx pages migrated\n", migrate.cpages);
 
-   if (migrate.cpages) {
-   r = svm_migrate_copy_to_ram(adev, prange, , ,
-

Re: [PATCH v3 2/3] drm/amdkfd: handle svm partial migration cpages 0

2021-10-12 Thread Felix Kuehling



Am 2021-10-12 um 5:00 p.m. schrieb Philip Yang:
> migrate_vma_setup may return cpages 0, means 0 page can be migrated,
> treat this as error case to skip the rest of vma migration steps.
>
> Change svm_migrate_vma_to_vram and svm_migrate_vma_to_ram to return the
> number of pages migrated successfully or error code. The caller add up
> all the successful migration pages and update prange->actual_loc only if
> the total migrated pages is not 0.
>
> This also removes the warning message "VRAM BO missing during
> validation" if migration cpages is 0.
>
> Signed-off-by: Philip Yang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 90 ++--
>  1 file changed, 51 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index b05c0579d0b9..d37f20b17586 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -376,7 +376,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   return r;
>  }
>  
> -static int
> +static long
>  svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   struct vm_area_struct *vma, uint64_t start,
>   uint64_t end)
> @@ -413,32 +413,37 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   prange->start, prange->last);
>   goto out_free;
>   }
> - if (migrate.cpages != npages) {
> - pr_debug("Partial migration. 0x%lx/0x%llx pages can be 
> migrated\n",
> -  migrate.cpages,
> -  npages);
> - }
>  
> - if (migrate.cpages) {
> - r = svm_migrate_copy_to_vram(adev, prange, , ,
> -  scratch);
> - migrate_vma_pages();
> - svm_migrate_copy_done(adev, mfence);
> - migrate_vma_finalize();
> + if (migrate.cpages != npages)
> + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
> +  migrate.cpages, npages);
> + else
> + pr_debug("0x%lx pages migrated\n", migrate.cpages);
> +
> + if (!migrate.cpages) {
> + pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
> +  prange->start, prange->last);
> + goto out_free;
>   }
>  
> + r = svm_migrate_copy_to_vram(adev, prange, , , scratch);
> + migrate_vma_pages();
> + svm_migrate_copy_done(adev, mfence);
> + migrate_vma_finalize();
> +
>   svm_range_dma_unmap(adev->dev, scratch, 0, npages);
>   svm_range_free_dma_mappings(prange);
>  
>  out_free:
>   kvfree(buf);
>  out:
> - if (!r) {
> + if (!r && migrate.cpages) {
>   pdd = svm_range_get_pdd_by_adev(prange, adev);
>   if (pdd)
>   WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
> - }
>  
> + return migrate.cpages;
> + }
>   return r;
>  }
>  
> @@ -460,7 +465,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
> uint32_t best_loc,
>   unsigned long addr, start, end;
>   struct vm_area_struct *vma;
>   struct amdgpu_device *adev;
> - int r = 0;
> + unsigned long cpages = 0;
> + long r;
>  
>   if (prange->actual_loc == best_loc) {
>   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
> @@ -492,17 +498,16 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
> uint32_t best_loc,
>  
>   next = min(vma->vm_end, end);
>   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
> - if (r) {
> - pr_debug("failed to migrate\n");
> - break;
> - }
> + if (r > 0)
> + cpages += r;

I think you still want to break out of the loop here if r < 0,
potentially with a debug message.


>   addr = next;
>   }
>  
> - if (!r)
> + if (cpages) {
>   prange->actual_loc = best_loc;
> -
> - return r;
> + return 0;
> + }
> + return -ENOMEM;
>  }
>  
>  static void svm_migrate_page_free(struct page *page)
> @@ -603,7 +608,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   return r;
>  }
>  
> -static int
> +static long
>  svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>  struct vm_area_struct *vma, uint64_t start, uint64_t end)
>  {
> @@ -640,29 +645,35 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, 
> struct svm_range *prange,
>   goto out_free;
>   }
>  
> - pr_debug("cpages %ld\n", migrate.cpages);
> + if (migrate.cpages != npages)
> + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
> +  migrate.cpages, npages);
> +

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Alex Deucher

On Tue, Oct 12, 2021 at 4:57 PM Harry Wentland  wrote:
>
>
>
> On 10/12/21 3:57 PM, Alex Deucher wrote:
> > On Tue, Oct 12, 2021 at 10:39 AM Harry Wentland  
> > wrote:
> >>
> >> On 2021-10-11 11:16, Simon Ser wrote:
> >>> Commit ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when
> >>> using overlay") changed the atomic validation code to forbid the
> >>> overlay plane from being used if it doesn't cover the whole CRTC. The
> >>> motivation is that ChromeOS uses the atomic API for everything except
> >>> the cursor plane (which uses the legacy API). Thus amdgpu must always
> >>> be prepared to enable/disable/move the cursor plane at any time without
> >>> failing (or else ChromeOS will trip over).
> >>>
> >>> As discussed in [1], there's no reason why the ChromeOS limitation
> >>> should prevent other fully atomic users from taking advantage of the
> >>> overlay plane. Let's limit the check to ChromeOS.
> >>>
> >>> v4: fix ChromeOS detection (Harry)
> >>>
> >>> v5: fix conflict with linux-next
> >>>
> >>> [1]: 
> >>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Famd-gfx%2FJIQ_93_cHcshiIDsrMU1huBzx9P9LVQxucx8hQArpQu7Wk5DrCl_vTXj_Q20m_L-8C8A5dSpNcSJ8ehfcCrsQpfB5QG_Spn14EYkH9chtg0%3D%40emersion.fr%2Fdata=04%7C01%7Charry.wentland%40amd.com%7Cf5038651be2d44b2d11208d98dba8a8e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637696654602344329%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=83wfZCmSw3IpY%2BRxgnVB4YqABUf8W%2BgYCynDzLvFU7g%3Dreserved=0>>
> >>> Signed-off-by: Simon Ser 
> >>> Cc: Alex Deucher 
> >>> Cc: Harry Wentland 
> >>> Cc: Nicholas Kazlauskas 
> >>> Cc: Bas Nieuwenhuizen 
> >>> Cc: Rodrigo Siqueira 
> >>> Cc: Sean Paul 
> >>> Fixes: ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when 
> >>> using overlay")
> >>
> >> Reviewed-by: Harry Wentland 
> >
> > @Harry Wentland, @Simon Ser Do you have a preference on whether we
> > apply this patch or revert ddab8bd788f5?  I'm fine with either.
> >
>
> Is get_mm_exe_file missing on linux-next? I'm okay either
> way but haven't looked closely at linux-next.

Yes, it was removed in 5.15.

Alex

>
> Another option, as discussed by Simon on IRC, might be
> to take this patch only on the Chrome kernels, though
> it would be nice to avoid custom patches on Chrome kernels.
>
> Harry
>
> > Alex
> >
> >>
> >> Harry
> >>
> >>> ---
> >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++
> >>>   1 file changed, 29 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> index f35561b5a465..2eeda1fec506 100644
> >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> @@ -10594,6 +10594,31 @@ static int add_affected_mst_dsc_crtcs(struct 
> >>> drm_atomic_state *state, struct drm
> >>>   }
> >>>   #endif
> >>>
> >>> +static bool is_chromeos(void)
> >>> +{
> >>> + struct mm_struct *mm = current->mm;
> >>> + struct file *exe_file;
> >>> + bool ret;
> >>> +
> >>> + /* ChromeOS renames its thread to DrmThread. Also check the 
> >>> executable
> >>> +  * name. */
> >>> + if (strcmp(current->comm, "DrmThread") != 0 || !mm)
> >>> + return false;
> >>> +
> >>> + rcu_read_lock();
> >>> + exe_file = rcu_dereference(mm->exe_file);
> >>> + if (exe_file && !get_file_rcu(exe_file))
> >>> + exe_file = NULL;
> >>> + rcu_read_unlock();
> >>> +
> >>> + if (!exe_file)
> >>> + return false;
> >>> + ret = strcmp(exe_file->f_path.dentry->d_name.name, "chrome") == 0;
> >>> + fput(exe_file);
> >>> +
> >>> + return ret;
> >>> +}
> >>> +
> >>>   static int validate_overlay(struct drm_atomic_state *state)
> >>>   {
> >>>int i;
> >>> @@ -10601,6 +10626,10 @@ static int validate_overlay(struct 
> >>> drm_atomic_state *state)
> >>>struct drm_plane_state *new_plane_state;
> >>>struct drm_plane_state *primary_state, *overlay_state = NULL;
> >>>
> >>> + /* This is a workaround for ChromeOS only */
> >>> + if (!is_chromeos())
> >>> + return 0;
> >>> +
> >>>/* Check if primary plane is contained inside overlay */
> >>>for_each_new_plane_in_state_reverse(state, plane, new_plane_state, 
> >>> i) {
> >>>if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
> >>>
> >>

[PATCH v3 2/3] drm/amdkfd: handle svm partial migration cpages 0

2021-10-12 Thread Philip Yang

migrate_vma_setup may return cpages 0, means 0 page can be migrated,
treat this as error case to skip the rest of vma migration steps.

Change svm_migrate_vma_to_vram and svm_migrate_vma_to_ram to return the
number of pages migrated successfully or error code. The caller add up
all the successful migration pages and update prange->actual_loc only if
the total migrated pages is not 0.

This also removes the warning message "VRAM BO missing during
validation" if migration cpages is 0.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 90 ++--
 1 file changed, 51 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b05c0579d0b9..d37f20b17586 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -376,7 +376,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
return r;
 }
 
-static int
+static long
 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
uint64_t end)
@@ -413,32 +413,37 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
prange->start, prange->last);
goto out_free;
}
-   if (migrate.cpages != npages) {
-   pr_debug("Partial migration. 0x%lx/0x%llx pages can be 
migrated\n",
-migrate.cpages,
-npages);
-   }
 
-   if (migrate.cpages) {
-   r = svm_migrate_copy_to_vram(adev, prange, , ,
-scratch);
-   migrate_vma_pages();
-   svm_migrate_copy_done(adev, mfence);
-   migrate_vma_finalize();
+   if (migrate.cpages != npages)
+   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+migrate.cpages, npages);
+   else
+   pr_debug("0x%lx pages migrated\n", migrate.cpages);
+
+   if (!migrate.cpages) {
+   pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
+prange->start, prange->last);
+   goto out_free;
}
 
+   r = svm_migrate_copy_to_vram(adev, prange, , , scratch);
+   migrate_vma_pages();
+   svm_migrate_copy_done(adev, mfence);
+   migrate_vma_finalize();
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
 
 out_free:
kvfree(buf);
 out:
-   if (!r) {
+   if (!r && migrate.cpages) {
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
-   }
 
+   return migrate.cpages;
+   }
return r;
 }
 
@@ -460,7 +465,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
-   int r = 0;
+   unsigned long cpages = 0;
+   long r;
 
if (prange->actual_loc == best_loc) {
pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
@@ -492,17 +498,16 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
 
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
-   if (r) {
-   pr_debug("failed to migrate\n");
-   break;
-   }
+   if (r > 0)
+   cpages += r;
addr = next;
}
 
-   if (!r)
+   if (cpages) {
prange->actual_loc = best_loc;
-
-   return r;
+   return 0;
+   }
+   return -ENOMEM;
 }
 
 static void svm_migrate_page_free(struct page *page)
@@ -603,7 +608,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
return r;
 }
 
-static int
+static long
 svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
   struct vm_area_struct *vma, uint64_t start, uint64_t end)
 {
@@ -640,29 +645,35 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
goto out_free;
}
 
-   pr_debug("cpages %ld\n", migrate.cpages);
+   if (migrate.cpages != npages)
+   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+migrate.cpages, npages);
+   else
+   pr_debug("0x%lx pages migrated\n", migrate.cpages);
 
-   if (migrate.cpages) {
-   r = svm_migrate_copy_to_ram(adev, prange, , ,
-   scratch, npages);
-   migrate_vma_pages();
-

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Harry Wentland





On 10/12/21 3:57 PM, Alex Deucher wrote:

On Tue, Oct 12, 2021 at 10:39 AM Harry Wentland  wrote:


On 2021-10-11 11:16, Simon Ser wrote:

Commit ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when
using overlay") changed the atomic validation code to forbid the
overlay plane from being used if it doesn't cover the whole CRTC. The
motivation is that ChromeOS uses the atomic API for everything except
the cursor plane (which uses the legacy API). Thus amdgpu must always
be prepared to enable/disable/move the cursor plane at any time without
failing (or else ChromeOS will trip over).

As discussed in [1], there's no reason why the ChromeOS limitation
should prevent other fully atomic users from taking advantage of the
overlay plane. Let's limit the check to ChromeOS.

v4: fix ChromeOS detection (Harry)

v5: fix conflict with linux-next

[1]: 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Famd-gfx%2FJIQ_93_cHcshiIDsrMU1huBzx9P9LVQxucx8hQArpQu7Wk5DrCl_vTXj_Q20m_L-8C8A5dSpNcSJ8ehfcCrsQpfB5QG_Spn14EYkH9chtg0%3D%40emersion.fr%2Fdata=04%7C01%7Charry.wentland%40amd.com%7Cf5038651be2d44b2d11208d98dba8a8e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637696654602344329%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=83wfZCmSw3IpY%2BRxgnVB4YqABUf8W%2BgYCynDzLvFU7g%3Dreserved=0>>
Signed-off-by: Simon Ser 
Cc: Alex Deucher 
Cc: Harry Wentland 
Cc: Nicholas Kazlauskas 
Cc: Bas Nieuwenhuizen 
Cc: Rodrigo Siqueira 
Cc: Sean Paul 
Fixes: ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when using 
overlay")


Reviewed-by: Harry Wentland 


@Harry Wentland, @Simon Ser Do you have a preference on whether we
apply this patch or revert ddab8bd788f5?  I'm fine with either.



Is get_mm_exe_file missing on linux-next? I'm okay either
way but haven't looked closely at linux-next.

Another option, as discussed by Simon on IRC, might be
to take this patch only on the Chrome kernels, though
it would be nice to avoid custom patches on Chrome kernels.

Harry


Alex



Harry


---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++
  1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f35561b5a465..2eeda1fec506 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10594,6 +10594,31 @@ static int add_affected_mst_dsc_crtcs(struct 
drm_atomic_state *state, struct drm
  }
  #endif

+static bool is_chromeos(void)
+{
+ struct mm_struct *mm = current->mm;
+ struct file *exe_file;
+ bool ret;
+
+ /* ChromeOS renames its thread to DrmThread. Also check the executable
+  * name. */
+ if (strcmp(current->comm, "DrmThread") != 0 || !mm)
+ return false;
+
+ rcu_read_lock();
+ exe_file = rcu_dereference(mm->exe_file);
+ if (exe_file && !get_file_rcu(exe_file))
+ exe_file = NULL;
+ rcu_read_unlock();
+
+ if (!exe_file)
+ return false;
+ ret = strcmp(exe_file->f_path.dentry->d_name.name, "chrome") == 0;
+ fput(exe_file);
+
+ return ret;
+}
+
  static int validate_overlay(struct drm_atomic_state *state)
  {
   int i;
@@ -10601,6 +10626,10 @@ static int validate_overlay(struct drm_atomic_state 
*state)
   struct drm_plane_state *new_plane_state;
   struct drm_plane_state *primary_state, *overlay_state = NULL;

+ /* This is a workaround for ChromeOS only */
+ if (!is_chromeos())
+ return 0;
+
   /* Check if primary plane is contained inside overlay */
   for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) {
   if (plane->type == DRM_PLANE_TYPE_OVERLAY) {

[PATCH] drm/amdgpu: Warn when bad pages approaches threshold

2021-10-12 Thread Kent Russell

Currently dmesg doesn't warn when the number of bad pages approaches the
threshold for page retirement. WARN when the number of bad pages
is at 90% or greater for easier checks and planning, instead of waiting
until the GPU is full of bad pages

Signed-off-by: Kent Russell 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 98732518543e..eda823294dc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -1077,6 +1077,8 @@ int amdgpu_ras_eeprom_init(struct 
amdgpu_ras_eeprom_control *control,
if (res)
DRM_ERROR("RAS table incorrect checksum or error:%d\n",
  res);
+   if (control->ras_num_recs >= (amdgpu_bad_page_threshold * 9 / 
10))
+   DRM_WARN("RAS records:%d approaching threshold:%d");
} else if (hdr->header == RAS_TABLE_HDR_BAD &&
   amdgpu_bad_page_threshold != 0) {
res = __verify_ras_table_checksum(control);
-- 
2.25.1

Re: Fwd: [PATCH] Size can be any value and is user controlled resulting in overwriting the 40 byte array wr_buf with an arbitrary length of data from buf.

2021-10-12 Thread Alex Deucher

On Tue, Oct 12, 2021 at 4:45 PM T. Williams  wrote:
>
> Should I resubmit the patch email with correct formatting? MITRE assigned 
> this bug as CVE-2021-42327. Does AMD/kernel do public vulnerability reports? 
> Do I need to email someone else or something(sorry for dumb questions this is 
> my first time doing this and I don't know what to do)?
> I am trying to do step 11 from here: 
> https://cve.mitre.org/cve/researcher_reservation_guidelines.

Just resend the fixed up patch using git-send-email and we'll apply it.

Alex

>
> On Tue, Oct 12, 2021 at 3:18 AM Christian König 
>  wrote:
>>
>> Am 11.10.21 um 22:24 schrieb T. Williams:
>>
>>
>>
>> -- Forwarded message -
>> From: docfate111 
>> Date: Mon, Oct 11, 2021 at 4:22 PM
>> Subject: [PATCH] Size can be any value and is user controlled resulting in 
>> overwriting the 40 byte array wr_buf with an arbitrary length of data from 
>> buf.
>> To: 
>> Cc: , 
>>
>>
>> Signed-off-by: docfate111 
>>
>>
>> While the find might be correct there are a couple of style problems with 
>> the patch.
>>
>> First of all the subject line must be shorter and should be something like 
>> "drm/amdgpu: fix out of bounds write".
>>
>> The detailed description of the bug then comes into the commit message.
>>
>> And finally please use your real name for the Signed-off-by line.
>>
>> Apart from that good catch,
>> Christian.
>>
>> ---
>>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
>> index 87daa78a32b8..17f2756a64dc 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
>> @@ -263,7 +263,7 @@ static ssize_t dp_link_settings_write(struct file *f, 
>> const char __user *buf,
>> if (!wr_buf)
>> return -ENOSPC;
>>
>> -   if (parse_write_buffer_into_params(wr_buf, size,
>> +   if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
>>(long *)param, buf,
>>max_param_num,
>>_nums)) {
>> --
>> 2.25.1
>>
>>
>>
>> --
>> Thank you for your time,
>> Thelford Williams
>>
>>
>
>
> --
> Thank you for your time,
> Thelford Williams

Re: Fwd: [PATCH] Size can be any value and is user controlled resulting in overwriting the 40 byte array wr_buf with an arbitrary length of data from buf.

2021-10-12 Thread T. Williams

Should I resubmit the patch email with correct formatting? MITRE assigned
this bug as CVE-2021-42327. Does AMD/kernel do public vulnerability
reports? Do I need to email someone else or something(sorry for dumb
questions this is my first time doing this and I don't know what to do)?
I am trying to do step 11 from here:
https://cve.mitre.org/cve/researcher_reservation_guidelines.

On Tue, Oct 12, 2021 at 3:18 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 11.10.21 um 22:24 schrieb T. Williams:
>
>
>
> -- Forwarded message -
> From: docfate111 
> Date: Mon, Oct 11, 2021 at 4:22 PM
> Subject: [PATCH] Size can be any value and is user controlled resulting in
> overwriting the 40 byte array wr_buf with an arbitrary length of data from
> buf.
> To: 
> Cc: , 
>
>
> Signed-off-by: docfate111 
>
>
> While the find might be correct there are a couple of style problems with
> the patch.
>
> First of all the subject line must be shorter and should be something like
> "drm/amdgpu: fix out of bounds write".
>
> The detailed description of the bug then comes into the commit message.
>
> And finally please use your real name for the Signed-off-by line.
>
> Apart from that good catch,
> Christian.
>
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
> index 87daa78a32b8..17f2756a64dc 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
> @@ -263,7 +263,7 @@ static ssize_t dp_link_settings_write(struct file *f,
> const char __user *buf,
> if (!wr_buf)
> return -ENOSPC;
>
> -   if (parse_write_buffer_into_params(wr_buf, size,
> +   if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
>(long *)param, buf,
>max_param_num,
>_nums)) {
> --
> 2.25.1
>
>
>
> --
> Thank you for your time,
> Thelford Williams
>
>
>

-- 
Thank you for your time,
Thelford Williams

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Darrick J. Wong

On Tue, Oct 12, 2021 at 04:24:25PM -0400, Felix Kuehling wrote:
> 
> Am 2021-10-12 um 3:11 p.m. schrieb Matthew Wilcox:
> > On Tue, Oct 12, 2021 at 11:39:57AM -0700, Andrew Morton wrote:
> >> Because I must ask: if this feature is for one single computer which
> >> presumably has a custom kernel, why add it to mainline Linux?
> > I think in particular patch 2 deserves to be merged because it removes
> > a ton of cruft from every call to put_page() (at least if you're using
> > a distro config).  It makes me nervous, but I think it's the right
> > thing to do.  It may well need more fixups after it has been merged,
> > but that's life.
> 
> Maybe we should split the first two patches into a separate series, and
> get it merged first, while the more controversial stuff is still under
> review?

Yes, please.  I've seen that first patch several times already. :)

--D

> Thanks,
>   Felix
> 
>

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Felix Kuehling



Am 2021-10-12 um 3:11 p.m. schrieb Matthew Wilcox:
> On Tue, Oct 12, 2021 at 11:39:57AM -0700, Andrew Morton wrote:
>> Because I must ask: if this feature is for one single computer which
>> presumably has a custom kernel, why add it to mainline Linux?
> I think in particular patch 2 deserves to be merged because it removes
> a ton of cruft from every call to put_page() (at least if you're using
> a distro config).  It makes me nervous, but I think it's the right
> thing to do.  It may well need more fixups after it has been merged,
> but that's life.

Maybe we should split the first two patches into a separate series, and
get it merged first, while the more controversial stuff is still under
review?

Thanks,
  Felix

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Alex Deucher

On Tue, Oct 12, 2021 at 10:39 AM Harry Wentland  wrote:
>
> On 2021-10-11 11:16, Simon Ser wrote:
> > Commit ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when
> > using overlay") changed the atomic validation code to forbid the
> > overlay plane from being used if it doesn't cover the whole CRTC. The
> > motivation is that ChromeOS uses the atomic API for everything except
> > the cursor plane (which uses the legacy API). Thus amdgpu must always
> > be prepared to enable/disable/move the cursor plane at any time without
> > failing (or else ChromeOS will trip over).
> >
> > As discussed in [1], there's no reason why the ChromeOS limitation
> > should prevent other fully atomic users from taking advantage of the
> > overlay plane. Let's limit the check to ChromeOS.
> >
> > v4: fix ChromeOS detection (Harry)
> >
> > v5: fix conflict with linux-next
> >
> > [1]: 
> > https://lore.kernel.org/amd-gfx/JIQ_93_cHcshiIDsrMU1huBzx9P9LVQxucx8hQArpQu7Wk5DrCl_vTXj_Q20m_L-8C8A5dSpNcSJ8ehfcCrsQpfB5QG_Spn14EYkH9chtg0=@emersion.fr/>>
> > Signed-off-by: Simon Ser 
> > Cc: Alex Deucher 
> > Cc: Harry Wentland 
> > Cc: Nicholas Kazlauskas 
> > Cc: Bas Nieuwenhuizen 
> > Cc: Rodrigo Siqueira 
> > Cc: Sean Paul 
> > Fixes: ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when 
> > using overlay")
>
> Reviewed-by: Harry Wentland 

@Harry Wentland, @Simon Ser Do you have a preference on whether we
apply this patch or revert ddab8bd788f5?  I'm fine with either.

Alex

>
> Harry
>
> > ---
> >  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++
> >  1 file changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index f35561b5a465..2eeda1fec506 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -10594,6 +10594,31 @@ static int add_affected_mst_dsc_crtcs(struct 
> > drm_atomic_state *state, struct drm
> >  }
> >  #endif
> >
> > +static bool is_chromeos(void)
> > +{
> > + struct mm_struct *mm = current->mm;
> > + struct file *exe_file;
> > + bool ret;
> > +
> > + /* ChromeOS renames its thread to DrmThread. Also check the executable
> > +  * name. */
> > + if (strcmp(current->comm, "DrmThread") != 0 || !mm)
> > + return false;
> > +
> > + rcu_read_lock();
> > + exe_file = rcu_dereference(mm->exe_file);
> > + if (exe_file && !get_file_rcu(exe_file))
> > + exe_file = NULL;
> > + rcu_read_unlock();
> > +
> > + if (!exe_file)
> > + return false;
> > + ret = strcmp(exe_file->f_path.dentry->d_name.name, "chrome") == 0;
> > + fput(exe_file);
> > +
> > + return ret;
> > +}
> > +
> >  static int validate_overlay(struct drm_atomic_state *state)
> >  {
> >   int i;
> > @@ -10601,6 +10626,10 @@ static int validate_overlay(struct 
> > drm_atomic_state *state)
> >   struct drm_plane_state *new_plane_state;
> >   struct drm_plane_state *primary_state, *overlay_state = NULL;
> >
> > + /* This is a workaround for ChromeOS only */
> > + if (!is_chromeos())
> > + return 0;
> > +
> >   /* Check if primary plane is contained inside overlay */
> >   for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) 
> > {
> >   if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
> >
>

Re: [PATCH] drm/amdgpu: enable display for cyan skillfish

2021-10-12 Thread Alex Deucher

On Tue, Oct 12, 2021 at 2:16 AM Lang Yu  wrote:
>
> Display support for cyan skillfish is ready now. Enable it!
>
> Signed-off-by: Lang Yu 

Whoops. this is my mistake.  I lost the display enablement when the IP
discovery patches and the cyan skillfish display patches crossed.

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 2bebd2ce6474..4228c7964175 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -736,6 +736,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
> amdgpu_device *adev)
> case IP_VERSION(1, 0, 1):
> case IP_VERSION(2, 0, 2):
> case IP_VERSION(2, 0, 0):
> +   case IP_VERSION(2, 0, 3):
> case IP_VERSION(2, 1, 0):
> case IP_VERSION(3, 0, 0):
> case IP_VERSION(3, 0, 2):
> @@ -745,8 +746,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
> amdgpu_device *adev)
> case IP_VERSION(3, 1, 3):
> amdgpu_device_ip_block_add(adev, _ip_block);
> break;
> -   case IP_VERSION(2, 0, 3):
> -   break;
> default:
> return -EINVAL;
> }
> --
> 2.25.1
>

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Matthew Wilcox

On Tue, Oct 12, 2021 at 11:39:57AM -0700, Andrew Morton wrote:
> Because I must ask: if this feature is for one single computer which
> presumably has a custom kernel, why add it to mainline Linux?

I think in particular patch 2 deserves to be merged because it removes
a ton of cruft from every call to put_page() (at least if you're using
a distro config).  It makes me nervous, but I think it's the right
thing to do.  It may well need more fixups after it has been merged,
but that's life.

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Andrew Morton

On Tue, 12 Oct 2021 15:56:29 -0300 Jason Gunthorpe  wrote:

> > To what other uses will this infrastructure be put?
> > 
> > Because I must ask: if this feature is for one single computer which
> > presumably has a custom kernel, why add it to mainline Linux?
> 
> Well, it certainly isn't just "one single computer". Overall I know of
> about, hmm, ~10 *datacenters* worth of installations that are using
> similar technology underpinnings.
> 
> "Frontier" is the code name for a specific installation but as the
> technology is proven out there will be many copies made of that same
> approach.
> 
> The previous program "Summit" was done with NVIDIA GPUs and PowerPC
> CPUs and also included a very similar capability. I think this is a
> good sign that this coherently attached accelerator will continue to
> be a theme in computing going foward. IIRC this was done using out of
> tree kernel patches and NUMA localities.
> 
> Specifically with CXL now being standardized and on a path to ubiquity
> I think we will see an explosion in deployments of coherently attached
> accelerator memory. This is the high end trickling down to wider
> usage.
> 
> I strongly think many CXL accelerators are going to want to manage
> their on-accelerator memory in this way as it makes universal sense to
> want to carefully manage memory access locality to optimize for
> performance.

Thanks.  Can we please get something like the above into the [0/n]
changelog?  Along with any other high-level info which is relevant?

It's rather important.  "why should I review this", "why should we
merge this", etc.

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Felix Kuehling

Am 2021-10-12 um 2:39 p.m. schrieb Andrew Morton:
> On Tue, 12 Oct 2021 12:12:35 -0500 Alex Sierra  wrote:
>
>> This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory
>> owned by a device that can be mapped into CPU page tables like
>> MEMORY_DEVICE_GENERIC and can also be migrated like MEMORY_DEVICE_PRIVATE.
>> With MEMORY_DEVICE_COHERENT, we isolate the new memory type from other
>> subsystems as far as possible, though there are some small changes to
>> other subsystems such as filesystem DAX, to handle the new memory type
>> appropriately.
>>
>> We use ZONE_DEVICE for this instead of NUMA so that the amdgpu
>> allocator can manage it without conflicting with core mm for non-unified
>> memory use cases.
>>
>> How it works: The system BIOS advertises the GPU device memory (aka VRAM)
>> as SPM (special purpose memory) in the UEFI system address map.
>> The amdgpu driver registers the memory with devmap as
>> MEMORY_DEVICE_COHERENT using devm_memremap_pages.
>>
>> The initial user for this hardware page migration capability will be
>> the Frontier supercomputer project.
> To what other uses will this infrastructure be put?
>
> Because I must ask: if this feature is for one single computer which
> presumably has a custom kernel, why add it to mainline Linux?

I'm not sure this will be the only system with this architecture. This
is only the first one I know of. I hope it's not a one-off, after all
the work we did on it. ;)

The Linux kernel on this system is based on SLES. We are working with
SUSE on backporting patches needed for this system. However, those
patches need to be upstream first.

DEVICE_PUBLIC was removed because it had no users. We're trying to add
it (or something like it) back because we now have a use case for it.

Regards,
  Felix


>
>> Our nodes in the lab have .5 TB of
>> system memory plus 256 GB of device memory split across 4 GPUs, all in
>> the same coherent address space. Page migration is expected to improve
>> application efficiency significantly. We will report empirical results
>> as they become available.
>>
>> This includes patches originally by Ralph Campbell to change ZONE_DEVICE
>> reference counting as requested in previous reviews of this patch series
>> (see https://patchwork.freedesktop.org/series/90706/ We extended
>> hmm_test to cover migration of MEMORY_DEVICE_COHERENT. This patch set
>> builds on HMM and our SVM memory manager already merged in 5.14.
>> We would like to complete review and merge this migration patchset for
>> 5.16.

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Jason Gunthorpe

On Tue, Oct 12, 2021 at 11:39:57AM -0700, Andrew Morton wrote:
> On Tue, 12 Oct 2021 12:12:35 -0500 Alex Sierra  wrote:
> 
> > This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory
> > owned by a device that can be mapped into CPU page tables like
> > MEMORY_DEVICE_GENERIC and can also be migrated like MEMORY_DEVICE_PRIVATE.
> > With MEMORY_DEVICE_COHERENT, we isolate the new memory type from other
> > subsystems as far as possible, though there are some small changes to
> > other subsystems such as filesystem DAX, to handle the new memory type
> > appropriately.
> > 
> > We use ZONE_DEVICE for this instead of NUMA so that the amdgpu
> > allocator can manage it without conflicting with core mm for non-unified
> > memory use cases.
> > 
> > How it works: The system BIOS advertises the GPU device memory (aka VRAM)
> > as SPM (special purpose memory) in the UEFI system address map.
> > The amdgpu driver registers the memory with devmap as
> > MEMORY_DEVICE_COHERENT using devm_memremap_pages.
> > 
> > The initial user for this hardware page migration capability will be
> > the Frontier supercomputer project.
> 
> To what other uses will this infrastructure be put?
> 
> Because I must ask: if this feature is for one single computer which
> presumably has a custom kernel, why add it to mainline Linux?

Well, it certainly isn't just "one single computer". Overall I know of
about, hmm, ~10 *datacenters* worth of installations that are using
similar technology underpinnings.

"Frontier" is the code name for a specific installation but as the
technology is proven out there will be many copies made of that same
approach.

The previous program "Summit" was done with NVIDIA GPUs and PowerPC
CPUs and also included a very similar capability. I think this is a
good sign that this coherently attached accelerator will continue to
be a theme in computing going foward. IIRC this was done using out of
tree kernel patches and NUMA localities.

Specifically with CXL now being standardized and on a path to ubiquity
I think we will see an explosion in deployments of coherently attached
accelerator memory. This is the high end trickling down to wider
usage.

I strongly think many CXL accelerators are going to want to manage
their on-accelerator memory in this way as it makes universal sense to
want to carefully manage memory access locality to optimize for
performance.

Jason

Re: [PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Andrew Morton

On Tue, 12 Oct 2021 12:12:35 -0500 Alex Sierra  wrote:

> This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory
> owned by a device that can be mapped into CPU page tables like
> MEMORY_DEVICE_GENERIC and can also be migrated like MEMORY_DEVICE_PRIVATE.
> With MEMORY_DEVICE_COHERENT, we isolate the new memory type from other
> subsystems as far as possible, though there are some small changes to
> other subsystems such as filesystem DAX, to handle the new memory type
> appropriately.
> 
> We use ZONE_DEVICE for this instead of NUMA so that the amdgpu
> allocator can manage it without conflicting with core mm for non-unified
> memory use cases.
> 
> How it works: The system BIOS advertises the GPU device memory (aka VRAM)
> as SPM (special purpose memory) in the UEFI system address map.
> The amdgpu driver registers the memory with devmap as
> MEMORY_DEVICE_COHERENT using devm_memremap_pages.
> 
> The initial user for this hardware page migration capability will be
> the Frontier supercomputer project.

To what other uses will this infrastructure be put?

Because I must ask: if this feature is for one single computer which
presumably has a custom kernel, why add it to mainline Linux?

> Our nodes in the lab have .5 TB of
> system memory plus 256 GB of device memory split across 4 GPUs, all in
> the same coherent address space. Page migration is expected to improve
> application efficiency significantly. We will report empirical results
> as they become available.
> 
> This includes patches originally by Ralph Campbell to change ZONE_DEVICE
> reference counting as requested in previous reviews of this patch series
> (see https://patchwork.freedesktop.org/series/90706/). We extended
> hmm_test to cover migration of MEMORY_DEVICE_COHERENT. This patch set
> builds on HMM and our SVM memory manager already merged in 5.14.
> We would like to complete review and merge this migration patchset for
> 5.16.

[PATCH v1 10/12] lib: add support for device coherent type in test_hmm

2021-10-12 Thread Alex Sierra

Device Coherent type uses device memory that is coherently accesible by
the CPU. This could be shown as SP (special purpose) memory range
at the BIOS-e820 memory enumeration. If no SP memory is supported in
system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.

Currently, test_hmm only supports two different SP ranges of at least
256MB size. This could be specified in the kernel parameter variable
efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x1 &
0x14000 physical address. Ex.
efi_fake_mem=1G@0x1:0x4,1G@0x14000:0x4

Signed-off-by: Alex Sierra 
---
 lib/test_hmm.c  | 195 
 lib/test_hmm_uapi.h |  16 +++-
 2 files changed, 157 insertions(+), 54 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 70a9be0efa00..b349dd920f04 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -469,6 +469,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
unsigned long pfn_first;
unsigned long pfn_last;
void *ptr;
+   int ret = -ENOMEM;
 
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
if (!devmem)
@@ -551,7 +552,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
}
spin_unlock(>lock);
 
-   return true;
+   return 0;
 
 err_release:
mutex_unlock(>devmem_lock);
@@ -560,7 +561,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
 err_devmem:
kfree(devmem);
 
-   return false;
+   return ret;
 }
 
 static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
@@ -569,13 +570,14 @@ static struct page *dmirror_devmem_alloc_page(struct 
dmirror_device *mdevice)
struct page *rpage;
 
/*
-* This is a fake device so we alloc real system memory to store
-* our device memory.
+* For ZONE_DEVICE private type, this is a fake device so we alloc real
+* system memory to store our device memory.
+* For ZONE_DEVICE coherent type we use the actual dpage to store the 
data
+* and ignore rpage.
 */
rpage = alloc_page(GFP_HIGHUSER);
if (!rpage)
return NULL;
-
spin_lock(>lock);
 
if (mdevice->free_pages) {
@@ -603,7 +605,7 @@ static void dmirror_migrate_alloc_and_copy(struct 
migrate_vma *args,
   struct dmirror *dmirror)
 {
struct dmirror_device *mdevice = dmirror->mdevice;
-   const unsigned long *src = args->src;
+   unsigned long *src = args->src;
unsigned long *dst = args->dst;
unsigned long addr;
 
@@ -621,12 +623,17 @@ static void dmirror_migrate_alloc_and_copy(struct 
migrate_vma *args,
 * unallocated pte_none() or read-only zero page.
 */
spage = migrate_pfn_to_page(*src);
+   if (spage && is_zone_device_page(spage))
+   pr_err("page already in device spage pfn: 0x%lx\n",
+ page_to_pfn(spage));
+   BUG_ON(spage && is_zone_device_page(spage));
 
dpage = dmirror_devmem_alloc_page(mdevice);
if (!dpage)
continue;
 
-   rpage = dpage->zone_device_data;
+   rpage = is_device_private_page(dpage) ? dpage->zone_device_data 
:
+   dpage;
if (spage)
copy_highpage(rpage, spage);
else
@@ -638,8 +645,10 @@ static void dmirror_migrate_alloc_and_copy(struct 
migrate_vma *args,
 * the simulated device memory and that page holds the pointer
 * to the mirror.
 */
+   rpage = dpage->zone_device_data;
rpage->zone_device_data = dmirror;
-
+   pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 
0x%lx\n",
+page_to_pfn(spage), page_to_pfn(dpage));
*dst = migrate_pfn(page_to_pfn(dpage)) |
MIGRATE_PFN_LOCKED;
if ((*src & MIGRATE_PFN_WRITE) ||
@@ -673,10 +682,13 @@ static int dmirror_migrate_finalize_and_map(struct 
migrate_vma *args,
continue;
 
/*
-* Store the page that holds the data so the page table
-* doesn't have to deal with ZONE_DEVICE private pages.
+* For ZONE_DEVICE private pages we store the page that
+* holds the data so the page table doesn't have to deal it.
+* For ZONE_DEVICE coherent pages we store the actual page, 
since
+* the CPU has coherent access to the page.
 */
-   entry = dpage->zone_device_data;
+   entry = is_device_private_page(dpage) ? dpage->zone_device_data 
:
+   dpage;

[PATCH v1 12/12] tools: update test_hmm script to support SP config

2021-10-12 Thread Alex Sierra

Add two more parameters to set spm_addr_dev0 & spm_addr_dev1
addresses. These two parameters configure the start SP
addresses for each device in test_hmm driver.
Consequently, this configures zone device type as coherent.

Signed-off-by: Alex Sierra 
---
 tools/testing/selftests/vm/test_hmm.sh | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/test_hmm.sh 
b/tools/testing/selftests/vm/test_hmm.sh
index 0647b525a625..3eeabe94399f 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -40,7 +40,18 @@ check_test_requirements()
 
 load_driver()
 {
-   modprobe $DRIVER > /dev/null 2>&1
+   if [ $# -eq 0 ]; then
+   modprobe $DRIVER > /dev/null 2>&1
+   else
+   if [ $# -eq 2 ]; then
+   modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2
+   > /dev/null 2>&1
+   else
+   echo "Missing module parameters. Make sure pass"\
+   "spm_addr_dev0 and spm_addr_dev1"
+   usage
+   fi
+   fi
if [ $? == 0 ]; then
major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
mknod /dev/hmm_dmirror0 c $major 0
@@ -58,7 +69,7 @@ run_smoke()
 {
echo "Running smoke test. Note, this test provides basic coverage."
 
-   load_driver
+   load_driver $1 $2
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
unload_driver
 }
@@ -75,6 +86,9 @@ usage()
echo "# Smoke testing"
echo "./${TEST_NAME}.sh smoke"
echo
+   echo "# Smoke testing with SPM enabled"
+   echo "./${TEST_NAME}.sh smoke  "
+   echo
exit 0
 }
 
@@ -84,7 +98,7 @@ function run_test()
usage
else
if [ "$1" = "smoke" ]; then
-   run_smoke
+   run_smoke $2 $3
else
usage
fi
-- 
2.32.0

[PATCH v1 11/12] tools: update hmm-test to support device coherent type

2021-10-12 Thread Alex Sierra

Test cases such as migrate_fault and migrate_multiple,
were modified to explicit migrate from device to sys memory
without the need of page faults, when using device coherent
type.

Snapshot test case updated to read memory device type
first and based on that, get the proper returned results
migrate_ping_pong test case added to test explicit migration
from device to sys memory for both private and coherent zone
types.

Helpers to migrate from device to sys memory and vicerversa
were also added.

Signed-off-by: Alex Sierra 
---
 tools/testing/selftests/vm/hmm-tests.c | 137 +
 1 file changed, 119 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/vm/hmm-tests.c 
b/tools/testing/selftests/vm/hmm-tests.c
index 5d1ac691b9f4..e7fa87618dd5 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -44,6 +44,7 @@ struct hmm_buffer {
int fd;
uint64_tcpages;
uint64_tfaults;
+   int zone_device_type;
 };
 
 #define TWOMEG (1 << 21)
@@ -144,6 +145,7 @@ static int hmm_dmirror_cmd(int fd,
}
buffer->cpages = cmd.cpages;
buffer->faults = cmd.faults;
+   buffer->zone_device_type = cmd.zone_device_type;
 
return 0;
 }
@@ -211,6 +213,32 @@ static void hmm_nanosleep(unsigned int n)
nanosleep(, NULL);
 }
 
+static int hmm_migrate_sys_to_dev(int fd,
+  struct hmm_buffer *buffer,
+  unsigned long npages)
+{
+   return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
+}
+
+static int hmm_migrate_dev_to_sys(int fd,
+  struct hmm_buffer *buffer,
+  unsigned long npages)
+{
+   return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
+}
+
+static int hmm_is_private_device(int fd, bool *res)
+{
+   struct hmm_buffer buffer;
+   int ret;
+
+   buffer.ptr = 0;
+   ret = hmm_dmirror_cmd(fd, HMM_DMIRROR_GET_MEM_DEV_TYPE, , 1);
+   *res = (buffer.zone_device_type == HMM_DMIRROR_MEMORY_DEVICE_PRIVATE);
+
+   return ret;
+}
+
 /*
  * Simple NULL test of device open/close.
  */
@@ -875,7 +903,7 @@ TEST_F(hmm, migrate)
ptr[i] = i;
 
/* Migrate memory to device. */
-   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
 
@@ -923,7 +951,7 @@ TEST_F(hmm, migrate_fault)
ptr[i] = i;
 
/* Migrate memory to device. */
-   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
 
@@ -936,7 +964,7 @@ TEST_F(hmm, migrate_fault)
ASSERT_EQ(ptr[i], i);
 
/* Migrate memory to the device again. */
-   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
 
@@ -976,7 +1004,7 @@ TEST_F(hmm, migrate_shared)
ASSERT_NE(buffer->ptr, MAP_FAILED);
 
/* Migrate memory to device. */
-   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, -ENOENT);
 
hmm_buffer_free(buffer);
@@ -1015,7 +1043,7 @@ TEST_F(hmm2, migrate_mixed)
p = buffer->ptr;
 
/* Migrating a protected area should be an error. */
-   ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
ASSERT_EQ(ret, -EINVAL);
 
/* Punch a hole after the first page address. */
@@ -1023,7 +1051,7 @@ TEST_F(hmm2, migrate_mixed)
ASSERT_EQ(ret, 0);
 
/* We expect an error if the vma doesn't cover the range. */
-   ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+   ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
ASSERT_EQ(ret, -EINVAL);
 
/* Page 2 will be a read-only zero page. */
@@ -1055,13 +1083,13 @@ TEST_F(hmm2, migrate_mixed)
 
/* Now try to migrate pages 2-5 to device 1. */
buffer->ptr = p + 2 * self->page_size;
-   ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+   ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 4);
 
/* Page 5 won't be migrated to device 0 because it's on device 1. */
buffer->ptr = p + 5 * self->page_size;
-   ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+   ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);

[PATCH v1 09/12] lib: test_hmm add module param for zone device type

2021-10-12 Thread Alex Sierra

In order to configure device coherent in test_hmm, two module parameters
should be passed, which correspond to the SP start address of each
device (2) spm_addr_dev0 & spm_addr_dev1. If no parameters are passed,
private device type is configured.

Signed-off-by: Alex Sierra 
---
 lib/test_hmm.c  | 66 +++--
 lib/test_hmm_uapi.h |  1 +
 2 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 3cd91ca31dd7..70a9be0efa00 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -33,6 +33,16 @@
 #define DEVMEM_CHUNK_SIZE  (256 * 1024 * 1024U)
 #define DEVMEM_CHUNKS_RESERVE  16
 
+static unsigned long spm_addr_dev0;
+module_param(spm_addr_dev0, long, 0644);
+MODULE_PARM_DESC(spm_addr_dev0,
+   "Specify start address for SPM (special purpose memory) used 
for device 0. By setting this Coherent device type will be used. Make sure 
spm_addr_dev1 is set too");
+
+static unsigned long spm_addr_dev1;
+module_param(spm_addr_dev1, long, 0644);
+MODULE_PARM_DESC(spm_addr_dev1,
+   "Specify start address for SPM (special purpose memory) used 
for device 1. By setting this Coherent device type will be used. Make sure 
spm_addr_dev0 is set too");
+
 static const struct dev_pagemap_ops dmirror_devmem_ops;
 static const struct mmu_interval_notifier_ops dmirror_min_ops;
 static dev_t dmirror_dev;
@@ -450,11 +460,11 @@ static int dmirror_write(struct dmirror *dmirror, struct 
hmm_dmirror_cmd *cmd)
return ret;
 }
 
-static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
+static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
   struct page **ppage)
 {
struct dmirror_chunk *devmem;
-   struct resource *res;
+   struct resource *res = NULL;
unsigned long pfn;
unsigned long pfn_first;
unsigned long pfn_last;
@@ -462,17 +472,29 @@ static bool dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
 
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
if (!devmem)
-   return false;
+   return -ENOMEM;
 
-   res = request_free_mem_region(_resource, DEVMEM_CHUNK_SIZE,
- "hmm_dmirror");
-   if (IS_ERR(res))
-   goto err_devmem;
+   if (!spm_addr_dev0 && !spm_addr_dev1) {
+   res = request_free_mem_region(_resource, 
DEVMEM_CHUNK_SIZE,
+ "hmm_dmirror");
+   if (IS_ERR_OR_NULL(res))
+   goto err_devmem;
+   devmem->pagemap.range.start = res->start;
+   devmem->pagemap.range.end = res->end;
+   devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
+   mdevice->zone_device_type = HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
+   } else if (spm_addr_dev0 && spm_addr_dev1) {
+   devmem->pagemap.range.start = MINOR(mdevice->cdevice.dev) ?
+   spm_addr_dev0 :
+   spm_addr_dev1;
+   devmem->pagemap.range.end = devmem->pagemap.range.start +
+   DEVMEM_CHUNK_SIZE - 1;
+   devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
+   mdevice->zone_device_type = HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
+   } else {
+   pr_err("Both spm_addr_dev parameters should be set\n");
+   }
 
-   mdevice->zone_device_type = HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
-   devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
-   devmem->pagemap.range.start = res->start;
-   devmem->pagemap.range.end = res->end;
devmem->pagemap.nr_range = 1;
devmem->pagemap.ops = _devmem_ops;
devmem->pagemap.owner = mdevice;
@@ -493,10 +515,14 @@ static bool dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
mdevice->devmem_capacity = new_capacity;
mdevice->devmem_chunks = new_chunks;
}
-
ptr = memremap_pages(>pagemap, numa_node_id());
-   if (IS_ERR(ptr))
+   if (IS_ERR_OR_NULL(ptr)) {
+   if (ptr)
+   ret = PTR_ERR(ptr);
+   else
+   ret = -EFAULT;
goto err_release;
+   }
 
devmem->mdevice = mdevice;
pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
@@ -529,7 +555,8 @@ static bool dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
 
 err_release:
mutex_unlock(>devmem_lock);
-   release_mem_region(devmem->pagemap.range.start, 
range_len(>pagemap.range));
+   if (res)
+   release_mem_region(devmem->pagemap.range.start, 
range_len(>pagemap.range));
 err_devmem:
kfree(devmem);
 
@@ -1097,10 +1124,8 @@ static int dmirror_device_init(struct dmirror_device 
*mdevice, int id)
if (ret)
return ret;
 
-

[PATCH v1 07/12] drm/amdkfd: coherent type as sys mem on migration to ram

2021-10-12 Thread Alex Sierra

Coherent device type memory on VRAM to RAM migration, has similar access
as System RAM from the CPU. This flag sets the source from the sender.
Which in Coherent type case, should be set as
MIGRATE_VMA_SELECT_DEVICE_COHERENT.

Signed-off-by: Alex Sierra 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 9efc97d55077..4ec7ac13f2b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -617,9 +617,12 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
-   migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
 
+   if (adev->gmc.xgmi.connected_to_cpu)
+   migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+   else
+   migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
size *= npages;
buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
-- 
2.32.0

[PATCH v1 08/12] lib: test_hmm add ioctl to get zone device type

2021-10-12 Thread Alex Sierra

new ioctl cmd added to query zone device type. This will be
used once the test_hmm adds zone device coherent type.

Signed-off-by: Alex Sierra 
---
 lib/test_hmm.c  | 15 ++-
 lib/test_hmm_uapi.h |  7 +++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 6998f10350ea..3cd91ca31dd7 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -82,6 +82,7 @@ struct dmirror_chunk {
 struct dmirror_device {
struct cdev cdevice;
struct hmm_devmem   *devmem;
+   unsigned intzone_device_type;
 
unsigned intdevmem_capacity;
unsigned intdevmem_count;
@@ -468,6 +469,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
if (IS_ERR(res))
goto err_devmem;
 
+   mdevice->zone_device_type = HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
devmem->pagemap.range.start = res->start;
devmem->pagemap.range.end = res->end;
@@ -912,6 +914,15 @@ static int dmirror_snapshot(struct dmirror *dmirror,
return ret;
 }
 
+static int dmirror_get_device_type(struct dmirror *dmirror,
+   struct hmm_dmirror_cmd *cmd)
+{
+   mutex_lock(>mutex);
+   cmd->zone_device_type = dmirror->mdevice->zone_device_type;
+   mutex_unlock(>mutex);
+
+   return 0;
+}
 static long dmirror_fops_unlocked_ioctl(struct file *filp,
unsigned int command,
unsigned long arg)
@@ -952,7 +963,9 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
case HMM_DMIRROR_SNAPSHOT:
ret = dmirror_snapshot(dmirror, );
break;
-
+   case HMM_DMIRROR_GET_MEM_DEV_TYPE:
+   ret = dmirror_get_device_type(dmirror, );
+   break;
default:
return -EINVAL;
}
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
index 670b4ef2a5b6..ee88701793d5 100644
--- a/lib/test_hmm_uapi.h
+++ b/lib/test_hmm_uapi.h
@@ -26,6 +26,7 @@ struct hmm_dmirror_cmd {
__u64   npages;
__u64   cpages;
__u64   faults;
+   __u64   zone_device_type;
 };
 
 /* Expose the address space of the calling process through hmm device file */
@@ -33,6 +34,7 @@ struct hmm_dmirror_cmd {
 #define HMM_DMIRROR_WRITE  _IOWR('H', 0x01, struct hmm_dmirror_cmd)
 #define HMM_DMIRROR_MIGRATE_IOWR('H', 0x02, struct hmm_dmirror_cmd)
 #define HMM_DMIRROR_SNAPSHOT   _IOWR('H', 0x03, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_GET_MEM_DEV_TYPE   _IOWR('H', 0x04, struct hmm_dmirror_cmd)
 
 /*
  * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
@@ -60,4 +62,9 @@ enum {
HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
 };
 
+enum {
+   /* 0 is reserved to catch uninitialized type fields */
+   HMM_DMIRROR_MEMORY_DEVICE_PRIVATE = 1,
+};
+
 #endif /* _LIB_TEST_HMM_UAPI_H */
-- 
2.32.0

[PATCH v1 06/12] drm/amdkfd: add SPM support for SVM

2021-10-12 Thread Alex Sierra

When CPU is connected throug XGMI, it has coherent
access to VRAM resource. In this case that resource
is taken from a table in the device gmc aperture base.
This resource is used along with the device type, which could
be DEVICE_PRIVATE or DEVICE_COHERENT to create the device
page map region.

Signed-off-by: Alex Sierra 
Reviewed-by: Felix Kuehling 
---
v7:
Remove lookup_resource call, so export symbol for this function
is not longer required. Patch dropped "kernel: resource:
lookup_resource as exported symbol"
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 32 +++-
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index ffad39ffa8c6..9efc97d55077 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -866,7 +866,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
 {
struct kfd_dev *kfddev = adev->kfd.dev;
struct dev_pagemap *pgmap;
-   struct resource *res;
+   struct resource *res = NULL;
unsigned long size;
void *r;
 
@@ -881,22 +881,29 @@ int svm_migrate_init(struct amdgpu_device *adev)
 * should remove reserved size
 */
size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
-   res = devm_request_free_mem_region(adev->dev, _resource, size);
-   if (IS_ERR(res))
-   return -ENOMEM;
+   if (adev->gmc.xgmi.connected_to_cpu) {
+   pgmap->range.start = adev->gmc.aper_base;
+   pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 
1;
+   pgmap->type = MEMORY_DEVICE_COHERENT;
+   } else {
+   res = devm_request_free_mem_region(adev->dev, _resource, 
size);
+   if (IS_ERR(res))
+   return -ENOMEM;
+   pgmap->range.start = res->start;
+   pgmap->range.end = res->end;
+   pgmap->type = MEMORY_DEVICE_PRIVATE;
+   }
 
-   pgmap->type = MEMORY_DEVICE_PRIVATE;
pgmap->nr_range = 1;
-   pgmap->range.start = res->start;
-   pgmap->range.end = res->end;
pgmap->ops = _migrate_pgmap_ops;
pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
-   pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+   pgmap->flags = 0;
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
pr_err("failed to register HMM device memory\n");
-   devm_release_mem_region(adev->dev, res->start,
-   res->end - res->start + 1);
+   if (pgmap->type == MEMORY_DEVICE_PRIVATE)
+   devm_release_mem_region(adev->dev, res->start,
+   res->end - res->start + 1);
return PTR_ERR(r);
}
 
@@ -915,6 +922,7 @@ void svm_migrate_fini(struct amdgpu_device *adev)
struct dev_pagemap *pgmap = >kfd.dev->pgmap;
 
devm_memunmap_pages(adev->dev, pgmap);
-   devm_release_mem_region(adev->dev, pgmap->range.start,
-   pgmap->range.end - pgmap->range.start + 1);
+   if (pgmap->type == MEMORY_DEVICE_PRIVATE)
+   devm_release_mem_region(adev->dev, pgmap->range.start,
+   pgmap->range.end - pgmap->range.start + 
1);
 }
-- 
2.32.0

[PATCH v1 05/12] drm/amdkfd: ref count init for device pages

2021-10-12 Thread Alex Sierra

Ref counter from device pages is init to zero during memmap init zone.
The first time a new device page is allocated to migrate data into it,
its ref counter needs to be initialized to one.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index dab290a4d19d..ffad39ffa8c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -220,7 +220,8 @@ svm_migrate_get_vram_page(struct svm_range *prange, 
unsigned long pfn)
page = pfn_to_page(pfn);
svm_range_bo_ref(prange->svm_bo);
page->zone_device_data = prange->svm_bo;
-   get_page(page);
+   VM_BUG_ON_PAGE(page_ref_count(page), page);
+   init_page_count(page);
lock_page(page);
 }
 
-- 
2.32.0

[PATCH v1 04/12] mm: add device coherent vma selection for memory migration

2021-10-12 Thread Alex Sierra

This case is used to migrate pages from device memory, back to system
memory. Device coherent type memory is cache coherent from device and CPU
point of view.

Signed-off-by: Alex Sierra 
---
v2:
condition added when migrations from device coherent pages.
---
 include/linux/migrate.h | 1 +
 mm/migrate.c| 9 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4bb4e519e3f5..b1cae5073d69 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -156,6 +156,7 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
 enum migrate_vma_direction {
MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
+   MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2,
 };
 
 struct migrate_vma {
diff --git a/mm/migrate.c b/mm/migrate.c
index 2bda612f3650..b40cd5a69f65 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2406,8 +2406,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
if (is_write_device_private_entry(entry))
mpfn |= MIGRATE_PFN_WRITE;
} else {
-   if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
-   goto next;
pfn = pte_pfn(pte);
if (is_zero_pfn(pfn)) {
mpfn = MIGRATE_PFN_MIGRATE;
@@ -2415,6 +2413,13 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
goto next;
}
page = vm_normal_page(migrate->vma, addr, pte);
+   if (!is_zone_device_page(page) &&
+   !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
+   goto next;
+   if (is_zone_device_page(page) &&
+   (!(migrate->flags & 
MIGRATE_VMA_SELECT_DEVICE_COHERENT) ||
+page->pgmap->owner != migrate->pgmap_owner))
+   goto next;
mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
}
-- 
2.32.0

[PATCH v1 02/12] mm: remove extra ZONE_DEVICE struct page refcount

2021-10-12 Thread Alex Sierra

From: Ralph Campbell 

ZONE_DEVICE struct pages have an extra reference count that complicates the
code for put_page() and several places in the kernel that need to check the
reference count to see that a page is not being used (gup, compaction,
migration, etc.). Clean up the code so the reference count doesn't need to
be treated specially for ZONE_DEVICE.

Signed-off-by: Ralph Campbell 
Signed-off-by: Alex Sierra 
Reviewed-by: Christoph Hellwig 
---
v2:
AS: merged this patch in linux 5.11 version

v5:
AS: add condition at try_grab_page to check for the zone device type, while
page ref counter is checked less/equal to zero. In case of device zone, pages
ref counter are initialized to zero.

v7:
AS: fix condition at try_grab_page added at v5, is invalid. It supposed
to fix xfstests/generic/413 test, however, there's a known issue on
this test where DAX mapped area DIO to non-DAX expect to fail.
https://patchwork.kernel.org/project/fstests/patch/1489463960-3579-1-git-send-email-xz...@redhat.com
This condition was removed after rebase over patch series
https://lore.kernel.org/r/20210813044133.1536842-4-jhubb...@nvidia.com
---
 arch/powerpc/kvm/book3s_hv_uvmem.c |  2 +-
 drivers/gpu/drm/nouveau/nouveau_dmem.c |  2 +-
 fs/dax.c   |  4 +-
 include/linux/dax.h|  2 +-
 include/linux/memremap.h   |  7 +--
 include/linux/mm.h | 11 
 lib/test_hmm.c |  2 +-
 mm/internal.h  |  8 +++
 mm/memcontrol.c|  6 +--
 mm/memremap.c  | 69 +++---
 mm/migrate.c   |  5 --
 mm/page_alloc.c|  3 ++
 mm/swap.c  | 45 ++---
 13 files changed, 46 insertions(+), 120 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c 
b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 84e5a2dc8be5..acee67710620 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -711,7 +711,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long 
gpa, struct kvm *kvm)
 
dpage = pfn_to_page(uvmem_pfn);
dpage->zone_device_data = pvt;
-   get_page(dpage);
+   init_page_count(dpage);
lock_page(dpage);
return dpage;
 out_clear:
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 92987daa5e17..8bc7120e1216 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -324,7 +324,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
return NULL;
}
 
-   get_page(page);
+   init_page_count(page);
lock_page(page);
return page;
 }
diff --git a/fs/dax.c b/fs/dax.c
index c387d09e3e5a..1166630b7190 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -571,14 +571,14 @@ static void *grab_mapping_entry(struct xa_state *xas,
 
 /**
  * dax_layout_busy_page_range - find first pinned page in @mapping
- * @mapping: address space to scan for a page with ref count > 1
+ * @mapping: address space to scan for a page with ref count > 0
  * @start: Starting offset. Page containing 'start' is included.
  * @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
  *   pages from 'start' till the end of file are included.
  *
  * DAX requires ZONE_DEVICE mapped pages. These pages are never
  * 'onlined' to the page allocator so they are considered idle when
- * page->count == 1. A filesystem uses this interface to determine if
+ * page->count == 0. A filesystem uses this interface to determine if
  * any page in the mapping is busy, i.e. for DMA, or other
  * get_user_pages() usages.
  *
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8b5da1d60dbc..05fc982ce153 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -245,7 +245,7 @@ static inline bool dax_mapping(struct address_space 
*mapping)
 
 static inline bool dax_page_unused(struct page *page)
 {
-   return page_ref_count(page) == 1;
+   return page_ref_count(page) == 0;
 }
 
 #define dax_wait_page(_inode, _page, _wait_cb) \
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 45a79da89c5f..77ff5fd0685f 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -66,9 +66,10 @@ enum memory_type {
 
 struct dev_pagemap_ops {
/*
-* Called once the page refcount reaches 1.  (ZONE_DEVICE pages never
-* reach 0 refcount unless there is a refcount bug. This allows the
-* device driver to implement its own memory management.)
+* Called once the page refcount reaches 0. The reference count
+* should be reset to one with init_page_count(page) before reusing
+* the page. This allows the device driver to implement its own
+* memory management.
 */
void

[PATCH v1 03/12] mm: add zone device coherent type memory support

2021-10-12 Thread Alex Sierra

Device memory that is cache coherent from device and CPU point of view.
This is use on platform that have an advance system bus (like CAPI or
CCIX). Any page of a process can be migrated to such memory. However,
no one should be allow to pin such memory so that it can always be
evicted.

Signed-off-by: Alex Sierra 
---
 include/linux/memremap.h |  8 
 include/linux/mm.h   |  8 
 mm/memcontrol.c  |  6 +++---
 mm/memory-failure.c  |  6 +-
 mm/memremap.c|  2 ++
 mm/migrate.c | 19 ---
 6 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 77ff5fd0685f..d64cd2e8147a 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -39,6 +39,13 @@ struct vmem_altmap {
  * A more complete discussion of unaddressable memory may be found in
  * include/linux/hmm.h and Documentation/vm/hmm.rst.
  *
+ * MEMORY_DEVICE_COHERENT:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is use on platform that have an advance system bus (like CAPI or CCIX). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allow to pin such memory so that it can always be evicted.
+ *
  * MEMORY_DEVICE_FS_DAX:
  * Host memory that has similar access semantics as System RAM i.e. DMA
  * coherent and supports page pinning. In support of coordinating page
@@ -59,6 +66,7 @@ struct vmem_altmap {
 enum memory_type {
/* 0 is reserved to catch uninitialized type fields */
MEMORY_DEVICE_PRIVATE = 1,
+   MEMORY_DEVICE_COHERENT,
MEMORY_DEVICE_FS_DAX,
MEMORY_DEVICE_GENERIC,
MEMORY_DEVICE_PCI_P2PDMA,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e24c904deeec..8bc697006a5c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1187,6 +1187,14 @@ static inline bool is_device_private_page(const struct 
page *page)
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
 
+static inline bool is_device_page(const struct page *page)
+{
+   return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
+   is_zone_device_page(page) &&
+   (page->pgmap->type == MEMORY_DEVICE_PRIVATE ||
+   page->pgmap->type == MEMORY_DEVICE_COHERENT);
+}
+
 static inline bool is_pci_p2pdma_page(const struct page *page)
 {
return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9a6bfb4fd36c..fe5a96428dce 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5526,8 +5526,8 @@ static int mem_cgroup_move_account(struct page *page,
  *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
  * target for charge migration. if @target is not NULL, the entry is stored
  * in target->ent.
- *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is 
MEMORY_DEVICE_PRIVATE
- * (so ZONE_DEVICE page and thus not on the lru).
+ *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is 
MEMORY_DEVICE_COHERENT
+ * or MEMORY_DEVICE_PRIVATE (so ZONE_DEVICE page and thus not on the lru).
  * For now we such page is charge like a regular page would be as for all
  * intent and purposes it is just special memory taking the place of a
  * regular page.
@@ -5561,7 +5561,7 @@ static enum mc_target_type get_mctgt_type(struct 
vm_area_struct *vma,
 */
if (page_memcg(page) == mc.from) {
ret = MC_TARGET_PAGE;
-   if (is_device_private_page(page))
+   if (is_device_page(page))
ret = MC_TARGET_DEVICE;
if (target)
target->page = page;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6f5f78885ab4..1076f5a07370 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1373,12 +1373,16 @@ static int memory_failure_dev_pagemap(unsigned long 
pfn, int flags,
goto unlock;
}
 
-   if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+   switch (pgmap->type) {
+   case MEMORY_DEVICE_PRIVATE:
+   case MEMORY_DEVICE_COHERENT:
/*
 * TODO: Handle HMM pages which may need coordination
 * with device-side memory.
 */
goto unlock;
+   default:
+   break;
}
 
/*
diff --git a/mm/memremap.c b/mm/memremap.c
index ab949a571e78..56033955d1f4 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -294,6 +294,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 
switch (pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
+   case MEMORY_DEVICE_COHERENT:
if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
WARN(1, "Device private memory not supported\n");

[PATCH v1 01/12] ext4/xfs: add page refcount helper

2021-10-12 Thread Alex Sierra

From: Ralph Campbell 

There are several places where ZONE_DEVICE struct pages assume a reference
count == 1 means the page is idle and free. Instead of open coding this,
add a helper function to hide this detail.

Signed-off-by: Ralph Campbell 
Signed-off-by: Alex Sierra 
Reviewed-by: Christoph Hellwig 
Acked-by: Theodore Ts'o 
Acked-by: Darrick J. Wong 
---
v3:
[AS]: rename dax_layout_is_idle_page func to dax_page_unused

v4:
[AS]: This ref count functionality was missing on fuse/dax.c.
---
 fs/dax.c|  4 ++--
 fs/ext4/inode.c |  5 +
 fs/fuse/dax.c   |  4 +---
 fs/xfs/xfs_file.c   |  4 +---
 include/linux/dax.h | 10 ++
 5 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 62352cbcf0f4..c387d09e3e5a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -369,7 +369,7 @@ static void dax_disassociate_entry(void *entry, struct 
address_space *mapping,
for_each_mapped_pfn(entry, pfn) {
struct page *page = pfn_to_page(pfn);
 
-   WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+   WARN_ON_ONCE(trunc && !dax_page_unused(page));
WARN_ON_ONCE(page->mapping && page->mapping != mapping);
page->mapping = NULL;
page->index = 0;
@@ -383,7 +383,7 @@ static struct page *dax_busy_page(void *entry)
for_each_mapped_pfn(entry, pfn) {
struct page *page = pfn_to_page(pfn);
 
-   if (page_ref_count(page) > 1)
+   if (!dax_page_unused(page))
return page;
}
return NULL;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fe6045a46599..05ffe6875cb1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3971,10 +3971,7 @@ int ext4_break_layouts(struct inode *inode)
if (!page)
return 0;
 
-   error = ___wait_var_event(>_refcount,
-   atomic_read(>_refcount) == 1,
-   TASK_INTERRUPTIBLE, 0, 0,
-   ext4_wait_dax_page(ei));
+   error = dax_wait_page(ei, page, ext4_wait_dax_page);
} while (error == 0);
 
return error;
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index ff99ab2a3c43..2b1f190ba78a 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -677,9 +677,7 @@ static int __fuse_dax_break_layouts(struct inode *inode, 
bool *retry,
return 0;
 
*retry = true;
-   return ___wait_var_event(>_refcount,
-   atomic_read(>_refcount) == 1, TASK_INTERRUPTIBLE,
-   0, 0, fuse_wait_dax_page(inode));
+   return dax_wait_page(inode, page, fuse_wait_dax_page);
 }
 
 /* dmap_end == 0 leads to unmapping of whole file */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 396ef36dcd0a..182057281086 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -840,9 +840,7 @@ xfs_break_dax_layouts(
return 0;
 
*retry = true;
-   return ___wait_var_event(>_refcount,
-   atomic_read(>_refcount) == 1, TASK_INTERRUPTIBLE,
-   0, 0, xfs_wait_dax_page(inode));
+   return dax_wait_page(inode, page, xfs_wait_dax_page);
 }
 
 int
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b52f084aa643..8b5da1d60dbc 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -243,6 +243,16 @@ static inline bool dax_mapping(struct address_space 
*mapping)
return mapping->host && IS_DAX(mapping->host);
 }
 
+static inline bool dax_page_unused(struct page *page)
+{
+   return page_ref_count(page) == 1;
+}
+
+#define dax_wait_page(_inode, _page, _wait_cb) \
+   ___wait_var_event(&(_page)->_refcount,  \
+   dax_page_unused(_page), \
+   TASK_INTERRUPTIBLE, 0, 0, _wait_cb(_inode))
+
 #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
 void hmem_register_device(int target_nid, struct resource *r);
 #else
-- 
2.32.0

[PATCH v1 00/12] MEMORY_DEVICE_COHERENT for CPU-accessible coherent device memory

2021-10-12 Thread Alex Sierra

This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory
owned by a device that can be mapped into CPU page tables like
MEMORY_DEVICE_GENERIC and can also be migrated like MEMORY_DEVICE_PRIVATE.
With MEMORY_DEVICE_COHERENT, we isolate the new memory type from other
subsystems as far as possible, though there are some small changes to
other subsystems such as filesystem DAX, to handle the new memory type
appropriately.

We use ZONE_DEVICE for this instead of NUMA so that the amdgpu
allocator can manage it without conflicting with core mm for non-unified
memory use cases.

How it works: The system BIOS advertises the GPU device memory (aka VRAM)
as SPM (special purpose memory) in the UEFI system address map.
The amdgpu driver registers the memory with devmap as
MEMORY_DEVICE_COHERENT using devm_memremap_pages.

The initial user for this hardware page migration capability will be
the Frontier supercomputer project. Our nodes in the lab have .5 TB of
system memory plus 256 GB of device memory split across 4 GPUs, all in
the same coherent address space. Page migration is expected to improve
application efficiency significantly. We will report empirical results
as they become available.

This includes patches originally by Ralph Campbell to change ZONE_DEVICE
reference counting as requested in previous reviews of this patch series
(see https://patchwork.freedesktop.org/series/90706/). We extended
hmm_test to cover migration of MEMORY_DEVICE_COHERENT. This patch set
builds on HMM and our SVM memory manager already merged in 5.14.
We would like to complete review and merge this migration patchset for
5.16.

Alex Sierra (10):
  mm: add zone device coherent type memory support
  mm: add device coherent vma selection for memory migration
  drm/amdkfd: ref count init for device pages
  drm/amdkfd: add SPM support for SVM
  drm/amdkfd: coherent type as sys mem on migration to ram
  lib: test_hmm add ioctl to get zone device type
  lib: test_hmm add module param for zone device type
  lib: add support for device coherent type in test_hmm
  tools: update hmm-test to support device coherent type
  tools: update test_hmm script to support SP config

Ralph Campbell (2):
  ext4/xfs: add page refcount helper
  mm: remove extra ZONE_DEVICE struct page refcount

 arch/powerpc/kvm/book3s_hv_uvmem.c   |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  40 ++--
 drivers/gpu/drm/nouveau/nouveau_dmem.c   |   2 +-
 fs/dax.c |   8 +-
 fs/ext4/inode.c  |   5 +-
 fs/fuse/dax.c|   4 +-
 fs/xfs/xfs_file.c|   4 +-
 include/linux/dax.h  |  10 +
 include/linux/memremap.h |  15 +-
 include/linux/migrate.h  |   1 +
 include/linux/mm.h   |  19 +-
 lib/test_hmm.c   | 276 +--
 lib/test_hmm_uapi.h  |  20 +-
 mm/internal.h|   8 +
 mm/memcontrol.c  |  12 +-
 mm/memory-failure.c  |   6 +-
 mm/memremap.c|  71 ++
 mm/migrate.c |  33 +--
 mm/page_alloc.c  |   3 +
 mm/swap.c|  45 +---
 tools/testing/selftests/vm/hmm-tests.c   | 137 +--
 tools/testing/selftests/vm/test_hmm.sh   |  20 +-
 22 files changed, 490 insertions(+), 251 deletions(-)

-- 
2.32.0

[PATCH] drm/amdgpu/gfx10: fix typo in gfx_v10_0_update_gfx_clock_gating()

2021-10-12 Thread Alex Deucher

Check was incorrectly converted to IP version checking.

Fixes: 4b0ad8425498ba ("drm/amdgpu/gfx10: convert to IP version checking")
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 71bb3c0dc1da..8cec03949835 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -8238,8 +8238,9 @@ static int gfx_v10_0_update_gfx_clock_gating(struct 
amdgpu_device *adev,
/* ===  CGCG + CGLS === */
gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
 
-   if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 1, 10)) &&
-(adev->ip_versions[GC_HWIP][0] <= IP_VERSION(10, 1, 2)))
+   if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10)) ||
+   (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) ||
+   (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)))

gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
} else {
/* CGCG/CGLS should be disabled before MGCG/MGLS
-- 
2.31.1

[PATCH 3/3] drm/amdgpu/psp: add some missing cases to psp_check_pmfw_centralized_cstate_management

2021-10-12 Thread Alex Deucher

Missed a few asics.

Fixes: 82d05736c47b19 ("drm/amdgpu/amdgpu_psp: convert to IP version checking")
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 6b39e6c02dd8..51620f2fc43a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -77,7 +77,9 @@ static void 
psp_check_pmfw_centralized_cstate_management(struct psp_context *psp
}
 
switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 4):
+   case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 7):
case IP_VERSION(11, 0, 9):
case IP_VERSION(11, 0, 11):
-- 
2.31.1

[PATCH 2/3] drm/amdgpu/swsmu: fix is_support_sw_smu() for VEGA20

2021-10-12 Thread Alex Deucher

VEGA20 is 11.0.2, but it's handled by powerplay, not
swsmu.

Fixes: a8967967f6a554 ("drm/amdgpu/amdgpu_smu: convert to IP version checking")
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 4ea7e90ef60d..f5bf3ab0ebad 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -455,6 +455,10 @@ static int smu_get_power_num_states(void *handle,
 
 bool is_support_sw_smu(struct amdgpu_device *adev)
 {
+   /* vega20 is 11.0.2, but it's supported via the powerplay code */
+   if (adev->asic_type == CHIP_VEGA20)
+   return false;
+
if (adev->ip_versions[MP1_HWIP][0] >= IP_VERSION(11, 0, 0))
return true;
 
-- 
2.31.1

[PATCH 1/3] drm/amdgpu/smu11: fix firmware version check for vangogh

2021-10-12 Thread Alex Deucher

Was missed in the conversion to IP version checking.

Fixes: af3b89d3a639d5 ("drm/amdgpu/smu11.0: convert to IP version checking")
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 3470c33ee09d..6d008e9c2f65 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -255,7 +255,7 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
case IP_VERSION(11, 0, 11):
smu->smc_driver_if_version = 
SMU11_DRIVER_IF_VERSION_Navy_Flounder;
break;
-   case CHIP_VANGOGH:
+   case IP_VERSION(11, 5, 0):
smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_VANGOGH;
break;
case IP_VERSION(11, 0, 12):
-- 
2.31.1

RE: [PATCH 2/2] drm/amdgpu: Fix RAS page retirement with mode2 reset on Aldebaran

2021-10-12 Thread Joshi, Mukul

[AMD Official Use Only]

Thanks Tao.
I will add a comment as you suggested before committing the change.

Regards,
Mukul

From: Zhou1, Tao 
Sent: Monday, October 11, 2021 11:55 PM
To: Joshi, Mukul ; amd-gfx@lists.freedesktop.org
Cc: Clements, John 
Subject: Re: [PATCH 2/2] drm/amdgpu: Fix RAS page retirement with mode2 reset 
on Aldebaran


[AMD Official Use Only]

The patch looks good for me, but it's better to add comment in 
amdgpu_register_bad_pages_mca_notifier to explain why we need to reserve GPU 
info instead of using mgpu_info list, with this addressed, the patch is:

Reviewed-by: Tao Zhou mailto:tao.zh...@amd.com>>


From: Joshi, Mukul mailto:mukul.jo...@amd.com>>
Sent: Tuesday, October 12, 2021 10:33 AM
To: amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Zhou1, Tao mailto:tao.zh...@amd.com>>; Clements, John 
mailto:john.cleme...@amd.com>>; Joshi, Mukul 
mailto:mukul.jo...@amd.com>>
Subject: [PATCH 2/2] drm/amdgpu: Fix RAS page retirement with mode2 reset on 
Aldebaran

During mode2 reset, the GPU is temporarily removed from the
mgpu_info list. As a result, page retirement fails because it
cannot find the GPU in the GPU list.
To fix this, create our own list of GPUs that support MCE notifier
based page retirement and use that list to check if the UMC error
occurred on a GPU that supports MCE notifier based page retirement.

Signed-off-by: Mukul Joshi mailto:mukul.jo...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e8875351967e..e8d88c77eb46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -112,7 +112,12 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
 uint64_t addr);
 #ifdef CONFIG_X86_MCE_AMD
-static void amdgpu_register_bad_pages_mca_notifier(void);
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+struct mce_notifier_adev_list {
+   struct amdgpu_device *devs[MAX_GPU_INSTANCE];
+   int num_gpu;
+};
+static struct mce_notifier_adev_list mce_adev_list;
 #endif

 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
@@ -2108,7 +2113,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 #ifdef CONFIG_X86_MCE_AMD
 if ((adev->asic_type == CHIP_ALDEBARAN) &&
 (adev->gmc.xgmi.connected_to_cpu))
-   amdgpu_register_bad_pages_mca_notifier();
+   amdgpu_register_bad_pages_mca_notifier(adev);
 #endif
 return 0;

@@ -2605,24 +2610,18 @@ void amdgpu_release_ras_context(struct amdgpu_device 
*adev)
 #ifdef CONFIG_X86_MCE_AMD
 static struct amdgpu_device *find_adev(uint32_t node_id)
 {
-   struct amdgpu_gpu_instance *gpu_instance;
 int i;
 struct amdgpu_device *adev = NULL;

-   mutex_lock(_info.mutex);
-
-   for (i = 0; i < mgpu_info.num_gpu; i++) {
-   gpu_instance = &(mgpu_info.gpu_ins[i]);
-   adev = gpu_instance->adev;
+   for (i = 0; i < mce_adev_list.num_gpu; i++) {
+   adev = mce_adev_list.devs[i];

-   if (adev->gmc.xgmi.connected_to_cpu &&
+   if (adev && adev->gmc.xgmi.connected_to_cpu &&
 adev->gmc.xgmi.physical_node_id == node_id)
 break;
 adev = NULL;
 }

-   mutex_unlock(_info.mutex);
-
 return adev;
 }

@@ -2718,8 +2717,9 @@ static struct notifier_block amdgpu_bad_page_nb = {
 .priority   = MCE_PRIO_UC,
 };

-static void amdgpu_register_bad_pages_mca_notifier(void)
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
 {
+   mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
 /*
  * Register the x86 notifier only once
  * with MCE subsystem.
--
2.33.0

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Harry Wentland

On 2021-10-11 11:16, Simon Ser wrote:
> Commit ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when
> using overlay") changed the atomic validation code to forbid the
> overlay plane from being used if it doesn't cover the whole CRTC. The
> motivation is that ChromeOS uses the atomic API for everything except
> the cursor plane (which uses the legacy API). Thus amdgpu must always
> be prepared to enable/disable/move the cursor plane at any time without
> failing (or else ChromeOS will trip over).
> 
> As discussed in [1], there's no reason why the ChromeOS limitation
> should prevent other fully atomic users from taking advantage of the
> overlay plane. Let's limit the check to ChromeOS.
> 
> v4: fix ChromeOS detection (Harry)
> 
> v5: fix conflict with linux-next
> 
> [1]: 
> https://lore.kernel.org/amd-gfx/JIQ_93_cHcshiIDsrMU1huBzx9P9LVQxucx8hQArpQu7Wk5DrCl_vTXj_Q20m_L-8C8A5dSpNcSJ8ehfcCrsQpfB5QG_Spn14EYkH9chtg0=@emersion.fr/>>
>  
> Signed-off-by: Simon Ser 
> Cc: Alex Deucher 
> Cc: Harry Wentland 
> Cc: Nicholas Kazlauskas 
> Cc: Bas Nieuwenhuizen 
> Cc: Rodrigo Siqueira 
> Cc: Sean Paul 
> Fixes: ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when using 
> overlay")

Reviewed-by: Harry Wentland 

Harry

> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index f35561b5a465..2eeda1fec506 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -10594,6 +10594,31 @@ static int add_affected_mst_dsc_crtcs(struct 
> drm_atomic_state *state, struct drm
>  }
>  #endif
>  
> +static bool is_chromeos(void)
> +{
> + struct mm_struct *mm = current->mm;
> + struct file *exe_file;
> + bool ret;
> +
> + /* ChromeOS renames its thread to DrmThread. Also check the executable
> +  * name. */
> + if (strcmp(current->comm, "DrmThread") != 0 || !mm)
> + return false;
> +
> + rcu_read_lock();
> + exe_file = rcu_dereference(mm->exe_file);
> + if (exe_file && !get_file_rcu(exe_file))
> + exe_file = NULL;
> + rcu_read_unlock();
> +
> + if (!exe_file)
> + return false;
> + ret = strcmp(exe_file->f_path.dentry->d_name.name, "chrome") == 0;
> + fput(exe_file);
> +
> + return ret;
> +}
> +
>  static int validate_overlay(struct drm_atomic_state *state)
>  {
>   int i;
> @@ -10601,6 +10626,10 @@ static int validate_overlay(struct drm_atomic_state 
> *state)
>   struct drm_plane_state *new_plane_state;
>   struct drm_plane_state *primary_state, *overlay_state = NULL;
>  
> + /* This is a workaround for ChromeOS only */
> + if (!is_chromeos())
> + return 0;
> +
>   /* Check if primary plane is contained inside overlay */
>   for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) {
>   if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
>

Re: [PATCH 1/2] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.

2021-10-12 Thread Zhu, James

[AMD Official Use Only]




From: Zhang, Yifan 
Sent: Monday, October 11, 2021 8:57 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Kuehling, Felix ; Zhu, James ; 
youling...@gmail.com ; Zhang, Yifan 
Subject: [PATCH 1/2] drm/amdkfd: fix boot failure when iommu is disabled in 
Picasso.

When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
init will fail. But this failure should not block amdgpu driver init.

Reported-by: youling 
Tested-by: youling 
Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
 drivers/gpu/drm/amd/amdkfd/kfd_device.c| 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index af9bdf16eefd..9dfcef2015c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2432,10 +2432,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
 if (!adev->gmc.xgmi.pending_reset)
 amdgpu_amdkfd_device_init(adev);

-   r = amdgpu_amdkfd_resume_iommu(adev);
-   if (r)
-   goto init_failed;
-
 amdgpu_fru_get_product_info(adev);

 init_failed:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4a416231b24c..bb652ee35c25 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -920,6 +920,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 goto device_iommu_error;
 }

+   if(kgd2kfd_resume_iommu(kfd))
+   goto device_iommu_error;
+
 kfd_cwsr_init(kfd);

 svm_migrate_init((struct amdgpu_device *)kfd->kgd);
[JZ] Move the above change to here, the device init sequence will be closer to 
initial design.
--
2.25.1

Re: [PATCH] drm/amd/display: fix null pointer deref when plugging in display

2021-10-12 Thread Harry Wentland

On 2021-10-12 09:26, Aurabindo Pillai wrote:
> [Why]
> When system boots in headless mode, connecting a 4k display creates a
> null pointer dereference due to hubp for a certain plane being null.
> Add a condition to check for null hubp before dereferencing it.
> 
> Signed-off-by: Aurabindo Pillai 

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c 
> b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> index 01a90badd173..2936a334cd64 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> @@ -969,7 +969,8 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc,
>   /* turning off DPG */
>   
> pipe_ctx->plane_res.hubp->funcs->set_blank(pipe_ctx->plane_res.hubp, false);
>   for (mpcc_pipe = pipe_ctx->bottom_pipe; mpcc_pipe; mpcc_pipe = 
> mpcc_pipe->bottom_pipe)
> - 
> mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false);
> + if (mpcc_pipe->plane_res.hubp)
> + 
> mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false);
>  
>   
> stream_res->opp->funcs->opp_set_disp_pattern_generator(stream_res->opp, 
> test_pattern, color_space,
>   color_depth, solid_color, width, height, 
> offset);
>

Re: [PATCH] drm/amd/display: Fix surface optimization regression on Carrizo

2021-10-12 Thread Harry Wentland




On 2021-10-12 10:11, Nicholas Kazlauskas wrote:
> [Why]
> DCE legacy optimization path isn't well tested under new DC optimization
> flow which can result in underflow occuring when initializing X11 on
> Carrizo.
> 
> [How]
> Retain the legacy optimization flow for DCE and keep the new one for DCN
> to satisfy optimizations being correctly applied for ASIC that can
> support it.
> 
> Fixes: ab37c6527bb1 ("drm/amd/display: Optimize bandwidth on following fast 
> update")
> Cc: Bhawanpreet Lakha 
> Cc: Mikita Lipski 
> Reported-by: Tom St Denis 
> Signed-off-by: Nicholas Kazlauskas 

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index da942e9f5142..f9876e429f26 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -3118,8 +3118,13 @@ void dc_commit_updates_for_stream(struct dc *dc,
>   if (new_pipe->plane_state && new_pipe->plane_state != 
> old_pipe->plane_state)
>   new_pipe->plane_state->force_full_update = true;
>   }
> - } else if (update_type == UPDATE_TYPE_FAST) {
> - /* Previous frame finished and HW is ready for optimization. */
> + } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= 
> DCE_VERSION_MAX) {
> + /*
> +  * Previous frame finished and HW is ready for optimization.
> +  *
> +  * Only relevant for DCN behavior where we can guarantee the 
> optimization
> +  * is safe to apply - retain the legacy behavior for DCE.
> +  */
>   dc_post_update_surfaces_to_stream(dc);
>   }
>  
> @@ -3178,6 +3183,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
>   }
>   }
>  
> + /* Legacy optimization path for DCE. */
> + if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < 
> DCE_VERSION_MAX) {
> + dc_post_update_surfaces_to_stream(dc);
> + TRACE_DCE_CLOCK_STATE(>bw_ctx.bw.dce);
> + }
> +
>   return;
>  
>  }
>

Re: [PATCH] drm/amd/display: Fix surface optimization regression on Carrizo

2021-10-12 Thread StDenis, Tom

[AMD Official Use Only]

Tested-by: Tom St Denis 

Thanks.


From: Alex Deucher 
Sent: Tuesday, October 12, 2021 10:15
To: Kazlauskas, Nicholas
Cc: amd-gfx list; Lakha, Bhawanpreet; Lipski, Mikita; StDenis, Tom
Subject: Re: [PATCH] drm/amd/display: Fix surface optimization regression on 
Carrizo

On Tue, Oct 12, 2021 at 10:11 AM Nicholas Kazlauskas
 wrote:
>
> [Why]
> DCE legacy optimization path isn't well tested under new DC optimization
> flow which can result in underflow occuring when initializing X11 on
> Carrizo.
>
> [How]
> Retain the legacy optimization flow for DCE and keep the new one for DCN
> to satisfy optimizations being correctly applied for ASIC that can
> support it.
>
> Fixes: ab37c6527bb1 ("drm/amd/display: Optimize bandwidth on following fast 
> update")
> Cc: Bhawanpreet Lakha 
> Cc: Mikita Lipski 
> Reported-by: Tom St Denis 
> Signed-off-by: Nicholas Kazlauskas 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index da942e9f5142..f9876e429f26 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -3118,8 +3118,13 @@ void dc_commit_updates_for_stream(struct dc *dc,
> if (new_pipe->plane_state && new_pipe->plane_state != 
> old_pipe->plane_state)
> new_pipe->plane_state->force_full_update = 
> true;
> }
> -   } else if (update_type == UPDATE_TYPE_FAST) {
> -   /* Previous frame finished and HW is ready for optimization. 
> */
> +   } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= 
> DCE_VERSION_MAX) {
> +   /*
> +* Previous frame finished and HW is ready for optimization.
> +*
> +* Only relevant for DCN behavior where we can guarantee the 
> optimization
> +* is safe to apply - retain the legacy behavior for DCE.
> +*/
> dc_post_update_surfaces_to_stream(dc);
> }
>
> @@ -3178,6 +3183,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
> }
> }
>
> +   /* Legacy optimization path for DCE. */
> +   if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < 
> DCE_VERSION_MAX) {
> +   dc_post_update_surfaces_to_stream(dc);
> +   TRACE_DCE_CLOCK_STATE(>bw_ctx.bw.dce);
> +   }
> +
> return;
>
>  }
> --
> 2.25.1
>

Re: [PATCH] drm/amd/display: Enable PSR by default on DCN3.1

2021-10-12 Thread Harry Wentland

On 2021-10-08 12:14, Nicholas Kazlauskas wrote:
> [Why]
> New idle optimizations for DCN3.1 require PSR for optimal power savings
> on panels that support it.
> 
> This was previously left disabled by default because of issues with
> compositors that do not pageflip and scan out directly to the
> frontbuffer.
> 
> For these compositors we now have detection methods that wait for x
> number of pageflips after a full update - triggered by a buffer or
> format change typically.
> 
> This may introduce bugs or new cases not tested by users so this is
> only currently targeting DCN31.
> 
> [How]
> Add code in DM to set PSR state by default for DCN3.1 while falling
> back to the feature mask for older DCN.
> 
> Add a global debug flag that can be set to disable it for either.
> 
> Cc: Harry Wentland 
> Cc: Roman Li 
> Signed-off-by: Nicholas Kazlauskas 

Reviewed-by: Harry Wentland 

Harry

> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 17 -
>  drivers/gpu/drm/amd/include/amd_shared.h|  5 +++--
>  2 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index dc595ecec595..ff545503a6ed 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -4031,6 +4031,7 @@ static int amdgpu_dm_initialize_drm_device(struct 
> amdgpu_device *adev)
>   int32_t primary_planes;
>   enum dc_connection_type new_connection_type = dc_connection_none;
>   const struct dc_plane_cap *plane;
> + bool psr_feature_enabled = false;
>  
>   dm->display_indexes_num = dm->dc->caps.max_streams;
>   /* Update the actual used number of crtc */
> @@ -4113,6 +4114,19 @@ static int amdgpu_dm_initialize_drm_device(struct 
> amdgpu_device *adev)
>   DRM_DEBUG_KMS("Unsupported DCN IP version for outbox: 0x%X\n",
> adev->ip_versions[DCE_HWIP][0]);
>   }
> +
> + /* Determine whether to enable PSR support by default. */
> + if (!(amdgpu_dc_debug_mask & DC_DISABLE_PSR)) {
> + switch (adev->ip_versions[DCE_HWIP][0]) {
> + case IP_VERSION(3, 1, 2):
> + case IP_VERSION(3, 1, 3):
> + psr_feature_enabled = true;
> + break;
> + default:
> + psr_feature_enabled = amdgpu_dc_feature_mask & 
> DC_PSR_MASK;
> + break;
> + }
> + }
>  #endif
>  
>   /* loops over all connectors on the board */
> @@ -4156,7 +4170,8 @@ static int amdgpu_dm_initialize_drm_device(struct 
> amdgpu_device *adev)
>   } else if (dc_link_detect(link, DETECT_REASON_BOOT)) {
>   amdgpu_dm_update_connector_after_detect(aconnector);
>   register_backlight_device(dm, link);
> - if (amdgpu_dc_feature_mask & DC_PSR_MASK)
> +
> + if (psr_feature_enabled)
>   amdgpu_dm_set_psr_caps(link);
>   }
>  
> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
> b/drivers/gpu/drm/amd/include/amd_shared.h
> index 257f280d3d53..f1a46d16f7ea 100644
> --- a/drivers/gpu/drm/amd/include/amd_shared.h
> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
> @@ -228,7 +228,7 @@ enum DC_FEATURE_MASK {
>   DC_FBC_MASK = (1 << 0), //0x1, disabled by default
>   DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default
>   DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default
> - DC_PSR_MASK = (1 << 3), //0x8, disabled by default
> + DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1
>   DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default
>  };
>  
> @@ -236,7 +236,8 @@ enum DC_DEBUG_MASK {
>   DC_DISABLE_PIPE_SPLIT = 0x1,
>   DC_DISABLE_STUTTER = 0x2,
>   DC_DISABLE_DSC = 0x4,
> - DC_DISABLE_CLOCK_GATING = 0x8
> + DC_DISABLE_CLOCK_GATING = 0x8,
> + DC_DISABLE_PSR = 0x10,
>  };
>  
>  enum amd_dpm_forced_level;
>

Re: [PATCH] drm/amd/display: Fix surface optimization regression on Carrizo

2021-10-12 Thread Alex Deucher

On Tue, Oct 12, 2021 at 10:11 AM Nicholas Kazlauskas
 wrote:
>
> [Why]
> DCE legacy optimization path isn't well tested under new DC optimization
> flow which can result in underflow occuring when initializing X11 on
> Carrizo.
>
> [How]
> Retain the legacy optimization flow for DCE and keep the new one for DCN
> to satisfy optimizations being correctly applied for ASIC that can
> support it.
>
> Fixes: ab37c6527bb1 ("drm/amd/display: Optimize bandwidth on following fast 
> update")
> Cc: Bhawanpreet Lakha 
> Cc: Mikita Lipski 
> Reported-by: Tom St Denis 
> Signed-off-by: Nicholas Kazlauskas 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index da942e9f5142..f9876e429f26 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -3118,8 +3118,13 @@ void dc_commit_updates_for_stream(struct dc *dc,
> if (new_pipe->plane_state && new_pipe->plane_state != 
> old_pipe->plane_state)
> new_pipe->plane_state->force_full_update = 
> true;
> }
> -   } else if (update_type == UPDATE_TYPE_FAST) {
> -   /* Previous frame finished and HW is ready for optimization. 
> */
> +   } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= 
> DCE_VERSION_MAX) {
> +   /*
> +* Previous frame finished and HW is ready for optimization.
> +*
> +* Only relevant for DCN behavior where we can guarantee the 
> optimization
> +* is safe to apply - retain the legacy behavior for DCE.
> +*/
> dc_post_update_surfaces_to_stream(dc);
> }
>
> @@ -3178,6 +3183,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
> }
> }
>
> +   /* Legacy optimization path for DCE. */
> +   if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < 
> DCE_VERSION_MAX) {
> +   dc_post_update_surfaces_to_stream(dc);
> +   TRACE_DCE_CLOCK_STATE(>bw_ctx.bw.dce);
> +   }
> +
> return;
>
>  }
> --
> 2.25.1
>

[PATCH] drm/amd/display: Fix surface optimization regression on Carrizo

2021-10-12 Thread Nicholas Kazlauskas

[Why]
DCE legacy optimization path isn't well tested under new DC optimization
flow which can result in underflow occuring when initializing X11 on
Carrizo.

[How]
Retain the legacy optimization flow for DCE and keep the new one for DCN
to satisfy optimizations being correctly applied for ASIC that can
support it.

Fixes: ab37c6527bb1 ("drm/amd/display: Optimize bandwidth on following fast 
update")
Cc: Bhawanpreet Lakha 
Cc: Mikita Lipski 
Reported-by: Tom St Denis 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index da942e9f5142..f9876e429f26 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3118,8 +3118,13 @@ void dc_commit_updates_for_stream(struct dc *dc,
if (new_pipe->plane_state && new_pipe->plane_state != 
old_pipe->plane_state)
new_pipe->plane_state->force_full_update = true;
}
-   } else if (update_type == UPDATE_TYPE_FAST) {
-   /* Previous frame finished and HW is ready for optimization. */
+   } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= 
DCE_VERSION_MAX) {
+   /*
+* Previous frame finished and HW is ready for optimization.
+*
+* Only relevant for DCN behavior where we can guarantee the 
optimization
+* is safe to apply - retain the legacy behavior for DCE.
+*/
dc_post_update_surfaces_to_stream(dc);
}
 
@@ -3178,6 +3183,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
}
}
 
+   /* Legacy optimization path for DCE. */
+   if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < 
DCE_VERSION_MAX) {
+   dc_post_update_surfaces_to_stream(dc);
+   TRACE_DCE_CLOCK_STATE(>bw_ctx.bw.dce);
+   }
+
return;
 
 }
-- 
2.25.1

[PATCH v2 3/3] drm/amdkfd: create unregister svm range not overlap with TTM range

2021-10-12 Thread Philip Yang

When creating new svm range to recover retry fault, avoid svm range
to overlap with ranges or userptr ranges managed by TTM, otherwise
svm migration will trigger TTM or userptr eviction, to evict user queues
unexpectedly.

Change helper amdgpu_ttm_tt_affect_userptr to return userptr which is
inside the range. Add helper svm_range_check_vm_userptr to scan all
userptr of the vm, and return overlap userptr bo start, last.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c| 92 +++--
 3 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index bd5dda8066fa..d784f8d3a834 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1220,7 +1220,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt 
*ttm)
  *
  */
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
- unsigned long end)
+ unsigned long end, unsigned long *userptr)
 {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned long size;
@@ -1235,6 +1235,8 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, 
unsigned long start,
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
 
+   if (userptr)
+   *userptr = gtt->userptr;
return true;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index ba5c864b8de1..91a087f9dc7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -182,7 +182,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
- unsigned long end);
+ unsigned long end, unsigned long *userptr);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
   int *last_invalidated);
 bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 49c92713c2ad..f987c73b535e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -50,7 +50,9 @@ static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq);
-
+static int
+svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
+  uint64_t *bo_s, uint64_t *bo_l);
 static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
.invalidate = svm_range_cpu_invalidate_pagetables,
 };
@@ -2308,6 +2310,7 @@ svm_range_best_restore_location(struct svm_range *prange,
 
return -1;
 }
+
 static int
 svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
unsigned long *start, unsigned long *last)
@@ -2355,8 +2358,59 @@ svm_range_get_range_boundaries(struct kfd_process *p, 
int64_t addr,
  vma->vm_end >> PAGE_SHIFT, *last);
 
return 0;
+}
+
+static int
+svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t 
last,
+  uint64_t *bo_s, uint64_t *bo_l)
+{
+   struct amdgpu_bo_va_mapping *mapping;
+   struct interval_tree_node *node;
+   struct amdgpu_bo *bo = NULL;
+   unsigned long userptr;
+   uint32_t i;
+   int r;
+
+   for (i = 0; i < p->n_pdds; i++) {
+   struct amdgpu_vm *vm;
+
+   if (!p->pdds[i]->drm_priv)
+   continue;
+
+   vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
+   r = amdgpu_bo_reserve(vm->root.bo, false);
+   if (r)
+   return r;
 
+   /* Check userptr by searching entire vm->va interval tree */
+   node = interval_tree_iter_first(>va, 0, ~0ULL);
+   while (node) {
+   mapping = container_of((struct rb_node *)node,
+  struct amdgpu_bo_va_mapping, rb);
+   bo = mapping->bo_va->base.bo;
+
+   if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+start << PAGE_SHIFT,
+last << PAGE_SHIFT,
+)) {
+   node = interval_tree_iter_next(node, 0, ~0ULL);
+

[PATCH v2 2/3] drm/amdkfd: handle svm partial migration cpages 0

2021-10-12 Thread Philip Yang

migrate_vma_setup may return cpages 0, means 0 page can be migrated,
treat this as error case to skip the rest of vma migration steps.

Change svm_migrate_vma_to_vram and svm_migrate_vma_to_ram to return the
number of pages migrated successfully. The caller add up all the
successful migration pages and update prange->actual_loc only if the
total migrated pages is not 0.

This also remove the warning message "VRAM BO missing during
validation" if migration cpages is 0.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 92 +---
 1 file changed, 49 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b05c0579d0b9..dd0fd52d0158 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -376,7 +376,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
return r;
 }
 
-static int
+static unsigned long
 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
uint64_t end)
@@ -413,33 +413,38 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
prange->start, prange->last);
goto out_free;
}
-   if (migrate.cpages != npages) {
-   pr_debug("Partial migration. 0x%lx/0x%llx pages can be 
migrated\n",
-migrate.cpages,
-npages);
-   }
 
-   if (migrate.cpages) {
-   r = svm_migrate_copy_to_vram(adev, prange, , ,
-scratch);
-   migrate_vma_pages();
-   svm_migrate_copy_done(adev, mfence);
-   migrate_vma_finalize();
+   if (migrate.cpages != npages)
+   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+migrate.cpages, npages);
+   else
+   pr_debug("0x%lx pages migrated\n", migrate.cpages);
+
+   if (!migrate.cpages) {
+   pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
+prange->start, prange->last);
+   goto out_free;
}
 
+   r = svm_migrate_copy_to_vram(adev, prange, , , scratch);
+   migrate_vma_pages();
+   svm_migrate_copy_done(adev, mfence);
+   migrate_vma_finalize();
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
 
 out_free:
kvfree(buf);
 out:
-   if (!r) {
+   if (!r && migrate.cpages) {
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
-   }
 
-   return r;
+   return migrate.cpages;
+   }
+   return 0;
 }
 
 /**
@@ -460,7 +465,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
-   int r = 0;
+   unsigned long cpages = 0;
 
if (prange->actual_loc == best_loc) {
pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
@@ -491,18 +496,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
break;
 
next = min(vma->vm_end, end);
-   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
-   if (r) {
-   pr_debug("failed to migrate\n");
-   break;
-   }
+   cpages += svm_migrate_vma_to_vram(adev, prange, vma, addr, 
next);
addr = next;
}
 
-   if (!r)
+   if (cpages) {
prange->actual_loc = best_loc;
-
-   return r;
+   return 0;
+   }
+   return -ENOMEM;
 }
 
 static void svm_migrate_page_free(struct page *page)
@@ -603,7 +605,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
return r;
 }
 
-static int
+static unsigned long
 svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
   struct vm_area_struct *vma, uint64_t start, uint64_t end)
 {
@@ -640,31 +642,37 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
goto out_free;
}
 
-   pr_debug("cpages %ld\n", migrate.cpages);
+   if (migrate.cpages != npages)
+   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+migrate.cpages, npages);
+   else
+   pr_debug("0x%lx pages migrated\n", migrate.cpages);
 
-   if (migrate.cpages) {
-   r = svm_migrate_copy_to_ram(adev, prange, , ,
-

[PATCH v2 1/3] drm/amdkfd: ratelimited svm debug messages

2021-10-12 Thread Philip Yang

No function change, use pr_debug_ratelimited to avoid per page debug
message overflowing dmesg buf and console log.

use dev_err to show error message from unexpected situation, to provide
clue to help debug without enabling dynamic debug log. Define dev_fmt to
output function name in error message.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 34 +---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +++-
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index f53e17a94ad8..b05c0579d0b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -20,7 +20,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
 #include 
 #include 
 #include 
@@ -34,6 +33,11 @@
 #include "kfd_svm.h"
 #include "kfd_migrate.h"
 
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__
+
 static uint64_t
 svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
 {
@@ -151,14 +155,14 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, 
dma_addr_t *sys,
gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
}
if (r) {
-   pr_debug("failed %d to create gart mapping\n", r);
+   dev_err(adev->dev, "fail %d create gart mapping\n", r);
goto out_unlock;
}
 
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
   NULL, , false, true, false);
if (r) {
-   pr_debug("failed %d to copy memory\n", r);
+   dev_err(adev->dev, "fail %d to copy memory\n", r);
goto out_unlock;
}
 
@@ -285,7 +289,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
 
r = svm_range_vram_node_new(adev, prange, true);
if (r) {
-   pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
+   dev_err(adev->dev, "fail %d to alloc vram\n", r);
goto out;
}
 
@@ -305,7 +309,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
  DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
if (r) {
-   pr_debug("failed %d dma_map_page\n", r);
+   dev_err(adev->dev, "fail %d dma_map_page\n", r);
goto out_free_vram_pages;
}
} else {
@@ -325,8 +329,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
continue;
}
 
-   pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
-src[i] >> PAGE_SHIFT, page_to_pfn(spage));
+   pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
+src[i] >> PAGE_SHIFT, page_to_pfn(spage));
 
if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
r = svm_migrate_copy_memory_gart(adev, src + i - j,
@@ -405,8 +409,8 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
 
r = migrate_vma_setup();
if (r) {
-   pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
-r, prange->svms, prange->start, prange->last);
+   dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
+   prange->start, prange->last);
goto out_free;
}
if (migrate.cpages != npages) {
@@ -506,7 +510,7 @@ static void svm_migrate_page_free(struct page *page)
struct svm_range_bo *svm_bo = page->zone_device_data;
 
if (svm_bo) {
-   pr_debug("svm_bo ref left: %d\n", kref_read(_bo->kref));
+   pr_debug_ratelimited("ref: %d\n", kref_read(_bo->kref));
svm_range_bo_unref(svm_bo);
}
 }
@@ -572,12 +576,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, 
struct svm_range *prange,
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, 
DMA_FROM_DEVICE);
r = dma_mapping_error(dev, dst[i]);
if (r) {
-   pr_debug("failed %d dma_map_page\n", r);
+   dev_err(adev->dev, "fail %d dma_map_page\n", r);
goto out_oom;
}
 
-   pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
- dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
+

Re: [PATCH] drm/amd/display: Enable PSR by default on DCN3.1

2021-10-12 Thread Kazlauskas, Nicholas


On 2021-10-11 1:04 a.m., Vishwakarma, Pratik wrote:


On 10/8/2021 9:44 PM, Nicholas Kazlauskas wrote:

[Why]
New idle optimizations for DCN3.1 require PSR for optimal power savings
on panels that support it.

This was previously left disabled by default because of issues with
compositors that do not pageflip and scan out directly to the
frontbuffer.

For these compositors we now have detection methods that wait for x
number of pageflips after a full update - triggered by a buffer or
format change typically.

This may introduce bugs or new cases not tested by users so this is
only currently targeting DCN31.

[How]
Add code in DM to set PSR state by default for DCN3.1 while falling
back to the feature mask for older DCN.

Add a global debug flag that can be set to disable it for either.

Cc: Harry Wentland
Cc: Roman Li
Signed-off-by: Nicholas Kazlauskas
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 17 -
  drivers/gpu/drm/amd/include/amd_shared.h|  5 +++--
  2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index dc595ecec595..ff545503a6ed 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4031,6 +4031,7 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
int32_t primary_planes;
enum dc_connection_type new_connection_type = dc_connection_none;
const struct dc_plane_cap *plane;
+   bool psr_feature_enabled = false;
  
  	dm->display_indexes_num = dm->dc->caps.max_streams;

/* Update the actual used number of crtc */
@@ -4113,6 +4114,19 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
DRM_DEBUG_KMS("Unsupported DCN IP version for outbox: 0x%X\n",
  adev->ip_versions[DCE_HWIP][0]);
}
+
+   /* Determine whether to enable PSR support by default. */
+   if (!(amdgpu_dc_debug_mask & DC_DISABLE_PSR)) {
+   switch (adev->ip_versions[DCE_HWIP][0]) {
+   case IP_VERSION(3, 1, 2):
+   case IP_VERSION(3, 1, 3):
+   psr_feature_enabled = true;
+   break;
+   default:
+   psr_feature_enabled = amdgpu_dc_feature_mask & 
DC_PSR_MASK;
+   break;
+   }
+   }
  #endif
  
  	/* loops over all connectors on the board */

@@ -4156,7 +4170,8 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
} else if (dc_link_detect(link, DETECT_REASON_BOOT)) {
amdgpu_dm_update_connector_after_detect(aconnector);
register_backlight_device(dm, link);
-   if (amdgpu_dc_feature_mask & DC_PSR_MASK)
+
+   if (psr_feature_enabled)
amdgpu_dm_set_psr_caps(link);
}
  
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h

index 257f280d3d53..f1a46d16f7ea 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -228,7 +228,7 @@ enum DC_FEATURE_MASK {
DC_FBC_MASK = (1 << 0), //0x1, disabled by default
DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default
DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default
-   DC_PSR_MASK = (1 << 3), //0x8, disabled by default
+   DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1
DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default
  };
  
@@ -236,7 +236,8 @@ enum DC_DEBUG_MASK {

DC_DISABLE_PIPE_SPLIT = 0x1,
DC_DISABLE_STUTTER = 0x2,
DC_DISABLE_DSC = 0x4,
-   DC_DISABLE_CLOCK_GATING = 0x8
+   DC_DISABLE_CLOCK_GATING = 0x8,
+   DC_DISABLE_PSR = 0x10,


Don't we need a corresponding check in amdgpu_dm_init() to disable PSR 
in runtime?


The check is `if (psr_feature_enabled)` above.


Also, how does it handle conflicting declarations from feature mask and 
debug mask?


Feature enable mask is used for older ASIC to allow PSR to be enabled.

For both old and new ASIC the DISABLE mask takes priority as a debug 
option for disabling PSR support.


Regards,
Nicholas Kazlauskas



/BR
/

/Pratik
/


  };
  
  enum amd_dpm_forced_level;

[PATCH] drm/amd/display: fix null pointer deref when plugging in display

2021-10-12 Thread Aurabindo Pillai

[Why]
When system boots in headless mode, connecting a 4k display creates a
null pointer dereference due to hubp for a certain plane being null.
Add a condition to check for null hubp before dereferencing it.

Signed-off-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 01a90badd173..2936a334cd64 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -969,7 +969,8 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc,
/* turning off DPG */

pipe_ctx->plane_res.hubp->funcs->set_blank(pipe_ctx->plane_res.hubp, false);
for (mpcc_pipe = pipe_ctx->bottom_pipe; mpcc_pipe; mpcc_pipe = 
mpcc_pipe->bottom_pipe)
-   
mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false);
+   if (mpcc_pipe->plane_res.hubp)
+   
mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false);
 

stream_res->opp->funcs->opp_set_disp_pattern_generator(stream_res->opp, 
test_pattern, color_space,
color_depth, solid_color, width, height, 
offset);
-- 
2.30.2

Re: [PATCH Review 1/1] drm/ttm: fix debugfs node create failed

2021-10-12 Thread Das, Nirmoy




On 10/12/2021 1:58 PM, Stanley.Yang wrote:

Test scenario:
 modprobe amdgpu -> rmmod amdgpu -> modprobe amdgpu
Error log:
 [   54.396807] debugfs: File 'page_pool' in directory 'amdttm' already 
present!
 [   54.396833] debugfs: File 'page_pool_shrink' in directory 'amdttm' 
already present!
 [   54.396848] debugfs: File 'buffer_objects' in directory 'amdttm' 
already present!



We should instead add a check if those debugfs files already 
exist/created in ttm debugfs dir using debugfs_lookup() before creating.



Regards,

Nirmoy




Reason:
 page_pool, page_pool_shrink and buffer_objects can be removed when
 rmmod amdttm, in the above test scenario only rmmod amdgpu, so those
 debugfs node will not be removed, this caused file create failed.
Soultion:
 create ttm_page directory under ttm_root directory when insmod amdgpu,
 page_pool, page_pool_shrink and buffer_objects are stored in ttm_page 
directiry,
 remove ttm_page directory when do rmmod amdgpu, this can fix above issue.

Signed-off-by: Stanley.Yang 
---
  drivers/gpu/drm/ttm/ttm_device.c | 12 +++-
  drivers/gpu/drm/ttm/ttm_module.c |  1 +
  drivers/gpu/drm/ttm/ttm_module.h |  1 +
  drivers/gpu/drm/ttm/ttm_pool.c   |  4 ++--
  4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 1de23edbc182..ad170328f0c8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -55,6 +55,10 @@ static void ttm_global_release(void)
  
  	ttm_pool_mgr_fini();
  
+#ifdef CONFIG_DEBUG_FS

+   debugfs_remove(ttm_debugfs_page);
+#endif
+
__free_page(glob->dummy_read_page);
memset(glob, 0, sizeof(*glob));
  out:
@@ -85,6 +89,10 @@ static int ttm_global_init(void)
>> PAGE_SHIFT;
num_dma32 = min(num_dma32, 2UL << (30 - PAGE_SHIFT));
  
+#ifdef CONFIG_DEBUG_FS

+   ttm_debugfs_page = debugfs_create_dir("ttm_page", ttm_debugfs_root);
+#endif
+
ttm_pool_mgr_init(num_pages);
ttm_tt_mgr_init(num_pages, num_dma32);
  
@@ -98,8 +106,10 @@ static int ttm_global_init(void)

INIT_LIST_HEAD(>device_list);
atomic_set(>bo_count, 0);
  
-	debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,

+#ifdef CONFIG_DEBUG_FS
+   debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_page,
>bo_count);
+#endif
  out:
mutex_unlock(_global_mutex);
return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index 88970a6b8e32..66595e6e7087 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -38,6 +38,7 @@
  #include "ttm_module.h"
  
  struct dentry *ttm_debugfs_root;

+struct dentry *ttm_debugfs_page;
  
  static int __init ttm_init(void)

  {
diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h
index d7cac5d4b835..6007dc66f44e 100644
--- a/drivers/gpu/drm/ttm/ttm_module.h
+++ b/drivers/gpu/drm/ttm/ttm_module.h
@@ -36,5 +36,6 @@
  struct dentry;
  
  extern struct dentry *ttm_debugfs_root;

+extern struct dentry *ttm_debugfs_page;
  
  #endif /* _TTM_MODULE_H_ */

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 8be7fd7161fd..ecb33daad7b5 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -709,9 +709,9 @@ int ttm_pool_mgr_init(unsigned long num_pages)
}
  
  #ifdef CONFIG_DEBUG_FS

-   debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
+   debugfs_create_file("page_pool", 0444, ttm_debugfs_page, NULL,
_pool_debugfs_globals_fops);
-   debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
+   debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_page, NULL,
_pool_debugfs_shrink_fops);
  #endif

[PATCH 1/1] drm/amdgpu: release gtt bo after each move test

2021-10-12 Thread Nirmoy Das

When gart size is < gtt size this test will fail with
-ENOMEM as we are not freeing gtt bo after each move test.
This is generally not an issue when gart size >= gtt size.

Reported-by: zhang 
Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 909d830b513e..0cf2a560d673 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -212,7 +212,6 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 
DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 
0x%llx\n",
 gart_addr - adev->gmc.gart_start);
-   continue;
 
 out_lclean_unpin:
amdgpu_bo_unpin(gtt_obj[i]);
@@ -220,6 +219,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
amdgpu_bo_unreserve(gtt_obj[i]);
 out_lclean_unref:
amdgpu_bo_unref(_obj[i]);
+   continue;
 out_lclean:
for (--i; i >= 0; --i) {
amdgpu_bo_unpin(gtt_obj[i]);
-- 
2.32.0

[PATCH Review 1/1] drm/ttm: fix debugfs node create failed

2021-10-12 Thread Stanley . Yang

Test scenario:
modprobe amdgpu -> rmmod amdgpu -> modprobe amdgpu
Error log:
[   54.396807] debugfs: File 'page_pool' in directory 'amdttm' already 
present!
[   54.396833] debugfs: File 'page_pool_shrink' in directory 'amdttm' 
already present!
[   54.396848] debugfs: File 'buffer_objects' in directory 'amdttm' already 
present!
Reason:
page_pool, page_pool_shrink and buffer_objects can be removed when
rmmod amdttm, in the above test scenario only rmmod amdgpu, so those
debugfs node will not be removed, this caused file create failed.
Soultion:
create ttm_page directory under ttm_root directory when insmod amdgpu,
page_pool, page_pool_shrink and buffer_objects are stored in ttm_page 
directiry,
remove ttm_page directory when do rmmod amdgpu, this can fix above issue.

Signed-off-by: Stanley.Yang 
---
 drivers/gpu/drm/ttm/ttm_device.c | 12 +++-
 drivers/gpu/drm/ttm/ttm_module.c |  1 +
 drivers/gpu/drm/ttm/ttm_module.h |  1 +
 drivers/gpu/drm/ttm/ttm_pool.c   |  4 ++--
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 1de23edbc182..ad170328f0c8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -55,6 +55,10 @@ static void ttm_global_release(void)
 
ttm_pool_mgr_fini();
 
+#ifdef CONFIG_DEBUG_FS
+   debugfs_remove(ttm_debugfs_page);
+#endif
+
__free_page(glob->dummy_read_page);
memset(glob, 0, sizeof(*glob));
 out:
@@ -85,6 +89,10 @@ static int ttm_global_init(void)
>> PAGE_SHIFT;
num_dma32 = min(num_dma32, 2UL << (30 - PAGE_SHIFT));
 
+#ifdef CONFIG_DEBUG_FS
+   ttm_debugfs_page = debugfs_create_dir("ttm_page", ttm_debugfs_root);
+#endif
+
ttm_pool_mgr_init(num_pages);
ttm_tt_mgr_init(num_pages, num_dma32);
 
@@ -98,8 +106,10 @@ static int ttm_global_init(void)
INIT_LIST_HEAD(>device_list);
atomic_set(>bo_count, 0);
 
-   debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
+#ifdef CONFIG_DEBUG_FS
+   debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_page,
>bo_count);
+#endif
 out:
mutex_unlock(_global_mutex);
return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index 88970a6b8e32..66595e6e7087 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -38,6 +38,7 @@
 #include "ttm_module.h"
 
 struct dentry *ttm_debugfs_root;
+struct dentry *ttm_debugfs_page;
 
 static int __init ttm_init(void)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h
index d7cac5d4b835..6007dc66f44e 100644
--- a/drivers/gpu/drm/ttm/ttm_module.h
+++ b/drivers/gpu/drm/ttm/ttm_module.h
@@ -36,5 +36,6 @@
 struct dentry;
 
 extern struct dentry *ttm_debugfs_root;
+extern struct dentry *ttm_debugfs_page;
 
 #endif /* _TTM_MODULE_H_ */
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 8be7fd7161fd..ecb33daad7b5 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -709,9 +709,9 @@ int ttm_pool_mgr_init(unsigned long num_pages)
}
 
 #ifdef CONFIG_DEBUG_FS
-   debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
+   debugfs_create_file("page_pool", 0444, ttm_debugfs_page, NULL,
_pool_debugfs_globals_fops);
-   debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
+   debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_page, NULL,
_pool_debugfs_shrink_fops);
 #endif
 
-- 
2.17.1

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Simon Ser

On Tuesday, October 12th, 2021 at 11:24, Paul Menzel  
wrote:

> Thank you for the explanation. Then I misunderstood commit ddab8bd7
> (drm/amd/display: Fix two cursor duplication when using overlay) from
> the Fixes tag, as commit ddab8bd7 does not mention Chrome OS, and also
> does not carry a fixes tag.

Yeah, that commit message isn't 100% explicit, but "some userspace" means
ChromeOS. See also e7d9560aeae5 ("Revert "drm/amd/display: Fix overlay
validation by considering cursors"") which reverts a patch relaxing the
checks introduced in ddab8bd7, and mentions ChromeOS explicitly.

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Paul Menzel


Dear Simon,


Am 12.10.21 um 11:15 schrieb Simon Ser:

On Tuesday, October 12th, 2021 at 11:08, Paul Menzel  
wrote:


the cursor plane (which uses the legacy API). Thus amdgpu must always
be prepared to enable/disable/move the cursor plane at any time without
failing (or else ChromeOS will trip over).


What ChromeOS version did you test with? Are there plans to improve
ChromeOS?


No idea, I haven't received feedback from the ChromeOS folks.


As discussed in [1], there's no reason why the ChromeOS limitation
should prevent other fully atomic users from taking advantage of the
overlay plane. Let's limit the check to ChromeOS.


How do we know, no other userspace programs are affected, breaking
Linux’ no-regression in userspace policy?


Actually this is the other way around: the ChromeOS fix which landed
has broken my user-space. This patch tries to fix the situation for
both ChromeOS and gamescope.


Thank you.


That said, it seems like amdgpu maintainers are open to just revert the
ChromeOS fix, thus fixing gamescope. ChromeOS can carry the fix in their
kernel tree. More on that soon.


v4: fix ChromeOS detection (Harry)

v5: fix conflict with linux-next

[1]: 
https://lore.kernel.org/amd-gfx/JIQ_93_cHcshiIDsrMU1huBzx9P9LVQxucx8hQArpQu7Wk5DrCl_vTXj_Q20m_L-8C8A5dSpNcSJ8ehfcCrsQpfB5QG_Spn14EYkH9chtg0=@emersion.fr/

Signed-off-by: Simon Ser 
Cc: Alex Deucher 
Cc: Harry Wentland 
Cc: Nicholas Kazlauskas 
Cc: Bas Nieuwenhuizen 
Cc: Rodrigo Siqueira 
Cc: Sean Paul 
Fixes: ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when using 
overlay")
---
   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++
   1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f35561b5a465..2eeda1fec506 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10594,6 +10594,31 @@ static int add_affected_mst_dsc_crtcs(struct 
drm_atomic_state *state, struct drm
   }
   #endif

+static bool is_chromeos(void)
+{
+   struct mm_struct *mm = current->mm;
+   struct file *exe_file;
+   bool ret;
+
+   /* ChromeOS renames its thread to DrmThread. Also check the executable
+* name. */
+   if (strcmp(current->comm, "DrmThread") != 0 || !mm)
+   return false;
+
+   rcu_read_lock();
+   exe_file = rcu_dereference(mm->exe_file);
+   if (exe_file && !get_file_rcu(exe_file))
+   exe_file = NULL;
+   rcu_read_unlock();
+
+   if (!exe_file)
+   return false;
+   ret = strcmp(exe_file->f_path.dentry->d_name.name, "chrome") == 0;
+   fput(exe_file);
+
+   return ret;
+}
+
   static int validate_overlay(struct drm_atomic_state *state)
   {
int i;
@@ -10601,6 +10626,10 @@ static int validate_overlay(struct drm_atomic_state 
*state)
struct drm_plane_state *new_plane_state;
struct drm_plane_state *primary_state, *overlay_state = NULL;

+   /* This is a workaround for ChromeOS only */
+   if (!is_chromeos())
+   return 0;


I would have expected the check to be the other way around, as no the
behavior on non-Chrome OS is changed?


This function performs a check which is only necessary on ChromeOS. On
non-ChromeOS, this function prevents user-space from using some hardware
features. The early return ensures non-ChromeOS user-space can use these
features.


Thank you for the explanation. Then I misunderstood commit ddab8bd7 
(drm/amd/display: Fix two cursor duplication when using overlay) from 
the Fixes tag, as commit ddab8bd7 does not mention Chrome OS, and also 
does not carry a fixes tag.


With that background, I guess the workaround it fine.


Kind regards,

Paul

RE: [PATCH] drm/amdgpu: enable display for cyan skillfish

2021-10-12 Thread Yu, Lang

[Public]



>-Original Message-
>From: Paul Menzel 
>Sent: Tuesday, October 12, 2021 4:51 PM
>To: Yu, Lang 
>Cc: Deucher, Alexander ; Huang, Ray
>; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH] drm/amdgpu: enable display for cyan skillfish
>
>Dear Lang,
>
>
>Am 12.10.21 um 08:16 schrieb Lang Yu:
>> Display support for cyan skillfish is ready now.
>
>What is the last commit making it “ready”?

This one, 
commit 4ac93fa0ec12a887b40b81d9b8b7fcd1033f48d5
drm/amd/display: add cyan_skillfish display support

Actually, it is fine before switching to amdgpu_discovery_set_ip_blocks.
During developing amdgpu_discovery_set_ip_blocks, it is not ready.
So skip to enable it.
 
>> Enable it!
>
>How did you test the patch?

I compiled amdgpu driver with this patch and loaded it on ubuntu 20.04. 
The display worked well. Otherwise the display won't work.

Regards,
Lang

>> Signed-off-by: Lang Yu 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 3 +--
>>   1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>> index 2bebd2ce6474..4228c7964175 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>> @@ -736,6 +736,7 @@ static int
>amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
>>  case IP_VERSION(1, 0, 1):
>>  case IP_VERSION(2, 0, 2):
>>  case IP_VERSION(2, 0, 0):
>> +case IP_VERSION(2, 0, 3):
>
>The numbers are not ordered. Could you please put a patch before this one, 
>fixing
>the ordering?
>
>>  case IP_VERSION(2, 1, 0):
>>  case IP_VERSION(3, 0, 0):
>>  case IP_VERSION(3, 0, 2):
>> @@ -745,8 +746,6 @@ static int
>amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
>>  case IP_VERSION(3, 1, 3):
>>  amdgpu_device_ip_block_add(adev, _ip_block);
>>  break;
>> -case IP_VERSION(2, 0, 3):
>> -break;
>>  default:
>>  return -EINVAL;
>>  }
>>
>
>
>Kind regards,
>
>Paul

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Simon Ser

On Tuesday, October 12th, 2021 at 11:08, Paul Menzel  
wrote:

> > the cursor plane (which uses the legacy API). Thus amdgpu must always
> > be prepared to enable/disable/move the cursor plane at any time without
> > failing (or else ChromeOS will trip over).
>
> What ChromeOS version did you test with? Are there plans to improve
> ChromeOS?

No idea, I haven't received feedback from the ChromeOS folks.

> > As discussed in [1], there's no reason why the ChromeOS limitation
> > should prevent other fully atomic users from taking advantage of the
> > overlay plane. Let's limit the check to ChromeOS.
>
> How do we know, no other userspace programs are affected, breaking
> Linux’ no-regression in userspace policy?

Actually this is the other way around: the ChromeOS fix which landed
has broken my user-space. This patch tries to fix the situation for
both ChromeOS and gamescope.

That said, it seems like amdgpu maintainers are open to just revert the
ChromeOS fix, thus fixing gamescope. ChromeOS can carry the fix in their
kernel tree. More on that soon.

> > v4: fix ChromeOS detection (Harry)
> >
> > v5: fix conflict with linux-next
> >
> > [1]: 
> > https://lore.kernel.org/amd-gfx/JIQ_93_cHcshiIDsrMU1huBzx9P9LVQxucx8hQArpQu7Wk5DrCl_vTXj_Q20m_L-8C8A5dSpNcSJ8ehfcCrsQpfB5QG_Spn14EYkH9chtg0=@emersion.fr/
> >
> > Signed-off-by: Simon Ser 
> > Cc: Alex Deucher 
> > Cc: Harry Wentland 
> > Cc: Nicholas Kazlauskas 
> > Cc: Bas Nieuwenhuizen 
> > Cc: Rodrigo Siqueira 
> > Cc: Sean Paul 
> > Fixes: ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when 
> > using overlay")
> > ---
> >   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++
> >   1 file changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index f35561b5a465..2eeda1fec506 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -10594,6 +10594,31 @@ static int add_affected_mst_dsc_crtcs(struct 
> > drm_atomic_state *state, struct drm
> >   }
> >   #endif
> >
> > +static bool is_chromeos(void)
> > +{
> > +   struct mm_struct *mm = current->mm;
> > +   struct file *exe_file;
> > +   bool ret;
> > +
> > +   /* ChromeOS renames its thread to DrmThread. Also check the executable
> > +* name. */
> > +   if (strcmp(current->comm, "DrmThread") != 0 || !mm)
> > +   return false;
> > +
> > +   rcu_read_lock();
> > +   exe_file = rcu_dereference(mm->exe_file);
> > +   if (exe_file && !get_file_rcu(exe_file))
> > +   exe_file = NULL;
> > +   rcu_read_unlock();
> > +
> > +   if (!exe_file)
> > +   return false;
> > +   ret = strcmp(exe_file->f_path.dentry->d_name.name, "chrome") == 0;
> > +   fput(exe_file);
> > +
> > +   return ret;
> > +}
> > +
> >   static int validate_overlay(struct drm_atomic_state *state)
> >   {
> > int i;
> > @@ -10601,6 +10626,10 @@ static int validate_overlay(struct 
> > drm_atomic_state *state)
> > struct drm_plane_state *new_plane_state;
> > struct drm_plane_state *primary_state, *overlay_state = NULL;
> >
> > +   /* This is a workaround for ChromeOS only */
> > +   if (!is_chromeos())
> > +   return 0;
>
> I would have expected the check to be the other way around, as no the
> behavior on non-Chrome OS is changed?

This function performs a check which is only necessary on ChromeOS. On
non-ChromeOS, this function prevents user-space from using some hardware
features. The early return ensures non-ChromeOS user-space can use these
features.

Re: [PATCH v5] amd/display: only require overlay plane to cover whole CRTC on ChromeOS

2021-10-12 Thread Paul Menzel


Dear Simon,


Am 11.10.21 um 17:16 schrieb Simon Ser:

Commit ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when
using overlay") changed the atomic validation code to forbid the
overlay plane from being used if it doesn't cover the whole CRTC. The
motivation is that ChromeOS uses the atomic API for everything except


s/motivation/problem/


the cursor plane (which uses the legacy API). Thus amdgpu must always
be prepared to enable/disable/move the cursor plane at any time without
failing (or else ChromeOS will trip over).


What ChromeOS version did you test with? Are there plans to improve 
ChromeOS?



As discussed in [1], there's no reason why the ChromeOS limitation
should prevent other fully atomic users from taking advantage of the
overlay plane. Let's limit the check to ChromeOS.


How do we know, no other userspace programs are affected, breaking 
Linux’ no-regression in userspace policy?



v4: fix ChromeOS detection (Harry)

v5: fix conflict with linux-next

[1]: 
https://lore.kernel.org/amd-gfx/JIQ_93_cHcshiIDsrMU1huBzx9P9LVQxucx8hQArpQu7Wk5DrCl_vTXj_Q20m_L-8C8A5dSpNcSJ8ehfcCrsQpfB5QG_Spn14EYkH9chtg0=@emersion.fr/

Signed-off-by: Simon Ser 
Cc: Alex Deucher 
Cc: Harry Wentland 
Cc: Nicholas Kazlauskas 
Cc: Bas Nieuwenhuizen 
Cc: Rodrigo Siqueira 
Cc: Sean Paul 
Fixes: ddab8bd788f5 ("drm/amd/display: Fix two cursor duplication when using 
overlay")
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++
  1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f35561b5a465..2eeda1fec506 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10594,6 +10594,31 @@ static int add_affected_mst_dsc_crtcs(struct 
drm_atomic_state *state, struct drm
  }
  #endif
  
+static bool is_chromeos(void)

+{
+   struct mm_struct *mm = current->mm;
+   struct file *exe_file;
+   bool ret;
+
+   /* ChromeOS renames its thread to DrmThread. Also check the executable
+* name. */
+   if (strcmp(current->comm, "DrmThread") != 0 || !mm)
+   return false;
+
+   rcu_read_lock();
+   exe_file = rcu_dereference(mm->exe_file);
+   if (exe_file && !get_file_rcu(exe_file))
+   exe_file = NULL;
+   rcu_read_unlock();
+
+   if (!exe_file)
+   return false;
+   ret = strcmp(exe_file->f_path.dentry->d_name.name, "chrome") == 0;
+   fput(exe_file);
+
+   return ret;
+}
+
  static int validate_overlay(struct drm_atomic_state *state)
  {
int i;
@@ -10601,6 +10626,10 @@ static int validate_overlay(struct drm_atomic_state 
*state)
struct drm_plane_state *new_plane_state;
struct drm_plane_state *primary_state, *overlay_state = NULL;
  
+	/* This is a workaround for ChromeOS only */

+   if (!is_chromeos())
+   return 0;


I would have expected the check to be the other way around, as no the 
behavior on non-Chrome OS is changed?



+


Could some log be added, if ChromeOS is detected?


/* Check if primary plane is contained inside overlay */
for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) {
if (plane->type == DRM_PLANE_TYPE_OVERLAY) {




Kind regards,

Paul

Re: [PATCH] drm/amdgpu: enable display for cyan skillfish

2021-10-12 Thread Paul Menzel


Dear Lang,


Am 12.10.21 um 08:16 schrieb Lang Yu:

Display support for cyan skillfish is ready now.


What is the last commit making it “ready”?


Enable it!


How did you test the patch?


Signed-off-by: Lang Yu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 2bebd2ce6474..4228c7964175 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -736,6 +736,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(1, 0, 1):
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 0, 0):
+   case IP_VERSION(2, 0, 3):


The numbers are not ordered. Could you please put a patch before this 
one, fixing the ordering?



case IP_VERSION(2, 1, 0):
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 2):
@@ -745,8 +746,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
amdgpu_device_ip_block_add(adev, _ip_block);
break;
-   case IP_VERSION(2, 0, 3):
-   break;
default:
return -EINVAL;
}




Kind regards,

Paul

RE: [PATCH] drm/amdgpu: enable display for cyan skillfish

2021-10-12 Thread Huang, Ray

[AMD Official Use Only]

+ Charlene.

Reviewed-by: Huang Rui 

-Original Message-
From: Yu, Lang  
Sent: Tuesday, October 12, 2021 2:16 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Huang, Ray 
; Yu, Lang 
Subject: [PATCH] drm/amdgpu: enable display for cyan skillfish

Display support for cyan skillfish is ready now. Enable it!

Signed-off-by: Lang Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 2bebd2ce6474..4228c7964175 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -736,6 +736,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(1, 0, 1):
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 0, 0):
+   case IP_VERSION(2, 0, 3):
case IP_VERSION(2, 1, 0):
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 2):
@@ -745,8 +746,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
amdgpu_device_ip_block_add(adev, _ip_block);
break;
-   case IP_VERSION(2, 0, 3):
-   break;
default:
return -EINVAL;
}
-- 
2.25.1

Re: Fwd: [PATCH] Size can be any value and is user controlled resulting in overwriting the 40 byte array wr_buf with an arbitrary length of data from buf.

2021-10-12 Thread Christian König

Am 11.10.21 um 22:24 schrieb T. Williams:

-- Forwarded message -
From: *docfate111* >

Date: Mon, Oct 11, 2021 at 4:22 PM
Subject: [PATCH] Size can be any value and is user controlled 
resulting in overwriting the 40 byte array wr_buf with an arbitrary 
length of data from buf.
To: >
Cc: mailto:harry.wentl...@amd.com>>, 
mailto:sunpeng...@amd.com>>

Signed-off-by: docfate111 >

While the find might be correct there are a couple of style problems 
with the patch.

First of all the subject line must be shorter and should be something 
like "drm/amdgpu: fix out of bounds write".

The detailed description of the bug then comes into the commit message.

And finally please use your real name for the Signed-off-by line.

Apart from that good catch,
Christian.

---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c

index 87daa78a32b8..17f2756a64dc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -263,7 +263,7 @@ static ssize_t dp_link_settings_write(struct file 
*f, const char __user *buf,

        if (!wr_buf)
                return -ENOSPC;

-       if (parse_write_buffer_into_params(wr_buf, size,
+       if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
                                           (long *)param, buf,
                                           max_param_num,
                                           _nums)) {
--
2.25.1

--
Thank you for your time,
Thelford Williams

[PATCH] drm/amdgpu: enable display for cyan skillfish

2021-10-12 Thread Lang Yu

Display support for cyan skillfish is ready now. Enable it!

Signed-off-by: Lang Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 2bebd2ce6474..4228c7964175 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -736,6 +736,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(1, 0, 1):
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 0, 0):
+   case IP_VERSION(2, 0, 3):
case IP_VERSION(2, 1, 0):
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 2):
@@ -745,8 +746,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
amdgpu_device_ip_block_add(adev, _ip_block);
break;
-   case IP_VERSION(2, 0, 3):
-   break;
default:
return -EINVAL;
}
-- 
2.25.1

73 matches

Mail list logo