Re: [PATCH 8/8] drm/amdgpu: use the new TTM bytes moved counter v2

2017-11-19 Thread Chunming Zhou

 Reviewed-by: Chunming Zhou  for the series.


btw: any new patch based on this for enabling eviction and swapout for 
per-vm-bo?


Regards,
David Zhou
On 2017年11月17日 18:49, Christian König wrote:

Instead of the global statistics use the per context bytes moved counter.

v2: rebased

Signed-off-by: Christian König 
Reviewed-by: Michel Dänzer 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  9 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++---
  2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 41994b87c76e..bea5bc64bf7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -344,7 +344,6 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
  {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
-   u64 initial_bytes_moved, bytes_moved;
uint32_t domain;
int r;
  
@@ -374,15 +373,13 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
  
  retry:

amdgpu_ttm_placement_from_domain(bo, domain);
-   initial_bytes_moved = atomic64_read(>num_bytes_moved);
r = ttm_bo_validate(>tbo, >placement, );
-   bytes_moved = atomic64_read(>num_bytes_moved) -
- initial_bytes_moved;
-   p->bytes_moved += bytes_moved;
+
+   p->bytes_moved += ctx.bytes_moved;
if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
-   p->bytes_moved_vis += bytes_moved;
+   p->bytes_moved_vis += ctx.bytes_moved;
  
  	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {

domain = bo->allowed_domains;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 15027f751e07..dc0a8be98043 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -331,7 +331,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
struct amdgpu_bo *bo;
enum ttm_bo_type type;
unsigned long page_align;
-   u64 initial_bytes_moved, bytes_moved;
size_t acc_size;
int r;
  
@@ -406,22 +405,19 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,

bo->tbo.bdev = >mman.bdev;
amdgpu_ttm_placement_from_domain(bo, domain);
  
-	initial_bytes_moved = atomic64_read(>num_bytes_moved);

-   /* Kernel allocation are uninterruptible */
r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type,
 >placement, page_align, , NULL,
 acc_size, sg, resv, _ttm_bo_destroy);
if (unlikely(r != 0))
return r;
  
-	bytes_moved = atomic64_read(>num_bytes_moved) -

- initial_bytes_moved;
if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
-   amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+   amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
+ctx.bytes_moved);
else
-   amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
+   amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
  
  	if (kernel)

bo->tbo.priority = 1;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Kernel crash/Null pointer dereference on vblank

2017-11-19 Thread Martin Babutzka
Dear AMD Developers,

At first congratulations for the DC code submission to the 4.15 kernel.
Unfortunately the major regression which I reported on 29.09., 06.10.,
02.11. and 05.11. still exists. But this time I got additional
debugging information maybe this helps to fix it.

Summary: I am running Xubuntu 17.10 with the amd-staging-drm-next
kernel patched to 4.14.0. The latest build which I tested is from
includes all commits up to now (including 2017-11-17 19:51:57 (GMT)
commit  85d09ce5e5039644487e9508d6359f9f4cf64427).

Some vblank operations make the kernel crash and hang up the whole
system. The error is reproducible by enabling the screen lock or the
suspend mode. The system can not return to proper state from either of
these (after all I am not 100% sure it is the same error). Debugging is
 easier with screen lock. Attached you can find the kernel crash and
the dce110_vblank_set function modified by some kernel prints. It looks
like the function is called twice and does not work the second time.
The whole code around dce110_vblank_set also looks interrupt-ish -
could this be a race condition or timing problem? Objects being cleared
from memory and then accessed by dce110_vblank_set?

Bug reports on this issue:
https://github.com/M-Bab/linux-kernel-amdgpu-binaries/issues/37
https://github.com/M-Bab/linux-kernel-amdgpu-binaries/issues/29

Many regards,
Martin (M-bab)bool dce110_vblank_set(
struct irq_service *irq_service,
const struct irq_source_info *info,
bool enable)
{
printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
struct dc_context *dc_ctx = irq_service->ctx;
printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
struct dc *core_dc = irq_service->ctx->dc;
printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
enum dc_irq_source dal_irq_src = dc_interrupt_to_irq_source(

irq_service->ctx->dc,

info->src_id,

info->ext_id);
uint8_t pipe_offset = dal_irq_src - IRQ_TYPE_VBLANK;
printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);

struct timing_generator *tg =

core_dc->current_state->res_ctx.pipe_ctx[pipe_offset].stream_res.tg;
printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);

if (enable) {
if (!tg->funcs->arm_vert_intr(tg, 2)) {
DC_ERROR("Failed to get VBLANK!\n");
return false;
}
}
printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);

dal_irq_service_set_generic(irq_service, info, enable);
printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
return true;

}


"normal" vblank during boot:
Nov 19 22:33:10 Main-PC kernel: [   17.605100] DEBUG: Passed dce110_vblank_set 
208 
Nov 19 22:33:10 Main-PC kernel: [   17.605102] DEBUG: Passed dce110_vblank_set 
210 
Nov 19 22:33:10 Main-PC kernel: [   17.605103] DEBUG: Passed dce110_vblank_set 
212 
Nov 19 22:33:10 Main-PC kernel: [   17.605104] DEBUG: Passed dce110_vblank_set 
218 
Nov 19 22:33:10 Main-PC kernel: [   17.605104] DEBUG: Passed dce110_vblank_set 
222 
Nov 19 22:33:10 Main-PC kernel: [   17.605108] DEBUG: Passed dce110_vblank_set 
230 
Nov 19 22:33:10 Main-PC kernel: [   17.605110] DEBUG: Passed dce110_vblank_set 
233 

vblank on screen lock in kernel.log/syslog:
Nov 19 22:34:10 Main-PC kernel: [   78.664890] DEBUG: Passed dce110_vblank_set 
208 
Nov 19 22:34:10 Main-PC kernel: [   78.664892] DEBUG: Passed dce110_vblank_set 
210 
Nov 19 22:34:10 Main-PC kernel: [   78.664893] DEBUG: Passed dce110_vblank_set 
212 
Nov 19 22:34:10 Main-PC kernel: [   78.664894] DEBUG: Passed dce110_vblank_set 
218 
Nov 19 22:34:10 Main-PC kernel: [   78.664894] DEBUG: Passed dce110_vblank_set 
222 
Nov 19 22:34:10 Main-PC kernel: [   78.664895] DEBUG: Passed dce110_vblank_set 
230 
Nov 19 22:34:10 Main-PC kernel: [   78.664896] DEBUG: Passed dce110_vblank_set 
233 
Nov 19 22:34:27 Main-PC kernel: [   96.113426] DEBUG: Passed dce110_vblank_set 
208 
Nov 19 22:34:27 Main-PC kernel: [   96.113433] DEBUG: Passed dce110_vblank_set 
210 
Nov 19 22:34:27 Main-PC kernel: [   96.113435] DEBUG: Passed dce110_vblank_set 
212 
Nov 19 22:34:27 Main-PC kernel: [   96.113438] DEBUG: Passed dce110_vblank_set 
218 
Nov 19 22:34:27 Main-PC kernel: [   96.113440] DEBUG: Passed dce110_vblank_set 
222 
Nov 19 22:34:27 Main-PC kernel: [   96.113448] BUG: unable to handle kernel 
NULL pointer dereference at   (null)
Nov 19 22:34:27 Main-PC kernel: [   96.113521] IP: dce110_vblank_set+0xe2/0x160 
[amdgpu]
Nov 19 22:34:27 Main-PC kernel: [   96.113524] PGD 0 P4D 0 
Nov 19 22:34:27 Main-PC kernel: [   96.113531] Oops:  

[PATCH 2/5] drm/amd/powerplay: Fix missing newlines at end of file

2017-11-19 Thread Ernst Sjöstrand
Signed-off-by: Ernst Sjöstrand 
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h  | 2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.h | 2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index eab504ecca25..ed27626dff14 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -291,4 +291,4 @@ struct amd_pm_funcs {
struct amd_pp_simple_clock_info *clocks);
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.h 
b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.h
index c6ba0d64cfb7..4112a9398163 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.h
@@ -43,4 +43,4 @@ struct phm_fuses_default {
 extern int pp_override_get_default_fuse_value(uint64_t key,
struct phm_fuses_default *result);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index c062844b15f3..560c1c159fcc 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -542,4 +542,4 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr 
*hwmgr,
boot_values->ulDCEFClk   = frequency;
 
return 0;
-}
\ No newline at end of file
+}
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/5] drm/amd/powerplay: Followup fixes to mc_reg_address

2017-11-19 Thread Ernst Sjöstrand
This is a followup to:
drm/amd/powerplay: Fix buffer overflows with mc_reg_address

Rework *_set_mc_special_registers for the other architectures to
use the same logic as the first patch. This allows the last entry
of the array to be filled without an error message for example.
This doesn't fix any known problems, perhaps avoided by luck.

Signed-off-by: Ernst Sjöstrand 
---
 drivers/gpu/drm/amd/amdgpu/si_dpm.c   | 10 +++---
 drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c  | 12 
 drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c | 12 
 3 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 51fd0c9a20a5..299cb3161b2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -5845,9 +5845,9 @@ static int si_set_mc_special_registers(struct 
amdgpu_device *adev,
((temp_reg & 0x)) |

((table->mc_reg_table_entry[k].mc_data[i] & 0x) >> 16);
j++;
+
if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
return -EINVAL;
-
temp_reg = RREG32(MC_PMG_CMD_MRS);
table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS;
table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS_LP;
@@ -5859,18 +5859,16 @@ static int si_set_mc_special_registers(struct 
amdgpu_device *adev,
table->mc_reg_table_entry[k].mc_data[j] 
|= 0x100;
}
j++;
-   if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
-   return -EINVAL;
 
if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
+   if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
+   return -EINVAL;
table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD;
table->mc_reg_address[j].s0 = MC_PMG_AUTO_CMD;
for (k = 0; k < table->num_entries; k++)
table->mc_reg_table_entry[k].mc_data[j] 
=

(table->mc_reg_table_entry[k].mc_data[i] & 0x) >> 16;
j++;
-   if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
-   return -EINVAL;
}
break;
case MC_SEQ_RESERVE_M:
@@ -5882,8 +5880,6 @@ static int si_set_mc_special_registers(struct 
amdgpu_device *adev,
(temp_reg & 0x) |

(table->mc_reg_table_entry[k].mc_data[i] & 0x);
j++;
-   if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
-   return -EINVAL;
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index ed4b37e566a3..c36f00ef46f3 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -2600,9 +2600,9 @@ static int ci_set_mc_special_registers(struct pp_hwmgr 
*hwmgr,

((table->mc_reg_table_entry[k].mc_data[i] & 0x) >> 16);
}
j++;
+
PP_ASSERT_WITH_CODE((j < 
SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE),
"Invalid VramInfo table.", return -EINVAL);
-
temp_reg = cgs_read_register(hwmgr->device, 
mmMC_PMG_CMD_MRS);
table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS;
table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP;
@@ -2615,10 +2615,10 @@ static int ci_set_mc_special_registers(struct pp_hwmgr 
*hwmgr,
table->mc_reg_table_entry[k].mc_data[j] 
|= 0x100;
}
j++;
-   PP_ASSERT_WITH_CODE((j <= 
SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE),
-   "Invalid VramInfo table.", return -EINVAL);
 
-   if (!data->is_memory_gddr5 && j < 
SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) {
+   if (!data->is_memory_gddr5) {
+   PP_ASSERT_WITH_CODE((j < 
SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE),
+   "Invalid VramInfo table.", return 
-EINVAL);
table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
  

[PATCH 4/5] drm/amd/powerplay: Fix buffer overflows with mc_reg_address

2017-11-19 Thread Ernst Sjöstrand
Smatch warned about the following lines:
ci_set_mc_special_registers() error: buffer overflow 'table->mc_reg_address' 16 
<= 16
tonga_set_mc_special_registers() error: buffer overflow 'table->mc_reg_address' 
16 <= 16

Change the logic to check before access instead of after incrementing.
It's fine if j reaches max after we're done. This allows the last entry
of the array to be filled without an error message for example.
Changed some whitespace to clarify grouping.

Signed-off-by: Ernst Sjöstrand 
---
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 10 +++---
 drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 10 +++---
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 5a60c161b0fc..f11c0aacf19f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -4540,9 +4540,9 @@ static int ci_set_mc_special_registers(struct 
amdgpu_device *adev,
((temp_reg & 0x)) | 
((table->mc_reg_table_entry[k].mc_data[i] & 0x) >> 16);
}
j++;
+
if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
return -EINVAL;
-
temp_reg = RREG32(mmMC_PMG_CMD_MRS);
table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS;
table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP;
@@ -4553,10 +4553,10 @@ static int ci_set_mc_special_registers(struct 
amdgpu_device *adev,
table->mc_reg_table_entry[k].mc_data[j] 
|= 0x100;
}
j++;
-   if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
-   return -EINVAL;
 
if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
+   if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+   return -EINVAL;
table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD;
for (k = 0; k < table->num_entries; k++) {
@@ -4564,8 +4564,6 @@ static int ci_set_mc_special_registers(struct 
amdgpu_device *adev,

(table->mc_reg_table_entry[k].mc_data[i] & 0x) >> 16;
}
j++;
-   if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
-   return -EINVAL;
}
break;
case mmMC_SEQ_RESERVE_M:
@@ -4577,8 +4575,6 @@ static int ci_set_mc_special_registers(struct 
amdgpu_device *adev,
(temp_reg & 0x) | 
(table->mc_reg_table_entry[k].mc_data[i] & 0x);
}
j++;
-   if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
-   return -EINVAL;
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 0a8e48bff219..81b8790c0d22 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -3106,9 +3106,9 @@ static int tonga_set_mc_special_registers(struct pp_hwmgr 
*hwmgr,

((table->mc_reg_table_entry[k].mc_data[i] & 0x) >> 16);
}
j++;
+
PP_ASSERT_WITH_CODE((j < 
SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE),
"Invalid VramInfo table.", return -EINVAL);
-
temp_reg = cgs_read_register(hwmgr->device, 
mmMC_PMG_CMD_MRS);
table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS;
table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP;
@@ -3121,18 +3121,16 @@ static int tonga_set_mc_special_registers(struct 
pp_hwmgr *hwmgr,
table->mc_reg_table_entry[k].mc_data[j] 
|= 0x100;
}
j++;
-   PP_ASSERT_WITH_CODE((j <= 
SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE),
-   "Invalid VramInfo table.", return -EINVAL);
 
if (!data->is_memory_gddr5) {
+   PP_ASSERT_WITH_CODE((j < 
SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE),
+   "Invalid VramInfo table.", return 
-EINVAL);
table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
 

[PATCH 1/5] drm/amd/powerplay: Minor fixes in processpptables.c

2017-11-19 Thread Ernst Sjöstrand
Reported by smatch:
init_overdrive_limits() error: uninitialized symbol 'result'.
get_clock_voltage_dependency_table() warn: inconsistent indenting

Signed-off-by: Ernst Sjöstrand 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index afae32ee2b0d..7c5b426320f1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -394,8 +394,8 @@ static int get_clock_voltage_dependency_table(struct 
pp_hwmgr *hwmgr,
dep_table->entries[i].clk =
((unsigned long)table->entries[i].ucClockHigh << 16) |
le16_to_cpu(table->entries[i].usClockLow);
-   dep_table->entries[i].v =
-   (unsigned 
long)le16_to_cpu(table->entries[i].usVoltage);
+   dep_table->entries[i].v =
+   (unsigned long)le16_to_cpu(table->entries[i].usVoltage);
}
 
*ptable = dep_table;
@@ -1042,7 +1042,7 @@ static int init_overdrive_limits_V2_1(struct pp_hwmgr 
*hwmgr,
 static int init_overdrive_limits(struct pp_hwmgr *hwmgr,
const ATOM_PPLIB_POWERPLAYTABLE *powerplay_table)
 {
-   int result;
+   int result = 1;
uint8_t frev, crev;
uint16_t size;
 
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/5] drm/amd/amdgpu: Fix missing null check in atombios_i2c.c

2017-11-19 Thread Ernst Sjöstrand
Reported by smatch:
amdgpu_atombios_i2c_process_i2c_ch() error: we previously assumed 'buf' could 
be null

Signed-off-by: Ernst Sjöstrand 
---
 drivers/gpu/drm/amd/amdgpu/atombios_i2c.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c 
b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index b374653bd6cf..f9b2ce9a98f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -65,8 +65,15 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct 
amdgpu_i2c_chan *chan,
args.ucRegIndex = buf[0];
if (num)
num--;
-   if (num)
-   memcpy(, [1], num);
+   if (num) {
+   if (buf) {
+   memcpy(, [1], num);
+   } else {
+   DRM_ERROR("hw i2c: missing buf with num > 1\n");
+   r = -EINVAL;
+   goto done;
+   }
+   }
args.lpI2CDataOut = cpu_to_le16(out);
} else {
if (num > ATOM_MAX_HW_I2C_READ) {
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/4] drm/amdkfd: Add CWSR support

2017-11-19 Thread Oded Gabbay
On Tue, Nov 14, 2017 at 11:41 PM, Felix Kuehling  wrote:
> This hardware feature allows the GPU to preempt shader execution in
> the middle of a compute wave, save the state and restore it later
> to resume execution.
>
> Memory for saving the state is allocated per queue in user mode and
> the address and size passed to the create_queue ioctl. The size
Is this a correct description?
It seems to me the memory is allocated at kfd_process_init_cwsr() and
the address is saved internally and not passed in the create_ioctl.
Which begs the question, why indeed it is not allocated by the user
and then passed through the create_ioctl function ?


> depends on the number of waves that can be in flight simultaneously
> on a given ASIC.
>
> Signed-off-by: Shaoyun.liu 
> Signed-off-by: Yong Zhao 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  7 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c| 20 -
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  6 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_module.c|  4 +
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c| 27 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 31 +++-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 87 
> +-
>  include/uapi/linux/kfd_ioctl.h |  3 +-
>  8 files changed, 179 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 505d391..2a4612d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file 
> *filep)
> return -EPERM;
> }
>
> -   process = kfd_create_process(current);
> +   process = kfd_create_process(filep);
> if (IS_ERR(process))
> return PTR_ERR(process);
>
> @@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct 
> queue_properties *q_properties,
> q_properties->ctx_save_restore_area_address =
> args->ctx_save_restore_address;
> q_properties->ctx_save_restore_area_size = 
> args->ctx_save_restore_size;
> +   q_properties->ctl_stack_size = args->ctl_stack_size;
> if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
> args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
> q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
> @@ -1088,6 +1089,10 @@ static int kfd_mmap(struct file *filp, struct 
> vm_area_struct *vma)
> KFD_MMAP_EVENTS_MASK) {
> vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
> return kfd_event_mmap(process, vma);
> +   } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
> +   KFD_MMAP_RESERVED_MEM_MASK) {
> +   vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
> +   return kfd_reserved_mem_mmap(process, vma);
> }
>
> return -EFAULT;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 621a3b5..4f05eac 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -27,6 +27,7 @@
>  #include "kfd_priv.h"
>  #include "kfd_device_queue_manager.h"
>  #include "kfd_pm4_headers_vi.h"
> +#include "cwsr_trap_handler_gfx8.asm"
>
>  #define MQD_SIZE_ALIGNED 768
>
> @@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = {
> .ih_ring_entry_size = 4 * sizeof(uint32_t),
> .event_interrupt_class = _interrupt_class_cik,
> .num_of_watch_points = 4,
> -   .mqd_size_aligned = MQD_SIZE_ALIGNED
> +   .mqd_size_aligned = MQD_SIZE_ALIGNED,
> +   .supports_cwsr = false,
>  };
>
>  static const struct kfd_device_info carrizo_device_info = {
> @@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = {
> .ih_ring_entry_size = 4 * sizeof(uint32_t),
> .event_interrupt_class = _interrupt_class_cik,
> .num_of_watch_points = 4,
> -   .mqd_size_aligned = MQD_SIZE_ALIGNED
> +   .mqd_size_aligned = MQD_SIZE_ALIGNED,
> +   .supports_cwsr = true,
>  };
>
>  struct kfd_deviceid {
> @@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, 
> int pasid,
> return AMD_IOMMU_INV_PRI_RSP_INVALID;
>  }
>
> +static void kfd_cwsr_init(struct kfd_dev *kfd)
> +{
> +   if (cwsr_enable && kfd->device_info->supports_cwsr) {
> +   BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
> +
> +   kfd->cwsr_isa = cwsr_trap_gfx8_hex;
> +   kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
> +   kfd->cwsr_enabled = true;
> +   }
> +}
> +
>  bool kgd2kfd_device_init(struct 

Re: [PATCH 2/4] drm/amdkfd: Add trap handler for CWSR

2017-11-19 Thread Oded Gabbay
On Tue, Nov 14, 2017 at 11:41 PM, Felix Kuehling  wrote:
> The trap handler is like an interrupt handler running on the GPU
> compute unit. It is needed for supporting CWSR (compute wave
> save/restore).
>
> This file defines an array with the pre-compiled GFXv8 shader ISA.
> The assembly code is included for reference in #if 0 ... #endif.
>
> Signed-off-by: Shaoyun.liu 
> Signed-off-by: Jay Cornwall 
> Signed-off-by: Felix Kuehling 
> ---
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm  | 1384 
> 
>  1 file changed, 1384 insertions(+)
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm 
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
> new file mode 100644
> index 000..751cc2e
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
> @@ -0,0 +1,1384 @@
> +/*
> + * Copyright 2015-2017 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#if 0
> +HW (VI) source code for CWSR trap handler
> +#Version 18 + multiple trap handler
> +
> +// this performance-optimal version was originally from Seven Xu at SRDC
> +
> +// Revison #18   --...
> +/* Rev History
> +** #1. Branch from gc dv.   
> //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, 
> #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, 
> skiped-already fixed by PV)
> +** #4. SR Memory Layout:
> +** 1. VGPR-SGPR-HWREG-{LDS}
> +** 2. tba_hi.bits.26 - reconfigured as the first wave in tg 
> bits, for defer Save LDS for a threadgroup.. performance concern..
> +** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
> +** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle 
> buffer?(NoNeed, already matched the swizzle pattern, more investigation)
> +** #7. Update: 1. don't barrier if noLDS
> +** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
> +** 2. Fix SQ issue by s_sleep 2
> +** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
> +** 2. optimize s_buffer save by burst 16sgprs...
> +** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
> +** #11. Update 1. Add 2 more timestamp for debug version
> +** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case 
> drop performance
> +** #13. Integ  1. Always use MUBUF for PV trap shader...
> +** #14. Update 1. s_buffer_store soft clause...
> +** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. 
> perf improvement a lot.
> +** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
> +** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with 
> ds_read/buffer_store for save part[TODO restore part]
> +** 2. PERF - Save LDS before save VGPR to cover LDS save long 
> latency...
> +** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not 
> writable by s_setreg_b32
> +** 2. FUNC - Handle non-CWSR traps
> +*/
> +
> +var G8SR_WDMEM_HWREG_OFFSET = 0
> +var G8SR_WDMEM_SGPR_OFFSET  = 128  // in bytes
> +
> +// Keep definition same as the app shader, These 2 time stamps are part of 
> the app shader... Should before any Save and after restore.
> +
> +var G8SR_DEBUG_TIMESTAMP = 0
> +var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4  // ts_save_d timestamp offset 
> relative to SGPR_SR_memory_offset
> +var s_g8sr_ts_save_s= s[34:35]   // save start
> +var s_g8sr_ts_sq_save_msg  = s[36:37]   // The save shader send SAVEWAVE msg 
> to spi
> +var s_g8sr_ts_spi_wrexec   = s[38:39]   // the SPI write the sr address to SQ
> +var s_g8sr_ts_save_d= s[40:41]   // save end
> +var s_g8sr_ts_restore_s = s[42:43]   // restore start
> +var 

Re: [PATCH 1/4] drm/amdkfd: Cleanup qpd.pqm initialization

2017-11-19 Thread Oded Gabbay
On Tue, Nov 14, 2017 at 11:41 PM, Felix Kuehling  wrote:
> The PQM doesn't change after process creation. So initialize it in
> kfd_create_process_device_data.
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +---
>  2 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 1f5ccd28..1bb9b26 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -348,6 +348,7 @@ struct kfd_process_device 
> *kfd_create_process_device_data(struct kfd_dev *dev,
> INIT_LIST_HEAD(>qpd.queues_list);
> INIT_LIST_HEAD(>qpd.priv_queue_list);
> pdd->qpd.dqm = dev->dqm;
> +   pdd->qpd.pqm = >pqm;
> pdd->process = p;
> pdd->bound = PDD_UNBOUND;
> pdd->already_dequeued = false;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index a3f1e62..eeb7726 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -178,10 +178,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
> return retval;
>
> if (list_empty(>qpd.queues_list) &&
> -   list_empty(>qpd.priv_queue_list)) {
> -   pdd->qpd.pqm = pqm;
> +   list_empty(>qpd.priv_queue_list))
> dev->dqm->ops.register_process(dev->dqm, >qpd);
> -   }
>
> pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
> if (!pqn) {
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdkfd: Do not ignore requested queue size during allocation

2017-11-19 Thread Oded Gabbay
On Thu, Nov 16, 2017 at 11:36 PM, Jan Vesely  wrote:
> Signed-off-by: Jan Vesely 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
> index f1d48281e322..b3bee39661ab 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
> @@ -37,15 +37,16 @@ static bool initialize_vi(struct kernel_queue *kq, struct 
> kfd_dev *dev,
> enum kfd_queue_type type, unsigned int queue_size)
>  {
> int retval;
> +   unsigned int size = ALIGN(queue_size, PAGE_SIZE);
>
> -   retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, >eop_mem);
> +   retval = kfd_gtt_sa_allocate(dev, size, >eop_mem);
> if (retval != 0)
> return false;
>
> kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
> kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
>
> -   memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
> +   memset(kq->eop_kernel_addr, 0, size);
>
> return true;
>  }
> --
> 2.13.6
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Thanks!
Applied to -next tree
Oded
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx