Re: [PATCH 2/5] drm/amdgpu: Add vm context module param

2017-05-24 Thread Christian König

Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan:

Add VM update mode module param (amdgpu.vm_update_mode) that can used to
control how VM pde/pte are updated for Graphics and Compute.

BIT0 controls Graphics and BIT1 Compute.
  BIT0 [= 0] Graphics updated by SDMA [= 1] by CPU
  BIT1 [= 0] Compute updated by SDMA [= 1] by CPU

By default, only for large BAR system vm_update_mode = 2, indicating
that Graphics VMs will be updated via SDMA and Compute VMs will be
updated via CPU. And for all all other systems (by default)
vm_update_mode = 0

Signed-off-by: Harish Kasiviswanathan 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 35 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 20 ++-
  5 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index fadeb55..fd84410 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -94,6 +94,7 @@
  extern int amdgpu_vm_block_size;
  extern int amdgpu_vm_fault_stop;
  extern int amdgpu_vm_debug;
+extern int amdgpu_vm_update_mode;
  extern int amdgpu_dc;
  extern int amdgpu_sched_jobs;
  extern int amdgpu_sched_hw_submission;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 130c45d..8d28a35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -94,6 +94,7 @@
  int amdgpu_vm_fault_stop = 0;
  int amdgpu_vm_debug = 0;
  int amdgpu_vram_page_split = 512;
+int amdgpu_vm_update_mode = -1;
  int amdgpu_exp_hw_support = 0;
  int amdgpu_dc = -1;
  int amdgpu_sched_jobs = 32;
@@ -180,6 +181,9 @@
  MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = 
enabled)");
  module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
  
+MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");

+module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
+
  MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations 
(default 1024, -1 = disable)");
  module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index d167949..8f6c20f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -774,7 +774,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct 
drm_file *file_priv)
goto out_suspend;
}
  
-	r = amdgpu_vm_init(adev, >vm);

+   r = amdgpu_vm_init(adev, >vm,
+  AMDGPU_VM_CONTEXT_GFX);
if (r) {
kfree(fpriv);
goto out_suspend;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c644e54..9c89cb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -721,6 +721,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
return true;
  }
  
+static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)

+{
+   return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
+}
+
  /**
   * amdgpu_vm_flush - hardware flush the vm
   *
@@ -2291,10 +2296,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, 
uint64_t vm_size)
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
+ * @vm_context: Indicates if it GFX or Compute context
   *
   * Init @vm fields.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+  int vm_context)
  {
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT(adev) * 8);
@@ -2323,6 +2330,16 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
if (r)
return r;
  
+	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)

+   vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+   AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+   else
+   vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+   AMDGPU_VM_USE_CPU_FOR_GFX);
+   DRM_DEBUG_DRIVER("VM update mode is %s\n",
+vm->use_cpu_for_update ? "CPU" : "SDMA");
+   WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+ "CPU update of VM recommended only for large BAR system\n");
vm->last_dir_update = NULL;
  
  	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 

RE: [PATCH 0/5] GFX9 KIQ

2017-05-24 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Liu, Shaoyun
> Sent: Tuesday, May 23, 2017 7:38 PM
> To: Alex Deucher; StDenis, Tom
> Cc: amd-gfx list
> Subject: RE: [PATCH 0/5] GFX9 KIQ
> 
> I don't have any test for KCQ ,  but KFD use the  KIQ to invalidate_tlbs ,  I 
> try
> to add some print message in the code to prove  it go through the new code
> path , but seems I don't  see any messages I added. I tried pr_info , pr_err
> and  printk , DRM_ERROR and  nothing  works , anything changed in amdgpu
> side to prevent the  message print ?

Do you see the KIQ and compute ring and IB tests passing?

Alex

> 
> Regards
> shaoyun.liu
> 
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Alex Deucher
> Sent: Tuesday, May 23, 2017 5:31 PM
> To: StDenis, Tom
> Cc: amd-gfx list
> Subject: Re: [PATCH 0/5] GFX9 KIQ
> 
> On Fri, May 12, 2017 at 7:11 AM, Tom St Denis 
> wrote:
> > On 11/05/17 07:33 PM, Tom St Denis wrote:
> >>
> >> On 11/05/17 02:35 PM, Alex Deucher wrote:
> >>>
> >>> These are the laste of the gfx9 KIQ patches that haven't landed yet.
> >>> Can someone with gfx9 capable hw test this (vega10 or raven)?  This
> >>> is needed to enable powergating on gfx9.
> >>>
> >>> Thanks,
> >>
> >>
> >> If nobody gets to it by morning I'll try it out first thing on my
> >> vega10 though my VBIOS might need updating...
> >
> >
> >
> > They don't apply on top of either 4.9 nor 4.11...
> 
> Odd.  They applied for me (at least to the 4.11 tree I had checked out when I
> left town).  Attached again just in case.
> 
> Alex
> 
> >
> >
> > Tom
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/6] drm/amd/powerplay: update magic number for rv hw backend

2017-05-24 Thread Hawking Zhang
Change-Id: If51c5a5e4c534769b95105a1dd5ed76b4ec4bbf3
Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c | 2 +-
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
index 7fa3980..8a1f0d9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
@@ -43,7 +43,7 @@
 #define SMC_RAM_END 0x4
 
 
-static const unsigned long PhwRaven_Magic = (unsigned long) PHM_Cz_Magic;
+static const unsigned long PhwRaven_Magic = (unsigned long) PHM_Rv_Magic;
 
 struct phm_vq_budgeting_settings RV_VQTable[] =
 {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h 
b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 7779600..01755a5 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -99,7 +99,8 @@ enum PHM_BackEnd_Magic {
PHM_CIslands_Magic= 0x38AC78B0,
PHM_Kv_Magic  = 0xDCBBABC0,
PHM_VIslands_Magic= 0x20130307,
-   PHM_Cz_Magic  = 0x67DCBA25
+   PHM_Cz_Magic  = 0x67DCBA25,
+   PHM_Rv_Magic  = 0x20161121
 };
 
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] drm/amd/powerplay: initialize rv vq table

2017-05-24 Thread Hawking Zhang
Change-Id: I64696e3b5418bcdb24a2c359677506d87769afaf
Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c | 47 +-
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h  | 18 +-
 2 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
index 631ef82..26906c3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
@@ -45,10 +45,28 @@
 
 static const unsigned long PhwRaven_Magic = (unsigned long) PHM_Cz_Magic;
 
-struct phm_vq_budgeting_record rv_vqtable[] = {
-   /* _TBD
-* CUs, SSP low, SSP High, Min Sclk Low, Min Sclk, High, AWD/non-AWD, 
DCLK, ECLK, Sustainable Sclk, Sustainable CUs */
-   { 8, 0, 45, 0, 0, VQ_DisplayConfig_NoneAWD, 8, 12, 4, 0 },
+struct phm_vq_budgeting_settings RV_VQTable[] =
+{
+   /* CUs, SSP low, SSP High, Display Configuration, AWD/non-AWD,
+* Sustainable GFXCLK, Sustainable FCLK, Sustainable CUs,
+ * unused, unused, unused */
+   { 11, 30, 60, VQ_DisplayConfig_NoneAWD,  8, 16, 11, 0, 0, 0 },
+   { 11, 30, 60, VQ_DisplayConfig_AWD,  8, 16, 11, 0, 0, 0 },
+
+   {  8, 30, 60, VQ_DisplayConfig_NoneAWD, 10, 16,  8, 0, 0, 0 },
+   {  8, 30, 60, VQ_DisplayConfig_AWD, 10, 16,  8, 0, 0, 0 },
+
+   { 10, 12, 30, VQ_DisplayConfig_NoneAWD,  4, 12, 10, 0, 0, 0 },
+   { 10, 12, 30, VQ_DisplayConfig_AWD,  4, 12, 10, 0, 0, 0 },
+
+   {  8, 12, 30, VQ_DisplayConfig_NoneAWD,  45000, 12,  8, 0, 0, 0 },
+   {  8, 12, 30, VQ_DisplayConfig_AWD,  45000, 12,  8, 0, 0, 0 },
+
+   {  6, 12, 30, VQ_DisplayConfig_NoneAWD,  45000, 12,  6, 0, 0, 0 },
+   {  6, 12, 30, VQ_DisplayConfig_AWD,  45000, 12,  6, 0, 0, 0 },
+
+   {  3, 12, 30, VQ_DisplayConfig_NoneAWD,  45000, 12,  3, 0, 0, 0 },
+   {  3, 12, 30, VQ_DisplayConfig_AWD,  45000, 12,  3, 0, 0, 0 },
 };
 
 static struct rv_power_state *cast_rv_ps(struct pp_hw_power_state *hw_ps)
@@ -72,13 +90,13 @@ static int rv_init_vq_budget_table(struct pp_hwmgr *hwmgr)
 {
uint32_t table_size, i;
struct phm_vq_budgeting_table *ptable;
-   uint32_t num_entries = ARRAY_SIZE(rv_vqtable);
+   uint32_t num_entries = sizeof(RV_VQTable)/sizeof(*RV_VQTable);
 
if (hwmgr->dyn_state.vq_budgeting_table != NULL)
return 0;
 
table_size = sizeof(struct phm_vq_budgeting_table) +
-   sizeof(struct phm_vq_budgeting_record) * (num_entries - 
1);
+   sizeof(struct phm_vq_budgeting_record) * (num_entries - 1);
 
ptable = kzalloc(table_size, GFP_KERNEL);
if (NULL == ptable)
@@ -87,16 +105,13 @@ static int rv_init_vq_budget_table(struct pp_hwmgr *hwmgr)
ptable->numEntries = (uint8_t) num_entries;
 
for (i = 0; i < ptable->numEntries; i++) {
-   ptable->entries[i].ulCUs = rv_vqtable[i].ulCUs;
-   ptable->entries[i].ulSustainableSOCPowerLimitLow = 
rv_vqtable[i].ulSustainableSOCPowerLimitLow;
-   ptable->entries[i].ulSustainableSOCPowerLimitHigh = 
rv_vqtable[i].ulSustainableSOCPowerLimitHigh;
-   ptable->entries[i].ulMinSclkLow = rv_vqtable[i].ulMinSclkLow;
-   ptable->entries[i].ulMinSclkHigh = rv_vqtable[i].ulMinSclkHigh;
-   ptable->entries[i].ucDispConfig = rv_vqtable[i].ucDispConfig;
-   ptable->entries[i].ulDClk = rv_vqtable[i].ulDClk;
-   ptable->entries[i].ulEClk = rv_vqtable[i].ulEClk;
-   ptable->entries[i].ulSustainableSclk = 
rv_vqtable[i].ulSustainableSclk;
-   ptable->entries[i].ulSustainableCUs = 
rv_vqtable[i].ulSustainableCUs;
+   ptable->settings[i].ulSetting1 = RV_VQTable[i].ulSetting1;
+   ptable->settings[i].ulSetting2 = RV_VQTable[i].ulSetting2;
+   ptable->settings[i].ulSetting3 = RV_VQTable[i].ulSetting3;
+   ptable->settings[i].ulSetting4 = RV_VQTable[i].ulSetting4;
+   ptable->settings[i].ulSetting5 = RV_VQTable[i].ulSetting5;
+   ptable->settings[i].ulSetting6 = RV_VQTable[i].ulSetting6;
+   ptable->settings[i].ulSetting7 = RV_VQTable[i].ulSetting7;
}
 
hwmgr->dyn_state.vq_budgeting_table = ptable;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h 
b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 35cb26f..7779600 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -519,9 +519,25 @@ struct phm_vq_budgeting_record {
uint32_t ulSustainableCUs;
 };
 
+struct phm_vq_budgeting_settings {
+   uint32_t ulSetting1;/* RV: CUs */
+   uint32_t ulSetting2;/* RV: SustainableSOCPowerLimitLow in W */
+   uint32_t ulSetting3;/* RV: 

[PATCH 5/6] drm/amd/powerplay: bypass pptable process on raven

2017-05-24 Thread Hawking Zhang
Change-Id: I550c70144213f9a7490be4df04f7efd286c4d05b
Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index 7138cf9..2716721 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -1563,6 +1563,9 @@ static int pp_tables_initialize(struct pp_hwmgr *hwmgr)
int result;
const ATOM_PPLIB_POWERPLAYTABLE *powerplay_table;
 
+   if (hwmgr->chip_id == CHIP_RAVEN)
+   return 0;
+
hwmgr->need_pp_table_upload = true;
 
powerplay_table = get_powerplay_table(hwmgr);
@@ -1609,6 +1612,9 @@ static int pp_tables_initialize(struct pp_hwmgr *hwmgr)
 
 static int pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
 {
+   if (hwmgr->chip_id == CHIP_RAVEN)
+   return 0;
+
if (NULL != hwmgr->dyn_state.vddc_dependency_on_sclk) {
kfree(hwmgr->dyn_state.vddc_dependency_on_sclk);
hwmgr->dyn_state.vddc_dependency_on_sclk = NULL;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/6] drm/amd/powerplay: set powerplay support cap on raven

2017-05-24 Thread Hawking Zhang
Change-Id: I9aa5e78c1cf19b9069d37215bfd2517980bdf2a6
Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
index 01413ca1..5b130ef 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
@@ -515,6 +515,9 @@ static int rv_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
return result;
}
 
+   phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+PHM_PlatformCaps_PowerPlaySupport);
+
rv_populate_clock_table(hwmgr);
 
result = rv_get_system_info_data(hwmgr);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 4/4] drm/amd/powerplay/smu7: disable mclk switching for high refresh rates

2017-05-24 Thread Zhu, Rex
We met similar issue on CI(SWDEV-103046). In high refresh rate, Dal need to 
notify powerpaly the min_mclk.


Best Regards
Rex
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Alex 
Deucher
Sent: Wednesday, May 24, 2017 5:27 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander
Subject: [PATCH 4/4] drm/amd/powerplay/smu7: disable mclk switching for high 
refresh rates

Even if the vblank period would allow it, it still seems to be problematic on 
some cards.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 1445c51..102eb6d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2793,7 +2793,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr 
*hwmgr,
 
disable_mclk_switching = ((1 < info.display_count) ||
  disable_mclk_switching_for_frame_lock ||
- smu7_vblank_too_short(hwmgr, 
mode_info.vblank_time_us));
+ smu7_vblank_too_short(hwmgr, 
mode_info.vblank_time_us) ||
+ (mode_info.refresh_rate > 120));
 
sclk = smu7_ps->performance_levels[0].engine_clock;
mclk = smu7_ps->performance_levels[0].memory_clock;
--
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/4] drm/amd/powerplay/smu7: add vblank check for mclk switching (v2)

2017-05-24 Thread Zhu, Rex
+   if (vblank_time_us < switch_limit_us)
+   return true;
+   else
+   return false;

How about change the code to 
 return vblank_time_us < switch_limit_us ? true : false;


anyway, patch is reviewed-by: Rex Zhu 

Best Regards
Rex

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Alex 
Deucher
Sent: Wednesday, May 24, 2017 5:27 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander
Subject: [PATCH 3/4] drm/amd/powerplay/smu7: add vblank check for mclk 
switching (v2)

Check to make sure the vblank period is long enough to support mclk switching.

v2: drop needless initial assignment (Nils)

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 31 +---
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index a74a3db..1445c51 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2655,6 +2655,28 @@ static int smu7_get_power_state_size(struct pp_hwmgr 
*hwmgr)
return sizeof(struct smu7_power_state);  }
 
+static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
+uint32_t vblank_time_us)
+{
+   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+   uint32_t switch_limit_us;
+
+   switch (hwmgr->chip_id) {
+   case CHIP_POLARIS10:
+   case CHIP_POLARIS11:
+   case CHIP_POLARIS12:
+   switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
+   break;
+   default:
+   switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
+   break;
+   }
+
+   if (vblank_time_us < switch_limit_us)
+   return true;
+   else
+   return false;
+}
 
 static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
struct pp_power_state *request_ps,
@@ -2669,6 +2691,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr 
*hwmgr,
bool disable_mclk_switching;
bool disable_mclk_switching_for_frame_lock;
struct cgs_display_info info = {0};
+   struct cgs_mode_info mode_info = {0};
const struct phm_clock_and_voltage_limits *max_limits;
uint32_t i;
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ 
-2677,6 +2700,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr 
*hwmgr,
int32_t count;
int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0;
 
+   info.mode_info = _info;
data->battery_state = (PP_StateUILabel_Battery ==
request_ps->classification.ui_label);
 
@@ -2703,8 +2727,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr 
*hwmgr,
 
cgs_get_active_displays_info(hwmgr->device, );
 
-   /*TO DO result = PHM_CheckVBlankTime(hwmgr, );*/
-
minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
 
@@ -2769,8 +2791,9 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr 
*hwmgr,

PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-   disable_mclk_switching = (1 < info.display_count) ||
-   disable_mclk_switching_for_frame_lock;
+   disable_mclk_switching = ((1 < info.display_count) ||
+ disable_mclk_switching_for_frame_lock ||
+ smu7_vblank_too_short(hwmgr, 
mode_info.vblank_time_us));
 
sclk = smu7_ps->performance_levels[0].engine_clock;
mclk = smu7_ps->performance_levels[0].memory_clock;
--
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-amdgpu 1/1] Update URLs

2017-05-24 Thread Alex Deucher
On Tue, May 23, 2017 at 9:18 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> * Point to the amd-gfx mailing list
> * Specify the component in all bugzilla URLs
> * Use https:// for all HTML URLs
>
> (Ported from radeon commit d80d01a73c2eaba2e3649b7bc0a3541b3ff782f6)
>
> Signed-off-by: Michel Dänzer 

Reviewed-by: Alex Deucher 

> ---
>  README | 14 +++---
>  configure.ac   |  2 +-
>  man/amdgpu.man |  6 +++---
>  3 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/README b/README
> index 98deab952..1a74a1281 100644
> --- a/README
> +++ b/README
> @@ -1,25 +1,25 @@
>  xf86-video-amdgpu - AMD Radeon video driver for the Xorg X server
>
> -All questions regarding this software should be directed at the
> -Xorg mailing list:
> +Patches and questions regarding this software should be directed at the
> +amd-gfx mailing list:
>
> -http://lists.freedesktop.org/mailman/listinfo/xorg
> +https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>  Please submit bug reports to the Xorg bugzilla:
>
> -https://bugs.freedesktop.org/enter_bug.cgi?product=xorg
> +
> https://bugs.freedesktop.org/enter_bug.cgi?product=xorg=Driver/AMDgpu
>
>  The master development code repository can be found at:
>
>  git://anongit.freedesktop.org/git/xorg/driver/xf86-video-amdgpu
>
> -http://cgit.freedesktop.org/xorg/driver/xf86-video-amdgpu
> +https://cgit.freedesktop.org/xorg/driver/xf86-video-amdgpu
>
>  For patch submission instructions, see:
>
> -   http://www.x.org/wiki/Development/Documentation/SubmittingPatches
> +https://www.x.org/wiki/Development/Documentation/SubmittingPatches
>
>  For more information on the git code manager, see:
>
> -http://wiki.x.org/wiki/GitPage
> +https://wiki.x.org/wiki/GitPage
>
> diff --git a/configure.ac b/configure.ac
> index 9110f3545..26eb52ddc 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -24,7 +24,7 @@
>  AC_PREREQ([2.60])
>  AC_INIT([xf86-video-amdgpu],
>  [1.3.99],
> -[https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
> +
> [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg=Driver/AMDgpu],
>  [xf86-video-amdgpu])
>
>  AC_CONFIG_SRCDIR([Makefile.am])
> diff --git a/man/amdgpu.man b/man/amdgpu.man
> index 13ffb7a27..d6904b818 100644
> --- a/man/amdgpu.man
> +++ b/man/amdgpu.man
> @@ -115,17 +115,17 @@ __xservername__(__appmansuffix__), 
> __xconfigfile__(__filemansuffix__), Xserver(_
>  .IP " 1." 4
>  Wiki page:
>  .RS 4
> -http://www.x.org/wiki/radeon
> +https://www.x.org/wiki/radeon
>  .RE
>  .IP " 2." 4
>  Overview about amdgpu development code:
>  .RS 4
> -http://cgit.freedesktop.org/xorg/driver/xf86-video-amdgpu/
> +https://cgit.freedesktop.org/xorg/driver/xf86-video-amdgpu/
>  .RE
>  .IP " 3." 4
>  Mailing list:
>  .RS 4
> -http://lists.freedesktop.org/mailman/listinfo/amd-gfx
> +https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>  .RE
>  .IP " 4." 4
>  IRC channel:
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/6] drm/amd/powerplay: using MinFClock/MaxFclock to report Min/Max memory clock limits

2017-05-24 Thread Hawking Zhang
Change-Id: I713031dced6e1d5a449cb07a53b644d1014c120c
Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
index 26906c3..7fa3980 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
@@ -743,6 +743,7 @@ static int rv_get_performance_level(struct pp_hwmgr *hwmgr, 
const struct pp_hw_p
struct rv_hwmgr *data;
uint32_t level_index;
uint32_t i;
+   uint32_t vol_dep_record_index = 0;
 
if (level == NULL || hwmgr == NULL || state == NULL)
return -EINVAL;
@@ -762,6 +763,13 @@ static int rv_get_performance_level(struct pp_hwmgr 
*hwmgr, const struct pp_hw_p
}
}
 
+   if (level_index == 0) {
+   vol_dep_record_index = 
data->clock_vol_info.vdd_dep_on_fclk->count - 1;
+   level->memory_clock =
+   
data->clock_vol_info.vdd_dep_on_fclk->entries[vol_dep_record_index].clk;
+   } else
+   level->memory_clock = 
data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk;
+
level->nonLocalMemoryFreq = 0;
level->nonLocalMemoryWidth = 0;
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/3] drm/amdgpu: Tweaks for high pressure on CPU visible VRAM

2017-05-24 Thread Christian König

Am 24.05.2017 um 13:03 schrieb Marek Olšák:

On Wed, May 24, 2017 at 9:56 AM, Michel Dänzer  wrote:

On 23/05/17 07:38 PM, Marek Olšák wrote:

On Tue, May 23, 2017 at 2:45 AM, Michel Dänzer  wrote:

On 22/05/17 07:09 PM, Marek Olšák wrote:

On Mon, May 22, 2017 at 12:00 PM, Michel Dänzer  wrote:

On 20/05/17 06:26 PM, Marek Olšák wrote:

On May 20, 2017 3:26 AM, "Michel Dänzer" > wrote:

 On 20/05/17 01:14 AM, Marek Olšák wrote:
 > Hi Michel,
 >
 > I've applied your series

 Thanks for testing it.

 > and it doesn't help with low Dirt Rally performance on Fiji. I see TTM
 > buffer moves at 800MB/s and many VRAM page faults.

 Did you see this:

 >> Note that there's only little if any improvement of the average
 framerate
 >> reported, but the minimum framerate as seen on the HUD goes from
 ~10 fps
 >> to ~17.

 I.e. it mostly affects the minimum framerate and smoothness for me
 as well.


Without the series, I get 70 average fps. With the series, I get 30
average fps. That might just be random bad luck. I don't know.

Hmm, yeah, maybe that was just one of the random slowdowns you've been
talking about in other threads and on IRC?

I can't reproduce any slowdown with these patches, even leaving visible
VRAM size at 256 MB.

The random slowdowns with Dirt Rally are only caused by the pressure
on visible VRAM. This whole thread is about those random slowdowns.

No, this thread is about the scenario described in the cover letter of
this patch series.



If you're saying "maybe it was just one of the random slowdowns", you're
saying "maybe it was just the visible VRAM pressure". It's only
random with Dirt Rally, which makes it difficult to believe statements
such as "I can't reproduce any slowdown".

I could say the same thing about you seeing random slowdowns... I've
never seen that, I had to artificially limit the size of visible VRAM to
64 MB to make it significantly affect the benchmark result.

How many times do you need to run the benchmark on average to hit a
random slowdown? Which desktop environment and other X clients are
running during the benchmark? Which tab is active in the Steam window
while the benchmark runs?

In my case, it's only xfwm4, xterm and steam on the Dirt Rally page in
the library.

Ubuntu Unity, Steam small mode (there are no tabs), Ultra settings in
Dirt Rally.

Every single time I run the game with this series, I get 700-1000MB/s
of TTM BO moves. There doesn't seem to be any randomness.

It was better without this series. (meaning it was sometimes OK, sometimes bad)

Thanks for the additional details. I presume that in the bad case there
are some BOs lying around in visible VRAM (e.g. from Unity), which
causes some of Dirt Rally's BOs to go back and forth between GTT on CPU
page faults and VRAM on GPU usage.

This means at least patch 2 goes out the window. I'll see if I can
salvage something out of patch 3.

I think the final solution (done in fault_reserve_notify) should be:
if (bo->num_cpu_page_faults++ > 20)
bo->preferred_domain = GTT_WC;


I more or less agree on that, but setting preferred_domain permanently 
to GTT_WC is what worries me a bit.


E.g. imagine you alt+tab from a game to your browser and back and the 
game runs way slower now because BOs are never moved back to VRAM.


What we need is a global limit of number of bytes transfered per second 
for swap operations or something like that.


Or maybe a timeout which says when a BO was moved (either by swapping it 
out or by a CPU page fault) only move it back after +n jiffies or 
something like that.


Christian.



Otherwise I think we'll be just going in circles and not get anywhere.

Marek
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: call amdgpu_dm_fini whewn hw_fini.

2017-05-24 Thread Rex Zhu
to free up drm mode_config info.

fix issue: unload amdgpu, can't load amdgpu again.

Change-Id: I493bc923b039eae69717cbe8a85c8f3f3ea97465
[drm:drm_debugfs_init [drm]] *ERROR* Cannot create /sys/kernel/debug/dri/0
[drm:drm_minor_register [drm]] *ERROR* DRM: Failed to initialize 
/sys/kernel/debug/dri.

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++---
 drivers/gpu/drm/amd/display/dc/core/dc.c  | 7 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e39aef6..2774f86 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -393,9 +393,8 @@ void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.freesync_module = NULL;
}
/* DC Destroy TODO: Replace destroy DAL */
-   {
+   if (adev->dm.dc)
dc_destroy(>dm.dc);
-   }
return;
 }
 
@@ -488,7 +487,7 @@ static int dm_hw_fini(void *handle)
amdgpu_dm_hpd_fini(adev);
 
amdgpu_dm_irq_fini(adev);
-
+   amdgpu_dm_fini(adev);
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 8106e01..8e1b573 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1583,7 +1583,12 @@ enum dc_irq_source dc_interrupt_to_irq_source(
 
 void dc_interrupt_set(const struct dc *dc, enum dc_irq_source src, bool enable)
 {
-   struct core_dc *core_dc = DC_TO_CORE(dc);
+   struct core_dc *core_dc;
+
+   if (dc == NULL)
+   return;
+   core_dc = DC_TO_CORE(dc);
+
dal_irq_service_set(core_dc->res_pool->irqs, src, enable);
 }
 
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/3] drm/amdgpu: Tweaks for high pressure on CPU visible VRAM

2017-05-24 Thread Marek Olšák
On Wed, May 24, 2017 at 9:56 AM, Michel Dänzer  wrote:
> On 23/05/17 07:38 PM, Marek Olšák wrote:
>> On Tue, May 23, 2017 at 2:45 AM, Michel Dänzer  wrote:
>>> On 22/05/17 07:09 PM, Marek Olšák wrote:
 On Mon, May 22, 2017 at 12:00 PM, Michel Dänzer  wrote:
> On 20/05/17 06:26 PM, Marek Olšák wrote:
>> On May 20, 2017 3:26 AM, "Michel Dänzer" > > wrote:
>>
>> On 20/05/17 01:14 AM, Marek Olšák wrote:
>> > Hi Michel,
>> >
>> > I've applied your series
>>
>> Thanks for testing it.
>>
>> > and it doesn't help with low Dirt Rally performance on Fiji. I see 
>> TTM
>> > buffer moves at 800MB/s and many VRAM page faults.
>>
>> Did you see this:
>>
>> >> Note that there's only little if any improvement of the average
>> framerate
>> >> reported, but the minimum framerate as seen on the HUD goes from
>> ~10 fps
>> >> to ~17.
>>
>> I.e. it mostly affects the minimum framerate and smoothness for me
>> as well.
>>
>>
>> Without the series, I get 70 average fps. With the series, I get 30
>> average fps. That might just be random bad luck. I don't know.
>
> Hmm, yeah, maybe that was just one of the random slowdowns you've been
> talking about in other threads and on IRC?
>
> I can't reproduce any slowdown with these patches, even leaving visible
> VRAM size at 256 MB.

 The random slowdowns with Dirt Rally are only caused by the pressure
 on visible VRAM. This whole thread is about those random slowdowns.
>>>
>>> No, this thread is about the scenario described in the cover letter of
>>> this patch series.
>>>
>>>
 If you're saying "maybe it was just one of the random slowdowns", you're
 saying "maybe it was just the visible VRAM pressure". It's only
 random with Dirt Rally, which makes it difficult to believe statements
 such as "I can't reproduce any slowdown".
>>>
>>> I could say the same thing about you seeing random slowdowns... I've
>>> never seen that, I had to artificially limit the size of visible VRAM to
>>> 64 MB to make it significantly affect the benchmark result.
>>>
>>> How many times do you need to run the benchmark on average to hit a
>>> random slowdown? Which desktop environment and other X clients are
>>> running during the benchmark? Which tab is active in the Steam window
>>> while the benchmark runs?
>>>
>>> In my case, it's only xfwm4, xterm and steam on the Dirt Rally page in
>>> the library.
>>
>> Ubuntu Unity, Steam small mode (there are no tabs), Ultra settings in
>> Dirt Rally.
>>
>> Every single time I run the game with this series, I get 700-1000MB/s
>> of TTM BO moves. There doesn't seem to be any randomness.
>>
>> It was better without this series. (meaning it was sometimes OK, sometimes 
>> bad)
>
> Thanks for the additional details. I presume that in the bad case there
> are some BOs lying around in visible VRAM (e.g. from Unity), which
> causes some of Dirt Rally's BOs to go back and forth between GTT on CPU
> page faults and VRAM on GPU usage.
>
> This means at least patch 2 goes out the window. I'll see if I can
> salvage something out of patch 3.

I think the final solution (done in fault_reserve_notify) should be:
if (bo->num_cpu_page_faults++ > 20)
   bo->preferred_domain = GTT_WC;

Otherwise I think we'll be just going in circles and not get anywhere.

Marek
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v3

2017-05-24 Thread Christian König

Am 23.05.2017 um 21:03 schrieb Felix Kuehling:

On 17-05-23 12:52 PM, Christian König wrote:

--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -395,7 +395,13 @@
  static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
return pte_flag;
  }
+static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev,
uint64_t addr)
+{
+ BUG_ON(addr & 0xF000FFFULL);
+ return addr;
+}
+

The mask in the BUG_ON leaves out the highest 4 bits (63-60). Is that
intentional? Same for gmc_v7_0.c.


diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3b5ea0f..f05c034 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -656,7 +656,13 @@
  static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
return pte_flag;
  }
   
+static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev,

uint64_t addr)
+{
+ BUG_ON(addr & 0xFFF00FFFULL);
+ return addr;
+}
+

This looks like you're allowing 52bit physical addresses for GFX 8.
AFAIK the HW only supports 40 bits. Is that intentional?


No, both issues are just typos caused by to much things to do in parallel.

Thanks for pointing that out,
Christian.



Other than that, the patch is Reviewed-by: Felix Kuehling


Regards,
   Felix
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/5] GFX9 KIQ

2017-05-24 Thread Tom St Denis

On 23/05/17 05:32 PM, Alex Deucher wrote:

On Tue, May 23, 2017 at 5:31 PM, Alex Deucher  wrote:

On Fri, May 12, 2017 at 7:11 AM, Tom St Denis  wrote:

On 11/05/17 07:33 PM, Tom St Denis wrote:


On 11/05/17 02:35 PM, Alex Deucher wrote:


These are the laste of the gfx9 KIQ patches that haven't landed yet.  Can
someone with gfx9 capable hw test this (vega10 or raven)?  This is needed
to enable powergating on gfx9.

Thanks,



If nobody gets to it by morning I'll try it out first thing on my vega10
though my VBIOS might need updating...




They don't apply on top of either 4.9 nor 4.11...


Odd.  They applied for me (at least to the 4.11 tree I had checked out
when I left town).  Attached again just in case.


This series applies fine and seems to work in basic tests (gnome + GL 
apps + uvd/vce apps) on my vega10.


Haven't tried on raven yet.

Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/5] GFX9 KIQ

2017-05-24 Thread Tom St Denis

On 24/05/17 09:19 AM, Deucher, Alexander wrote:

-Original Message-
From: StDenis, Tom
Sent: Wednesday, May 24, 2017 9:10 AM
To: Deucher, Alexander; Alex Deucher
Cc: amd-gfx list
Subject: Re: [PATCH 0/5] GFX9 KIQ

On 24/05/17 09:10 AM, Deucher, Alexander wrote:
>> -Original Message-
>> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On
Behalf
>> Of Tom St Denis
>> Sent: Wednesday, May 24, 2017 7:50 AM
>> To: Alex Deucher
>> Cc: amd-gfx list
>> Subject: Re: [PATCH 0/5] GFX9 KIQ
>>
>> On 23/05/17 05:32 PM, Alex Deucher wrote:
>> > On Tue, May 23, 2017 at 5:31 PM, Alex Deucher

>> wrote:
>> >> On Fri, May 12, 2017 at 7:11 AM, Tom St Denis

>> wrote:
>> >>> On 11/05/17 07:33 PM, Tom St Denis wrote:
>> 
>>  On 11/05/17 02:35 PM, Alex Deucher wrote:
>> >
>> > These are the laste of the gfx9 KIQ patches that haven't landed
yet.
>> Can
>> > someone with gfx9 capable hw test this (vega10 or raven)?  This is
>> needed
>> > to enable powergating on gfx9.
>> >
>> > Thanks,
>> 
>> 
>>  If nobody gets to it by morning I'll try it out first thing on my vega10
>>  though my VBIOS might need updating...
>> >>>
>> >>>
>> >>>
>> >>> They don't apply on top of either 4.9 nor 4.11...
>> >>
>> >> Odd.  They applied for me (at least to the 4.11 tree I had checked out
>> >> when I left town).  Attached again just in case.
>>
>> This series applies fine and seems to work in basic tests (gnome + GL
>> apps + uvd/vce apps) on my vega10.
>>
>> Haven't tried on raven yet.
>
> All the ring and IB tests pass?

Ya it boots up and I can run gnome + gl apps just fine.


Awesome.  Anyone want to give me an r-b or a-b on the remaining patches?


Definitely so far as vega10 is concerned.

Ack-by: Tom St Denis 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/display: call amdgpu_dm_fini whewn hw_fini.

2017-05-24 Thread Grodzovsky, Andrey
> -Original Message-
> From: Deucher, Alexander
> Sent: Wednesday, May 24, 2017 9:10 AM
> To: Zhu, Rex; amd-gfx@lists.freedesktop.org; Wentland, Harry; Grodzovsky,
> Andrey
> Cc: Zhu, Rex
> Subject: RE: [PATCH] drm/amd/display: call amdgpu_dm_fini whewn
> hw_fini.
> 
> > -Original Message-
> > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> > Of Rex Zhu
> > Sent: Wednesday, May 24, 2017 7:20 AM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Zhu, Rex
> > Subject: [PATCH] drm/amd/display: call amdgpu_dm_fini whewn hw_fini.
> >
> > to free up drm mode_config info.
> >
> > fix issue: unload amdgpu, can't load amdgpu again.
> 
> +Harry, Andrey
> 
> Acked-by: Alex Deucher 
> 
> >
> > Change-Id: I493bc923b039eae69717cbe8a85c8f3f3ea97465
> > [drm:drm_debugfs_init [drm]] *ERROR* Cannot create
> > /sys/kernel/debug/dri/0
> > [drm:drm_minor_register [drm]] *ERROR* DRM: Failed to initialize
> > /sys/kernel/debug/dri.
> >
> > Signed-off-by: Rex Zhu 
> > ---
> >  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++---
> >  drivers/gpu/drm/amd/display/dc/core/dc.c  | 7 ++-
> >  2 files changed, 8 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index e39aef6..2774f86 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -393,9 +393,8 @@ void amdgpu_dm_fini(struct amdgpu_device
> *adev)
> > adev->dm.freesync_module = NULL;
> > }
> > /* DC Destroy TODO: Replace destroy DAL */
> > -   {
> > +   if (adev->dm.dc)
> > dc_destroy(>dm.dc);
> > -   }
> > return;
> >  }
> >
> > @@ -488,7 +487,7 @@ static int dm_hw_fini(void *handle)
> > amdgpu_dm_hpd_fini(adev);
> >
> > amdgpu_dm_irq_fini(adev);
> > -
> > +   amdgpu_dm_fini(adev);
> > return 0;
> >  }
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c
> > b/drivers/gpu/drm/amd/display/dc/core/dc.c
> > index 8106e01..8e1b573 100644
> > --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> > +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> > @@ -1583,7 +1583,12 @@ enum dc_irq_source
> dc_interrupt_to_irq_source(
> >
> >  void dc_interrupt_set(const struct dc *dc, enum dc_irq_source src,
> > bool
> > enable)
> >  {
> > -   struct core_dc *core_dc = DC_TO_CORE(dc);
> > +   struct core_dc *core_dc;
> > +
> > +   if (dc == NULL)
> > +   return;
> > +   core_dc = DC_TO_CORE(dc);
> > +
> > dal_irq_service_set(core_dc->res_pool->irqs, src, enable);  }
> >
> > --
> > 1.9.1
> >
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reviewed-by: Andrey Grodzovsky
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: call amdgpu_dm_fini whewn hw_fini.

2017-05-24 Thread Harry Wentland

Please fix the typo (whewn -> when) in the title.

With that this patch is
Reviewed-by: Harry Wentland 

Harry

On 2017-05-24 07:20 AM, Rex Zhu wrote:

to free up drm mode_config info.

fix issue: unload amdgpu, can't load amdgpu again.

Change-Id: I493bc923b039eae69717cbe8a85c8f3f3ea97465
[drm:drm_debugfs_init [drm]] *ERROR* Cannot create /sys/kernel/debug/dri/0
[drm:drm_minor_register [drm]] *ERROR* DRM: Failed to initialize 
/sys/kernel/debug/dri.

Signed-off-by: Rex Zhu 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++---
  drivers/gpu/drm/amd/display/dc/core/dc.c  | 7 ++-
  2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e39aef6..2774f86 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -393,9 +393,8 @@ void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.freesync_module = NULL;
}
/* DC Destroy TODO: Replace destroy DAL */
-   {
+   if (adev->dm.dc)
dc_destroy(>dm.dc);
-   }
return;
  }
  
@@ -488,7 +487,7 @@ static int dm_hw_fini(void *handle)

amdgpu_dm_hpd_fini(adev);
  
  	amdgpu_dm_irq_fini(adev);

-
+   amdgpu_dm_fini(adev);
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c

index 8106e01..8e1b573 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1583,7 +1583,12 @@ enum dc_irq_source dc_interrupt_to_irq_source(
  
  void dc_interrupt_set(const struct dc *dc, enum dc_irq_source src, bool enable)

  {
-   struct core_dc *core_dc = DC_TO_CORE(dc);
+   struct core_dc *core_dc;
+
+   if (dc == NULL)
+   return;
+   core_dc = DC_TO_CORE(dc);
+
dal_irq_service_set(core_dc->res_pool->irqs, src, enable);
  }
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 0/5] GFX9 KIQ

2017-05-24 Thread Deucher, Alexander
> -Original Message-
> From: StDenis, Tom
> Sent: Wednesday, May 24, 2017 9:10 AM
> To: Deucher, Alexander; Alex Deucher
> Cc: amd-gfx list
> Subject: Re: [PATCH 0/5] GFX9 KIQ
> 
> On 24/05/17 09:10 AM, Deucher, Alexander wrote:
> >> -Original Message-
> >> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On
> Behalf
> >> Of Tom St Denis
> >> Sent: Wednesday, May 24, 2017 7:50 AM
> >> To: Alex Deucher
> >> Cc: amd-gfx list
> >> Subject: Re: [PATCH 0/5] GFX9 KIQ
> >>
> >> On 23/05/17 05:32 PM, Alex Deucher wrote:
> >> > On Tue, May 23, 2017 at 5:31 PM, Alex Deucher
> 
> >> wrote:
> >> >> On Fri, May 12, 2017 at 7:11 AM, Tom St Denis
> 
> >> wrote:
> >> >>> On 11/05/17 07:33 PM, Tom St Denis wrote:
> >> 
> >>  On 11/05/17 02:35 PM, Alex Deucher wrote:
> >> >
> >> > These are the laste of the gfx9 KIQ patches that haven't landed
> yet.
> >> Can
> >> > someone with gfx9 capable hw test this (vega10 or raven)?  This is
> >> needed
> >> > to enable powergating on gfx9.
> >> >
> >> > Thanks,
> >> 
> >> 
> >>  If nobody gets to it by morning I'll try it out first thing on my 
> >>  vega10
> >>  though my VBIOS might need updating...
> >> >>>
> >> >>>
> >> >>>
> >> >>> They don't apply on top of either 4.9 nor 4.11...
> >> >>
> >> >> Odd.  They applied for me (at least to the 4.11 tree I had checked out
> >> >> when I left town).  Attached again just in case.
> >>
> >> This series applies fine and seems to work in basic tests (gnome + GL
> >> apps + uvd/vce apps) on my vega10.
> >>
> >> Haven't tried on raven yet.
> >
> > All the ring and IB tests pass?
> 
> Ya it boots up and I can run gnome + gl apps just fine.

Awesome.  Anyone want to give me an r-b or a-b on the remaining patches?

Thanks,

Alex

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 6/6] drm/amd/powerplay: set powerplay support cap on raven

2017-05-24 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Hawking Zhang
> Sent: Wednesday, May 24, 2017 7:14 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking
> Subject: [PATCH 6/6] drm/amd/powerplay: set powerplay support cap on
> raven
> 
> Change-Id: I9aa5e78c1cf19b9069d37215bfd2517980bdf2a6
> Signed-off-by: Hawking Zhang 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
> index 01413ca1..5b130ef 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
> @@ -515,6 +515,9 @@ static int rv_hwmgr_backend_init(struct pp_hwmgr
> *hwmgr)
>   return result;
>   }
> 
> + phm_cap_set(hwmgr->platform_descriptor.platformCaps,
> +PHM_PlatformCaps_PowerPlaySupport);
> +
>   rv_populate_clock_table(hwmgr);
> 
>   result = rv_get_system_info_data(hwmgr);
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/display: call amdgpu_dm_fini whewn hw_fini.

2017-05-24 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Rex Zhu
> Sent: Wednesday, May 24, 2017 7:20 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhu, Rex
> Subject: [PATCH] drm/amd/display: call amdgpu_dm_fini whewn hw_fini.
> 
> to free up drm mode_config info.
> 
> fix issue: unload amdgpu, can't load amdgpu again.

+Harry, Andrey

Acked-by: Alex Deucher 

> 
> Change-Id: I493bc923b039eae69717cbe8a85c8f3f3ea97465
> [drm:drm_debugfs_init [drm]] *ERROR* Cannot create
> /sys/kernel/debug/dri/0
> [drm:drm_minor_register [drm]] *ERROR* DRM: Failed to initialize
> /sys/kernel/debug/dri.
> 
> Signed-off-by: Rex Zhu 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++---
>  drivers/gpu/drm/amd/display/dc/core/dc.c  | 7 ++-
>  2 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index e39aef6..2774f86 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -393,9 +393,8 @@ void amdgpu_dm_fini(struct amdgpu_device *adev)
>   adev->dm.freesync_module = NULL;
>   }
>   /* DC Destroy TODO: Replace destroy DAL */
> - {
> + if (adev->dm.dc)
>   dc_destroy(>dm.dc);
> - }
>   return;
>  }
> 
> @@ -488,7 +487,7 @@ static int dm_hw_fini(void *handle)
>   amdgpu_dm_hpd_fini(adev);
> 
>   amdgpu_dm_irq_fini(adev);
> -
> + amdgpu_dm_fini(adev);
>   return 0;
>  }
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c
> b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index 8106e01..8e1b573 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -1583,7 +1583,12 @@ enum dc_irq_source dc_interrupt_to_irq_source(
> 
>  void dc_interrupt_set(const struct dc *dc, enum dc_irq_source src, bool
> enable)
>  {
> - struct core_dc *core_dc = DC_TO_CORE(dc);
> + struct core_dc *core_dc;
> +
> + if (dc == NULL)
> + return;
> + core_dc = DC_TO_CORE(dc);
> +
>   dal_irq_service_set(core_dc->res_pool->irqs, src, enable);
>  }
> 
> --
> 1.9.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/1] amdgpu: move asic id table to a separate file

2017-05-24 Thread Li, Samuel
>There's no need for a separate repo upstream.  It's purely to aid internal 
>packaging.
As a first step, we can put a snapshot in libdrm for now. 
External packaging  will face the same issue though ... the packagers need to 
put the ids in various versions of various distros. We also likely need to 
automate the upstream update to save sometime for ourselves.

Sam
-Original Message-
From: Deucher, Alexander 
Sent: Tuesday, May 23, 2017 8:12 AM
To: 'Michel Dänzer' ; Li, Samuel 
Cc: amd-gfx@lists.freedesktop.org; Yuan, Xiaojie 
Subject: RE: [PATCH 1/1] amdgpu: move asic id table to a separate file

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Michel Dänzer
> Sent: Thursday, May 11, 2017 8:50 PM
> To: Li, Samuel
> Cc: amd-gfx@lists.freedesktop.org; Yuan, Xiaojie
> Subject: Re: [PATCH 1/1] amdgpu: move asic id table to a separate file
> 
> On 12/05/17 06:13 AM, Li, Samuel wrote:
> > Submitted a request to create a new repo on freedesktop.
> 
> What's the point of having a separate repository upstream? Can't we 
> just keep it in the libdrm repository?

There's no need for a separate repo upstream.  It's purely to aid internal 
packaging.

Alex

> 
> 
> --
> Earthling Michel Dänzer   |   http://www.amd.com
> Libre software enthusiast | Mesa and X developer
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 0/5] GFX9 KIQ

2017-05-24 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Tom St Denis
> Sent: Wednesday, May 24, 2017 7:50 AM
> To: Alex Deucher
> Cc: amd-gfx list
> Subject: Re: [PATCH 0/5] GFX9 KIQ
> 
> On 23/05/17 05:32 PM, Alex Deucher wrote:
> > On Tue, May 23, 2017 at 5:31 PM, Alex Deucher 
> wrote:
> >> On Fri, May 12, 2017 at 7:11 AM, Tom St Denis 
> wrote:
> >>> On 11/05/17 07:33 PM, Tom St Denis wrote:
> 
>  On 11/05/17 02:35 PM, Alex Deucher wrote:
> >
> > These are the laste of the gfx9 KIQ patches that haven't landed yet.
> Can
> > someone with gfx9 capable hw test this (vega10 or raven)?  This is
> needed
> > to enable powergating on gfx9.
> >
> > Thanks,
> 
> 
>  If nobody gets to it by morning I'll try it out first thing on my vega10
>  though my VBIOS might need updating...
> >>>
> >>>
> >>>
> >>> They don't apply on top of either 4.9 nor 4.11...
> >>
> >> Odd.  They applied for me (at least to the 4.11 tree I had checked out
> >> when I left town).  Attached again just in case.
> 
> This series applies fine and seems to work in basic tests (gnome + GL
> apps + uvd/vce apps) on my vega10.
> 
> Haven't tried on raven yet.

All the ring and IB tests pass?

> 
> Tom
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/5] GFX9 KIQ

2017-05-24 Thread Tom St Denis

On 24/05/17 09:10 AM, Deucher, Alexander wrote:

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
Of Tom St Denis
Sent: Wednesday, May 24, 2017 7:50 AM
To: Alex Deucher
Cc: amd-gfx list
Subject: Re: [PATCH 0/5] GFX9 KIQ

On 23/05/17 05:32 PM, Alex Deucher wrote:
> On Tue, May 23, 2017 at 5:31 PM, Alex Deucher 
wrote:
>> On Fri, May 12, 2017 at 7:11 AM, Tom St Denis 
wrote:
>>> On 11/05/17 07:33 PM, Tom St Denis wrote:

 On 11/05/17 02:35 PM, Alex Deucher wrote:
>
> These are the laste of the gfx9 KIQ patches that haven't landed yet.
Can
> someone with gfx9 capable hw test this (vega10 or raven)?  This is
needed
> to enable powergating on gfx9.
>
> Thanks,


 If nobody gets to it by morning I'll try it out first thing on my vega10
 though my VBIOS might need updating...
>>>
>>>
>>>
>>> They don't apply on top of either 4.9 nor 4.11...
>>
>> Odd.  They applied for me (at least to the 4.11 tree I had checked out
>> when I left town).  Attached again just in case.

This series applies fine and seems to work in basic tests (gnome + GL
apps + uvd/vce apps) on my vega10.

Haven't tried on raven yet.


All the ring and IB tests pass?


Ya it boots up and I can run gnome + gl apps just fine.

Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/5] drm/syncobj: add sync obj wait interface. (v3)

2017-05-24 Thread Jason Ekstrand
On Wed, May 24, 2017 at 10:25 AM, Jason Ekstrand 
wrote:

> On Wed, May 24, 2017 at 12:06 AM, Dave Airlie  wrote:
>
>> From: Dave Airlie 
>>
>> This interface will allow sync object to be used to back
>> Vulkan fences. This API is pretty much the vulkan fence waiting
>> API, and I've ported the code from amdgpu.
>>
>> v2: accept relative timeout, pass remaining time back
>> to userspace.
>> v3: return to absolute timeouts.
>>
>> Signed-off-by: Dave Airlie 
>> ---
>>  drivers/gpu/drm/drm_internal.h |   2 +
>>  drivers/gpu/drm/drm_ioctl.c|   2 +
>>  drivers/gpu/drm/drm_syncobj.c  | 164 ++
>> +++
>>  include/uapi/drm/drm.h |  14 
>>  4 files changed, 182 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h
>> b/drivers/gpu/drm/drm_internal.h
>> index 3fdef2c..53e3f6b 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -156,3 +156,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device
>> *dev, void *data,
>>struct drm_file *file_private);
>>  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>struct drm_file *file_private);
>> +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>> +  struct drm_file *file_private);
>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index f1e5681..385ce74 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>   DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
>> drm_syncobj_fd_to_handle_ioctl,
>>   DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>> + DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>  };
>>
>>  #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.
>> c
>> index b611480..8b87594 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1,5 +1,7 @@
>>  /*
>>   * Copyright 2017 Red Hat
>> + * Parts ported from amdgpu (fence wait code).
>> + * Copyright 2016 Advanced Micro Devices, Inc.
>>   *
>>   * Permission is hereby granted, free of charge, to any person obtaining
>> a
>>   * copy of this software and associated documentation files (the
>> "Software"),
>> @@ -31,6 +33,9 @@
>>   * that contain an optional fence. The fence can be updated with a new
>>   * fence, or be NULL.
>>   *
>> + * syncobj's can be waited upon, where it will wait for the underlying
>> + * fence.
>> + *
>>   * syncobj's can be export to fd's and back, these fd's are opaque and
>>   * have no other use case, except passing the syncobj between processes.
>>   *
>> @@ -375,3 +380,162 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
>> *dev, void *data,
>> return drm_syncobj_fd_to_handle(file_private, args->fd,
>> >handle);
>>  }
>> +
>> +
>> +/**
>> + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute
>> value
>> + *
>> + * @timeout_ns: timeout in ns
>> + *
>> + * Calculate the timeout in jiffies from an absolute timeout in ns.
>> + */
>> +unsigned long drm_timeout_abs_to_jiffies(uint64_t timeout_ns)
>> +{
>> +   unsigned long timeout_jiffies;
>> +   ktime_t timeout;
>> +
>> +   /* clamp timeout if it's to large */
>> +   if (((int64_t)timeout_ns) < 0)
>> +   return MAX_SCHEDULE_TIMEOUT;
>> +
>> +   timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
>> +   if (ktime_to_ns(timeout) < 0)
>> +   return 0;
>> +
>> +   timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
>> +   /*  clamp timeout to avoid unsigned-> signed overflow */
>> +   if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
>> +   return MAX_SCHEDULE_TIMEOUT - 1;
>> +
>> +   return timeout_jiffies;
>> +}
>> +
>> +static int drm_syncobj_wait_all_fences(struct drm_device *dev,
>> +  struct drm_file *file_private,
>> +  struct drm_syncobj_wait *wait,
>> +  uint32_t *handles)
>> +{
>> +   uint32_t i;
>> +   int ret;
>> +   unsigned long timeout = drm_timeout_abs_to_jiffies(wai
>> t->timeout_ns);
>> +
>> +   for (i = 0; i < wait->count_handles; i++) {
>> +   struct dma_fence *fence;
>> +
>> +   ret = drm_syncobj_fence_get(file_private, handles[i],
>> +   );
>> +   if (ret)
>> +   return ret;
>> +
>> +   if (!fence)
>> +   continue;
>> +
>> + 

Re: [PATCH 2/5] drm/syncobj: add sync obj wait interface. (v3)

2017-05-24 Thread Jason Ekstrand
On Wed, May 24, 2017 at 12:06 AM, Dave Airlie  wrote:

> From: Dave Airlie 
>
> This interface will allow sync object to be used to back
> Vulkan fences. This API is pretty much the vulkan fence waiting
> API, and I've ported the code from amdgpu.
>
> v2: accept relative timeout, pass remaining time back
> to userspace.
> v3: return to absolute timeouts.
>
> Signed-off-by: Dave Airlie 
> ---
>  drivers/gpu/drm/drm_internal.h |   2 +
>  drivers/gpu/drm/drm_ioctl.c|   2 +
>  drivers/gpu/drm/drm_syncobj.c  | 164 ++
> +++
>  include/uapi/drm/drm.h |  14 
>  4 files changed, 182 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_
> internal.h
> index 3fdef2c..53e3f6b 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -156,3 +156,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device
> *dev, void *data,
>struct drm_file *file_private);
>  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>struct drm_file *file_private);
> +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +  struct drm_file *file_private);
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index f1e5681..385ce74 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   DRM_UNLOCKED|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
> drm_syncobj_fd_to_handle_ioctl,
>   DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
> + DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  };
>
>  #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index b611480..8b87594 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1,5 +1,7 @@
>  /*
>   * Copyright 2017 Red Hat
> + * Parts ported from amdgpu (fence wait code).
> + * Copyright 2016 Advanced Micro Devices, Inc.
>   *
>   * Permission is hereby granted, free of charge, to any person obtaining a
>   * copy of this software and associated documentation files (the
> "Software"),
> @@ -31,6 +33,9 @@
>   * that contain an optional fence. The fence can be updated with a new
>   * fence, or be NULL.
>   *
> + * syncobj's can be waited upon, where it will wait for the underlying
> + * fence.
> + *
>   * syncobj's can be export to fd's and back, these fd's are opaque and
>   * have no other use case, except passing the syncobj between processes.
>   *
> @@ -375,3 +380,162 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
> *dev, void *data,
> return drm_syncobj_fd_to_handle(file_private, args->fd,
> >handle);
>  }
> +
> +
> +/**
> + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute
> value
> + *
> + * @timeout_ns: timeout in ns
> + *
> + * Calculate the timeout in jiffies from an absolute timeout in ns.
> + */
> +unsigned long drm_timeout_abs_to_jiffies(uint64_t timeout_ns)
> +{
> +   unsigned long timeout_jiffies;
> +   ktime_t timeout;
> +
> +   /* clamp timeout if it's to large */
> +   if (((int64_t)timeout_ns) < 0)
> +   return MAX_SCHEDULE_TIMEOUT;
> +
> +   timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
> +   if (ktime_to_ns(timeout) < 0)
> +   return 0;
> +
> +   timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
> +   /*  clamp timeout to avoid unsigned-> signed overflow */
> +   if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
> +   return MAX_SCHEDULE_TIMEOUT - 1;
> +
> +   return timeout_jiffies;
> +}
> +
> +static int drm_syncobj_wait_all_fences(struct drm_device *dev,
> +  struct drm_file *file_private,
> +  struct drm_syncobj_wait *wait,
> +  uint32_t *handles)
> +{
> +   uint32_t i;
> +   int ret;
> +   unsigned long timeout = drm_timeout_abs_to_jiffies(
> wait->timeout_ns);
> +
> +   for (i = 0; i < wait->count_handles; i++) {
> +   struct dma_fence *fence;
> +
> +   ret = drm_syncobj_fence_get(file_private, handles[i],
> +   );
> +   if (ret)
> +   return ret;
> +
> +   if (!fence)
> +   continue;
> +
> +   ret = dma_fence_wait_timeout(fence, true, timeout);
> +
> +   dma_fence_put(fence);
> +   if (ret < 0)
> +   return ret;
> +   if (ret == 0)
> + 

Re: [PATCH 1/5] drm: introduce sync objects (v3)

2017-05-24 Thread Jason Ekstrand
I can't really review for all of the kernel details (though the seem ok to
me) so this mostly applies to the API:

Reviewed-by: Jason Ekstrand 

On Wed, May 24, 2017 at 12:06 AM, Dave Airlie  wrote:

> From: Dave Airlie 
>
> Sync objects are new toplevel drm object, that contain a
> pointer to a fence. This fence can be updated via command
> submission ioctls via drivers.
>
> There is also a generic wait obj API modelled on the vulkan
> wait API (with code modelled on some amdgpu code).
>
> These objects can be converted to an opaque fd that can be
> passes between processes.
>
> v2: rename reference/unreference to put/get (Chris)
> fix leaked reference (David Zhou)
> drop mutex in favour of cmpxchg (Chris)
> v3: cleanups from danvet, rebase on drm_fops rename
> check fd_flags is 0 in ioctls.
>
> Reviewed-by: Sean Paul 
> Signed-off-by: Dave Airlie 
> ---
>  Documentation/gpu/drm-internals.rst |   3 +
>  Documentation/gpu/drm-mm.rst|  12 ++
>  drivers/gpu/drm/Makefile|   2 +-
>  drivers/gpu/drm/drm_file.c  |   8 +
>  drivers/gpu/drm/drm_internal.h  |  13 ++
>  drivers/gpu/drm/drm_ioctl.c |  12 ++
>  drivers/gpu/drm/drm_syncobj.c   | 377 ++
> ++
>  include/drm/drmP.h  |   1 -
>  include/drm/drm_drv.h   |   1 +
>  include/drm/drm_file.h  |   5 +
>  include/drm/drm_syncobj.h   |  87 +
>  include/uapi/drm/drm.h  |  24 +++
>  12 files changed, 543 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/drm_syncobj.c
>  create mode 100644 include/drm/drm_syncobj.h
>
> diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-
> internals.rst
> index babfb61..2b23d78 100644
> --- a/Documentation/gpu/drm-internals.rst
> +++ b/Documentation/gpu/drm-internals.rst
> @@ -98,6 +98,9 @@ DRIVER_ATOMIC
>  implement appropriate obj->atomic_get_property() vfuncs for any
>  modeset objects with driver specific properties.
>
> +DRIVER_SYNCOBJ
> +Driver support drm sync objects.
> +
>  Major, Minor and Patchlevel
>  ~~~
>
> diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
> index 96b9c34..9412798 100644
> --- a/Documentation/gpu/drm-mm.rst
> +++ b/Documentation/gpu/drm-mm.rst
> @@ -484,3 +484,15 @@ DRM Cache Handling
>
>  .. kernel-doc:: drivers/gpu/drm/drm_cache.c
> :export:
> +
> +DRM Sync Objects
> +===
> +
> +.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
> +   :doc: Overview
> +
> +.. kernel-doc:: include/drm/drm_syncobj.h
> +   :export:
> +
> +.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
> +   :export:
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index 59f0f9b..6f42188 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -16,7 +16,7 @@ drm-y   :=drm_auth.o drm_bufs.o drm_cache.o \
> drm_framebuffer.o drm_connector.o drm_blend.o \
> drm_encoder.o drm_mode_object.o drm_property.o \
> drm_plane.o drm_color_mgmt.o drm_print.o \
> -   drm_dumb_buffers.o drm_mode_config.o
> +   drm_dumb_buffers.o drm_mode_config.o drm_syncobj.o
>
>  drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
>  drm-$(CONFIG_DRM_VM) += drm_vm.o
> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
> index 3783b65..a20d6a9 100644
> --- a/drivers/gpu/drm/drm_file.c
> +++ b/drivers/gpu/drm/drm_file.c
> @@ -229,6 +229,9 @@ static int drm_open_helper(struct file *filp, struct
> drm_minor *minor)
> if (drm_core_check_feature(dev, DRIVER_GEM))
> drm_gem_open(dev, priv);
>
> +   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +   drm_syncobj_open(priv);
> +
> if (drm_core_check_feature(dev, DRIVER_PRIME))
> drm_prime_init_file_private(>prime);
>
> @@ -276,6 +279,8 @@ static int drm_open_helper(struct file *filp, struct
> drm_minor *minor)
>  out_prime_destroy:
> if (drm_core_check_feature(dev, DRIVER_PRIME))
> drm_prime_destroy_file_private(>prime);
> +   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +   drm_syncobj_release(priv);
> if (drm_core_check_feature(dev, DRIVER_GEM))
> drm_gem_release(dev, priv);
> put_pid(priv->pid);
> @@ -398,6 +403,9 @@ int drm_release(struct inode *inode, struct file *filp)
> drm_property_destroy_user_blobs(dev, file_priv);
> }
>
> +   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +   drm_syncobj_release(file_priv);
> +
> if (drm_core_check_feature(dev, DRIVER_GEM))
> drm_gem_release(dev, file_priv);
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_
> internal.h
> index 3d8e8f8..3fdef2c 100644
> --- 

Re: [PATCH 3/5] drm/syncobj: add sync_file interaction.

2017-05-24 Thread Jason Ekstrand
Yes, I believe this is the proper way for sync_file and syncobj to
interact.  Again, I can't really review for all of the kernel details
(though the seem ok to me) so this mostly applies to the API:

Reviewed-by: Jason Ekstrand 

On Wed, May 24, 2017 at 12:06 AM, Dave Airlie  wrote:

> From: Dave Airlie 
>
> This interface allows importing the fence from a sync_file into
> an existing drm sync object, or exporting the fence attached to
> an existing drm sync object into a new sync file object.
>
> This should only be used to interact with sync files where necessary.
>
> Reviewed-by: Sean Paul 
> Signed-off-by: Dave Airlie 
> ---
>  drivers/gpu/drm/drm_syncobj.c | 64 ++
> +++--
>  include/uapi/drm/drm.h|  2 ++
>  2 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 8b87594..54d751e 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -50,6 +50,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "drm_internal.h"
>  #include 
> @@ -276,6 +277,48 @@ static int drm_syncobj_fd_to_handle(struct drm_file
> *file_private,
> return 0;
>  }
>
> +int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
> +  int fd, int handle)
> +{
> +   struct dma_fence *fence = sync_file_get_fence(fd);
> +   if (!fence)
> +   return -EINVAL;
> +
> +   return drm_syncobj_replace_fence(file_private, handle, fence);
> +}
> +
> +int drm_syncobj_export_sync_file(struct drm_file *file_private,
> +int handle, int *p_fd)
> +{
> +   int ret;
> +   struct dma_fence *fence;
> +   struct sync_file *sync_file;
> +   int fd = get_unused_fd_flags(O_CLOEXEC);
> +
> +   if (fd < 0)
> +   return fd;
> +
> +   ret = drm_syncobj_fence_get(file_private, handle, );
> +   if (ret)
> +   goto err_put_fd;
> +
> +   sync_file = sync_file_create(fence);
> +   if (!sync_file) {
> +   ret = -EINVAL;
> +   goto err_fence_put;
> +   }
> +
> +   fd_install(fd, sync_file->file);
> +
> +   dma_fence_put(fence);
> +   *p_fd = fd;
> +   return 0;
> +err_fence_put:
> +   dma_fence_put(fence);
> +err_put_fd:
> +   put_unused_fd(fd);
> +   return ret;
> +}
>  /**
>   * drm_syncobj_open - initalizes syncobj file-private structures at
> devnode open time
>   * @dev: drm_device which is being opened by userspace
> @@ -358,9 +401,17 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device
> *dev, void *data,
> if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> return -ENODEV;
>
> -   if (args->pad || args->flags)
> +   if (args->pad)
> +   return -EINVAL;
> +
> +   if (args->flags != 0 &&
> +   args->flags != DRM_SYNCOBJ_HANDLE_TO_FD_
> FLAGS_EXPORT_FENCE_SYNC_FILE)
> return -EINVAL;
>
> +   if (args->flags & DRM_SYNCOBJ_HANDLE_TO_FD_
> FLAGS_EXPORT_FENCE_SYNC_FILE)
> +   return drm_syncobj_export_sync_file(file_private,
> args->handle,
> +   >fd);
> +
> return drm_syncobj_handle_to_fd(file_private, args->handle,
> >fd);
>  }
> @@ -374,9 +425,18 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
> *dev, void *data,
> if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> return -ENODEV;
>
> -   if (args->pad || args->flags)
> +   if (args->pad)
> return -EINVAL;
>
> +   if (args->flags != 0 &&
> +   args->flags != DRM_SYNCOBJ_FD_TO_HANDLE_
> FLAGS_IMPORT_SYNC_FILE_FENCE)
> +   return -EINVAL;
> +
> +   if (args->flags & DRM_SYNCOBJ_FD_TO_HANDLE_
> FLAGS_IMPORT_SYNC_FILE_FENCE)
> +   return drm_syncobj_import_sync_file_fence(file_private,
> + args->fd,
> + args->handle);
> +
> return drm_syncobj_fd_to_handle(file_private, args->fd,
> >handle);
>  }
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index d6e2f62..94c75be 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -708,6 +708,8 @@ struct drm_syncobj_destroy {
> __u32 pad;
>  };
>
> +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE_FENCE (1 << 0)
> +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_FENCE_SYNC_FILE (1 << 0)
>  struct drm_syncobj_handle {
> __u32 handle;
> __u32 flags;
> --
> 2.9.4
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> 

RE: [PATCH 0/5] GFX9 KIQ

2017-05-24 Thread Liu, Shaoyun
HI , Alex 
It turn out to be  a wrong card I used for test . 
With a correct Vega10 card , KIQ and  other compute rings   ring test and  IB 
test passed.  I also verified KFD can use the  KIQ  to submit the 
invalidate_tlbs successfully . 

Regards
Shaoyun.liu

-Original Message-
From: Deucher, Alexander 
Sent: Wednesday, May 24, 2017 7:14 AM
To: Liu, Shaoyun; Alex Deucher; StDenis, Tom
Cc: amd-gfx list
Subject: RE: [PATCH 0/5] GFX9 KIQ

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Liu, Shaoyun
> Sent: Tuesday, May 23, 2017 7:38 PM
> To: Alex Deucher; StDenis, Tom
> Cc: amd-gfx list
> Subject: RE: [PATCH 0/5] GFX9 KIQ
> 
> I don't have any test for KCQ ,  but KFD use the  KIQ to 
> invalidate_tlbs ,  I try to add some print message in the code to 
> prove  it go through the new code path , but seems I don't  see any 
> messages I added. I tried pr_info , pr_err and  printk , DRM_ERROR and  
> nothing  works , anything changed in amdgpu side to prevent the  message 
> print ?

Do you see the KIQ and compute ring and IB tests passing?

Alex

> 
> Regards
> shaoyun.liu
> 
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Alex Deucher
> Sent: Tuesday, May 23, 2017 5:31 PM
> To: StDenis, Tom
> Cc: amd-gfx list
> Subject: Re: [PATCH 0/5] GFX9 KIQ
> 
> On Fri, May 12, 2017 at 7:11 AM, Tom St Denis 
> wrote:
> > On 11/05/17 07:33 PM, Tom St Denis wrote:
> >>
> >> On 11/05/17 02:35 PM, Alex Deucher wrote:
> >>>
> >>> These are the laste of the gfx9 KIQ patches that haven't landed yet.
> >>> Can someone with gfx9 capable hw test this (vega10 or raven)?  
> >>> This is needed to enable powergating on gfx9.
> >>>
> >>> Thanks,
> >>
> >>
> >> If nobody gets to it by morning I'll try it out first thing on my
> >> vega10 though my VBIOS might need updating...
> >
> >
> >
> > They don't apply on top of either 4.9 nor 4.11...
> 
> Odd.  They applied for me (at least to the 4.11 tree I had checked out 
> when I left town).  Attached again just in case.
> 
> Alex
> 
> >
> >
> > Tom
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/5] drm/amdgpu: Add vm context module param

2017-05-24 Thread Kasiviswanathan, Harish


-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: Wednesday, May 24, 2017 5:41 AM
To: Kasiviswanathan, Harish ; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/5] drm/amdgpu: Add vm context module param

Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan:
> Add VM update mode module param (amdgpu.vm_update_mode) that can used 
> to control how VM pde/pte are updated for Graphics and Compute.
>
> BIT0 controls Graphics and BIT1 Compute.
>   BIT0 [= 0] Graphics updated by SDMA [= 1] by CPU
>   BIT1 [= 0] Compute updated by SDMA [= 1] by CPU
>
> By default, only for large BAR system vm_update_mode = 2, indicating 
> that Graphics VMs will be updated via SDMA and Compute VMs will be 
> updated via CPU. And for all all other systems (by default) 
> vm_update_mode = 0
>
> Signed-off-by: Harish Kasiviswanathan 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  4 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 35 
> -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 20 ++-
>   5 files changed, 60 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index fadeb55..fd84410 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -94,6 +94,7 @@
>   extern int amdgpu_vm_block_size;
>   extern int amdgpu_vm_fault_stop;
>   extern int amdgpu_vm_debug;
> +extern int amdgpu_vm_update_mode;
>   extern int amdgpu_dc;
>   extern int amdgpu_sched_jobs;
>   extern int amdgpu_sched_hw_submission; diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 130c45d..8d28a35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -94,6 +94,7 @@
>   int amdgpu_vm_fault_stop = 0;
>   int amdgpu_vm_debug = 0;
>   int amdgpu_vram_page_split = 512;
> +int amdgpu_vm_update_mode = -1;
>   int amdgpu_exp_hw_support = 0;
>   int amdgpu_dc = -1;
>   int amdgpu_sched_jobs = 32;
> @@ -180,6 +181,9 @@
>   MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = 
> enabled)");
>   module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
>   
> +MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never 
> +(default except for large BAR(LB)), 1 = Graphics only, 2 = Compute 
> +only (default for LB), 3 = Both"); module_param_named(vm_update_mode, 
> +amdgpu_vm_update_mode, int, 0444);
> +
>   MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM 
> allocations (default 1024, -1 = disable)");
>   module_param_named(vram_page_split, amdgpu_vram_page_split, int, 
> 0444);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d167949..8f6c20f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -774,7 +774,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct 
> drm_file *file_priv)
>   goto out_suspend;
>   }
>   
> - r = amdgpu_vm_init(adev, >vm);
> + r = amdgpu_vm_init(adev, >vm,
> +AMDGPU_VM_CONTEXT_GFX);
>   if (r) {
>   kfree(fpriv);
>   goto out_suspend;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c644e54..9c89cb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -721,6 +721,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring 
> *ring,
>   return true;
>   }
>   
> +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) {
> + return (adev->mc.real_vram_size == adev->mc.visible_vram_size); }
> +
>   /**
>* amdgpu_vm_flush - hardware flush the vm
>*
> @@ -2291,10 +2296,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device 
> *adev, uint64_t vm_size)
>*
>* @adev: amdgpu_device pointer
>* @vm: requested vm
> + * @vm_context: Indicates if it GFX or Compute context
>*
>* Init @vm fields.
>*/
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +int vm_context)
>   {
>   const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
>   AMDGPU_VM_PTE_COUNT(adev) * 8);
> @@ -2323,6 +2330,16 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm)
>   if (r)
>   return r;
>   
> + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
> + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> + AMDGPU_VM_USE_CPU_FOR_COMPUTE);
> + else
> + 

Re: [PATCH] drm/radeon: Unbreak HPD handling for r600+

2017-05-24 Thread Alex Deucher
On Fri, May 12, 2017 at 2:56 AM, Christian König
 wrote:
> Am 12.05.2017 um 01:31 schrieb Lyude:
>>
>> We end up reading the interrupt register for HPD5, and then writing it
>> to HPD6 which on systems without anything using HPD5 results in
>> permanently disabling hotplug on one of the display outputs after the
>> first time we acknowledge a hotplug interrupt from the GPU.
>>
>> This code is really bad. But for now, let's just fix this. I will
>> hopefully have a large patch series to refactor all of this soon.
>>
>> Signed-off-by: Lyude 
>> Cc: sta...@vger.kernel.org
>
>
> Really nice catch! And yes I agree the copy code in HPD handling
> always scared me as well.
>
> Patch is Reviewed-by: Christian König .

Applied.  thanks!

Alex

>
> Christian.
>
>
>> ---
>>   drivers/gpu/drm/radeon/cik.c   | 4 ++--
>>   drivers/gpu/drm/radeon/evergreen.c | 4 ++--
>>   drivers/gpu/drm/radeon/r600.c  | 2 +-
>>   drivers/gpu/drm/radeon/si.c| 4 ++--
>>   4 files changed, 7 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
>> index 53710dd..cfc917c 100644
>> --- a/drivers/gpu/drm/radeon/cik.c
>> +++ b/drivers/gpu/drm/radeon/cik.c
>> @@ -7401,7 +7401,7 @@ static inline void cik_irq_ack(struct radeon_device
>> *rdev)
>> WREG32(DC_HPD5_INT_CONTROL, tmp);
>> }
>> if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
>> -   tmp = RREG32(DC_HPD5_INT_CONTROL);
>> +   tmp = RREG32(DC_HPD6_INT_CONTROL);
>> tmp |= DC_HPDx_INT_ACK;
>> WREG32(DC_HPD6_INT_CONTROL, tmp);
>> }
>> @@ -7431,7 +7431,7 @@ static inline void cik_irq_ack(struct radeon_device
>> *rdev)
>> WREG32(DC_HPD5_INT_CONTROL, tmp);
>> }
>> if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)
>> {
>> -   tmp = RREG32(DC_HPD5_INT_CONTROL);
>> +   tmp = RREG32(DC_HPD6_INT_CONTROL);
>> tmp |= DC_HPDx_RX_INT_ACK;
>> WREG32(DC_HPD6_INT_CONTROL, tmp);
>> }
>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>> b/drivers/gpu/drm/radeon/evergreen.c
>> index d1b1e0c..c48d19e 100644
>> --- a/drivers/gpu/drm/radeon/evergreen.c
>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>> @@ -4933,7 +4933,7 @@ static void evergreen_irq_ack(struct radeon_device
>> *rdev)
>> WREG32(DC_HPD5_INT_CONTROL, tmp);
>> }
>> if (rdev->irq.stat_regs.evergreen.disp_int_cont5 &
>> DC_HPD6_INTERRUPT) {
>> -   tmp = RREG32(DC_HPD5_INT_CONTROL);
>> +   tmp = RREG32(DC_HPD6_INT_CONTROL);
>> tmp |= DC_HPDx_INT_ACK;
>> WREG32(DC_HPD6_INT_CONTROL, tmp);
>> }
>> @@ -4964,7 +4964,7 @@ static void evergreen_irq_ack(struct radeon_device
>> *rdev)
>> WREG32(DC_HPD5_INT_CONTROL, tmp);
>> }
>> if (rdev->irq.stat_regs.evergreen.disp_int_cont5 &
>> DC_HPD6_RX_INTERRUPT) {
>> -   tmp = RREG32(DC_HPD5_INT_CONTROL);
>> +   tmp = RREG32(DC_HPD6_INT_CONTROL);
>> tmp |= DC_HPDx_RX_INT_ACK;
>> WREG32(DC_HPD6_INT_CONTROL, tmp);
>> }
>> diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
>> index 0a08517..e06e2d8 100644
>> --- a/drivers/gpu/drm/radeon/r600.c
>> +++ b/drivers/gpu/drm/radeon/r600.c
>> @@ -3988,7 +3988,7 @@ static void r600_irq_ack(struct radeon_device *rdev)
>> WREG32(DC_HPD5_INT_CONTROL, tmp);
>> }
>> if (rdev->irq.stat_regs.r600.disp_int_cont2 &
>> DC_HPD6_INTERRUPT) {
>> -   tmp = RREG32(DC_HPD5_INT_CONTROL);
>> +   tmp = RREG32(DC_HPD6_INT_CONTROL);
>> tmp |= DC_HPDx_INT_ACK;
>> WREG32(DC_HPD6_INT_CONTROL, tmp);
>> }
>> diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
>> index 528e5a4..bfeb774 100644
>> --- a/drivers/gpu/drm/radeon/si.c
>> +++ b/drivers/gpu/drm/radeon/si.c
>> @@ -6330,7 +6330,7 @@ static inline void si_irq_ack(struct radeon_device
>> *rdev)
>> WREG32(DC_HPD5_INT_CONTROL, tmp);
>> }
>> if (rdev->irq.stat_regs.evergreen.disp_int_cont5 &
>> DC_HPD6_INTERRUPT) {
>> -   tmp = RREG32(DC_HPD5_INT_CONTROL);
>> +   tmp = RREG32(DC_HPD6_INT_CONTROL);
>> tmp |= DC_HPDx_INT_ACK;
>> WREG32(DC_HPD6_INT_CONTROL, tmp);
>> }
>> @@ -6361,7 +6361,7 @@ static inline void si_irq_ack(struct radeon_device
>> *rdev)
>> WREG32(DC_HPD5_INT_CONTROL, tmp);
>> }
>> if (rdev->irq.stat_regs.evergreen.disp_int_cont5 &
>> DC_HPD6_RX_INTERRUPT) {
>> -   tmp = RREG32(DC_HPD5_INT_CONTROL);
>> +   tmp = RREG32(DC_HPD6_INT_CONTROL);
>> 

Re: [PATCH v2 0/3] Cleanup evergreen/si IRQ handling code

2017-05-24 Thread Alex Deucher
On Sat, May 20, 2017 at 7:39 AM, Christian König
 wrote:
> Am 20.05.2017 um 01:48 schrieb Lyude:
>>
>> This is the first part of me going through and cleaning up the IRQ
>> handling
>> code for radeon, since after taking a look at it the other day while
>> trying to
>> debug something I realized basically all of the code was copy pasted
>> everywhere, and quite difficult to actually read through.
>>
>> Will come up with something for r600 and cik once I've got the chipsets on
>> hand
>> to test with.
>>
>> Lyude (3):
>>drm/radeon: Cleanup display interrupt handling for evergreen, si
>>drm/radeon: Cleanup HDMI audio interrupt handling for evergreen
>>drm/radeon: Cleanup pageflipping IRQ handling for evergreen, si
>
>
> I don't have time to do a line by line review, but what I saw looked very
> good to me.
>
> So the whole seres is Acked-by: Christian König .

Applied.  thanks!

Alex


>
> BTW: You don't want to take a look at the other hw generations as well?
>
> Regards,
> Christian.
>
>>
>>   drivers/gpu/drm/radeon/evergreen.c  | 943
>> ++--
>>   drivers/gpu/drm/radeon/radeon.h |  27 +-
>>   drivers/gpu/drm/radeon/radeon_irq_kms.c |  35 ++
>>   drivers/gpu/drm/radeon/si.c | 655 +-
>>   4 files changed, 344 insertions(+), 1316 deletions(-)
>>
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Topics for beginners

2017-05-24 Thread Subrat Meher
Hi Christian,

I have built a stack from git sources. Is drm-next a good branch for basing
development on?

As you suspected, piglit reported a few crashes and failures with gpu and
cl profiles. So I will start looking at those. Thanks for the pointers!

Interest wise, I am the most interested in kernel topics, and improving
driver stability. There is a system lockup issue on my 270 when running
euro truck simulator 2 at high render resolution. I plan to get some debug
data on this once after reading up on kernel debugging.

Thanks,
Subrat

On 22-May-2017 16:01, "Christian König"  wrote:

> Hi Subrat,
>
> well finding topics for beginners on the graphics stack are usually not an
> easy task because it is a rather complicated piece of software.
>
> I suggest to just start with setting up a build and test environment and
> start to run piglit (our testing suite) on a fully self compiled stack
> (kernel, libdrm, ddx, mesa).
>
> If you find any bugs while doing this (and there still are a couple I
> guess) you can start to narrow those down.
>
> Additional to that is there any particular area you are interested in?
> E.g. kernel development, video codecs or frontend stuff (OpenGL, OpenCL)?
>
> Regards,
> Christian.
>
> Am 19.05.2017 um 05:24 schrieb Subrat Meher:
>
> Hi,
>
> I am interested in working on radeon's Linux graphics stack. Are there any
> areas that are more approachable to someone  unfamiliar with the stack?
>
> As for existing knowledge, I have worked with OpenGL, OpenCL and embedded
> applications.
>
> Apologies if this is not the right place for this discussion.
>
> Thanks,
> Subrat
>
>
> ___
> amd-gfx mailing 
> listamd-gfx@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2

2017-05-24 Thread Zhang, Jerry (Junwei)

On 05/24/2017 05:18 PM, Christian König wrote:

Am 18.05.2017 um 07:33 schrieb Zhang, Jerry (Junwei):

On 05/17/2017 05:22 PM, Christian König wrote:

[SNIP]
+static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+BUG_ON(addr & 0xF000FFFULL);
+return addr;
+}


Just wonder why we didn't return the address as below?
adev->vm_manager.vram_base_offset + addr


That is a really good question and I couldn't figure out all the details either.

I've tested it on an Kaveri and it still seems to work fine, so the logic is
indeed correct.


Thanks for your verification.

Then it's a makeshift change for now, although it looks a little suspicious.
Let's dig it more in the future with HW team.
Please feel free to add my RB for the v3 one.

Reviewed-by: Junwei Zhang 

Jerry




Does that cause gmc6~gmc8 has no APU support officially?


No, only the pro driver doesn't support APUs. The open source driver works
perfectly fine on them.


Or I miss any info about them?


The documentation for gfx6-8 is very unclear on this.

It says that the registers and PDE should use physical addresses, but from my
testing that isn't the case.

Going to ping the hardware dev on this once more when I have time.

Regards,
Christian.



Jerry


+
  static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
  {
@@ -1127,6 +1133,7 @@ static const struct amdgpu_gart_funcs
gmc_v6_0_gart_funcs = {
  .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v6_0_gart_set_pte_pde,
  .set_prt = gmc_v6_0_set_prt,
+.get_vm_pde = gmc_v6_0_get_vm_pde,
  .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
  };

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 967505b..4f140e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct
amdgpu_device *adev,
  return pte_flag;
  }

+static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+BUG_ON(addr & 0xF000FFFULL);
+return addr;
+}
+
  /**
   * gmc_v8_0_set_fault_enable_default - update VM fault handling
   *
@@ -1293,7 +1299,8 @@ static const struct amdgpu_gart_funcs
gmc_v7_0_gart_funcs = {
  .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v7_0_gart_set_pte_pde,
  .set_prt = gmc_v7_0_set_prt,
-.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
+.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
+.get_vm_pde = gmc_v7_0_get_vm_pde
  };

  static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3b5ea0f..f05c034 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct
amdgpu_device *adev,
  return pte_flag;
  }

+static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+BUG_ON(addr & 0xFFF00FFFULL);
+return addr;
+}
+
  /**
   * gmc_v8_0_set_fault_enable_default - update VM fault handling
   *
@@ -1612,7 +1618,8 @@ static const struct amdgpu_gart_funcs
gmc_v8_0_gart_funcs = {
  .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v8_0_gart_set_pte_pde,
  .set_prt = gmc_v8_0_set_prt,
-.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
+.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
+.get_vm_pde = gmc_v8_0_get_vm_pde
  };

  static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 19e1027..047b1a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -358,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct
amdgpu_device *adev,
  return pte_flag;
  }

-static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
+static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
  {
-return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
+addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
+BUG_ON(addr & 0x003FULL);
+return addr;
  }

  static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
  .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v9_0_gart_set_pte_pde,
-.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
-.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
  .get_invalidate_req = gmc_v9_0_get_invalidate_req,
+.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
+.get_vm_pde = gmc_v9_0_get_vm_pde
  };

  static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 

Re: [PATCH 0/3] drm/amdgpu: Tweaks for high pressure on CPU visible VRAM

2017-05-24 Thread Michel Dänzer
On 24/05/17 08:27 PM, Christian König wrote:
> Am 24.05.2017 um 13:03 schrieb Marek Olšák:
>>>
>> I think the final solution (done in fault_reserve_notify) should be:
>> if (bo->num_cpu_page_faults++ > 20)
>> bo->preferred_domain = GTT_WC;

I agree something like that will probably be part of the solution, but I
doubt it's quite that simple or that it's the only thing that can be
improved.


> I more or less agree on that, but setting preferred_domain permanently
> to GTT_WC is what worries me a bit.
> 
> E.g. imagine you alt+tab from a game to your browser and back and the
> game runs way slower now because BOs are never moved back to VRAM.

Right, permanently moving a BO to GTT might itself cause performance to
drop down a cliff in some cases. It's possible that this is irrelevant
compared to excessive buffer migration for CPU access though.


> What we need is a global limit of number of bytes transfered per second
> for swap operations or something like that.
> 
> Or maybe a timeout which says when a BO was moved (either by swapping it
> out or by a CPU page fault) only move it back after +n jiffies or
> something like that.

I also feel like something like this will be more useful than the number
of CPU page faults per se. But I'm curious what Marek comes up with. :)


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amdgpu: add a mechanism to acquire gpu exclusivity

2017-05-24 Thread Andres Rodriguez
A DRM_MASTER may wish to restrict gpu job submission to only a limited
set of clients. To enable this use case we provide the following new
IOCTL APIs:
  * A mechanism to change a process's ctx priorities
  * A mechanism to limit the minimum priority required for the gpu
scheduler to queue a job to the HW

This functionality is useful in VR use cases, where two compositors are
operating simultaneously, e.g. X + SteamVRComposer.

In this case SteamVRComposer can limit gpu access to itself + the
relevant clients. Once critical work is complete, and if enough time is
available until the next HMD vblank, general access to the gpu can be
restored.

The operation is limited to DRM_MASTER since it may lead to starvation.
The implementation of drm leases is required to extend DRM_MASTER
status to the SteamVRComposer.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   |  39 
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 131 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h |  34 +++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c |  81 ++--
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  14 ++-
 include/uapi/drm/amdgpu_drm.h |  26 +
 9 files changed, 306 insertions(+), 26 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index b62d9e9..e4d3b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-   amdgpu_queue_mgr.o
+   amdgpu_queue_mgr.o amdgpu_sched.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3722352..9681de7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -833,6 +833,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 
struct amdgpu_ring *ring,
  struct dma_fence *fence);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
   struct amdgpu_ring *ring, uint64_t seq);
+void amdgpu_ctx_set_priority(struct amdgpu_device *adev,
+struct amdgpu_ctx *ctx,
+enum amd_sched_priority priority);
 
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 43fe5ae..996434f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -24,6 +24,7 @@
 
 #include 
 #include "amdgpu.h"
+#include "amdgpu_sched.h"
 
 static int amdgpu_ctx_init(struct amdgpu_device *adev,
   enum amd_sched_priority priority,
@@ -198,23 +199,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
return 0;
 }
 
-static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
-{
-   switch (amdgpu_priority) {
-   case AMDGPU_CTX_PRIORITY_HIGH_HW:
-   return AMD_SCHED_PRIORITY_HIGH_HW;
-   case AMDGPU_CTX_PRIORITY_HIGH_SW:
-   return AMD_SCHED_PRIORITY_HIGH_SW;
-   case AMDGPU_CTX_PRIORITY_NORMAL:
-   return AMD_SCHED_PRIORITY_NORMAL;
-   case AMDGPU_CTX_PRIORITY_LOW:
-   return AMD_SCHED_PRIORITY_LOW;
-   default:
-   WARN(1, "Invalid context priority %d\n", amdgpu_priority);
-   return AMD_SCHED_PRIORITY_INVALID;
-   }
-}
-
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 struct drm_file *filp)
 {
@@ -337,6 +321,27 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx 
*ctx,
return fence;
 }
 
+void amdgpu_ctx_set_priority(struct amdgpu_device *adev,
+struct amdgpu_ctx *ctx,
+enum amd_sched_priority priority)
+{
+   int i;
+   struct amd_sched_rq *rq;
+   struct amd_sched_entity *entity;
+   struct amdgpu_ring *ring;
+
+   spin_lock(>ring_lock);
+   for (i = 0; i < adev->num_rings; i++) {
+   ring = adev->rings[i];
+   entity = >rings[i].entity;
+   rq = >sched.sched_rq[priority];
+
+   if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)
+   

[RFC] Exclusive gpu access for SteamVR usecases

2017-05-24 Thread Andres Rodriguez
When multiple environments are running simultaneously on a system, e.g.
an X desktop + a SteamVR game session, it may be useful to sacrifice
performance in one environment in order to boost it on the other.

This series provides a mechanism for a DRM_MASTER to provide exclusive
gpu access to a group of processes.

Note: This series is built on the assumption that the drm lease patch series
will extend DRM_MASTER status to lesees.

The libdrm we intend to provide is as follows:

/**
 * Set the priority of all contexts in a process
 *
 * This function will change the priority of all contexts owned by
 * the process identified by fd.
 *
 * \param dev - \c [in] device handle
 * \param fd  - \c [in] fd from target process
 * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_*
 *
 * \return  0 on success\n
 * <0 - Negative POSIX error code
 *
 * \notes @fd can be *any* file descriptor from the target process.
 * \notes this function requires DRM_MASTER
 */
int amdgpu_sched_process_priority_set(amdgpu_device_handle dev,
  int fd, int32_t priority);

/**
 * Request to raise the minimum required priority to schedule a gpu job
 *
 * Submit a request to increase the minimum required priority to schedule
 * a gpu job. Once this function returns, the gpu scheduler will no longer
 * consider jobs from contexts with priority lower than @priority.
 *
 * The minimum priority considered by the scheduler will be the highest from
 * all currently active requests.
 *
 * Requests are refcounted, and must be balanced using
 * amdgpu_sched_min_priority_put()
 *
 * \param dev - \c [in] device handle
 * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_*
 *
 * \return  0 on success\n
 * <0 - Negative POSIX error code
 *
 * \notes this function requires DRM_MASTER
 */
int amdgpu_sched_min_priority_get(amdgpu_device_handle dev,
  int32_t priority);

/**
 * Drop a request to raise the minimum required scheduler priority
 *
 * This call balances amdgpu_sched_min_priority_get()
 *
 * If no other active requests exists for @priority, the minimum required
 * priority will decay to a lower level until one is reached with an active
 * request or the lowest priority is reached.
 *
 * \param dev - \c [in] device handle
 * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_*
 *
 * \return  0 on success\n
 * <0 - Negative POSIX error code
 *
 * \notes this function requires DRM_MASTER
 */
int amdgpu_sched_min_priority_put(amdgpu_device_handle dev,
  int32_t priority);

Using this app, VRComposer can raise the priority of the VRapp and itself. Then
it can restrict the minimum scheduler priority in order to become exclusive gpu
clients.

One of the areas I'd like feedback is the following scenario. If a VRapp opens
a new fd and creates a new context after a call to set_priority, this specific
context will be lower priority than the rest. If the minimum required priority
is then raised, it is possible that this new context will be starved and
deadlock the VRapp.

One solution I had in mind to address this situation, is to make set_priority
also raise the priority of future contexts created by the VRapp. However, that
would require keeping track of the requested priority on a per-process data
structure. The current design appears to steer clean of keeping any process
specific data, and everything instead of stored on a per-file basis. Which is
why I did not pursue this approach. But if this is something you'd like me to
implement let me know.

One could also argue that preventing an application deadlock should be handled
between the VRComposer and the VRApp. It is not the kernel's responsibility to
babysit userspace applications and prevent themselves from shooting themselves
in the foot. The same could be achieved by improper usage of shared fences
between processes.

Thoughts/feedback/comments on this issue, or others, are appreciated.

Regards,
Andres
 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/3] drm/amdgpu: add a new scheduler priority AMD_SCHED_PRIORITY_HIGH_SW

2017-05-24 Thread Andres Rodriguez
Add a new priority level to the gpu scheduler *_HIGH_SW. This level
intends to provide elevated entity priority at the sw scheduler level
without the negative side effects of an elevated HW priority.

Some of the negative effects of HW priorities can include stealing
resources from other queues.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   |  8 +---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  3 ++-
 include/uapi/drm/amdgpu_drm.h |  3 ++-
 4 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index cc00110..48d0d1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -35,7 +35,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
return -EINVAL;
 
-   if (priority >= AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_NICE))
+   if (priority > AMD_SCHED_PRIORITY_NORMAL && !capable(CAP_SYS_NICE))
return -EACCES;
 
memset(ctx, 0, sizeof(*ctx));
@@ -201,8 +201,10 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
 static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
 {
switch (amdgpu_priority) {
-   case AMDGPU_CTX_PRIORITY_HIGH:
-   return AMD_SCHED_PRIORITY_HIGH;
+   case AMDGPU_CTX_PRIORITY_HIGH_HW:
+   return AMD_SCHED_PRIORITY_HIGH_HW;
+   case AMDGPU_CTX_PRIORITY_HIGH_SW:
+   return AMD_SCHED_PRIORITY_HIGH_SW;
case AMDGPU_CTX_PRIORITY_NORMAL:
return AMD_SCHED_PRIORITY_NORMAL;
case AMDGPU_CTX_PRIORITY_LOW:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 6147c94..396d3e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6749,19 +6749,12 @@ static void gfx_v8_0_hqd_set_priority(struct 
amdgpu_device *adev,
mutex_lock(>srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
-   switch (priority) {
-   case AMD_SCHED_PRIORITY_NORMAL:
-   WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0);
-   WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0);
-   break;
-   case AMD_SCHED_PRIORITY_HIGH:
+   if (priority >= AMD_SCHED_PRIORITY_HIGH_HW) {
WREG32(mmCP_HQD_PIPE_PRIORITY, 0x2);
WREG32(mmCP_HQD_QUEUE_PRIORITY, 0xf);
-   break;
-   default:
-   WARN(1, "Attempt to set invalid SPI priority:%d for ring:%d\n",
-   priority, ring->idx);
-   break;
+   } else {
+   WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0);
+   WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0);
}
 
vi_srbm_select(adev, 0, 0, 0, 0);
@@ -6776,7 +6769,8 @@ static void gfx_v8_0_ring_set_priority_compute(struct 
amdgpu_ring *ring,
return;
 
gfx_v8_0_hqd_set_priority(adev, ring, priority);
-   gfx_v8_0_pipe_reserve_resources(adev, ring, priority >= 
AMD_SCHED_PRIORITY_HIGH);
+   gfx_v8_0_pipe_reserve_resources(adev, ring,
+   priority >= AMD_SCHED_PRIORITY_HIGH_HW);
 }
 
 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 46c18424..dbcaa2e 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -117,7 +117,8 @@ enum amd_sched_priority {
AMD_SCHED_PRIORITY_MIN,
AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN,
AMD_SCHED_PRIORITY_NORMAL,
-   AMD_SCHED_PRIORITY_HIGH,
+   AMD_SCHED_PRIORITY_HIGH_SW,
+   AMD_SCHED_PRIORITY_HIGH_HW,
AMD_SCHED_PRIORITY_KERNEL,
AMD_SCHED_PRIORITY_MAX
 };
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 88b2a52..27d0a822 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -166,7 +166,8 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_PRIORITY_LOW -1023
 #define AMDGPU_CTX_PRIORITY_NORMAL  0
 /* Selecting a priority above NORMAL requires CAP_SYS_ADMIN */
-#define AMDGPU_CTX_PRIORITY_HIGH1023
+#define AMDGPU_CTX_PRIORITY_HIGH_SW 512
+#define AMDGPU_CTX_PRIORITY_HIGH_HW 1023
 
 struct drm_amdgpu_ctx_in {
/** AMDGPU_CTX_OP_* */
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters

2017-05-24 Thread Andres Rodriguez
Returning invalid priorities as _NORMAL is a backwards compatibility
quirk of amdgpu_ctx_ioctl(). Move this detail one layer up where it
belongs.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 8 +---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 48d0d1e..43fe5ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -211,7 +211,7 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int 
amdgpu_priority)
return AMD_SCHED_PRIORITY_LOW;
default:
WARN(1, "Invalid context priority %d\n", amdgpu_priority);
-   return AMD_SCHED_PRIORITY_NORMAL;
+   return AMD_SCHED_PRIORITY_INVALID;
}
 }
 
@@ -230,8 +230,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
id = args->in.ctx_id;
priority = amdgpu_to_sched_priority(args->in.priority);
 
-   if (priority >= AMD_SCHED_PRIORITY_MAX)
-   return -EINVAL;
+   /* For backwards compatibility reasons, we need to accept
+* ioctls with garbage in the priority field */
+   if (priority == AMD_SCHED_PRIORITY_INVALID)
+   priority = AMD_SCHED_PRIORITY_NORMAL;
 
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index dbcaa2e..da040bc 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -120,7 +120,8 @@ enum amd_sched_priority {
AMD_SCHED_PRIORITY_HIGH_SW,
AMD_SCHED_PRIORITY_HIGH_HW,
AMD_SCHED_PRIORITY_KERNEL,
-   AMD_SCHED_PRIORITY_MAX
+   AMD_SCHED_PRIORITY_MAX,
+   AMD_SCHED_PRIORITY_INVALID = -1
 };
 
 /**
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] amdgpu: move asic id table to a separate file

2017-05-24 Thread Michel Dänzer
On 25/05/17 12:10 AM, Li, Samuel wrote:
>> There's no need for a separate repo upstream.  It's purely to aid
>> internal packaging.
> As a first step, we can put a snapshot in libdrm for now. External
> packaging  will face the same issue though ... the packagers need to
> put the ids in various versions of various distros.

They can take a snapshot from the libdrm repository the same way they
could from a separate repository. I don't see any issue there.


> We also likely need to automate the upstream update to save sometime
> for ourselves.

I'm skeptical that it can be automated, we'll see. :)


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/3] drm/amdgpu: Tweaks for high pressure on CPU visible VRAM

2017-05-24 Thread Michel Dänzer
On 23/05/17 07:38 PM, Marek Olšák wrote:
> On Tue, May 23, 2017 at 2:45 AM, Michel Dänzer  wrote:
>> On 22/05/17 07:09 PM, Marek Olšák wrote:
>>> On Mon, May 22, 2017 at 12:00 PM, Michel Dänzer  wrote:
 On 20/05/17 06:26 PM, Marek Olšák wrote:
> On May 20, 2017 3:26 AM, "Michel Dänzer"  > wrote:
>
> On 20/05/17 01:14 AM, Marek Olšák wrote:
> > Hi Michel,
> >
> > I've applied your series
>
> Thanks for testing it.
>
> > and it doesn't help with low Dirt Rally performance on Fiji. I see 
> TTM
> > buffer moves at 800MB/s and many VRAM page faults.
>
> Did you see this:
>
> >> Note that there's only little if any improvement of the average
> framerate
> >> reported, but the minimum framerate as seen on the HUD goes from
> ~10 fps
> >> to ~17.
>
> I.e. it mostly affects the minimum framerate and smoothness for me
> as well.
>
>
> Without the series, I get 70 average fps. With the series, I get 30
> average fps. That might just be random bad luck. I don't know.

 Hmm, yeah, maybe that was just one of the random slowdowns you've been
 talking about in other threads and on IRC?

 I can't reproduce any slowdown with these patches, even leaving visible
 VRAM size at 256 MB.
>>>
>>> The random slowdowns with Dirt Rally are only caused by the pressure
>>> on visible VRAM. This whole thread is about those random slowdowns.
>>
>> No, this thread is about the scenario described in the cover letter of
>> this patch series.
>>
>>
>>> If you're saying "maybe it was just one of the random slowdowns", you're
>>> saying "maybe it was just the visible VRAM pressure". It's only
>>> random with Dirt Rally, which makes it difficult to believe statements
>>> such as "I can't reproduce any slowdown".
>>
>> I could say the same thing about you seeing random slowdowns... I've
>> never seen that, I had to artificially limit the size of visible VRAM to
>> 64 MB to make it significantly affect the benchmark result.
>>
>> How many times do you need to run the benchmark on average to hit a
>> random slowdown? Which desktop environment and other X clients are
>> running during the benchmark? Which tab is active in the Steam window
>> while the benchmark runs?
>>
>> In my case, it's only xfwm4, xterm and steam on the Dirt Rally page in
>> the library.
> 
> Ubuntu Unity, Steam small mode (there are no tabs), Ultra settings in
> Dirt Rally.
> 
> Every single time I run the game with this series, I get 700-1000MB/s
> of TTM BO moves. There doesn't seem to be any randomness.
> 
> It was better without this series. (meaning it was sometimes OK, sometimes 
> bad)

Thanks for the additional details. I presume that in the bad case there
are some BOs lying around in visible VRAM (e.g. from Unity), which
causes some of Dirt Rally's BOs to go back and forth between GTT on CPU
page faults and VRAM on GPU usage.

This means at least patch 2 goes out the window. I'll see if I can
salvage something out of patch 3.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/4] drm/amdgpu/ci: disable mclk switching for high refresh rates (v2)

2017-05-24 Thread Christian König

Am 23.05.2017 um 23:26 schrieb Alex Deucher:

Even if the vblank period would allow it, it still seems to
be problematic on some cards.

v2: fix logic inversion (Nils)

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868

Signed-off-by: Alex Deucher 


Acked-by: Christian König  for the whole series.


---
  drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 6dc1410..ec93714 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device 
*adev)
u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 
300;
  
+	/* disable mclk switching if the refresh is >120Hz, even if the

+* blanking period would allow it
+*/
+   if (amdgpu_dpm_get_vrefresh(adev) > 120)
+   return true;
+
if (vblank_time < switch_limit)
return true;
else



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/5] drm: introduce sync objects (v3)

2017-05-24 Thread Dave Airlie
From: Dave Airlie 

Sync objects are new toplevel drm object, that contain a
pointer to a fence. This fence can be updated via command
submission ioctls via drivers.

There is also a generic wait obj API modelled on the vulkan
wait API (with code modelled on some amdgpu code).

These objects can be converted to an opaque fd that can be
passes between processes.

v2: rename reference/unreference to put/get (Chris)
fix leaked reference (David Zhou)
drop mutex in favour of cmpxchg (Chris)
v3: cleanups from danvet, rebase on drm_fops rename
check fd_flags is 0 in ioctls.

Reviewed-by: Sean Paul 
Signed-off-by: Dave Airlie 
---
 Documentation/gpu/drm-internals.rst |   3 +
 Documentation/gpu/drm-mm.rst|  12 ++
 drivers/gpu/drm/Makefile|   2 +-
 drivers/gpu/drm/drm_file.c  |   8 +
 drivers/gpu/drm/drm_internal.h  |  13 ++
 drivers/gpu/drm/drm_ioctl.c |  12 ++
 drivers/gpu/drm/drm_syncobj.c   | 377 
 include/drm/drmP.h  |   1 -
 include/drm/drm_drv.h   |   1 +
 include/drm/drm_file.h  |   5 +
 include/drm/drm_syncobj.h   |  87 +
 include/uapi/drm/drm.h  |  24 +++
 12 files changed, 543 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_syncobj.c
 create mode 100644 include/drm/drm_syncobj.h

diff --git a/Documentation/gpu/drm-internals.rst 
b/Documentation/gpu/drm-internals.rst
index babfb61..2b23d78 100644
--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@@ -98,6 +98,9 @@ DRIVER_ATOMIC
 implement appropriate obj->atomic_get_property() vfuncs for any
 modeset objects with driver specific properties.
 
+DRIVER_SYNCOBJ
+Driver support drm sync objects.
+
 Major, Minor and Patchlevel
 ~~~
 
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index 96b9c34..9412798 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -484,3 +484,15 @@ DRM Cache Handling
 
 .. kernel-doc:: drivers/gpu/drm/drm_cache.c
:export:
+
+DRM Sync Objects
+===
+
+.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_syncobj.h
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
+   :export:
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 59f0f9b..6f42188 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -16,7 +16,7 @@ drm-y   :=drm_auth.o drm_bufs.o drm_cache.o \
drm_framebuffer.o drm_connector.o drm_blend.o \
drm_encoder.o drm_mode_object.o drm_property.o \
drm_plane.o drm_color_mgmt.o drm_print.o \
-   drm_dumb_buffers.o drm_mode_config.o
+   drm_dumb_buffers.o drm_mode_config.o drm_syncobj.o
 
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_DRM_VM) += drm_vm.o
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 3783b65..a20d6a9 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -229,6 +229,9 @@ static int drm_open_helper(struct file *filp, struct 
drm_minor *minor)
if (drm_core_check_feature(dev, DRIVER_GEM))
drm_gem_open(dev, priv);
 
+   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   drm_syncobj_open(priv);
+
if (drm_core_check_feature(dev, DRIVER_PRIME))
drm_prime_init_file_private(>prime);
 
@@ -276,6 +279,8 @@ static int drm_open_helper(struct file *filp, struct 
drm_minor *minor)
 out_prime_destroy:
if (drm_core_check_feature(dev, DRIVER_PRIME))
drm_prime_destroy_file_private(>prime);
+   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   drm_syncobj_release(priv);
if (drm_core_check_feature(dev, DRIVER_GEM))
drm_gem_release(dev, priv);
put_pid(priv->pid);
@@ -398,6 +403,9 @@ int drm_release(struct inode *inode, struct file *filp)
drm_property_destroy_user_blobs(dev, file_priv);
}
 
+   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   drm_syncobj_release(file_priv);
+
if (drm_core_check_feature(dev, DRIVER_GEM))
drm_gem_release(dev, file_priv);
 
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 3d8e8f8..3fdef2c 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -142,4 +142,17 @@ static inline int drm_debugfs_crtc_crc_add(struct drm_crtc 
*crtc)
 {
return 0;
 }
+
 #endif
+
+/* drm_syncobj.c */
+void drm_syncobj_open(struct drm_file *file_private);
+void drm_syncobj_release(struct drm_file *file_private);
+int drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
+struct drm_file *file_private);
+int 

drm syncobj - final posting I hope

2017-05-24 Thread Dave Airlie
I've fixed up a few things in here from Daniel's comments,
Daniel I didn't change things exactly as suggested but I removed
fd_flags from API completely.

Christian, I think I got the post deps hook in the right place
now in the amdgpu patch.

Would be nice if someone can validate the timeout stuff
in the second patch makes sense, I've decided to stick to
an absolute timeout.

I will probably at least merge the first 3 into drm-next, and
we can work out what to do with the two amdgpu ones.

Dave.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/5] drm/syncobj: add sync_file interaction.

2017-05-24 Thread Dave Airlie
From: Dave Airlie 

This interface allows importing the fence from a sync_file into
an existing drm sync object, or exporting the fence attached to
an existing drm sync object into a new sync file object.

This should only be used to interact with sync files where necessary.

Reviewed-by: Sean Paul 
Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/drm_syncobj.c | 64 +--
 include/uapi/drm/drm.h|  2 ++
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 8b87594..54d751e 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -50,6 +50,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_internal.h"
 #include 
@@ -276,6 +277,48 @@ static int drm_syncobj_fd_to_handle(struct drm_file 
*file_private,
return 0;
 }
 
+int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
+  int fd, int handle)
+{
+   struct dma_fence *fence = sync_file_get_fence(fd);
+   if (!fence)
+   return -EINVAL;
+
+   return drm_syncobj_replace_fence(file_private, handle, fence);
+}
+
+int drm_syncobj_export_sync_file(struct drm_file *file_private,
+int handle, int *p_fd)
+{
+   int ret;
+   struct dma_fence *fence;
+   struct sync_file *sync_file;
+   int fd = get_unused_fd_flags(O_CLOEXEC);
+
+   if (fd < 0)
+   return fd;
+
+   ret = drm_syncobj_fence_get(file_private, handle, );
+   if (ret)
+   goto err_put_fd;
+
+   sync_file = sync_file_create(fence);
+   if (!sync_file) {
+   ret = -EINVAL;
+   goto err_fence_put;
+   }
+
+   fd_install(fd, sync_file->file);
+
+   dma_fence_put(fence);
+   *p_fd = fd;
+   return 0;
+err_fence_put:
+   dma_fence_put(fence);
+err_put_fd:
+   put_unused_fd(fd);
+   return ret;
+}
 /**
  * drm_syncobj_open - initalizes syncobj file-private structures at devnode 
open time
  * @dev: drm_device which is being opened by userspace
@@ -358,9 +401,17 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, 
void *data,
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -ENODEV;
 
-   if (args->pad || args->flags)
+   if (args->pad)
+   return -EINVAL;
+
+   if (args->flags != 0 &&
+   args->flags != 
DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_FENCE_SYNC_FILE)
return -EINVAL;
 
+   if (args->flags & DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_FENCE_SYNC_FILE)
+   return drm_syncobj_export_sync_file(file_private, args->handle,
+   >fd);
+
return drm_syncobj_handle_to_fd(file_private, args->handle,
>fd);
 }
@@ -374,9 +425,18 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, 
void *data,
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -ENODEV;
 
-   if (args->pad || args->flags)
+   if (args->pad)
return -EINVAL;
 
+   if (args->flags != 0 &&
+   args->flags != 
DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE_FENCE)
+   return -EINVAL;
+
+   if (args->flags & DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE_FENCE)
+   return drm_syncobj_import_sync_file_fence(file_private,
+ args->fd,
+ args->handle);
+
return drm_syncobj_fd_to_handle(file_private, args->fd,
>handle);
 }
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index d6e2f62..94c75be 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -708,6 +708,8 @@ struct drm_syncobj_destroy {
__u32 pad;
 };
 
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE_FENCE (1 << 0)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_FENCE_SYNC_FILE (1 << 0)
 struct drm_syncobj_handle {
__u32 handle;
__u32 flags;
-- 
2.9.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/5] amdgpu: use drm sync objects for shared semaphores (v5)

2017-05-24 Thread Dave Airlie
From: Dave Airlie 

This creates a new command submission chunk for amdgpu
to add in and out sync objects around the submission.

Sync objects are managed via the drm syncobj ioctls.

The command submission interface is enhanced with two new
chunks, one for syncobj pre submission dependencies,
and one for post submission sync obj signalling,
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

In theory VkFences could be backed with sync objects and
just get passed into the cs as syncobj handles as well.

NOTE: this interface addition needs a version bump to expose
it to userspace.

v1.1: keep file reference on import.
v2: move to using syncobjs
v2.1: change some APIs to just use p pointer.
v3: make more robust against CS failures, we now add the
wait sems but only remove them once the CS job has been
submitted.
v4: rewrite names of API and base on new syncobj code.
v5: move post deps earlier, rename some apis

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 87 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 +-
 include/uapi/drm/amdgpu_drm.h   |  6 +++
 3 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 30225d7..3cbd547 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -226,6 +227,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void 
*data)
break;
 
case AMDGPU_CHUNK_ID_DEPENDENCIES:
+   case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+   case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
break;
 
default:
@@ -1040,6 +1043,40 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,
return 0;
 }
 
+static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
+uint32_t handle)
+{
+   int r;
+   struct dma_fence *fence;
+   r = drm_syncobj_fence_get(p->filp, handle, );
+   if (r)
+   return r;
+
+   r = amdgpu_sync_fence(p->adev, >job->sync, fence);
+   dma_fence_put(fence);
+
+   return r;
+}
+
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+   struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
  struct amdgpu_cs_parser *p)
 {
@@ -1054,12 +1091,54 @@ static int amdgpu_cs_dependencies(struct amdgpu_device 
*adev,
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
return r;
+   } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+   r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+   if (r)
+   return r;
}
}
 
return 0;
 }
 
+static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
+struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = drm_syncobj_replace_fence(p->filp, deps[i].handle,
+ p->fence);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+   int i, r;
+
+   for (i = 0; i < p->nchunks; ++i) {
+   struct amdgpu_cs_chunk *chunk;
+
+   chunk = >chunks[i];
+
+   if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+   r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+   if (r)
+   return r;
+   }
+   }
+   return 0;
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
  

[PATCH 4/5] amdgpu/cs: split out fence dependency checking

2017-05-24 Thread Dave Airlie
From: Dave Airlie 

This just splits out the fence depenency checking into it's
own function to make it easier to add semaphore dependencies.

Reviewed-by: Christian König 
Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 85 +++---
 1 file changed, 47 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4e6b950..30225d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -995,56 +995,65 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return 0;
 }
 
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
+static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
+  struct amdgpu_cs_chunk *chunk)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-   int i, j, r;
-
-   for (i = 0; i < p->nchunks; ++i) {
-   struct drm_amdgpu_cs_chunk_dep *deps;
-   struct amdgpu_cs_chunk *chunk;
-   unsigned num_deps;
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_dep *deps;
 
-   chunk = >chunks[i];
+   deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_dep);
 
-   if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
-   continue;
+   for (i = 0; i < num_deps; ++i) {
+   struct amdgpu_ring *ring;
+   struct amdgpu_ctx *ctx;
+   struct dma_fence *fence;
 
-   deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
-   num_deps = chunk->length_dw * 4 /
-   sizeof(struct drm_amdgpu_cs_chunk_dep);
+   r = amdgpu_cs_get_ring(p->adev, deps[i].ip_type,
+  deps[i].ip_instance,
+  deps[i].ring, );
+   if (r)
+   return r;
 
-   for (j = 0; j < num_deps; ++j) {
-   struct amdgpu_ring *ring;
-   struct amdgpu_ctx *ctx;
-   struct dma_fence *fence;
+   ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+   if (ctx == NULL)
+   return -EINVAL;
 
-   r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-  deps[j].ip_instance,
-  deps[j].ring, );
+   fence = amdgpu_ctx_get_fence(ctx, ring,
+deps[i].handle);
+   if (IS_ERR(fence)) {
+   r = PTR_ERR(fence);
+   amdgpu_ctx_put(ctx);
+   return r;
+   } else if (fence) {
+   r = amdgpu_sync_fence(p->adev, >job->sync,
+ fence);
+   dma_fence_put(fence);
+   amdgpu_ctx_put(ctx);
if (r)
return r;
+   }
+   }
+   return 0;
+}
 
-   ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
-   if (ctx == NULL)
-   return -EINVAL;
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ struct amdgpu_cs_parser *p)
+{
+   int i, r;
 
-   fence = amdgpu_ctx_get_fence(ctx, ring,
-deps[j].handle);
-   if (IS_ERR(fence)) {
-   r = PTR_ERR(fence);
-   amdgpu_ctx_put(ctx);
-   return r;
+   for (i = 0; i < p->nchunks; ++i) {
+   struct amdgpu_cs_chunk *chunk;
 
-   } else if (fence) {
-   r = amdgpu_sync_fence(adev, >job->sync,
- fence);
-   dma_fence_put(fence);
-   amdgpu_ctx_put(ctx);
-   if (r)
-   return r;
-   }
+   chunk = >chunks[i];
+
+   if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+   r = amdgpu_cs_process_fence_dep(p, chunk);
+   if (r)
+   return r;
}
}
 
-- 
2.9.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/5] amdgpu: use drm sync objects for shared semaphores (v5)

2017-05-24 Thread zhoucm1



On 2017年05月24日 15:06, Dave Airlie wrote:

From: Dave Airlie 

This creates a new command submission chunk for amdgpu
to add in and out sync objects around the submission.

Sync objects are managed via the drm syncobj ioctls.

The command submission interface is enhanced with two new
chunks, one for syncobj pre submission dependencies,
and one for post submission sync obj signalling,
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

In theory VkFences could be backed with sync objects and
just get passed into the cs as syncobj handles as well.

NOTE: this interface addition needs a version bump to expose
it to userspace.

v1.1: keep file reference on import.
v2: move to using syncobjs
v2.1: change some APIs to just use p pointer.
v3: make more robust against CS failures, we now add the
wait sems but only remove them once the CS job has been
submitted.
v4: rewrite names of API and base on new syncobj code.
v5: move post deps earlier, rename some apis

Signed-off-by: Dave Airlie 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 87 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 +-
  include/uapi/drm/amdgpu_drm.h   |  6 +++
  3 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 30225d7..3cbd547 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,6 +27,7 @@
  #include 
  #include 
  #include 
+#include 
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  
@@ -226,6 +227,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)

break;
  
  		case AMDGPU_CHUNK_ID_DEPENDENCIES:

+   case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+   case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
break;
  
  		default:

@@ -1040,6 +1043,40 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,
return 0;
  }
  
+static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,

+uint32_t handle)
+{
+   int r;
+   struct dma_fence *fence;
+   r = drm_syncobj_fence_get(p->filp, handle, );
+   if (r)
+   return r;
+
+   r = amdgpu_sync_fence(p->adev, >job->sync, fence);
We just fixed a sched fence issue by adding new job->dep_sync member, so 
here should use >job->dep_sync instead of >job->sync.


Regards,
David Zhou

+   dma_fence_put(fence);
+
+   return r;
+}
+
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+   struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
  static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
  struct amdgpu_cs_parser *p)
  {
@@ -1054,12 +1091,54 @@ static int amdgpu_cs_dependencies(struct amdgpu_device 
*adev,
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
return r;
+   } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+   r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+   if (r)
+   return r;
}
}
  
  	return 0;

  }
  
+static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,

+struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = drm_syncobj_replace_fence(p->filp, deps[i].handle,
+ p->fence);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+   int i, r;
+
+   for (i = 0; i < p->nchunks; ++i) {
+   struct amdgpu_cs_chunk *chunk;
+
+   chunk = >chunks[i];
+
+   if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+   r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+   

Re: [PATCH 5/5] amdgpu: use drm sync objects for shared semaphores (v5)

2017-05-24 Thread Christian König

Am 24.05.2017 um 09:25 schrieb zhoucm1:



On 2017年05月24日 15:06, Dave Airlie wrote:

From: Dave Airlie 

This creates a new command submission chunk for amdgpu
to add in and out sync objects around the submission.

Sync objects are managed via the drm syncobj ioctls.

The command submission interface is enhanced with two new
chunks, one for syncobj pre submission dependencies,
and one for post submission sync obj signalling,
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

In theory VkFences could be backed with sync objects and
just get passed into the cs as syncobj handles as well.

NOTE: this interface addition needs a version bump to expose
it to userspace.

v1.1: keep file reference on import.
v2: move to using syncobjs
v2.1: change some APIs to just use p pointer.
v3: make more robust against CS failures, we now add the
wait sems but only remove them once the CS job has been
submitted.
v4: rewrite names of API and base on new syncobj code.
v5: move post deps earlier, rename some apis

Signed-off-by: Dave Airlie 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 87 
-

  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 +-
  include/uapi/drm/amdgpu_drm.h   |  6 +++
  3 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 30225d7..3cbd547 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,6 +27,7 @@
  #include 
  #include 
  #include 
+#include 
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  @@ -226,6 +227,8 @@ int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, void *data)

  break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
+case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
  break;
default:
@@ -1040,6 +1043,40 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,

  return 0;
  }
  +static int amdgpu_syncobj_lookup_and_add_to_sync(struct 
amdgpu_cs_parser *p,

+ uint32_t handle)
+{
+int r;
+struct dma_fence *fence;
+r = drm_syncobj_fence_get(p->filp, handle, );
+if (r)
+return r;
+
+r = amdgpu_sync_fence(p->adev, >job->sync, fence);
We just fixed a sched fence issue by adding new job->dep_sync member, 
so here should use >job->dep_sync instead of >job->sync.


To be precise job->sync is now for the implicit fences from the BOs 
(e.g. buffers moves and such) and job->dep_sync is for all explicit 
fences from dependencies/semaphores.


The handling is now slightly different to fix a bug with two consecutive 
jobs scheduled to the same ring.


But apart from that the patch now looks good to me, so with that fixed 
it is Reviewed-by: Christian König .


Regards,
Christian.



Regards,
David Zhou

+dma_fence_put(fence);
+
+return r;
+}
+
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+struct amdgpu_cs_chunk *chunk)
+{
+unsigned num_deps;
+int i, r;
+struct drm_amdgpu_cs_chunk_sem *deps;
+
+deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+num_deps = chunk->length_dw * 4 /
+sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+for (i = 0; i < num_deps; ++i) {
+r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+if (r)
+return r;
+}
+return 0;
+}
+
  static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p)
  {
@@ -1054,12 +1091,54 @@ static int amdgpu_cs_dependencies(struct 
amdgpu_device *adev,

  r = amdgpu_cs_process_fence_dep(p, chunk);
  if (r)
  return r;
+} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+if (r)
+return r;
  }
  }
return 0;
  }
  +static int amdgpu_cs_process_syncobj_out_dep(struct 
amdgpu_cs_parser *p,

+ struct amdgpu_cs_chunk *chunk)
+{
+unsigned num_deps;
+int i, r;
+struct drm_amdgpu_cs_chunk_sem *deps;
+
+deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+num_deps = chunk->length_dw * 4 /
+sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+for (i = 0; i < num_deps; ++i) {
+r = drm_syncobj_replace_fence(p->filp, deps[i].handle,
+  p->fence);
+if (r)
+return r;
+}
+return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+int i, r;
+
+for (i = 0; i < p->nchunks; ++i) {
+struct amdgpu_cs_chunk *chunk;
+
+chunk = >chunks[i];
+
+if (chunk->chunk_id 

Re: [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM

2017-05-24 Thread Christian König

Am 18.05.2017 um 07:30 schrieb zhoucm1:



On 2017年05月17日 17:22, Christian König wrote:

[SNIP]
+/* In the case of a mixed PT the PDE must point to it*/
+if (p->adev->asic_type < CHIP_VEGA10 ||
+nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
+p->func != amdgpu_vm_do_set_ptes ||
+!(flags & AMDGPU_PTE_VALID)) {
+
+pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
+pt = amdgpu_gart_get_vm_pde(p->adev, pt);
+flags = AMDGPU_PTE_VALID;

This case should be handled when updating levels, so return directly?


The problem is that when we switch from huge page back to a normal 
mapping because the BO was evicted we also need to reset the PDE.



+} else {
+pt = amdgpu_gart_get_vm_pde(p->adev, dst);
+flags |= AMDGPU_PDE_PTE;
+}
  -return entry->bo;
+if (entry->addr == pt &&
+entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
+return 0;
+
+entry->addr = pt;
+entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+
+if (parent->bo->shadow) {
+pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+pde = pd_addr + pt_idx * 8;
+amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
From the spec "any pde in the chain can itself take on the format of a 
PTE and point directly to an aligned block of logical address space by 
setting the P bit.",

So here should pass addr into PDE instead of pt.


The pt variable was overwritten with the address a few lines above. The 
code was indeed hard to understand, so I've fixed it.



+}
+
+pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+pde = pd_addr + pt_idx * 8;
+amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);

Should pass addr into PDE instead of pt as well.



+return 0;
  }
/**
@@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,

  uint64_t addr, pe_start;
  struct amdgpu_bo *pt;
  unsigned nptes;
+int r;
/* walk over the address space and update the page tables */
  for (addr = start; addr < end; addr += nptes) {
-pt = amdgpu_vm_get_pt(params, addr);
-if (!pt) {
-pr_err("PT not found, aborting update_ptes\n");
-return -EINVAL;
-}
+
+if ((addr & ~mask) == (end & ~mask))
+nptes = end - addr;
+else
+nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
+
+r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
+dst, flags, );
If huge page happens, its sub PTEs don't need to update more, they 
cannot be indexed by page table when that PDE is PTE, right?


Right, but I wasn't sure if we don't need them for something. So I've 
kept the update for now.


Going to try dropping this today.


Btw: Is this BigK which people often said?


Yes and no. I can explain more internally if you like.

Regards,
Christian.



Regards,
David Zhou

+if (r)
+return r;
if (params->shadow) {
  if (!pt->shadow)
@@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,

  pt = pt->shadow;
  }
  -if ((addr & ~mask) == (end & ~mask))
-nptes = end - addr;
-else
-nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
  pe_start = amdgpu_bo_gpu_offset(pt);
  pe_start += (addr & mask) * 8;
  @@ -1353,6 +1397,9 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,

  /* padding, etc. */
  ndw = 64;
  +/* one PDE write for each huge page */
+ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
+
  if (src) {
  /* only copy commands needed */
  ndw += ncmds * 7;
@@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,

error_free:
  amdgpu_job_free(job);
+amdgpu_vm_invalidate_level(>root);
  return r;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index afe9073..1c5e0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
  /* TILED for VEGA10, reserved for older ASICs  */
  #define AMDGPU_PTE_PRT(1ULL << 51)
  +/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE(1ULL << 54)
+
  /* VEGA10 only */
  #define AMDGPU_PTE_MTYPE(a)((uint64_t)a << 57)
  #define AMDGPU_PTE_MTYPE_MASKAMDGPU_PTE_MTYPE(3ULL)
@@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
  struct amdgpu_vm_pt {
  struct amdgpu_bo*bo;
  uint64_taddr;
+boolhuge_page;
/* array of page tables, one for each directory entry */
  struct amdgpu_vm_pt*entries;




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2

2017-05-24 Thread Christian König

Am 18.05.2017 um 07:33 schrieb Zhang, Jerry (Junwei):

On 05/17/2017 05:22 PM, Christian König wrote:

[SNIP]
+static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, 
uint64_t addr)

+{
+BUG_ON(addr & 0xF000FFFULL);
+return addr;
+}


Just wonder why we didn't return the address as below?
adev->vm_manager.vram_base_offset + addr


That is a really good question and I couldn't figure out all the details 
either.


I've tested it on an Kaveri and it still seems to work fine, so the 
logic is indeed correct.



Does that cause gmc6~gmc8 has no APU support officially?


No, only the pro driver doesn't support APUs. The open source driver 
works perfectly fine on them.



Or I miss any info about them?


The documentation for gfx6-8 is very unclear on this.

It says that the registers and PDE should use physical addresses, but 
from my testing that isn't the case.


Going to ping the hardware dev on this once more when I have time.

Regards,
Christian.



Jerry


+
  static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device 
*adev,

bool value)
  {
@@ -1127,6 +1133,7 @@ static const struct amdgpu_gart_funcs 
gmc_v6_0_gart_funcs = {

  .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v6_0_gart_set_pte_pde,
  .set_prt = gmc_v6_0_set_prt,
+.get_vm_pde = gmc_v6_0_get_vm_pde,
  .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
  };

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

index 967505b..4f140e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct 
amdgpu_device *adev,

  return pte_flag;
  }

+static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, 
uint64_t addr)

+{
+BUG_ON(addr & 0xF000FFFULL);
+return addr;
+}
+
  /**
   * gmc_v8_0_set_fault_enable_default - update VM fault handling
   *
@@ -1293,7 +1299,8 @@ static const struct amdgpu_gart_funcs 
gmc_v7_0_gart_funcs = {

  .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v7_0_gart_set_pte_pde,
  .set_prt = gmc_v7_0_set_prt,
-.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
+.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
+.get_vm_pde = gmc_v7_0_get_vm_pde
  };

  static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

index 3b5ea0f..f05c034 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct 
amdgpu_device *adev,

  return pte_flag;
  }

+static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, 
uint64_t addr)

+{
+BUG_ON(addr & 0xFFF00FFFULL);
+return addr;
+}
+
  /**
   * gmc_v8_0_set_fault_enable_default - update VM fault handling
   *
@@ -1612,7 +1618,8 @@ static const struct amdgpu_gart_funcs 
gmc_v8_0_gart_funcs = {

  .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v8_0_gart_set_pte_pde,
  .set_prt = gmc_v8_0_set_prt,
-.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
+.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
+.get_vm_pde = gmc_v8_0_get_vm_pde
  };

  static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 19e1027..047b1a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -358,17 +358,19 @@ static uint64_t 
gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,

  return pte_flag;
  }

-static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 
mc_addr)

+static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
  {
-return adev->vm_manager.vram_base_offset + mc_addr - 
adev->mc.vram_start;
+addr = adev->vm_manager.vram_base_offset + addr - 
adev->mc.vram_start;

+BUG_ON(addr & 0x003FULL);
+return addr;
  }

  static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
  .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
  .set_pte_pde = gmc_v9_0_gart_set_pte_pde,
-.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
-.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
  .get_invalidate_req = gmc_v9_0_get_invalidate_req,
+.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
+.get_vm_pde = gmc_v9_0_get_vm_pde
  };

  static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

index 91cf7e6..fb6f4b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1143,10 +1143,8 @@ static void 
sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
  uint32_t req =