Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V3)

2018-04-16 Thread Zhang, Jerry (Junwei)

On 04/16/2018 05:19 PM, Christian König wrote:

Am 16.04.2018 um 11:04 schrieb Zhang, Jerry (Junwei):

On 04/16/2018 03:17 PM, Shirish S wrote:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().
V3: removed usage of separate wq, ensure ib tests is
 run before enabling clockgating.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  3 +++
  2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..f225840 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1656,6 +1656,10 @@ static int amdgpu_device_ip_late_set_cg_state(struct
amdgpu_device *adev)
  if (amdgpu_emu_mode == 1)
  return 0;

+r = amdgpu_ib_ring_tests(adev);
+if (r)
+DRM_ERROR("ib ring test failed (%d).\n", r);
+


Just confirm:
IIRC, ib may not run immediately via scheduler thread.
Is there any possible that the actual ib ring test interferes cg enablement?


No, the IB tests are run directly on the hardware ring and not through the
scheduler.

But even when we run them through the scheduler we wait for the to complete, so
that shouldn't be an issue.


Thanks for explanation.
That's fine.

Jerry



Christian.



Jerry


  for (i = 0; i < adev->num_ip_blocks; i++) {
  if (!adev->ip_blocks[i].status.valid)
  continue;
@@ -1706,8 +1710,8 @@ static int amdgpu_device_ip_late_init(struct
amdgpu_device *adev)
  }
  }

-mod_delayed_work(system_wq, >late_init_work,
-msecs_to_jiffies(AMDGPU_RESUME_MS));
+queue_delayed_work(system_wq, >late_init_work,
+   msecs_to_jiffies(AMDGPU_RESUME_MS));

  amdgpu_device_fill_reset_magic(adev);

@@ -2374,10 +2378,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  goto failed;
  }

-r = amdgpu_ib_ring_tests(adev);
-if (r)
-DRM_ERROR("ib ring test failed (%d).\n", r);
-
  if (amdgpu_sriov_vf(adev))
  amdgpu_virt_init_data_exchange(adev);

@@ -2640,11 +2640,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool
resume, bool fbcon)

  amdgpu_fence_driver_resume(adev);

-if (resume) {
-r = amdgpu_ib_ring_tests(adev);
-if (r)
-DRM_ERROR("ib ring test failed (%d).\n", r);
-}

  r = amdgpu_device_ip_late_init(adev);
  if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..83d7160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void
*data, struct drm_file
  if (!info->return_size || !info->return_pointer)
  return -EINVAL;

+/* Ensure IB tests are run on ring */
+flush_delayed_work(>late_init_work);
+
  switch (info->query) {
  case AMDGPU_INFO_ACCEL_WORKING:
  ui32 = adev->accel_working;




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V3)

2018-04-16 Thread Christian König

Am 16.04.2018 um 11:04 schrieb Zhang, Jerry (Junwei):

On 04/16/2018 03:17 PM, Shirish S wrote:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().
V3: removed usage of separate wq, ensure ib tests is
 run before enabling clockgating.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |  3 +++
  2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 1762eb4..f225840 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1656,6 +1656,10 @@ static int 
amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)

  if (amdgpu_emu_mode == 1)
  return 0;

+    r = amdgpu_ib_ring_tests(adev);
+    if (r)
+    DRM_ERROR("ib ring test failed (%d).\n", r);
+


Just confirm:
IIRC, ib may not run immediately via scheduler thread.
Is there any possible that the actual ib ring test interferes cg 
enablement?


No, the IB tests are run directly on the hardware ring and not through 
the scheduler.


But even when we run them through the scheduler we wait for the to 
complete, so that shouldn't be an issue.


Christian.



Jerry


  for (i = 0; i < adev->num_ip_blocks; i++) {
  if (!adev->ip_blocks[i].status.valid)
  continue;
@@ -1706,8 +1710,8 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)

  }
  }

-    mod_delayed_work(system_wq, >late_init_work,
-    msecs_to_jiffies(AMDGPU_RESUME_MS));
+    queue_delayed_work(system_wq, >late_init_work,
+   msecs_to_jiffies(AMDGPU_RESUME_MS));

  amdgpu_device_fill_reset_magic(adev);

@@ -2374,10 +2378,6 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  goto failed;
  }

-    r = amdgpu_ib_ring_tests(adev);
-    if (r)
-    DRM_ERROR("ib ring test failed (%d).\n", r);
-
  if (amdgpu_sriov_vf(adev))
  amdgpu_virt_init_data_exchange(adev);

@@ -2640,11 +2640,6 @@ int amdgpu_device_resume(struct drm_device 
*dev, bool resume, bool fbcon)


  amdgpu_fence_driver_resume(adev);

-    if (resume) {
-    r = amdgpu_ib_ring_tests(adev);
-    if (r)
-    DRM_ERROR("ib ring test failed (%d).\n", r);
-    }

  r = amdgpu_device_ip_late_init(adev);
  if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 487d39e..83d7160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device 
*dev, void *data, struct drm_file

  if (!info->return_size || !info->return_pointer)
  return -EINVAL;

+    /* Ensure IB tests are run on ring */
+    flush_delayed_work(>late_init_work);
+
  switch (info->query) {
  case AMDGPU_INFO_ACCEL_WORKING:
  ui32 = adev->accel_working;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V3)

2018-04-16 Thread Zhang, Jerry (Junwei)

On 04/16/2018 03:17 PM, Shirish S wrote:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().
V3: removed usage of separate wq, ensure ib tests is
 run before enabling clockgating.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  3 +++
  2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..f225840 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1656,6 +1656,10 @@ static int amdgpu_device_ip_late_set_cg_state(struct 
amdgpu_device *adev)
if (amdgpu_emu_mode == 1)
return 0;

+   r = amdgpu_ib_ring_tests(adev);
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+


Just confirm:
IIRC, ib may not run immediately via scheduler thread.
Is there any possible that the actual ib ring test interferes cg enablement?

Jerry


for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -1706,8 +1710,8 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
}
}

-   mod_delayed_work(system_wq, >late_init_work,
-   msecs_to_jiffies(AMDGPU_RESUME_MS));
+   queue_delayed_work(system_wq, >late_init_work,
+  msecs_to_jiffies(AMDGPU_RESUME_MS));

amdgpu_device_fill_reset_magic(adev);

@@ -2374,10 +2378,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}

-   r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
-
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);

@@ -2640,11 +2640,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
resume, bool fbcon)

amdgpu_fence_driver_resume(adev);

-   if (resume) {
-   r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
-   }

r = amdgpu_device_ip_late_init(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..83d7160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;

+   /* Ensure IB tests are run on ring */
+   flush_delayed_work(>late_init_work);
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V3)

2018-04-16 Thread Christian König

Am 16.04.2018 um 09:17 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().
V3: removed usage of separate wq, ensure ib tests is
 run before enabling clockgating.

Signed-off-by: Shirish S 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  3 +++
  2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..f225840 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1656,6 +1656,10 @@ static int amdgpu_device_ip_late_set_cg_state(struct 
amdgpu_device *adev)
if (amdgpu_emu_mode == 1)
return 0;
  
+	r = amdgpu_ib_ring_tests(adev);

+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -1706,8 +1710,8 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
}
}
  
-	mod_delayed_work(system_wq, >late_init_work,

-   msecs_to_jiffies(AMDGPU_RESUME_MS));
+   queue_delayed_work(system_wq, >late_init_work,
+  msecs_to_jiffies(AMDGPU_RESUME_MS));
  
  	amdgpu_device_fill_reset_magic(adev);
  
@@ -2374,10 +2378,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,

goto failed;
}
  
-	r = amdgpu_ib_ring_tests(adev);

-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
-
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
  
@@ -2640,11 +2640,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
  
  	amdgpu_fence_driver_resume(adev);
  
-	if (resume) {

-   r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
-   }
  
  	r = amdgpu_device_ip_late_init(adev);

if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..83d7160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
  
+	/* Ensure IB tests are run on ring */

+   flush_delayed_work(>late_init_work);
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: defer test IBs on the rings at boot (V3)

2018-04-16 Thread Shirish S
amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().
V3: removed usage of separate wq, ensure ib tests is
run before enabling clockgating.

Signed-off-by: Shirish S 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  3 +++
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..f225840 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1656,6 +1656,10 @@ static int amdgpu_device_ip_late_set_cg_state(struct 
amdgpu_device *adev)
if (amdgpu_emu_mode == 1)
return 0;
 
+   r = amdgpu_ib_ring_tests(adev);
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -1706,8 +1710,8 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
}
}
 
-   mod_delayed_work(system_wq, >late_init_work,
-   msecs_to_jiffies(AMDGPU_RESUME_MS));
+   queue_delayed_work(system_wq, >late_init_work,
+  msecs_to_jiffies(AMDGPU_RESUME_MS));
 
amdgpu_device_fill_reset_magic(adev);
 
@@ -2374,10 +2378,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
-   r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
-
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
 
@@ -2640,11 +2640,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
resume, bool fbcon)
 
amdgpu_fence_driver_resume(adev);
 
-   if (resume) {
-   r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
-   }
 
r = amdgpu_device_ip_late_init(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..83d7160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
 
+   /* Ensure IB tests are run on ring */
+   flush_delayed_work(>late_init_work);
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V2)

2018-04-16 Thread S, Shirish



On 4/13/2018 10:20 PM, Alex Deucher wrote:

On Fri, Apr 13, 2018 at 9:25 AM, Christian König
 wrote:

Am 13.04.2018 um 10:31 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().

Signed-off-by: Shirish S 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 
   3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
 /* delayed work_func for deferring clockgating during resume */
 struct delayed_work late_init_work;
+   /* delayed work_func to defer testing IB's on rings during boot */
+   struct delayed_work late_init_test_ib_work;


That still has the chance of running the late init in parallel with the IB
tests and that really doesn't looks like a good idea to me.

Yeah, at least on older chips we run into problems if we power or
clock gate some engines while they are in use.  Even on engines that
support dynamic gating, you usually have to set it up while the engine
is idle.  Make sure the IB tests run before we enable gating.

Ok Alex.
I have re-spun V3, with only one delayed work and ensuring ib tests are 
run before enabling clock gating.

Regards,
Shirish S

Alex


Is there any issue with putting the IB test into the late init work handler
as well?



 struct amdgpu_virt  virt;
 /* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..ee84058 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 #define AMDGPU_RESUME_MS2000
+#define AMDGPU_IB_TEST_SCHED_MS2000
 static const char *amdgpu_asic_name[] = {
 "TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum
amd_asic_type asic_type)
 }
   }
   +static void amdgpu_device_late_init_test_ib_func_handler(struct
work_struct *work)
+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device,
late_init_test_ib_work.work);
+   int r = amdgpu_ib_ring_tests(adev);
+
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
   /**
* amdgpu_device_has_dc_support - check if dc is supported
*
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 INIT_LIST_HEAD(>ring_lru_list);
 spin_lock_init(>ring_lru_list_lock);
   + INIT_DELAYED_WORK(>late_init_test_ib_work,
+ amdgpu_device_late_init_test_ib_func_handler);
 INIT_DELAYED_WORK(>late_init_work,
   amdgpu_device_ip_late_init_func_handler);
   @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 goto failed;
 }
   - r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
+   /* Schedule amdgpu_ib_ring_tests() */
+   queue_delayed_work(system_wq, >late_init_test_ib_work,
+   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
 if (amdgpu_sriov_vf(adev))
 amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 }
 adev->accel_working = false;
 cancel_delayed_work_sync(>late_init_work);
+   cancel_delayed_work_sync(>late_init_test_ib_work);
 /* free i2c buses */
 if (!amdgpu_device_has_dc_support(adev))
 amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..6fa326b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
void *data, struct drm_file
 if (!info->return_size || !info->return_pointer)
 return -EINVAL;
   + /* Ensure IB tests on ring are executed */
+   if (delayed_work_pending(>late_init_test_ib_work))
+   flush_delayed_work(>late_init_test_ib_work);
+


You just need to call flush_delayed_work() here without the if.

Regards,
Christian.


 switch (info->query) {
 case AMDGPU_INFO_ACCEL_WORKING:
 ui32 = 

Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V2)

2018-04-13 Thread Alex Deucher
On Fri, Apr 13, 2018 at 9:25 AM, Christian König
 wrote:
> Am 13.04.2018 um 10:31 schrieb Shirish S:
>>
>> amdgpu_ib_ring_tests() runs test IB's on rings at boot
>> contributes to ~500 ms of amdgpu driver's boot time.
>>
>> This patch defers it and ensures that its executed
>> in amdgpu_info_ioctl() if it wasn't scheduled.
>>
>> V2: Use queue_delayed_work() & flush_delayed_work().
>>
>> Signed-off-by: Shirish S 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 
>>   3 files changed, 23 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 5734871..ae8f722 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1611,6 +1611,8 @@ struct amdgpu_device {
>> /* delayed work_func for deferring clockgating during resume */
>> struct delayed_work late_init_work;
>> +   /* delayed work_func to defer testing IB's on rings during boot */
>> +   struct delayed_work late_init_test_ib_work;
>
>
> That still has the chance of running the late init in parallel with the IB
> tests and that really doesn't looks like a good idea to me.

Yeah, at least on older chips we run into problems if we power or
clock gate some engines while they are in use.  Even on engines that
support dynamic gating, you usually have to set it up while the engine
is idle.  Make sure the IB tests run before we enable gating.

Alex

>
> Is there any issue with putting the IB test into the late init work handler
> as well?
>
>
>> struct amdgpu_virt  virt;
>> /* firmware VRAM reservation */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 1762eb4..ee84058 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>> #define AMDGPU_RESUME_MS2000
>> +#define AMDGPU_IB_TEST_SCHED_MS2000
>> static const char *amdgpu_asic_name[] = {
>> "TAHITI",
>> @@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum
>> amd_asic_type asic_type)
>> }
>>   }
>>   +static void amdgpu_device_late_init_test_ib_func_handler(struct
>> work_struct *work)
>> +{
>> +   struct amdgpu_device *adev =
>> +   container_of(work, struct amdgpu_device,
>> late_init_test_ib_work.work);
>> +   int r = amdgpu_ib_ring_tests(adev);
>> +
>> +   if (r)
>> +   DRM_ERROR("ib ring test failed (%d).\n", r);
>> +}
>> +
>>   /**
>>* amdgpu_device_has_dc_support - check if dc is supported
>>*
>> @@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>> INIT_LIST_HEAD(>ring_lru_list);
>> spin_lock_init(>ring_lru_list_lock);
>>   + INIT_DELAYED_WORK(>late_init_test_ib_work,
>> + amdgpu_device_late_init_test_ib_func_handler);
>> INIT_DELAYED_WORK(>late_init_work,
>>   amdgpu_device_ip_late_init_func_handler);
>>   @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>> goto failed;
>> }
>>   - r = amdgpu_ib_ring_tests(adev);
>> -   if (r)
>> -   DRM_ERROR("ib ring test failed (%d).\n", r);
>> +   /* Schedule amdgpu_ib_ring_tests() */
>> +   queue_delayed_work(system_wq, >late_init_test_ib_work,
>> +   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
>> if (amdgpu_sriov_vf(adev))
>> amdgpu_virt_init_data_exchange(adev);
>> @@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>> }
>> adev->accel_working = false;
>> cancel_delayed_work_sync(>late_init_work);
>> +   cancel_delayed_work_sync(>late_init_test_ib_work);
>> /* free i2c buses */
>> if (!amdgpu_device_has_dc_support(adev))
>> amdgpu_i2c_fini(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 487d39e..6fa326b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -279,6 +279,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
>> void *data, struct drm_file
>> if (!info->return_size || !info->return_pointer)
>> return -EINVAL;
>>   + /* Ensure IB tests on ring are executed */
>> +   if (delayed_work_pending(>late_init_test_ib_work))
>> +   flush_delayed_work(>late_init_test_ib_work);
>> +
>
>
> You just need to call flush_delayed_work() here without the if.
>
> Regards,
> Christian.
>
>> 

Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V2)

2018-04-13 Thread Christian König

Am 13.04.2018 um 10:31 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 
  3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
  
  	/* delayed work_func for deferring clockgating during resume */

struct delayed_work late_init_work;
+   /* delayed work_func to defer testing IB's on rings during boot */
+   struct delayed_work late_init_test_ib_work;


That still has the chance of running the late init in parallel with the 
IB tests and that really doesn't looks like a good idea to me.


Is there any issue with putting the IB test into the late init work 
handler as well?


  
  	struct amdgpu_virt	virt;

/* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..ee84058 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  
  #define AMDGPU_RESUME_MS		2000

+#define AMDGPU_IB_TEST_SCHED_MS2000
  
  static const char *amdgpu_asic_name[] = {

"TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)
}
  }
  
+static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct *work)

+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);
+   int r = amdgpu_ib_ring_tests(adev);
+
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
  /**
   * amdgpu_device_has_dc_support - check if dc is supported
   *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(>ring_lru_list);
spin_lock_init(>ring_lru_list_lock);
  
+	INIT_DELAYED_WORK(>late_init_test_ib_work,

+ amdgpu_device_late_init_test_ib_func_handler);
INIT_DELAYED_WORK(>late_init_work,
  amdgpu_device_ip_late_init_func_handler);
  
@@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,

goto failed;
}
  
-	r = amdgpu_ib_ring_tests(adev);

-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
+   /* Schedule amdgpu_ib_ring_tests() */
+   queue_delayed_work(system_wq, >late_init_test_ib_work,
+   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
  
  	if (amdgpu_sriov_vf(adev))

amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
}
adev->accel_working = false;
cancel_delayed_work_sync(>late_init_work);
+   cancel_delayed_work_sync(>late_init_test_ib_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..6fa326b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
  
+	/* Ensure IB tests on ring are executed */

+   if (delayed_work_pending(>late_init_test_ib_work))
+   flush_delayed_work(>late_init_test_ib_work);
+


You just need to call flush_delayed_work() here without the if.

Regards,
Christian.


switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: defer test IBs on the rings at boot (V2)

2018-04-13 Thread Shirish S
amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().

Signed-off-by: Shirish S 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
 
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
+   /* delayed work_func to defer testing IB's on rings during boot */
+   struct delayed_work late_init_test_ib_work;
 
struct amdgpu_virt  virt;
/* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..ee84058 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 
 #define AMDGPU_RESUME_MS   2000
+#define AMDGPU_IB_TEST_SCHED_MS2000
 
 static const char *amdgpu_asic_name[] = {
"TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)
}
 }
 
+static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct 
*work)
+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);
+   int r = amdgpu_ib_ring_tests(adev);
+
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
 /**
  * amdgpu_device_has_dc_support - check if dc is supported
  *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(>ring_lru_list);
spin_lock_init(>ring_lru_list_lock);
 
+   INIT_DELAYED_WORK(>late_init_test_ib_work,
+ amdgpu_device_late_init_test_ib_func_handler);
INIT_DELAYED_WORK(>late_init_work,
  amdgpu_device_ip_late_init_func_handler);
 
@@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
-   r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
+   /* Schedule amdgpu_ib_ring_tests() */
+   queue_delayed_work(system_wq, >late_init_test_ib_work,
+   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
 
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
}
adev->accel_working = false;
cancel_delayed_work_sync(>late_init_work);
+   cancel_delayed_work_sync(>late_init_test_ib_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..6fa326b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
 
+   /* Ensure IB tests on ring are executed */
+   if (delayed_work_pending(>late_init_test_ib_work))
+   flush_delayed_work(>late_init_test_ib_work);
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot

2018-04-13 Thread S, Shirish



On 4/13/2018 12:38 PM, Christian König wrote:

Am 13.04.2018 um 09:01 schrieb S, Shirish:



On 4/13/2018 11:53 AM, Christian König wrote:

Am 13.04.2018 um 06:07 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and adds a check to report
in amdgpu_info_ioctl() if it was scheduled or not.


That is rather suboptimal, but see below.

Which part is sub-optimal, deferring or checking if the work is 
scheduled?


That was about the check. We should wait for the test to finish 
instead of printing an error and continuing.



Done. Have made this change in V2.


Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |  3 +++
  3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
    /* delayed work_func for deferring clockgating during 
resume */

  struct delayed_work late_init_work;
+    /* delayed work_func to defer testing IB's on rings during 
boot */

+    struct delayed_work late_init_test_ib_work;


You must put the IB test into the late_init_work as well, otherwise 
the two delayed workers can race with each other.


I thought  from the comment above the declaration, its clear why i am 
creating 2 work structures.
late_init_work is to optimize resume time and late_init_test_ib_work 
is to optimize the boot time.
There cant be race as the context in which they are called are 
totally different.


Late init enables power and clock gating. If I'm not completely 
mistaken we don't do the power/clock gating earlier because we had to 
wait for the IB test to finish.


Could be that modern ASICs have additional logic to prevent that, but 
the last time I worked on this power gating a block while you run 
something on it could even crash the whole system.



    struct amdgpu_virt    virt;
  /* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 1762eb4..e65a5e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
    #define AMDGPU_RESUME_MS    2000
+#define AMDGPU_IB_TEST_SCHED_MS    2000
    static const char *amdgpu_asic_name[] = {
  "TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)

  }
  }
  +static void amdgpu_device_late_init_test_ib_func_handler(struct 
work_struct *work)

+{
+    struct amdgpu_device *adev =
+    container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);

+    int r = amdgpu_ib_ring_tests(adev);
+
+    if (r)
+    DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
  /**
   * amdgpu_device_has_dc_support - check if dc is supported
   *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  INIT_LIST_HEAD(>ring_lru_list);
  spin_lock_init(>ring_lru_list_lock);
  + INIT_DELAYED_WORK(>late_init_test_ib_work,
+ amdgpu_device_late_init_test_ib_func_handler);
  INIT_DELAYED_WORK(>late_init_work,
    amdgpu_device_ip_late_init_func_handler);
  @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  goto failed;
  }
  -    r = amdgpu_ib_ring_tests(adev);
-    if (r)
-    DRM_ERROR("ib ring test failed (%d).\n", r);
+    /* Schedule amdgpu_ib_ring_tests() */
+    mod_delayed_work(system_wq, >late_init_test_ib_work,
+    msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));


That doesn't work like you intended. mod_delayed_work() overrides 
the existing handler.


What you wanted to use is queue_delayed_work(), but as I said we 
should only have one delayed worker.
mod_delayed_work() is safer and optimal method that replaces 
cancel_delayed_work() followed by queue_delayed_work().

(https://lkml.org/lkml/2011/2/3/175)
But if you strongly insist i don't mind changing it.


Well, mod_delayed_work() does NOT replace queue_delayed_work(). Those 
two functions are for different use cases.


The link you posted actually explains it quite well:

So, cancel_delayed_work() followed by queue_delayed_work() schedules
the work to be executed at the specified time regardless of the
current pending state while queue_delayed_work() takes effect iff
currently the work item is not pending.


queue_delayed_work() takes only effect if the work item is not already 
pending/executing.


In other words with queue_delayed_work() you don't risk executing 
things twice when the 

Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot

2018-04-13 Thread Christian König

Am 13.04.2018 um 09:01 schrieb S, Shirish:



On 4/13/2018 11:53 AM, Christian König wrote:

Am 13.04.2018 um 06:07 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and adds a check to report
in amdgpu_info_ioctl() if it was scheduled or not.


That is rather suboptimal, but see below.

Which part is sub-optimal, deferring or checking if the work is 
scheduled?


That was about the check. We should wait for the test to finish instead 
of printing an error and continuing.




Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |  3 +++
  3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
    /* delayed work_func for deferring clockgating during resume */
  struct delayed_work late_init_work;
+    /* delayed work_func to defer testing IB's on rings during boot */
+    struct delayed_work late_init_test_ib_work;


You must put the IB test into the late_init_work as well, otherwise 
the two delayed workers can race with each other.


I thought  from the comment above the declaration, its clear why i am 
creating 2 work structures.
late_init_work is to optimize resume time and late_init_test_ib_work 
is to optimize the boot time.
There cant be race as the context in which they are called are totally 
different.


Late init enables power and clock gating. If I'm not completely mistaken 
we don't do the power/clock gating earlier because we had to wait for 
the IB test to finish.


Could be that modern ASICs have additional logic to prevent that, but 
the last time I worked on this power gating a block while you run 
something on it could even crash the whole system.



    struct amdgpu_virt    virt;
  /* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 1762eb4..e65a5e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
    #define AMDGPU_RESUME_MS    2000
+#define AMDGPU_IB_TEST_SCHED_MS    2000
    static const char *amdgpu_asic_name[] = {
  "TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)

  }
  }
  +static void amdgpu_device_late_init_test_ib_func_handler(struct 
work_struct *work)

+{
+    struct amdgpu_device *adev =
+    container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);

+    int r = amdgpu_ib_ring_tests(adev);
+
+    if (r)
+    DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
  /**
   * amdgpu_device_has_dc_support - check if dc is supported
   *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  INIT_LIST_HEAD(>ring_lru_list);
  spin_lock_init(>ring_lru_list_lock);
  +    INIT_DELAYED_WORK(>late_init_test_ib_work,
+  amdgpu_device_late_init_test_ib_func_handler);
  INIT_DELAYED_WORK(>late_init_work,
    amdgpu_device_ip_late_init_func_handler);
  @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  goto failed;
  }
  -    r = amdgpu_ib_ring_tests(adev);
-    if (r)
-    DRM_ERROR("ib ring test failed (%d).\n", r);
+    /* Schedule amdgpu_ib_ring_tests() */
+    mod_delayed_work(system_wq, >late_init_test_ib_work,
+    msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));


That doesn't work like you intended. mod_delayed_work() overrides the 
existing handler.


What you wanted to use is queue_delayed_work(), but as I said we 
should only have one delayed worker.
mod_delayed_work() is safer and optimal method that replaces 
cancel_delayed_work() followed by queue_delayed_work().

(https://lkml.org/lkml/2011/2/3/175)
But if you strongly insist i don't mind changing it.


Well, mod_delayed_work() does NOT replace queue_delayed_work(). Those 
two functions are for different use cases.


The link you posted actually explains it quite well:

So, cancel_delayed_work() followed by queue_delayed_work() schedules
the work to be executed at the specified time regardless of the
current pending state while queue_delayed_work() takes effect iff
currently the work item is not pending.


queue_delayed_work() takes only effect if the work item is not already 
pending/executing.


In other words with queue_delayed_work() you don't risk executing things 
twice when the work is already executing.


While with mod_delayed_work() you absolutely 

Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot

2018-04-13 Thread S, Shirish



On 4/13/2018 11:53 AM, Christian König wrote:

Am 13.04.2018 um 06:07 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and adds a check to report
in amdgpu_info_ioctl() if it was scheduled or not.


That is rather suboptimal, but see below.


Which part is sub-optimal, deferring or checking if the work is scheduled?


Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |  3 +++
  3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
    /* delayed work_func for deferring clockgating during resume */
  struct delayed_work late_init_work;
+    /* delayed work_func to defer testing IB's on rings during boot */
+    struct delayed_work late_init_test_ib_work;


You must put the IB test into the late_init_work as well, otherwise 
the two delayed workers can race with each other.


I thought  from the comment above the declaration, its clear why i am 
creating 2 work structures.
late_init_work is to optimize resume time and late_init_test_ib_work is 
to optimize the boot time.
There cant be race as the context in which they are called are totally 
different.

    struct amdgpu_virt    virt;
  /* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 1762eb4..e65a5e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
    #define AMDGPU_RESUME_MS    2000
+#define AMDGPU_IB_TEST_SCHED_MS    2000
    static const char *amdgpu_asic_name[] = {
  "TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)

  }
  }
  +static void amdgpu_device_late_init_test_ib_func_handler(struct 
work_struct *work)

+{
+    struct amdgpu_device *adev =
+    container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);

+    int r = amdgpu_ib_ring_tests(adev);
+
+    if (r)
+    DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
  /**
   * amdgpu_device_has_dc_support - check if dc is supported
   *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  INIT_LIST_HEAD(>ring_lru_list);
  spin_lock_init(>ring_lru_list_lock);
  +    INIT_DELAYED_WORK(>late_init_test_ib_work,
+  amdgpu_device_late_init_test_ib_func_handler);
  INIT_DELAYED_WORK(>late_init_work,
    amdgpu_device_ip_late_init_func_handler);
  @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  goto failed;
  }
  -    r = amdgpu_ib_ring_tests(adev);
-    if (r)
-    DRM_ERROR("ib ring test failed (%d).\n", r);
+    /* Schedule amdgpu_ib_ring_tests() */
+    mod_delayed_work(system_wq, >late_init_test_ib_work,
+    msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));


That doesn't work like you intended. mod_delayed_work() overrides the 
existing handler.


What you wanted to use is queue_delayed_work(), but as I said we 
should only have one delayed worker.
mod_delayed_work() is safer and optimal method that replaces 
cancel_delayed_work() followed by queue_delayed_work().

(https://lkml.org/lkml/2011/2/3/175)
But if you strongly insist i don't mind changing it.



    if (amdgpu_sriov_vf(adev))
  amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device 
*adev)

  }
  adev->accel_working = false;
  cancel_delayed_work_sync(>late_init_work);
+ cancel_delayed_work_sync(>late_init_test_ib_work);
  /* free i2c buses */
  if (!amdgpu_device_has_dc_support(adev))
  amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 487d39e..057bd9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device 
*dev, void *data, struct drm_file

  if (!info->return_size || !info->return_pointer)
  return -EINVAL;
  +    if (delayed_work_pending(>late_init_test_ib_work))
+    DRM_ERROR("IB test on ring not executed\n");
+


Please use flush_delayed_work() instead of issuing and error here.


Agree, wasn't sure of what to do here :).
So i will re-spin with the flush part added. Hope this reply clarifies 
your comments.

Thanks.
Regards,
Shirish S

Regards,
Christian.


  

Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot

2018-04-13 Thread Christian König

Am 13.04.2018 um 06:07 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and adds a check to report
in amdgpu_info_ioctl() if it was scheduled or not.


That is rather suboptimal, but see below.



Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  3 +++
  3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
  
  	/* delayed work_func for deferring clockgating during resume */

struct delayed_work late_init_work;
+   /* delayed work_func to defer testing IB's on rings during boot */
+   struct delayed_work late_init_test_ib_work;


You must put the IB test into the late_init_work as well, otherwise the 
two delayed workers can race with each other.


  
  	struct amdgpu_virt	virt;

/* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..e65a5e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  
  #define AMDGPU_RESUME_MS		2000

+#define AMDGPU_IB_TEST_SCHED_MS2000
  
  static const char *amdgpu_asic_name[] = {

"TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)
}
  }
  
+static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct *work)

+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);
+   int r = amdgpu_ib_ring_tests(adev);
+
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
  /**
   * amdgpu_device_has_dc_support - check if dc is supported
   *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(>ring_lru_list);
spin_lock_init(>ring_lru_list_lock);
  
+	INIT_DELAYED_WORK(>late_init_test_ib_work,

+ amdgpu_device_late_init_test_ib_func_handler);
INIT_DELAYED_WORK(>late_init_work,
  amdgpu_device_ip_late_init_func_handler);
  
@@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,

goto failed;
}
  
-	r = amdgpu_ib_ring_tests(adev);

-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
+   /* Schedule amdgpu_ib_ring_tests() */
+   mod_delayed_work(system_wq, >late_init_test_ib_work,
+   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));


That doesn't work like you intended. mod_delayed_work() overrides the 
existing handler.


What you wanted to use is queue_delayed_work(), but as I said we should 
only have one delayed worker.


  
  	if (amdgpu_sriov_vf(adev))

amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
}
adev->accel_working = false;
cancel_delayed_work_sync(>late_init_work);
+   cancel_delayed_work_sync(>late_init_test_ib_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..057bd9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
  
+	if (delayed_work_pending(>late_init_test_ib_work))

+   DRM_ERROR("IB test on ring not executed\n");
+


Please use flush_delayed_work() instead of issuing and error here.

Regards,
Christian.


switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: defer test IBs on the rings at boot

2018-04-12 Thread Shirish S
amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and adds a check to report
in amdgpu_info_ioctl() if it was scheduled or not.

Signed-off-by: Shirish S 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  3 +++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
 
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
+   /* delayed work_func to defer testing IB's on rings during boot */
+   struct delayed_work late_init_test_ib_work;
 
struct amdgpu_virt  virt;
/* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..e65a5e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 
 #define AMDGPU_RESUME_MS   2000
+#define AMDGPU_IB_TEST_SCHED_MS2000
 
 static const char *amdgpu_asic_name[] = {
"TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)
}
 }
 
+static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct 
*work)
+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);
+   int r = amdgpu_ib_ring_tests(adev);
+
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
 /**
  * amdgpu_device_has_dc_support - check if dc is supported
  *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(>ring_lru_list);
spin_lock_init(>ring_lru_list_lock);
 
+   INIT_DELAYED_WORK(>late_init_test_ib_work,
+ amdgpu_device_late_init_test_ib_func_handler);
INIT_DELAYED_WORK(>late_init_work,
  amdgpu_device_ip_late_init_func_handler);
 
@@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
-   r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
+   /* Schedule amdgpu_ib_ring_tests() */
+   mod_delayed_work(system_wq, >late_init_test_ib_work,
+   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
 
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
}
adev->accel_working = false;
cancel_delayed_work_sync(>late_init_work);
+   cancel_delayed_work_sync(>late_init_test_ib_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..057bd9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
 
+   if (delayed_work_pending(>late_init_test_ib_work))
+   DRM_ERROR("IB test on ring not executed\n");
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx