Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V2)

2018-04-16 Thread S, Shirish



On 4/13/2018 10:20 PM, Alex Deucher wrote:

On Fri, Apr 13, 2018 at 9:25 AM, Christian König
 wrote:

Am 13.04.2018 um 10:31 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().

Signed-off-by: Shirish S 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 
   3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
 /* delayed work_func for deferring clockgating during resume */
 struct delayed_work late_init_work;
+   /* delayed work_func to defer testing IB's on rings during boot */
+   struct delayed_work late_init_test_ib_work;


That still has the chance of running the late init in parallel with the IB
tests and that really doesn't looks like a good idea to me.

Yeah, at least on older chips we run into problems if we power or
clock gate some engines while they are in use.  Even on engines that
support dynamic gating, you usually have to set it up while the engine
is idle.  Make sure the IB tests run before we enable gating.

Ok Alex.
I have re-spun V3, with only one delayed work and ensuring ib tests are 
run before enabling clock gating.

Regards,
Shirish S

Alex


Is there any issue with putting the IB test into the late init work handler
as well?



 struct amdgpu_virt  virt;
 /* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..ee84058 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 #define AMDGPU_RESUME_MS2000
+#define AMDGPU_IB_TEST_SCHED_MS2000
 static const char *amdgpu_asic_name[] = {
 "TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum
amd_asic_type asic_type)
 }
   }
   +static void amdgpu_device_late_init_test_ib_func_handler(struct
work_struct *work)
+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device,
late_init_test_ib_work.work);
+   int r = amdgpu_ib_ring_tests(adev);
+
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
   /**
* amdgpu_device_has_dc_support - check if dc is supported
*
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 INIT_LIST_HEAD(>ring_lru_list);
 spin_lock_init(>ring_lru_list_lock);
   + INIT_DELAYED_WORK(>late_init_test_ib_work,
+ amdgpu_device_late_init_test_ib_func_handler);
 INIT_DELAYED_WORK(>late_init_work,
   amdgpu_device_ip_late_init_func_handler);
   @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 goto failed;
 }
   - r = amdgpu_ib_ring_tests(adev);
-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
+   /* Schedule amdgpu_ib_ring_tests() */
+   queue_delayed_work(system_wq, >late_init_test_ib_work,
+   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
 if (amdgpu_sriov_vf(adev))
 amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 }
 adev->accel_working = false;
 cancel_delayed_work_sync(>late_init_work);
+   cancel_delayed_work_sync(>late_init_test_ib_work);
 /* free i2c buses */
 if (!amdgpu_device_has_dc_support(adev))
 amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..6fa326b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
void *data, struct drm_file
 if (!info->return_size || !info->return_pointer)
 return -EINVAL;
   + /* Ensure IB tests on ring are executed */
+   if (delayed_work_pending(>late_init_test_ib_work))
+   flush_delayed_work(>late_init_test_ib_work);
+


You just need to call flush_delayed_work() here without the if.

Regards,
Christian.


 switch (info->query) {
 case AMDGPU_INFO_ACCEL_WORKING:
 ui32 = 

Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V2)

2018-04-13 Thread Alex Deucher
On Fri, Apr 13, 2018 at 9:25 AM, Christian König
 wrote:
> Am 13.04.2018 um 10:31 schrieb Shirish S:
>>
>> amdgpu_ib_ring_tests() runs test IB's on rings at boot
>> contributes to ~500 ms of amdgpu driver's boot time.
>>
>> This patch defers it and ensures that its executed
>> in amdgpu_info_ioctl() if it wasn't scheduled.
>>
>> V2: Use queue_delayed_work() & flush_delayed_work().
>>
>> Signed-off-by: Shirish S 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 
>>   3 files changed, 23 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 5734871..ae8f722 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1611,6 +1611,8 @@ struct amdgpu_device {
>> /* delayed work_func for deferring clockgating during resume */
>> struct delayed_work late_init_work;
>> +   /* delayed work_func to defer testing IB's on rings during boot */
>> +   struct delayed_work late_init_test_ib_work;
>
>
> That still has the chance of running the late init in parallel with the IB
> tests and that really doesn't looks like a good idea to me.

Yeah, at least on older chips we run into problems if we power or
clock gate some engines while they are in use.  Even on engines that
support dynamic gating, you usually have to set it up while the engine
is idle.  Make sure the IB tests run before we enable gating.

Alex

>
> Is there any issue with putting the IB test into the late init work handler
> as well?
>
>
>> struct amdgpu_virt  virt;
>> /* firmware VRAM reservation */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 1762eb4..ee84058 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>> #define AMDGPU_RESUME_MS2000
>> +#define AMDGPU_IB_TEST_SCHED_MS2000
>> static const char *amdgpu_asic_name[] = {
>> "TAHITI",
>> @@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum
>> amd_asic_type asic_type)
>> }
>>   }
>>   +static void amdgpu_device_late_init_test_ib_func_handler(struct
>> work_struct *work)
>> +{
>> +   struct amdgpu_device *adev =
>> +   container_of(work, struct amdgpu_device,
>> late_init_test_ib_work.work);
>> +   int r = amdgpu_ib_ring_tests(adev);
>> +
>> +   if (r)
>> +   DRM_ERROR("ib ring test failed (%d).\n", r);
>> +}
>> +
>>   /**
>>* amdgpu_device_has_dc_support - check if dc is supported
>>*
>> @@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>> INIT_LIST_HEAD(>ring_lru_list);
>> spin_lock_init(>ring_lru_list_lock);
>>   + INIT_DELAYED_WORK(>late_init_test_ib_work,
>> + amdgpu_device_late_init_test_ib_func_handler);
>> INIT_DELAYED_WORK(>late_init_work,
>>   amdgpu_device_ip_late_init_func_handler);
>>   @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>> goto failed;
>> }
>>   - r = amdgpu_ib_ring_tests(adev);
>> -   if (r)
>> -   DRM_ERROR("ib ring test failed (%d).\n", r);
>> +   /* Schedule amdgpu_ib_ring_tests() */
>> +   queue_delayed_work(system_wq, >late_init_test_ib_work,
>> +   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
>> if (amdgpu_sriov_vf(adev))
>> amdgpu_virt_init_data_exchange(adev);
>> @@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>> }
>> adev->accel_working = false;
>> cancel_delayed_work_sync(>late_init_work);
>> +   cancel_delayed_work_sync(>late_init_test_ib_work);
>> /* free i2c buses */
>> if (!amdgpu_device_has_dc_support(adev))
>> amdgpu_i2c_fini(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 487d39e..6fa326b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -279,6 +279,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
>> void *data, struct drm_file
>> if (!info->return_size || !info->return_pointer)
>> return -EINVAL;
>>   + /* Ensure IB tests on ring are executed */
>> +   if (delayed_work_pending(>late_init_test_ib_work))
>> +   flush_delayed_work(>late_init_test_ib_work);
>> +
>
>
> You just need to call flush_delayed_work() here without the if.
>
> Regards,
> Christian.
>
>> 

Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot (V2)

2018-04-13 Thread Christian König

Am 13.04.2018 um 10:31 schrieb Shirish S:

amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.

This patch defers it and ensures that its executed
in amdgpu_info_ioctl() if it wasn't scheduled.

V2: Use queue_delayed_work() & flush_delayed_work().

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 
  3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
  
  	/* delayed work_func for deferring clockgating during resume */

struct delayed_work late_init_work;
+   /* delayed work_func to defer testing IB's on rings during boot */
+   struct delayed_work late_init_test_ib_work;


That still has the chance of running the late init in parallel with the 
IB tests and that really doesn't looks like a good idea to me.


Is there any issue with putting the IB test into the late init work 
handler as well?


  
  	struct amdgpu_virt	virt;

/* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..ee84058 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  
  #define AMDGPU_RESUME_MS		2000

+#define AMDGPU_IB_TEST_SCHED_MS2000
  
  static const char *amdgpu_asic_name[] = {

"TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum 
amd_asic_type asic_type)
}
  }
  
+static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct *work)

+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device, 
late_init_test_ib_work.work);
+   int r = amdgpu_ib_ring_tests(adev);
+
+   if (r)
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
  /**
   * amdgpu_device_has_dc_support - check if dc is supported
   *
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(>ring_lru_list);
spin_lock_init(>ring_lru_list_lock);
  
+	INIT_DELAYED_WORK(>late_init_test_ib_work,

+ amdgpu_device_late_init_test_ib_func_handler);
INIT_DELAYED_WORK(>late_init_work,
  amdgpu_device_ip_late_init_func_handler);
  
@@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,

goto failed;
}
  
-	r = amdgpu_ib_ring_tests(adev);

-   if (r)
-   DRM_ERROR("ib ring test failed (%d).\n", r);
+   /* Schedule amdgpu_ib_ring_tests() */
+   queue_delayed_work(system_wq, >late_init_test_ib_work,
+   msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
  
  	if (amdgpu_sriov_vf(adev))

amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
}
adev->accel_working = false;
cancel_delayed_work_sync(>late_init_work);
+   cancel_delayed_work_sync(>late_init_test_ib_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..6fa326b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
  
+	/* Ensure IB tests on ring are executed */

+   if (delayed_work_pending(>late_init_test_ib_work))
+   flush_delayed_work(>late_init_test_ib_work);
+


You just need to call flush_delayed_work() here without the if.

Regards,
Christian.


switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx