RE: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v2)

2020-07-28 Thread Liu, Monk
[AMD Official Use Only - Internal Distribution Only]

Thanks Felix

I reworked my patch with your suggestion and I can get queues evenly cross 
pipes, e.g.: modprobe amdgpu num_kcq=6

[  409.878557] amdgpu :00:07.0: amdgpu: ring comp_1.0.0 uses VM inv eng 1 
on hub 0
[  409.878559] amdgpu :00:07.0: amdgpu: ring comp_1.1.0 uses VM inv eng 4 
on hub 0
[  409.878561] amdgpu :00:07.0: amdgpu: ring comp_1.2.0 uses VM inv eng 5 
on hub 0
[  409.878563] amdgpu :00:07.0: amdgpu: ring comp_1.3.0 uses VM inv eng 6 
on hub 0
[  409.878565] amdgpu :00:07.0: amdgpu: ring comp_1.0.1 uses VM inv eng 7 
on hub 0
[  409.878567] amdgpu :00:07.0: amdgpu: ring comp_1.1.1 uses VM inv eng 8 
on hub 0
[  409.878568] amdgpu :00:07.0: amdgpu: ring kiq_2.1.0 uses VM inv eng 9 on 
hub 0

Please review my patch upcoming

_
Monk Liu|GPU Virtualization Team |AMD


-Original Message-
From: Kuehling, Felix 
Sent: Tuesday, July 28, 2020 7:33 AM
To: amd-gfx@lists.freedesktop.org; Liu, Monk 
Subject: Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how 
many KCQ we want(v2)

Am 2020-07-27 um 6:47 a.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of kernel compute queues cost lots of
> clocks during world switch which impacts a lot to multi-VF performance
>
> how:
> introduce a paramter to control the number of kernel compute queues to
> avoid performance drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through modprobe
> in virtual machine
>
> Signed-off-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 
> +++---
>  7 files changed, 71 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..71a3d6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;  #ifdef
> CONFIG_DRM_AMDGPU_CIK  extern int amdgpu_cik_support;  #endif
> +extern int amdgpu_num_kcq_user_set;
>
>  #define AMDGPU_VM_MAX_NUM_CTX4096
>  #define AMDGPU_SG_THRESHOLD(256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..18b93ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct
> amdgpu_device *adev)
>
>  amdgpu_gmc_tmz_set(adev);
>
> +if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
> +amdgpu_num_kcq_user_set = 8;
> +dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter provided by 
> user\n");
> +}
> +
>  return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..03a94e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;  int amdgpu_force_asic_type =
> -1;  int amdgpu_tmz = 0;  int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq_user_set = 8;
>
>  struct amdgpu_mgpu_info mgpu_info = {
>  .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default),
> 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>
> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set
> +to greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
> +
>  static const struct pci_device_id pciidlist[] = {  #ifdef
> CONFIG_DRM_AMDGPU_SI
>  {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff
> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..0b59049 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> 

Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v2)

2020-07-27 Thread Felix Kuehling
Am 2020-07-27 um 6:47 a.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of kernel compute queues cost lots of clocks
> during world switch which impacts a lot to multi-VF performance
>
> how:
> introduce a paramter to control the number of kernel compute queues to
> avoid performance drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through
> modprobe in virtual machine
>
> Signed-off-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 
> +++---
>  7 files changed, 71 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..71a3d6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>  #ifdef CONFIG_DRM_AMDGPU_CIK
>  extern int amdgpu_cik_support;
>  #endif
> +extern int amdgpu_num_kcq_user_set;
>  
>  #define AMDGPU_VM_MAX_NUM_CTX4096
>  #define AMDGPU_SG_THRESHOLD  (256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..18b93ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
> amdgpu_device *adev)
>  
>   amdgpu_gmc_tmz_set(adev);
>  
> + if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
> + amdgpu_num_kcq_user_set = 8;
> + dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter 
> provided by user\n");
> + }
> +
>   return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..03a94e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>  int amdgpu_force_asic_type = -1;
>  int amdgpu_tmz = 0;
>  int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq_user_set = 8;
>  
>  struct amdgpu_mgpu_info mgpu_info = {
>   .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>  MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
> legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>  module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>  
> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
> greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
> +
>  static const struct pci_device_id pciidlist[] = {
>  #ifdef  CONFIG_DRM_AMDGPU_SI
>   {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..0b59049 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
> amdgpu_device *adev,
>  
>  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>  {
> - int i, queue, pipe, mec;
> + int i, queue, pipe, mec, j = 0;
>   bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>  
>   /* policy for amdgpu compute queue ownership */
> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
> amdgpu_device *adev)
>  
>   if (multipipe_policy) {
>   /* policy: amdgpu owns the first two queues of the 
> first MEC */
> - if (mec == 0 && queue < 2)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> + if (mec == 0 && queue < 2) {
> + if (j++ < adev->gfx.num_compute_rings)

This is not ideal, because it wouldn't distribute the queues evenly
across pipes if there are fewer than 7. I would change how queue and
pipe are calculated from i for the multipipe_policy case:

if (multipipe_policy) {
pipe = i % adev->gfx.mec.num_pipe_per_mec;
queue = (i / adev->gfx.mec.num_pipe_per_mec)
% adev->gfx.mec.num_queue_per_pipe;
} else {
/* previous way */
}


[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v2)

2020-07-27 Thread Monk Liu
what:
the MQD's save and restore of kernel compute queues cost lots of clocks
during world switch which impacts a lot to multi-VF performance

how:
introduce a paramter to control the number of kernel compute queues to
avoid performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +++---
 7 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq_user_set;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..18b93ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
+   amdgpu_num_kcq_user_set = 8;
+   dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter 
provided by user\n");
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe, mec, j = 0;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
/* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
 
if (multipipe_policy) {
/* policy: amdgpu owns the first two queues of the 
first MEC */
-   if (mec == 0 && queue < 2)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && queue < 2) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
} else {
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && pipe == 0) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+