Re: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions

2020-02-25 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Thanks! I will update the commit message before pushing. It should be the way 
how SDMA queue count were used to unmap SDMA engines according to the previous 
understanding was wrong.

Regards,
Yong

From: Kuehling, Felix 
Sent: Tuesday, February 25, 2020 12:06 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package 
submissions

As I understand it, the SDMA queue counting wasn't incorrect. The main
change here is that you no longer send separate unmap packets for SDMA
queues, and that makes SDMA queue counting unnecessary.

That said, this patch series is a nice cleanup and improvement. The
series is

Reviewed-by: Felix Kuehling 

On 2020-02-24 17:18, Yong Zhao wrote:
> The previous SDMA queue counting was wrong. In addition, after confirming
> with MEC firmware team, we understands that only one unmap queue package,
> instead of one unmap queue package for CP and each SDMA engine, is needed,
> which results in much simpler driver code.
>
> Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc
> Signed-off-by: Yong Zhao 
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++-
>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 -
>   .../amd/amdkfd/kfd_process_queue_manager.c| 16 ++--
>   3 files changed, 29 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 958275db3f55..692abfd2088a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct 
> device_queue_manager *dqm)
>return dqm->dev->device_info->num_xgmi_sdma_engines;
>   }
>
> +static unsigned int get_num_all_sdma_engines(struct device_queue_manager 
> *dqm)
> +{
> + return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
> +}
> +
>   unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
>   {
>return dqm->dev->device_info->num_sdma_engines
> @@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct 
> device_queue_manager *dqm,
>if (q->properties.is_active)
>increment_queue_count(dqm, q->properties.type);
>
> - if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
> - dqm->sdma_queue_count++;
> - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
> - dqm->xgmi_sdma_queue_count++;
> -
>/*
> * Unconditionally increment this counter, regardless of the queue's
> * type or whether the queue is active.
> @@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct 
> device_queue_manager *dqm,
>mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
>q->properties.type)];
>
> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
>deallocate_hqd(dqm, q);
> - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
> - dqm->sdma_queue_count--;
> + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
>deallocate_sdma_queue(dqm, q);
> - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
> - dqm->xgmi_sdma_queue_count--;
> + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
>deallocate_sdma_queue(dqm, q);
> - } else {
> + else {
>pr_debug("q->properties.type %d is invalid\n",
>q->properties.type);
>return -EINVAL;
> @@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager 
> *dqm)
>INIT_LIST_HEAD(>queues);
>dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
>dqm->active_cp_queue_count = 0;
> - dqm->sdma_queue_count = 0;
> - dqm->xgmi_sdma_queue_count = 0;
>
>for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
>int pipe_offset = pipe * get_queues_per_pipe(dqm);
> @@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct 
> device_queue_manager *dqm,
>int bit;
>
>if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
> - if (dqm->sdma_bitmap == 0)
> + if (dqm->sdma_bitmap == 0) {
> + pr_err("No more SDMA queue to allocate\n");
>return -ENOMEM;
> + }
> +
>

Re: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions

2020-02-25 Thread Felix Kuehling
As I understand it, the SDMA queue counting wasn't incorrect. The main 
change here is that you no longer send separate unmap packets for SDMA 
queues, and that makes SDMA queue counting unnecessary.


That said, this patch series is a nice cleanup and improvement. The 
series is


Reviewed-by: Felix Kuehling 

On 2020-02-24 17:18, Yong Zhao wrote:

The previous SDMA queue counting was wrong. In addition, after confirming
with MEC firmware team, we understands that only one unmap queue package,
instead of one unmap queue package for CP and each SDMA engine, is needed,
which results in much simpler driver code.

Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc
Signed-off-by: Yong Zhao 
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++-
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 -
  .../amd/amdkfd/kfd_process_queue_manager.c| 16 ++--
  3 files changed, 29 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 958275db3f55..692abfd2088a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct 
device_queue_manager *dqm)
return dqm->dev->device_info->num_xgmi_sdma_engines;
  }
  
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)

+{
+   return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+}
+
  unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
  {
return dqm->dev->device_info->num_sdma_engines
@@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
if (q->properties.is_active)
increment_queue_count(dqm, q->properties.type);
  
-	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)

-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
-
/*
 * Unconditionally increment this counter, regardless of the queue's
 * type or whether the queue is active.
@@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
  
-	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {

+   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   dqm->sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   dqm->xgmi_sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
-   } else {
+   else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
return -EINVAL;
@@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
dqm->active_cp_queue_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
  
  	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {

int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
int bit;
  
  	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {

-   if (dqm->sdma_bitmap == 0)
+   if (dqm->sdma_bitmap == 0) {
+   pr_err("No more SDMA queue to allocate\n");
return -ENOMEM;
+   }
+
bit = __ffs64(dqm->sdma_bitmap);
dqm->sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -991,8 +990,10 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
q->properties.sdma_queue_id = q->sdma_id /
get_num_sdma_engines(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   if (dqm->xgmi_sdma_bitmap == 0)
+   if (dqm->xgmi_sdma_bitmap == 0) {
+   pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
+   }
bit = __ffs64(dqm->xgmi_sdma_bitmap);
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -1081,8 +1082,7 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->processes_count = 0;

Re: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions

2020-02-25 Thread Deucher, Alexander
[AMD Public Use]

Series is:
Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Yong Zhao 

Sent: Monday, February 24, 2020 5:18 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Zhao, Yong 
Subject: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package 
submissions

The previous SDMA queue counting was wrong. In addition, after confirming
with MEC firmware team, we understands that only one unmap queue package,
instead of one unmap queue package for CP and each SDMA engine, is needed,
which results in much simpler driver code.

Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 -
 .../amd/amdkfd/kfd_process_queue_manager.c| 16 ++--
 3 files changed, 29 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 958275db3f55..692abfd2088a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct 
device_queue_manager *dqm)
 return dqm->dev->device_info->num_xgmi_sdma_engines;
 }

+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
+{
+   return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+}
+
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
 {
 return dqm->dev->device_info->num_sdma_engines
@@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
 if (q->properties.is_active)
 increment_queue_count(dqm, q->properties.type);

-   if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
-
 /*
  * Unconditionally increment this counter, regardless of the queue's
  * type or whether the queue is active.
@@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 q->properties.type)];

-   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 deallocate_hqd(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   dqm->sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 deallocate_sdma_queue(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   dqm->xgmi_sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 deallocate_sdma_queue(dqm, q);
-   } else {
+   else {
 pr_debug("q->properties.type %d is invalid\n",
 q->properties.type);
 return -EINVAL;
@@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
 INIT_LIST_HEAD(>queues);
 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
 dqm->active_cp_queue_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;

 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
 int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
 int bit;

 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   if (dqm->sdma_bitmap == 0)
+   if (dqm->sdma_bitmap == 0) {
+   pr_err("No more SDMA queue to allocate\n");
 return -ENOMEM;
+   }
+
 bit = __ffs64(dqm->sdma_bitmap);
 dqm->sdma_bitmap &= ~(1ULL << bit);
 q->sdma_id = bit;
@@ -991,8 +990,10 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
 q->properties.sdma_queue_id = q->sdma_id /
 get_num_sdma_engines(dqm);
 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   if (dqm->xgmi_sdma_bitmap == 0)
+   if (dqm->xgmi_sdma_bitmap == 0) {
+   pr_err("No more XGMI SDMA queue to allocate\n");
 return -ENOMEM;
+   }
 bit = __ffs64(dqm->xgmi_sdma_bitmap);
 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
 q->sdma_id = bit;
@@ -1081,8 +1082,7 @@ static int i

[PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions

2020-02-24 Thread Yong Zhao
The previous SDMA queue counting was wrong. In addition, after confirming
with MEC firmware team, we understands that only one unmap queue package,
instead of one unmap queue package for CP and each SDMA engine, is needed,
which results in much simpler driver code.

Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 -
 .../amd/amdkfd/kfd_process_queue_manager.c| 16 ++--
 3 files changed, 29 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 958275db3f55..692abfd2088a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct 
device_queue_manager *dqm)
return dqm->dev->device_info->num_xgmi_sdma_engines;
 }
 
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
+{
+   return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+}
+
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
 {
return dqm->dev->device_info->num_sdma_engines
@@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
if (q->properties.is_active)
increment_queue_count(dqm, q->properties.type);
 
-   if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
-
/*
 * Unconditionally increment this counter, regardless of the queue's
 * type or whether the queue is active.
@@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
 
-   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   dqm->sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   dqm->xgmi_sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
-   } else {
+   else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
return -EINVAL;
@@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
dqm->active_cp_queue_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
 
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
int bit;
 
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   if (dqm->sdma_bitmap == 0)
+   if (dqm->sdma_bitmap == 0) {
+   pr_err("No more SDMA queue to allocate\n");
return -ENOMEM;
+   }
+
bit = __ffs64(dqm->sdma_bitmap);
dqm->sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -991,8 +990,10 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
q->properties.sdma_queue_id = q->sdma_id /
get_num_sdma_engines(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   if (dqm->xgmi_sdma_bitmap == 0)
+   if (dqm->xgmi_sdma_bitmap == 0) {
+   pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
+   }
bit = __ffs64(dqm->xgmi_sdma_bitmap);
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -1081,8 +1082,7 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->processes_count = 0;
dqm->active_cp_queue_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
+
dqm->active_runlist = false;
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -1254,11 +1254,6 @@ static int create_queue_cpsch(struct