Re: [PATCH 3/4] amd/amdgpu: add sched list to IPs with multiple run-queues

2019-12-10 Thread Christian König
Yeah, but you are to fast for me. I was still looking into comments for 
patch #4 :)


Christian.

Am 10.12.19 um 13:55 schrieb Nirmoy:

Thanks Christian. That make sense, resent modified patches.

On 12/10/19 12:28 PM, Christian König wrote:

Am 09.12.19 um 22:53 schrieb Nirmoy Das:

This sched list can be passed on to entity creation routine
instead of manually creating such sched list on every context creation.


Please drop the "_list" from the names here. A list usually means a 
linked list and those are actually arrays.


Additional to that amdgpu_device_init_sched_list() should probably go 
into amdgpu_ctx.c instead. That is actually not really device 
related, but more UAPI/ctx stuff.


Apart from that looks good to me,
Christian.



Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 69 
--

  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 44 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h    |  9 ++-
  6 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 1d6850af9908..c1fc75299b7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,

 struct amdgpu_ctx *ctx)
  {
  unsigned num_entities = amdgpu_ctx_total_num_entities();
-    unsigned i, j, k;
+    unsigned i, j;
  int r;
    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -121,73 +121,56 @@ static int amdgpu_ctx_init(struct 
amdgpu_device *adev,

  ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-    struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
-    unsigned num_rings = 0;
-    unsigned num_rqs = 0;
+    struct drm_gpu_scheduler **sched_list;
+    struct drm_gpu_scheduler *sched;
+    unsigned num_scheds = 0;
    switch (i) {
  case AMDGPU_HW_IP_GFX:
-    rings[0] = >gfx.gfx_ring[0];
-    num_rings = 1;
+    sched_list = adev->gfx.gfx_sched_list;
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_COMPUTE:
-    for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-    rings[j] = >gfx.compute_ring[j];
-    num_rings = adev->gfx.num_compute_rings;
+    sched_list = adev->gfx.compute_sched_list;
+    num_scheds = adev->gfx.num_compute_rings;
  break;
  case AMDGPU_HW_IP_DMA:
-    for (j = 0; j < adev->sdma.num_instances; ++j)
-    rings[j] = >sdma.instance[j].ring;
-    num_rings = adev->sdma.num_instances;
+    sched_list = adev->sdma.sdma_sched_list;
+    num_scheds = adev->sdma.num_instances;
  break;
  case AMDGPU_HW_IP_UVD:
-    rings[0] = >uvd.inst[0].ring;
-    num_rings = 1;
+    sched = >uvd.inst[0].ring.sched;
+    sched_list = 
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_VCE:
-    rings[0] = >vce.ring[0];
-    num_rings = 1;
+    sched = >vce.ring[0].sched;
+    sched_list = 
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_UVD_ENC:
-    rings[0] = >uvd.inst[0].ring_enc[0];
-    num_rings = 1;
+    sched = >uvd.inst[0].ring_enc[0].sched;
+    sched_list = 
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_VCN_DEC:
-    for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-    if (adev->vcn.harvest_config & (1 << j))
-    continue;
-    rings[num_rings++] = >vcn.inst[j].ring_dec;
-    }
+    sched_list = adev->vcn.vcn_dec_sched_list;
+    num_scheds = adev->vcn.num_vcn_dec_sched_list;
  break;
  case AMDGPU_HW_IP_VCN_ENC:
-    for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-    if (adev->vcn.harvest_config & (1 << j))
-    continue;
-    for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-    rings[num_rings++] = 
>vcn.inst[j].ring_enc[k];

-    }
+    sched_list = adev->vcn.vcn_enc_sched_list;
+    num_scheds = adev->vcn.num_vcn_enc_sched_list;
  break;
  case AMDGPU_HW_IP_VCN_JPEG:
-    for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-    if (adev->vcn.harvest_config & (1 << j))
-    continue;
-    rings[num_rings++] = >jpeg.inst[j].ring_dec;
-    }
+    sched_list = adev->jpeg.jpeg_sched_list;
+    

Re: [PATCH 3/4] amd/amdgpu: add sched list to IPs with multiple run-queues

2019-12-10 Thread Nirmoy

Thanks Christian. That make sense, resent modified patches.

On 12/10/19 12:28 PM, Christian König wrote:

Am 09.12.19 um 22:53 schrieb Nirmoy Das:

This sched list can be passed on to entity creation routine
instead of manually creating such sched list on every context creation.


Please drop the "_list" from the names here. A list usually means a 
linked list and those are actually arrays.


Additional to that amdgpu_device_init_sched_list() should probably go 
into amdgpu_ctx.c instead. That is actually not really device related, 
but more UAPI/ctx stuff.


Apart from that looks good to me,
Christian.



Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 69 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 44 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h    |  9 ++-
  6 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 1d6850af9908..c1fc75299b7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 struct amdgpu_ctx *ctx)
  {
  unsigned num_entities = amdgpu_ctx_total_num_entities();
-    unsigned i, j, k;
+    unsigned i, j;
  int r;
    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -121,73 +121,56 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,

  ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-    struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
-    unsigned num_rings = 0;
-    unsigned num_rqs = 0;
+    struct drm_gpu_scheduler **sched_list;
+    struct drm_gpu_scheduler *sched;
+    unsigned num_scheds = 0;
    switch (i) {
  case AMDGPU_HW_IP_GFX:
-    rings[0] = >gfx.gfx_ring[0];
-    num_rings = 1;
+    sched_list = adev->gfx.gfx_sched_list;
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_COMPUTE:
-    for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-    rings[j] = >gfx.compute_ring[j];
-    num_rings = adev->gfx.num_compute_rings;
+    sched_list = adev->gfx.compute_sched_list;
+    num_scheds = adev->gfx.num_compute_rings;
  break;
  case AMDGPU_HW_IP_DMA:
-    for (j = 0; j < adev->sdma.num_instances; ++j)
-    rings[j] = >sdma.instance[j].ring;
-    num_rings = adev->sdma.num_instances;
+    sched_list = adev->sdma.sdma_sched_list;
+    num_scheds = adev->sdma.num_instances;
  break;
  case AMDGPU_HW_IP_UVD:
-    rings[0] = >uvd.inst[0].ring;
-    num_rings = 1;
+    sched = >uvd.inst[0].ring.sched;
+    sched_list = 
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_VCE:
-    rings[0] = >vce.ring[0];
-    num_rings = 1;
+    sched = >vce.ring[0].sched;
+    sched_list = 
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_UVD_ENC:
-    rings[0] = >uvd.inst[0].ring_enc[0];
-    num_rings = 1;
+    sched = >uvd.inst[0].ring_enc[0].sched;
+    sched_list = 
+    num_scheds = 1;
  break;
  case AMDGPU_HW_IP_VCN_DEC:
-    for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-    if (adev->vcn.harvest_config & (1 << j))
-    continue;
-    rings[num_rings++] = >vcn.inst[j].ring_dec;
-    }
+    sched_list = adev->vcn.vcn_dec_sched_list;
+    num_scheds =  adev->vcn.num_vcn_dec_sched_list;
  break;
  case AMDGPU_HW_IP_VCN_ENC:
-    for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-    if (adev->vcn.harvest_config & (1 << j))
-    continue;
-    for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-    rings[num_rings++] = 
>vcn.inst[j].ring_enc[k];

-    }
+    sched_list = adev->vcn.vcn_enc_sched_list;
+    num_scheds =  adev->vcn.num_vcn_enc_sched_list;
  break;
  case AMDGPU_HW_IP_VCN_JPEG:
-    for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-    if (adev->vcn.harvest_config & (1 << j))
-    continue;
-    rings[num_rings++] = >jpeg.inst[j].ring_dec;
-    }
+    sched_list = adev->jpeg.jpeg_sched_list;
+    num_scheds =  adev->jpeg.num_jpeg_sched_list;
  break;
  }
  -    for (j = 0; j < num_rings; ++j) {
-    if 

Re: [PATCH 3/4] amd/amdgpu: add sched list to IPs with multiple run-queues

2019-12-10 Thread Christian König

Am 09.12.19 um 22:53 schrieb Nirmoy Das:

This sched list can be passed on to entity creation routine
instead of manually creating such sched list on every context creation.


Please drop the "_list" from the names here. A list usually means a 
linked list and those are actually arrays.


Additional to that amdgpu_device_init_sched_list() should probably go 
into amdgpu_ctx.c instead. That is actually not really device related, 
but more UAPI/ctx stuff.


Apart from that looks good to me,
Christian.



Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 69 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 44 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h|  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h|  9 ++-
  6 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 1d6850af9908..c1fc75299b7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
   struct amdgpu_ctx *ctx)
  {
unsigned num_entities = amdgpu_ctx_total_num_entities();
-   unsigned i, j, k;
+   unsigned i, j;
int r;
  
  	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)

@@ -121,73 +121,56 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
  
  	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {

-   struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-   struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
-   unsigned num_rings = 0;
-   unsigned num_rqs = 0;
+   struct drm_gpu_scheduler **sched_list;
+   struct drm_gpu_scheduler *sched;
+   unsigned num_scheds = 0;
  
  		switch (i) {

case AMDGPU_HW_IP_GFX:
-   rings[0] = >gfx.gfx_ring[0];
-   num_rings = 1;
+   sched_list = adev->gfx.gfx_sched_list;
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_COMPUTE:
-   for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-   rings[j] = >gfx.compute_ring[j];
-   num_rings = adev->gfx.num_compute_rings;
+   sched_list = adev->gfx.compute_sched_list;
+   num_scheds = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
-   for (j = 0; j < adev->sdma.num_instances; ++j)
-   rings[j] = >sdma.instance[j].ring;
-   num_rings = adev->sdma.num_instances;
+   sched_list = adev->sdma.sdma_sched_list;
+   num_scheds = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
-   rings[0] = >uvd.inst[0].ring;
-   num_rings = 1;
+   sched = >uvd.inst[0].ring.sched;
+   sched_list = 
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_VCE:
-   rings[0] = >vce.ring[0];
-   num_rings = 1;
+   sched = >vce.ring[0].sched;
+   sched_list = 
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
-   rings[0] = >uvd.inst[0].ring_enc[0];
-   num_rings = 1;
+   sched = >uvd.inst[0].ring_enc[0].sched;
+   sched_list = 
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_VCN_DEC:
-   for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-   if (adev->vcn.harvest_config & (1 << j))
-   continue;
-   rings[num_rings++] = 
>vcn.inst[j].ring_dec;
-   }
+   sched_list = adev->vcn.vcn_dec_sched_list;
+   num_scheds =  adev->vcn.num_vcn_dec_sched_list;
break;
case AMDGPU_HW_IP_VCN_ENC:
-   for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-   if (adev->vcn.harvest_config & (1 << j))
-   continue;
-   for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-   rings[num_rings++] = 
>vcn.inst[j].ring_enc[k];
-   }
+   

[PATCH 3/4] amd/amdgpu: add sched list to IPs with multiple run-queues

2019-12-09 Thread Nirmoy Das
This sched list can be passed on to entity creation routine
instead of manually creating such sched list on every context creation.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 69 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 44 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h|  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h   |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h   |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h|  9 ++-
 6 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 1d6850af9908..c1fc75299b7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
   struct amdgpu_ctx *ctx)
 {
unsigned num_entities = amdgpu_ctx_total_num_entities();
-   unsigned i, j, k;
+   unsigned i, j;
int r;
 
if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -121,73 +121,56 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-   struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-   struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
-   unsigned num_rings = 0;
-   unsigned num_rqs = 0;
+   struct drm_gpu_scheduler **sched_list;
+   struct drm_gpu_scheduler *sched;
+   unsigned num_scheds = 0;
 
switch (i) {
case AMDGPU_HW_IP_GFX:
-   rings[0] = >gfx.gfx_ring[0];
-   num_rings = 1;
+   sched_list = adev->gfx.gfx_sched_list;
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_COMPUTE:
-   for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-   rings[j] = >gfx.compute_ring[j];
-   num_rings = adev->gfx.num_compute_rings;
+   sched_list = adev->gfx.compute_sched_list;
+   num_scheds = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
-   for (j = 0; j < adev->sdma.num_instances; ++j)
-   rings[j] = >sdma.instance[j].ring;
-   num_rings = adev->sdma.num_instances;
+   sched_list = adev->sdma.sdma_sched_list;
+   num_scheds = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
-   rings[0] = >uvd.inst[0].ring;
-   num_rings = 1;
+   sched = >uvd.inst[0].ring.sched;
+   sched_list = 
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_VCE:
-   rings[0] = >vce.ring[0];
-   num_rings = 1;
+   sched = >vce.ring[0].sched;
+   sched_list = 
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
-   rings[0] = >uvd.inst[0].ring_enc[0];
-   num_rings = 1;
+   sched = >uvd.inst[0].ring_enc[0].sched;
+   sched_list = 
+   num_scheds = 1;
break;
case AMDGPU_HW_IP_VCN_DEC:
-   for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-   if (adev->vcn.harvest_config & (1 << j))
-   continue;
-   rings[num_rings++] = 
>vcn.inst[j].ring_dec;
-   }
+   sched_list = adev->vcn.vcn_dec_sched_list;
+   num_scheds =  adev->vcn.num_vcn_dec_sched_list;
break;
case AMDGPU_HW_IP_VCN_ENC:
-   for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-   if (adev->vcn.harvest_config & (1 << j))
-   continue;
-   for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-   rings[num_rings++] = 
>vcn.inst[j].ring_enc[k];
-   }
+   sched_list = adev->vcn.vcn_enc_sched_list;
+   num_scheds =  adev->vcn.num_vcn_enc_sched_list;
break;
case AMDGPU_HW_IP_VCN_JPEG:
-   for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-   if (adev->vcn.harvest_config & (1 << j))
-