Re: [PATCH] drm/amd/amdgpu: Enable gfx pipe1 and fix related issues

2022-11-04 Thread Christian König




Am 04.11.22 um 12:19 schrieb Emily Deng:

Starting from SIENNA CICHLID asic supports two gfx pipes, enabling
two graphics queues for performance concern.

Signed-off-by: Emily Deng 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 42 -
  2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 331aa191910c..0072f36b44d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -33,7 +33,7 @@
container_of((e), struct amdgpu_ctx_entity, entity)
  
  const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {

-   [AMDGPU_HW_IP_GFX]  =   1,
+   [AMDGPU_HW_IP_GFX]  =   2,


Again, please completely drop this. That change absolutely makes no 
sense at all.


Regards,
Christian.


[AMDGPU_HW_IP_COMPUTE]  =   4,
[AMDGPU_HW_IP_DMA]  =   2,
[AMDGPU_HW_IP_UVD]  =   1,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 49d34c7bbf20..bbf18060611e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4606,7 +4606,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
-   adev->gfx.me.num_pipe_per_me = 1;
+   adev->gfx.me.num_pipe_per_me = 2;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -6008,6 +6008,24 @@ static int gfx_v10_0_cp_gfx_load_microcode(struct 
amdgpu_device *adev)
return 0;
  }
  
+static int gfx_v10_0_wait_for_idle(void *handle)

+{
+   unsigned i;
+   u32 tmp;
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+   for (i = 0; i < adev->usec_timeout; i++) {
+   /* read MC_STATUS */
+   tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
+   GRBM_STATUS__GUI_ACTIVE_MASK;
+
+   if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+   return 0;
+   udelay(1);
+   }
+   return -ETIMEDOUT;
+}
+
  static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
  {
struct amdgpu_ring *ring;
@@ -6069,7 +6087,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device 
*adev)
amdgpu_ring_write(ring, 0x8000);
  
  	amdgpu_ring_commit(ring);

-
+   gfx_v10_0_wait_for_idle(adev);
/* submit cs packet to copy state 0 to next available state */
if (adev->gfx.num_gfx_rings > 1) {
/* maximum supported gfx ring is 2 */
@@ -7404,24 +7422,6 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
  }
  
-static int gfx_v10_0_wait_for_idle(void *handle)

-{
-   unsigned i;
-   u32 tmp;
-   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-   for (i = 0; i < adev->usec_timeout; i++) {
-   /* read MC_STATUS */
-   tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
-   GRBM_STATUS__GUI_ACTIVE_MASK;
-
-   if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
-   return 0;
-   udelay(1);
-   }
-   return -ETIMEDOUT;
-}
-
  static int gfx_v10_0_soft_reset(void *handle)
  {
u32 grbm_soft_reset = 0;
@@ -8466,7 +8466,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct 
amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
-   ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+   ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
  




RE: [PATCH] drm/amd/amdgpu: Enable gfx pipe1 and fix related issues

2022-11-04 Thread Deng, Emily
[AMD Official Use Only - General]

>-Original Message-
>From: Christian König 
>Sent: Friday, November 4, 2022 5:26 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org;
>Michel Dänzer 
>Subject: Re: [PATCH] drm/amd/amdgpu: Enable gfx pipe1 and fix related
>issues
>
>Am 04.11.22 um 01:32 schrieb Emily Deng:
>> Starting from SIENNA CICHLID asic supports two gfx pipes, enabling two
>> graphics queues for performance concern.
>
>With the printk still in the patch I assume that this is just a debugging 
>patch?
>
Sorry, will delete it.
>>
>> Signed-off-by: Emily Deng 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 43 +
>>   2 files changed, 23 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 331aa191910c..0072f36b44d1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -33,7 +33,7 @@
>>  container_of((e), struct amdgpu_ctx_entity, entity)
>>
>>   const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>> -[AMDGPU_HW_IP_GFX]  =   1,
>> +[AMDGPU_HW_IP_GFX]  =   2,
>
>That's an absolutely clear NAK and as far as I can see also unnecessary.
>
>We don't want to expose the GFX queues as separate queues to userspace.
>
>Instead the queues have separate priorities which userspace can select.
>
>Regards,
>Christian.
>
>>  [AMDGPU_HW_IP_COMPUTE]  =   4,
>>  [AMDGPU_HW_IP_DMA]  =   2,
>>  [AMDGPU_HW_IP_UVD]  =   1,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index 49d34c7bbf20..9219cd29acd3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -4606,7 +4606,7 @@ static int gfx_v10_0_sw_init(void *handle)
>>  case IP_VERSION(10, 3, 3):
>>  case IP_VERSION(10, 3, 7):
>>  adev->gfx.me.num_me = 1;
>> -adev->gfx.me.num_pipe_per_me = 1;
>> +adev->gfx.me.num_pipe_per_me = 2;
>>  adev->gfx.me.num_queue_per_pipe = 1;
>>  adev->gfx.mec.num_mec = 2;
>>  adev->gfx.mec.num_pipe_per_mec = 4; @@ -6008,6 +6008,25
>@@ static
>> int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
>>  return 0;
>>   }
>>
>> +static int gfx_v10_0_wait_for_idle(void *handle) {
>> +unsigned i;
>> +u32 tmp;
>> +struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> +
>> +for (i = 0; i < adev->usec_timeout; i++) {
>> +/* read MC_STATUS */
>> +tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
>> +GRBM_STATUS__GUI_ACTIVE_MASK;
>> +
>> +if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
>> +return 0;
>> +udelay(1);
>> +}
>> +printk("Emily:gfx_v10_0_wait_for_idle\n");
>> +return -ETIMEDOUT;
>> +}
>> +
>>   static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
>>   {
>>  struct amdgpu_ring *ring;
>> @@ -6069,7 +6088,7 @@ static int gfx_v10_0_cp_gfx_start(struct
>amdgpu_device *adev)
>>  amdgpu_ring_write(ring, 0x8000);
>>
>>  amdgpu_ring_commit(ring);
>> -
>> +gfx_v10_0_wait_for_idle(adev);
>>  /* submit cs packet to copy state 0 to next available state */
>>  if (adev->gfx.num_gfx_rings > 1) {
>>  /* maximum supported gfx ring is 2 */ @@ -7404,24 +7423,6
>@@
>> static bool gfx_v10_0_is_idle(void *handle)
>>  return true;
>>   }
>>
>> -static int gfx_v10_0_wait_for_idle(void *handle) -{
>> -unsigned i;
>> -u32 tmp;
>> -struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> -
>> -for (i = 0; i < adev->usec_timeout; i++) {
>> -/* read MC_STATUS */
>> -tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
>> -GRBM_STATUS__GUI_ACTIVE_MASK;
>> -
>> -if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
>> -return 0;
>> -udelay(1);
>> -}
>> -return -ETIMEDOUT;
>> -}
>> -
>>   static int gfx_v10_0_soft_reset(void *handle)
>>   {
>>  u32 grbm_soft_reset = 0;
>> @@ -8466,7 +8467,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct
>amdgpu_ring *ring)
>>  }
>>  reg_mem_engine = 0;
>>  } else {
>> -ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
>> +ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring-
>>pipe;
>>  reg_mem_engine = 1; /* pfp */
>>  }
>>



Re: [PATCH] drm/amd/amdgpu: Enable gfx pipe1 and fix related issues

2022-11-04 Thread Christian König

Am 04.11.22 um 01:32 schrieb Emily Deng:

Starting from SIENNA CICHLID asic supports two gfx pipes, enabling
two graphics queues for performance concern.


With the printk still in the patch I assume that this is just a 
debugging patch?




Signed-off-by: Emily Deng 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 43 +
  2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 331aa191910c..0072f36b44d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -33,7 +33,7 @@
container_of((e), struct amdgpu_ctx_entity, entity)
  
  const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {

-   [AMDGPU_HW_IP_GFX]  =   1,
+   [AMDGPU_HW_IP_GFX]  =   2,


That's an absolutely clear NAK and as far as I can see also unnecessary.

We don't want to expose the GFX queues as separate queues to userspace.

Instead the queues have separate priorities which userspace can select.

Regards,
Christian.


[AMDGPU_HW_IP_COMPUTE]  =   4,
[AMDGPU_HW_IP_DMA]  =   2,
[AMDGPU_HW_IP_UVD]  =   1,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 49d34c7bbf20..9219cd29acd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4606,7 +4606,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
-   adev->gfx.me.num_pipe_per_me = 1;
+   adev->gfx.me.num_pipe_per_me = 2;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -6008,6 +6008,25 @@ static int gfx_v10_0_cp_gfx_load_microcode(struct 
amdgpu_device *adev)
return 0;
  }
  
+static int gfx_v10_0_wait_for_idle(void *handle)

+{
+   unsigned i;
+   u32 tmp;
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+   for (i = 0; i < adev->usec_timeout; i++) {
+   /* read MC_STATUS */
+   tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
+   GRBM_STATUS__GUI_ACTIVE_MASK;
+
+   if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+   return 0;
+   udelay(1);
+   }
+   printk("Emily:gfx_v10_0_wait_for_idle\n");
+   return -ETIMEDOUT;
+}
+
  static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
  {
struct amdgpu_ring *ring;
@@ -6069,7 +6088,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device 
*adev)
amdgpu_ring_write(ring, 0x8000);
  
  	amdgpu_ring_commit(ring);

-
+   gfx_v10_0_wait_for_idle(adev);
/* submit cs packet to copy state 0 to next available state */
if (adev->gfx.num_gfx_rings > 1) {
/* maximum supported gfx ring is 2 */
@@ -7404,24 +7423,6 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
  }
  
-static int gfx_v10_0_wait_for_idle(void *handle)

-{
-   unsigned i;
-   u32 tmp;
-   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-   for (i = 0; i < adev->usec_timeout; i++) {
-   /* read MC_STATUS */
-   tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
-   GRBM_STATUS__GUI_ACTIVE_MASK;
-
-   if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
-   return 0;
-   udelay(1);
-   }
-   return -ETIMEDOUT;
-}
-
  static int gfx_v10_0_soft_reset(void *handle)
  {
u32 grbm_soft_reset = 0;
@@ -8466,7 +8467,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct 
amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
-   ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+   ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}