Re: [PATCH 3/3] drm/amdgpu/gfx9: adjust mqd allocation size

2017-08-21 Thread Alex Deucher
On Sat, Aug 19, 2017 at 12:18 PM, Kuehling, Felix
<felix.kuehl...@amd.com> wrote:
> I'm not sure how dynamic CU masking works. But on a GPU with 64 CUs, a 32-bit 
> CU mask (in struct v9_mqd_allocation) seems too small.

Good question even on gfx8.  I've got a request out to the hw team.

Alex

>
> Regards,
>   Felix
> 
> From: amd-gfx <amd-gfx-boun...@lists.freedesktop.org> on behalf of Alex 
> Deucher <alexdeuc...@gmail.com>
> Sent: Friday, August 18, 2017 11:57:51 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander
> Subject: [PATCH 3/3] drm/amdgpu/gfx9: adjust mqd allocation size
>
> to allocate additional space for the dynamic cu masks.
>
> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 25 ++---
>  drivers/gpu/drm/amd/include/v9_structs.h |  8 
>  2 files changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 69182ee..7c06d1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1346,7 +1346,7 @@ static int gfx_v9_0_sw_init(void *handle)
> return r;
>
> /* create MQD for all compute queues as wel as KIQ for SRIOV case */
> -   r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd));
> +   r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct 
> v9_mqd_allocation));
> if (r)
> return r;
>
> @@ -2463,6 +2463,13 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
> mqd->compute_static_thread_mgmt_se3 = 0x;
> mqd->compute_misc_reserved = 0x0003;
>
> +   mqd->dynamic_cu_mask_addr_lo =
> +   lower_32_bits(ring->mqd_gpu_addr
> + + offsetof(struct v9_mqd_allocation, 
> dynamic_cu_mask));
> +   mqd->dynamic_cu_mask_addr_hi =
> +   upper_32_bits(ring->mqd_gpu_addr
> + + offsetof(struct v9_mqd_allocation, 
> dynamic_cu_mask));
> +
> eop_base_addr = ring->eop_gpu_addr >> 8;
> mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
> mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
> @@ -2695,7 +2702,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
> *ring)
> if (adev->gfx.in_reset) { /* for GPU_RESET case */
> /* reset MQD to a clean status */
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> -   memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], 
> sizeof(*mqd));
> +   memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], 
> sizeof(struct v9_mqd_allocation));
>
> /* reset ring buffer */
> ring->wptr = 0;
> @@ -2707,7 +2714,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
> *ring)
> soc15_grbm_select(adev, 0, 0, 0, 0);
> mutex_unlock(>srbm_mutex);
> } else {
> -   memset((void *)mqd, 0, sizeof(*mqd));
> +   memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
> +   ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 
> 0x;
> +   ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 
> 0x;
> mutex_lock(>srbm_mutex);
> soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> gfx_v9_0_mqd_init(ring);
> @@ -2716,7 +2725,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
> *ring)
> mutex_unlock(>srbm_mutex);
>
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> -   memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, 
> sizeof(*mqd));
> +   memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, 
> sizeof(struct v9_mqd_allocation));
> }
>
> return 0;
> @@ -2729,7 +2738,9 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring 
> *ring)
> int mqd_idx = ring - >gfx.compute_ring[0];
>
> if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
> -   memset((void *)mqd, 0, sizeof(*mqd));
> +   memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
> +   ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 
> 0x;
> +   ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 
> 0x;
> mutex_lock(>srbm_mutex);
> soc15_grbm_select(adev, ring->me, rin

[PATCH 3/3] drm/amdgpu/gfx9: adjust mqd allocation size

2017-08-18 Thread Alex Deucher
to allocate additional space for the dynamic cu masks.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 25 ++---
 drivers/gpu/drm/amd/include/v9_structs.h |  8 
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 69182ee..7c06d1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1346,7 +1346,7 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
 
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
-   r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd));
+   r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct 
v9_mqd_allocation));
if (r)
return r;
 
@@ -2463,6 +2463,13 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
mqd->compute_static_thread_mgmt_se3 = 0x;
mqd->compute_misc_reserved = 0x0003;
 
+   mqd->dynamic_cu_mask_addr_lo =
+   lower_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, 
dynamic_cu_mask));
+   mqd->dynamic_cu_mask_addr_hi =
+   upper_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, 
dynamic_cu_mask));
+
eop_base_addr = ring->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -2695,7 +2702,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring)
if (adev->gfx.in_reset) { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
-   memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], 
sizeof(*mqd));
+   memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], 
sizeof(struct v9_mqd_allocation));
 
/* reset ring buffer */
ring->wptr = 0;
@@ -2707,7 +2714,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring)
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
} else {
-   memset((void *)mqd, 0, sizeof(*mqd));
+   memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+   ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0x;
+   ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0x;
mutex_lock(>srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v9_0_mqd_init(ring);
@@ -2716,7 +2725,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring)
mutex_unlock(>srbm_mutex);
 
if (adev->gfx.mec.mqd_backup[mqd_idx])
-   memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, 
sizeof(*mqd));
+   memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, 
sizeof(struct v9_mqd_allocation));
}
 
return 0;
@@ -2729,7 +2738,9 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring 
*ring)
int mqd_idx = ring - >gfx.compute_ring[0];
 
if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
-   memset((void *)mqd, 0, sizeof(*mqd));
+   memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+   ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0x;
+   ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0x;
mutex_lock(>srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v9_0_mqd_init(ring);
@@ -2737,11 +2748,11 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring 
*ring)
mutex_unlock(>srbm_mutex);
 
if (adev->gfx.mec.mqd_backup[mqd_idx])
-   memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, 
sizeof(*mqd));
+   memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, 
sizeof(struct v9_mqd_allocation));
} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
-   memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], 
sizeof(*mqd));
+   memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], 
sizeof(struct v9_mqd_allocation));
 
/* reset ring buffer */
ring->wptr = 0;
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h 
b/drivers/gpu/drm/amd/include/v9_structs.h
index 56d79db..2fb25ab 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -672,6 +672,14 @@ struct v9_mqd {
uint32_t reserved_511;
 };
 
+struct v9_mqd_allocation {
+   struct v9_mqd mqd;
+   uint32_t