Re: [PATCH v2] drm/amdgpu: limiting AV1 to first instance on VCN4 decode

2022-07-13 Thread Zhu, James
[AMD Official Use Only - General]

This patch is Reviewed-by: James Zhu 



From: amd-gfx  on behalf of Sonny Jiang 

Sent: Wednesday, July 13, 2022 11:59 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Jiang, Sonny 
Subject: [PATCH v2] drm/amdgpu: limiting AV1 to first instance on VCN4 decode

AV1 is only supported on first instance.

Signed-off-by: Sonny Jiang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 131 ++
 1 file changed, 131 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 84ac2401895a..a91ffbf902d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -25,6 +25,7 @@
 #include "amdgpu.h"
 #include "amdgpu_vcn.h"
 #include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
 #include "soc15.h"
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
@@ -44,6 +45,9 @@
 #define VCN_VID_SOC_ADDRESS_2_0
 0x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0   
 0x48300

+#define RDECODE_MSG_CREATE 
0x
+#define RDECODE_MESSAGE_CREATE 
0x0001
+
 static int amdgpu_ih_clientid_vcns[] = {
 SOC15_IH_CLIENTID_VCN,
 SOC15_IH_CLIENTID_VCN1
@@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct 
amdgpu_ring *ring)
 }
 }

+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)
+{
+   struct drm_gpu_scheduler **scheds;
+
+   /* The create msg must be in the first IB submitted */
+   if (atomic_read(>entity->fence_seq))
+   return -EINVAL;
+
+   scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
+   [AMDGPU_RING_PRIO_0].sched;
+   drm_sched_entity_modify_sched(p->entity, scheds, 1);
+   return 0;
+}
+
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
+{
+   struct ttm_operation_ctx ctx = { false, false };
+   struct amdgpu_bo_va_mapping *map;
+   uint32_t *msg, num_buffers;
+   struct amdgpu_bo *bo;
+   uint64_t start, end;
+   unsigned int i;
+   void *ptr;
+   int r;
+
+   addr &= AMDGPU_GMC_HOLE_MASK;
+   r = amdgpu_cs_find_mapping(p, addr, , );
+   if (r) {
+   DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+   return r;
+   }
+
+   start = map->start * AMDGPU_GPU_PAGE_SIZE;
+   end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+   if (addr & 0x7) {
+   DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+   return -EINVAL;
+   }
+
+   bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+   amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+   r = ttm_bo_validate(>tbo, >placement, );
+   if (r) {
+   DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+   return r;
+   }
+
+   r = amdgpu_bo_kmap(bo, );
+   if (r) {
+   DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+   return r;
+   }
+
+   msg = ptr + addr - start;
+
+   /* Check length */
+   if (msg[1] > end - addr) {
+   r = -EINVAL;
+   goto out;
+   }
+
+   if (msg[3] != RDECODE_MSG_CREATE)
+   goto out;
+
+   num_buffers = msg[2];
+   for (i = 0, msg = [6]; i < num_buffers; ++i, msg += 4) {
+   uint32_t offset, size, *create;
+
+   if (msg[0] != RDECODE_MESSAGE_CREATE)
+   continue;
+
+   offset = msg[1];
+   size = msg[2];
+
+   if (offset + size > end) {
+   r = -EINVAL;
+   goto out;
+   }
+
+   create = ptr + addr + offset - start;
+
+   /* H246, HEVC and VP9 can run on any instance */
+   if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
+   continue;
+
+   r = vcn_v4_0_limit_sched(p);
+   if (r)
+   goto out;
+   }
+
+out:
+   amdgpu_bo_kunmap(bo);
+   return r;
+}
+
+#define RADEON_VCN_ENGINE_TYPE_DECODE 
(0x0003)
+
+static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+   struct amdgpu_job *job,
+   struct amdgpu_ib *ib)
+{
+   struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
+   struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+   uint32_t val;
+   int r = 0;
+
+   /* The first instance can decode anything */
+   if (!ring->me)
+   

[PATCH v2] drm/amdgpu: limiting AV1 to first instance on VCN4 decode

2022-07-13 Thread Sonny Jiang
AV1 is only supported on first instance.

Signed-off-by: Sonny Jiang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 131 ++
 1 file changed, 131 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 84ac2401895a..a91ffbf902d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -25,6 +25,7 @@
 #include "amdgpu.h"
 #include "amdgpu_vcn.h"
 #include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
 #include "soc15.h"
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
@@ -44,6 +45,9 @@
 #define VCN_VID_SOC_ADDRESS_2_0
0x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0   
0x48300
 
+#define RDECODE_MSG_CREATE 
0x
+#define RDECODE_MESSAGE_CREATE 
0x0001
+
 static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
@@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct 
amdgpu_ring *ring)
}
 }
 
+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)
+{
+   struct drm_gpu_scheduler **scheds;
+
+   /* The create msg must be in the first IB submitted */
+   if (atomic_read(>entity->fence_seq))
+   return -EINVAL;
+
+   scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
+   [AMDGPU_RING_PRIO_0].sched;
+   drm_sched_entity_modify_sched(p->entity, scheds, 1);
+   return 0;
+}
+
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
+{
+   struct ttm_operation_ctx ctx = { false, false };
+   struct amdgpu_bo_va_mapping *map;
+   uint32_t *msg, num_buffers;
+   struct amdgpu_bo *bo;
+   uint64_t start, end;
+   unsigned int i;
+   void *ptr;
+   int r;
+
+   addr &= AMDGPU_GMC_HOLE_MASK;
+   r = amdgpu_cs_find_mapping(p, addr, , );
+   if (r) {
+   DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+   return r;
+   }
+
+   start = map->start * AMDGPU_GPU_PAGE_SIZE;
+   end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+   if (addr & 0x7) {
+   DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+   return -EINVAL;
+   }
+
+   bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+   amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+   r = ttm_bo_validate(>tbo, >placement, );
+   if (r) {
+   DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+   return r;
+   }
+
+   r = amdgpu_bo_kmap(bo, );
+   if (r) {
+   DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+   return r;
+   }
+
+   msg = ptr + addr - start;
+
+   /* Check length */
+   if (msg[1] > end - addr) {
+   r = -EINVAL;
+   goto out;
+   }
+
+   if (msg[3] != RDECODE_MSG_CREATE)
+   goto out;
+
+   num_buffers = msg[2];
+   for (i = 0, msg = [6]; i < num_buffers; ++i, msg += 4) {
+   uint32_t offset, size, *create;
+
+   if (msg[0] != RDECODE_MESSAGE_CREATE)
+   continue;
+
+   offset = msg[1];
+   size = msg[2];
+
+   if (offset + size > end) {
+   r = -EINVAL;
+   goto out;
+   }
+
+   create = ptr + addr + offset - start;
+
+   /* H246, HEVC and VP9 can run on any instance */
+   if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
+   continue;
+
+   r = vcn_v4_0_limit_sched(p);
+   if (r)
+   goto out;
+   }
+
+out:
+   amdgpu_bo_kunmap(bo);
+   return r;
+}
+
+#define RADEON_VCN_ENGINE_TYPE_DECODE 
(0x0003)
+
+static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+   struct amdgpu_job *job,
+   struct amdgpu_ib *ib)
+{
+   struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
+   struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+   uint32_t val;
+   int r = 0;
+
+   /* The first instance can decode anything */
+   if (!ring->me)
+   return r;
+
+   /* unified queue ib header has 8 double words. */
+   if (ib->length_dw < 8)
+   return r;
+
+   val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
+
+   if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+   decode_buffer = (struct amdgpu_vcn_decode_buffer *)>ptr[10];
+
+   if (decode_buffer->valid_buf_flag  & 0x1)
+   r = vcn_v4_0_dec_msg(p, 
((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+