[PATCH] drm/amdgpu: Fix a race of IB test

2021-09-10 Thread xinhui pan
Direct IB submission should be exclusive. So use write lock.

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 19323b4cce7b..acbe02928791 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1358,10 +1358,15 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file 
*m, void *unused)
}
 
/* Avoid accidently unparking the sched thread during GPU reset */
-   r = down_read_killable(>reset_sem);
+   r = down_write_killable(>reset_sem);
if (r)
return r;
 
+   /* Avoid concurrently IB test but not cancel it as I don't know whether 
we
+* would add more code in the delayed init work.
+*/
+   flush_delayed_work(>delayed_init_work);
+
/* hold on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -1387,7 +1392,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file 
*m, void *unused)
kthread_unpark(ring->sched.thread);
}
 
-   up_read(>reset_sem);
+   up_write(>reset_sem);
 
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
-- 
2.25.1



[PATCH v3 2/3] drm/amdgpu: VCE avoid memory allocation during IB test

2021-09-10 Thread xinhui pan
alloc extra msg from direct IB pool.

Signed-off-by: xinhui pan 
---
change from v1:
let addr align up to gpu page boundary.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 27 -
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index e9fdf49d69e8..caa4d3420e00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -82,7 +82,6 @@ MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-struct amdgpu_bo *bo,
 struct dma_fence **fence);
 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
  bool direct, struct dma_fence **fence);
@@ -441,12 +440,12 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, 
struct drm_file *filp)
  * Open up a stream for HW test
  */
 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-struct amdgpu_bo *bo,
 struct dma_fence **fence)
 {
const unsigned ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
+   struct amdgpu_ib ib_msg;
struct dma_fence *f = NULL;
uint64_t addr;
int i, r;
@@ -456,9 +455,17 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle,
if (r)
return r;
 
-   ib = >ibs[0];
+   memset(_msg, 0, sizeof(ib_msg));
+   /* only one gpu page is needed, alloc +1 page to make addr aligned. */
+   r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ _msg);
+   if (r)
+   goto err;
 
-   addr = amdgpu_bo_gpu_offset(bo);
+   ib = >ibs[0];
+   /* let addr point to page boundary */
+   addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
 
/* stitch together an VCE create msg */
ib->length_dw = 0;
@@ -498,6 +505,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle,
ib->ptr[i] = 0x0;
 
r = amdgpu_job_submit_direct(job, ring, );
+   amdgpu_ib_free(ring->adev, _msg, f);
if (r)
goto err;
 
@@ -1134,20 +1142,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
struct dma_fence *fence = NULL;
-   struct amdgpu_bo *bo = NULL;
long r;
 
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != >adev->vce.ring[0])
return 0;
 
-   r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- , NULL, NULL);
-   if (r)
-   return r;
-
-   r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
+   r = amdgpu_vce_get_create_msg(ring, 1, NULL);
if (r)
goto error;
 
@@ -1163,8 +1164,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
 
 error:
dma_fence_put(fence);
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, NULL);
return r;
 }
 
-- 
2.25.1



[PATCH v3 3/3] drm/amdgpu: VCN avoid memory allocation during IB test

2021-09-10 Thread xinhui pan
alloc extra msg from direct IB pool.

Reviewed-by: Christian König 
Signed-off-by: xinhui pan 
---
change from v1:
let addr align up to gpu page boundary.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 97 +++--
 1 file changed, 44 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 561296a85b43..b60b8fe5bf67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring 
*ring)
 }
 
 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
-  struct amdgpu_bo *bo,
+  struct amdgpu_ib *ib_msg,
   struct dma_fence **fence)
 {
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
-   uint64_t addr;
-   void *msg = NULL;
+   uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
int i, r;
 
r = amdgpu_job_alloc_with_ib(adev, 64,
@@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
goto err;
 
ib = >ibs[0];
-   addr = amdgpu_bo_gpu_offset(bo);
-   msg = amdgpu_bo_kptr(bo);
ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
ib->ptr[1] = addr;
ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
@@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
 
-   amdgpu_bo_fence(bo, f, false);
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, (void **));
+   amdgpu_ib_free(adev, ib_msg, f);
 
if (fence)
*fence = dma_fence_get(f);
@@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring 
*ring,
 
 err_free:
amdgpu_job_free(job);
-
 err:
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, (void **));
+   amdgpu_ib_free(adev, ib_msg, f);
return r;
 }
 
 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-struct amdgpu_bo **bo)
+   struct amdgpu_ib *ib)
 {
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
 
-   *bo = NULL;
-   r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **));
+   memset(ib, 0, sizeof(*ib));
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   AMDGPU_IB_POOL_DIRECT,
+   ib);
if (r)
return r;
 
+   msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0038);
msg[2] = cpu_to_le32(0x0001);
@@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand
 }
 
 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
- struct amdgpu_bo **bo)
+ struct amdgpu_ib *ib)
 {
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
 
-   *bo = NULL;
-   r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **));
+   memset(ib, 0, sizeof(*ib));
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   AMDGPU_IB_POOL_DIRECT,
+   ib);
if (r)
return r;
 
+   msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0018);
msg[2] = cpu_to_le32(0x);
@@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
struct dma_fence *fence = NULL;
-   struct amdgpu_bo *bo;
+   struct amdgpu_ib ib;
long r;
 
-   r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
+   r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
if (r)
goto error;
 
-   r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
+   r = amdgpu_vcn_dec_send_msg(ring, , NULL);
if (r)
goto error;
-   r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, );
+   r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, );
if (r)
goto error;
 
-   r = amdgpu_vcn_dec_send_msg(ring, bo, );
+   r = amdgpu_vcn_dec_send_msg(ring, , );
if (r)
goto error;

[PATCH v3 1/3] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread xinhui pan
move BO allocation in sw_init.

Signed-off-by: xinhui pan 
---
change from v2:
use reservation trylock for direct IB test.
change from v1:
only use pre-allocated BO for direct IB submission.
and take its reservation lock to avoid any potential race.
better safe than sorry.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 104 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |   1 +
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |   8 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |   8 +-
 4 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..a4b3dd6b38c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -134,6 +134,51 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12);
 MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
+static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
+
+static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
+  uint32_t size,
+  struct amdgpu_bo **bo_ptr)
+{
+   struct ttm_operation_ctx ctx = { true, false };
+   struct amdgpu_bo *bo = NULL;
+   void *addr;
+   int r;
+
+   r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ , NULL, );
+   if (r)
+   return r;
+
+   if (adev->uvd.address_64_bit) {
+   *bo_ptr = bo;
+   return 0;
+   }
+
+   amdgpu_bo_kunmap(bo);
+   amdgpu_bo_unpin(bo);
+   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+   amdgpu_uvd_force_into_uvd_segment(bo);
+   r = ttm_bo_validate(>tbo, >placement, );
+   if (r)
+   goto err;
+   r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
+   if (r)
+   goto err_pin;
+   r = amdgpu_bo_kmap(bo, );
+   if (r)
+   goto err_kmap;
+   *bo_ptr = bo;
+   return 0;
+err_kmap:
+   amdgpu_bo_unpin(bo);
+err_pin:
+err:
+   amdgpu_bo_unreserve(bo);
+   amdgpu_bo_unref();
+   return r;
+}
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 {
@@ -302,6 +347,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0))
adev->uvd.address_64_bit = true;
 
+   r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, >uvd.ib_bo);
+   if (r)
+   return r;
+   amdgpu_bo_unreserve(adev->uvd.ib_bo);
+
switch (adev->asic_type) {
case CHIP_TONGA:
adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
@@ -324,6 +374,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
+   void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo);
int i, j;
 
drm_sched_entity_destroy(>uvd.entity);
@@ -342,6 +393,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
amdgpu_ring_fini(>uvd.inst[j].ring_enc[i]);
}
+   amdgpu_bo_free_kernel(>uvd.ib_bo, NULL, );
release_firmware(adev->uvd.fw);
 
return 0;
@@ -1080,23 +1132,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
*ring, struct amdgpu_bo *bo,
unsigned offset_idx = 0;
unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-   amdgpu_bo_kunmap(bo);
-   amdgpu_bo_unpin(bo);
-
-   if (!ring->adev->uvd.address_64_bit) {
-   struct ttm_operation_ctx ctx = { true, false };
-
-   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-   amdgpu_uvd_force_into_uvd_segment(bo);
-   r = ttm_bo_validate(>tbo, >placement, );
-   if (r)
-   goto err;
-   }
-
r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
 AMDGPU_IB_POOL_DELAYED, );
if (r)
-   goto err;
+   return r;
 
if (adev->asic_type >= CHIP_VEGA10) {
offset_idx = 1 + ring->me;
@@ -1148,8 +1187,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,
}
 
amdgpu_bo_fence(bo, f, false);
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_unref();
 
if (fence)
*fence = dma_fence_get(f);
@@ -1159,10 +1196,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,
 
 err_free:
amdgpu_job_free(job);
-
-err:
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_unref();
return r;
 }
 
@@ -1173,16 +1206,16 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, 
uint32_t handle,
  

[PATCH 1/1] drm/amdkfd: Add sysfs bitfields and enums to uAPI

2021-09-10 Thread Felix Kuehling
These bits are de-facto part of the uAPI, so declare them in a uAPI header.

Signed-off-by: Felix Kuehling 
---
 MAINTAINERS   |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  46 +
 include/uapi/linux/kfd_sysfs.h| 108 ++
 3 files changed, 110 insertions(+), 45 deletions(-)
 create mode 100644 include/uapi/linux/kfd_sysfs.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 84cd16694640..7554ec928ee2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -930,6 +930,7 @@ F:  drivers/gpu/drm/amd/include/kgd_kfd_interface.h
 F: drivers/gpu/drm/amd/include/v9_structs.h
 F: drivers/gpu/drm/amd/include/vi_structs.h
 F: include/uapi/linux/kfd_ioctl.h
+F: include/uapi/linux/kfd_sysfs.h
 
 AMD SPI DRIVER
 M: Sanjay R Mehta 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index a8db017c9b8e..f0cc59d2fd5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -25,38 +25,11 @@
 
 #include 
 #include 
+#include 
 #include "kfd_crat.h"
 
 #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
 
-#define HSA_CAP_HOT_PLUGGABLE  0x0001
-#define HSA_CAP_ATS_PRESENT0x0002
-#define HSA_CAP_SHARED_WITH_GRAPHICS   0x0004
-#define HSA_CAP_QUEUE_SIZE_POW20x0008
-#define HSA_CAP_QUEUE_SIZE_32BIT   0x0010
-#define HSA_CAP_QUEUE_IDLE_EVENT   0x0020
-#define HSA_CAP_VA_LIMIT   0x0040
-#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x0080
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK0x0f00
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT   8
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK   0x3000
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT  12
-
-#define HSA_CAP_DOORBELL_TYPE_PRE_1_0  0x0
-#define HSA_CAP_DOORBELL_TYPE_1_0  0x1
-#define HSA_CAP_DOORBELL_TYPE_2_0  0x2
-#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP   0x4000
-
-#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x0008 /* Old buggy user 
mode depends on this being 0 */
-#define HSA_CAP_MEM_EDCSUPPORTED   0x0010
-#define HSA_CAP_RASEVENTNOTIFY 0x0020
-#define HSA_CAP_ASIC_REVISION_MASK 0x03c0
-#define HSA_CAP_ASIC_REVISION_SHIFT22
-#define HSA_CAP_SRAM_EDCSUPPORTED  0x0400
-#define HSA_CAP_SVMAPI_SUPPORTED   0x0800
-#define HSA_CAP_FLAGS_COHERENTHOSTACCESS   0x1000
-#define HSA_CAP_RESERVED   0xe00f8000
-
 struct kfd_node_properties {
uint64_t hive_id;
uint32_t cpu_cores_count;
@@ -93,17 +66,6 @@ struct kfd_node_properties {
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
 };
 
-#define HSA_MEM_HEAP_TYPE_SYSTEM   0
-#define HSA_MEM_HEAP_TYPE_FB_PUBLIC1
-#define HSA_MEM_HEAP_TYPE_FB_PRIVATE   2
-#define HSA_MEM_HEAP_TYPE_GPU_GDS  3
-#define HSA_MEM_HEAP_TYPE_GPU_LDS  4
-#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH  5
-
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
-#define HSA_MEM_FLAGS_RESERVED 0xfffc
-
 struct kfd_mem_properties {
struct list_headlist;
uint32_theap_type;
@@ -116,12 +78,6 @@ struct kfd_mem_properties {
struct attributeattr;
 };
 
-#define HSA_CACHE_TYPE_DATA0x0001
-#define HSA_CACHE_TYPE_INSTRUCTION 0x0002
-#define HSA_CACHE_TYPE_CPU 0x0004
-#define HSA_CACHE_TYPE_HSACU   0x0008
-#define HSA_CACHE_TYPE_RESERVED0xfff0
-
 struct kfd_cache_properties {
struct list_headlist;
uint32_tprocessor_id_low;
diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
new file mode 100644
index ..e1fb78b4bf09
--- /dev/null
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND 

Re: [PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread Felix Kuehling

On 2021-09-10 11:15 a.m., shaoyunl wrote:

The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies 
to all
associated VFs. so guest driver can not directly enable the atomicOps for VF, it
depends on PF to enable it. In current design, amdgpu driver  will get the 
enabled
atomicOps bits through private pf2vf data

Signed-off-by: shaoyunl 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 24 +++--
  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +++-
  2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 653bd8fdaa33..3ae1721ca859 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
  
-	/* enable PCIE atomic ops */

-   r = pci_enable_atomic_ops_to_root(adev->pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
-   if (r) {
-   adev->have_atomics_support = false;
-   DRM_INFO("PCIE atomic ops is not supported\n");
-   } else {
-   adev->have_atomics_support = true;
-   }
-
amdgpu_device_get_pcie_info(adev);
  
  	if (amdgpu_mcbp)

@@ -3562,6 +3551,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
  
+	/* enable PCIE atomic ops */

+   if (amdgpu_sriov_vf(adev))
+   adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
+   
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
+   (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   else
+   adev->have_atomics_support =
+   !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   if (!adev->have_atomics_support)
+   dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h

index a434c71fde8e..995899191288 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
/* UUID info */
struct amd_sriov_msg_uuid_info uuid_info;
+   /* pcie atomic Ops info */
+   uint32_t pcie_atomic_ops_enabled_flags;
/* reserved */
-   uint32_t reserved[256 - 47];
+   uint32_t reserved[256 - 48];
  };
  
  struct amd_sriov_msg_vf2pf_info_header {


Re: 回复: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Christian König
Yeah, that indeed sounds buggy. Probably easiest to just down write the 
reset sem.


Christian.

Am 10.09.21 um 13:48 schrieb Pan, Xinhui:

[AMD Official Use Only]

looks like amdgpu_debugfs_test_ib_show use direct IB submission too.
It parks the ring scheduler thread, and down read the reset_sem to avoid race 
with gpu reset.
BUT, amdgpu_debugfs_test_ib_show itself could be called in parrel. Need fix it.


发件人: Koenig, Christian 
发送时间: 2021年9月10日 19:10
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during 
IB test

Yeah, but that IB test should use the indirect submission through the
scheduler as well.

Otherwise you have IB test and scheduler writing both to the ring buffer
at the same time and potentially corrupting it.

Christian.

Am 10.09.21 um 12:10 schrieb Pan, Xinhui:

[AMD Official Use Only]

we need take this lock.
IB test can be triggered through debugfs. Recent days I usually test it by cat 
gpu recovery and amdgpu_test_ib in debugfs.



发件人: Koenig, Christian 
发送时间: 2021年9月10日 18:02
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB 
test

*sigh* yeah, you are probably right. Wouldn't be to simple if this would
be easy, doesn't it?

In this case you can also skip taking the reservation lock for the
pre-allocated BO, since there should absolutely be only one user at a time.

Christian.

Am 10.09.21 um 11:42 schrieb Pan, Xinhui:

[AMD Official Use Only]

oh, god. uvd free handler submit delayed msg which depends on scheduler with 
reservation lock hold.
This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools 
(v2)" says
Any jobs which schedule IBs without dependence on gpu scheduler should use 
DIRECT pool.

Looks like we should only use reserved BO for direct IB submission.
As for delayed IB submission, we could alloc a new one dynamicly.

发件人: Koenig, Christian 
发送时间: 2021年9月10日 16:53
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

It should not unless we are OOM which should not happen during driver
initialization.

But there is another problem I'm thinking about: Do we still use UVD IB
submissions to forcefully tear down UVD sessions when a process crashes?

If yes that stuff could easily deadlock with an IB test executed during
GPU reset.

Christian.

Am 10.09.21 um 10:18 schrieb Pan, Xinhui:

[AMD Official Use Only]

I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
For now, the new placement is calculated by new = old ∩ new.


发件人: Koenig, Christian 
发送时间: 2021年9月10日 14:24
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Am 10.09.21 um 02:38 schrieb xinhui pan:

move BO allocation in sw_init.

Signed-off-by: xinhui pan 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
  4 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e2eaac941d37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
  const char *fw_name;
  const struct common_firmware_header *hdr;
  unsigned family_id;
+ struct amdgpu_bo *bo = NULL;
+ void *addr;
  int i, j, r;

  INIT_DELAYED_WORK(>uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
  adev->uvd.filp[i] = NULL;
  }

+ r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ , NULL, );
+ if (r)
+ return r;
+
  /* from uvd v5.0 HW addressing capacity increased to 64 bits */
- if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0))
+ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0)) {
  adev->uvd.address_64_bit = true;
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
+ 0, 256 << 20);

Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.


+ 

[PATCH] amd/display: enable panel orientation quirks

2021-09-10 Thread Simon Ser
This patch allows panel orientation quirks from DRM core to be
used. They attach a DRM connector property "panel orientation"
which indicates in which direction the panel has been mounted.
Some machines have the internal screen mounted with a rotation.

Since the panel orientation quirks need the native mode from the
EDID, check for it in amdgpu_dm_connector_ddc_get_modes.

Signed-off-by: Simon Ser 
Cc: Alex Deucher 
Cc: Harry Wentland 
Cc: Nicholas Kazlauskas 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 53363728dbbd..a420602f1794 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -7680,6 +7680,32 @@ static void amdgpu_dm_connector_add_common_modes(struct 
drm_encoder *encoder,
}
 }
 
+static void amdgpu_set_panel_orientation(struct drm_connector *connector)
+{
+   struct drm_encoder *encoder;
+   struct amdgpu_encoder *amdgpu_encoder;
+   const struct drm_display_mode *native_mode;
+
+   if (connector->connector_type != DRM_MODE_CONNECTOR_eDP &&
+   connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
+   return;
+
+   encoder = amdgpu_dm_connector_to_encoder(connector);
+   if (!encoder)
+   return;
+
+   amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+   native_mode = _encoder->native_mode;
+   if (native_mode->hdisplay == 0 || native_mode->vdisplay == 0)
+   return;
+
+   drm_connector_set_panel_orientation_with_quirk(connector,
+  
DRM_MODE_PANEL_ORIENTATION_UNKNOWN,
+  native_mode->hdisplay,
+  native_mode->vdisplay);
+}
+
 static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
  struct edid *edid)
 {
@@ -7708,6 +7734,8 @@ static void amdgpu_dm_connector_ddc_get_modes(struct 
drm_connector *connector,
 * restored here.
 */
amdgpu_dm_update_freesync_caps(connector, edid);
+
+   amdgpu_set_panel_orientation(connector);
} else {
amdgpu_dm_connector->num_modes = 0;
}
-- 
2.33.0




[PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread shaoyunl
The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies 
to all
associated VFs. so guest driver can not directly enable the atomicOps for VF, it
depends on PF to enable it. In current design, amdgpu driver  will get the 
enabled
atomicOps bits through private pf2vf data

Signed-off-by: shaoyunl 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 24 +++--
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +++-
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 653bd8fdaa33..3ae1721ca859 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
 
-   /* enable PCIE atomic ops */
-   r = pci_enable_atomic_ops_to_root(adev->pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
-   if (r) {
-   adev->have_atomics_support = false;
-   DRM_INFO("PCIE atomic ops is not supported\n");
-   } else {
-   adev->have_atomics_support = true;
-   }
-
amdgpu_device_get_pcie_info(adev);
 
if (amdgpu_mcbp)
@@ -3562,6 +3551,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* enable PCIE atomic ops */
+   if (amdgpu_sriov_vf(adev))
+   adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
+   
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
+   (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   else
+   adev->have_atomics_support =
+   !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   if (!adev->have_atomics_support)
+   dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index a434c71fde8e..995899191288 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
/* UUID info */
struct amd_sriov_msg_uuid_info uuid_info;
+   /* pcie atomic Ops info */
+   uint32_t pcie_atomic_ops_enabled_flags;
/* reserved */
-   uint32_t reserved[256 - 47];
+   uint32_t reserved[256 - 48];
 };
 
 struct amd_sriov_msg_vf2pf_info_header {
-- 
2.17.1



Re: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Christian König
Yeah, but that IB test should use the indirect submission through the 
scheduler as well.


Otherwise you have IB test and scheduler writing both to the ring buffer 
at the same time and potentially corrupting it.


Christian.

Am 10.09.21 um 12:10 schrieb Pan, Xinhui:

[AMD Official Use Only]

we need take this lock.
IB test can be triggered through debugfs. Recent days I usually test it by cat 
gpu recovery and amdgpu_test_ib in debugfs.



发件人: Koenig, Christian 
发送时间: 2021年9月10日 18:02
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB 
test

*sigh* yeah, you are probably right. Wouldn't be to simple if this would
be easy, doesn't it?

In this case you can also skip taking the reservation lock for the
pre-allocated BO, since there should absolutely be only one user at a time.

Christian.

Am 10.09.21 um 11:42 schrieb Pan, Xinhui:

[AMD Official Use Only]

oh, god. uvd free handler submit delayed msg which depends on scheduler with 
reservation lock hold.
This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools 
(v2)" says
Any jobs which schedule IBs without dependence on gpu scheduler should use 
DIRECT pool.

Looks like we should only use reserved BO for direct IB submission.
As for delayed IB submission, we could alloc a new one dynamicly.

发件人: Koenig, Christian 
发送时间: 2021年9月10日 16:53
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

It should not unless we are OOM which should not happen during driver
initialization.

But there is another problem I'm thinking about: Do we still use UVD IB
submissions to forcefully tear down UVD sessions when a process crashes?

If yes that stuff could easily deadlock with an IB test executed during
GPU reset.

Christian.

Am 10.09.21 um 10:18 schrieb Pan, Xinhui:

[AMD Official Use Only]

I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
For now, the new placement is calculated by new = old ∩ new.


发件人: Koenig, Christian 
发送时间: 2021年9月10日 14:24
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Am 10.09.21 um 02:38 schrieb xinhui pan:

move BO allocation in sw_init.

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
 4 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e2eaac941d37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 const char *fw_name;
 const struct common_firmware_header *hdr;
 unsigned family_id;
+ struct amdgpu_bo *bo = NULL;
+ void *addr;
 int i, j, r;

 INIT_DELAYED_WORK(>uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 adev->uvd.filp[i] = NULL;
 }

+ r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ , NULL, );
+ if (r)
+ return r;
+
 /* from uvd v5.0 HW addressing capacity increased to 64 bits */
- if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0))
+ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0)) {
 adev->uvd.address_64_bit = true;
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
+ 0, 256 << 20);

Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.


+ if (r) {
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref();
+ return r;
+ }
+ r = amdgpu_bo_kmap(bo, );
+ if (r) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref();
+ return r;
+ }
+ }
+ adev->uvd.ib_bo = bo;
+ amdgpu_bo_unreserve(bo);

 switch (adev->asic_type) {
 case CHIP_TONGA:
@@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
 

RE: [PATCH 00/33] DC Patches September 08, 2021

2021-09-10 Thread Wheeler, Daniel
[Public]

Hi all,
 
This week this patchset was tested on the following systems:
 
HP Envy 360, with Ryzen 5 4500U, with the following display types: eDP 1080p 
60hz, 4k 60hz  (via USB-C to DP/HDMI), 1440p 144hz (via USB-C to DP/HDMI), 
1680*1050 60hz (via USB-C to DP and then DP to DVI/VGA)
 
AMD Ryzen 9 5900H, with the following display types: eDP 1080p 60hz, 4k 60hz  
(via USB-C to DP/HDMI), 1440p 144hz (via USB-C to DP/HDMI), 1680*1050 60hz (via 
USB-C to DP and then DP to DVI/VGA)
 
Sapphire Pulse RX5700XT with the following display types:
4k 60hz  (via DP/HDMI), 1440p 144hz (via DP/HDMI), 1680*1050 60hz (via DP to 
DVI/VGA)
 
Reference AMD RX6800 with the following display types:
4k 60hz  (via DP/HDMI and USB-C to DP/HDMI), 1440p 144hz (via USB-C to DP/HDMI 
and USB-C to DP/HDMI), 1680*1050 60hz (via DP to DVI/VGA)
 
Included testing using a Startech DP 1.4 MST hub at 2x 4k 60hz, and 3x 1080p 
60hz on all systems.
 
 
Tested-by: Daniel Wheeler 
 
 
Thank you,
 
Dan Wheeler
Technologist  |  AMD
SW Display
--
1 Commerce Valley Dr E, Thornhill, ON L3T 7X6
Facebook |  Twitter |  amd.com  

-Original Message-
From: amd-gfx  On Behalf Of Mikita Lipski
Sent: September 8, 2021 10:54 AM
To: amd-gfx@lists.freedesktop.org
Cc: Wentland, Harry ; Li, Sun peng (Leo) 
; Lakha, Bhawanpreet ; Siqueira, 
Rodrigo ; Pillai, Aurabindo 
; Zhuo, Qingqing ; Lipski, 
Mikita ; Li, Roman ; Jacob, Anson 
; Lin, Wayne ; Wang, Chao-kai (Stylon) 
; Chiu, Solomon 
Subject: [PATCH 00/33] DC Patches September 08, 2021

This DC patchset brings improvements in multiple areas. In summary, we 
highlight:

* bandwidth optimizations on following fast updates
* fixes and code improvements of DP connector blanking
* add thread to offload work of MST HPD IRQ function
* fix gamma coefficients
* provide backlight support for APUs without DMUB support
* coverity memory leak and warning fixes
* DSC MST bandwidth calculation fixes
* DMUB enhances

Anson Jacob (3):
  drm/amd/display: Fix false BAD_FREE warning from Coverity
  drm/amd/display: Fix multiple memory leaks reported by coverity
  drm/amd/display: Revert "Directly retrain link from debugfs"

Anthony Koo (2):
  drm/amd/display: [FW Promotion] Release 0.0.81
  drm/amd/display: [FW Promotion] Release 0.0.82

Aric Cyr (2):
  drm/amd/display: 3.2.151
  drm/amd/display: 3.2.152

Aurabindo Pillai (1):
  drm/amd/display: Add flag to detect dpms force off during HPD

Dale Zhao (1):
  drm/amd/display: Refine condition of cursor visibility for pipe-split

Eric Yang (1):
  drm/amd/display: Add periodic detection when zstate is enabled

Harry Wentland (1):
  drm/amd/display: Get backlight from PWM if DMCU is not initialized

Hersen Wu (1):
  drm/amd/display: dsc mst 2 4K displays go dark with 2 lane HBR3

Ian Chen (1):
  drm/amd/display: remove force_enable_edp_fec param.

Jaehyun Chung (3):
  drm/amd/display: Add regamma/degamma coefficients and set sRGB when TF
is BT709
  drm/amd/display: Correct degamma coefficients
  drm/amd/display: Revert adding degamma coefficients

Jimmy Kizito (1):
  drm/amd/display: Fix dynamic link encoder access.

Josip Pavic (1):
  drm/amd/display: unblock abm when odm is enabled only on configs that
support it

Leo (Hanghong) Ma (3):
  drm/amd/display: Add DPCD writes at key points
  drm/amd/display: Fix system hang at boot
  drm/amd/display: Add helper for blanking all dp displays

Meenakshikumar Somasundaram (2):
  drm/amd/display: Fix for null pointer access for ddc pin and aux
engine.
  drm/amd/display: Link training retry fix for abort case

Michael Strauss (2):
  drm/amd/display: Add VPG and AFMT low power support for DCN3.1
  drm/amd/display: Enable mem low power control for DCN3.1 sub-IP blocks

Nicholas Kazlauskas (1):
  drm/amd/display: Optimize bandwidth on following fast update

Qingqing Zhuo (3):
  drm/amd/display: Revert "dc: w/a for hard hang on HPD on native DP"
  drm/amd/display: Apply w/a for hard hang on HPD
  drm/amd/display: Fix unstable HPCP compliance on Chrome Barcelo

Wayne Lin (2):
  drm/amd/display: Add option to defer works of hpd_rx_irq
  drm/amd/display: Fork thread to offload work of hpd_rx_irq

Wenjing Liu (2):
  drm/amd/display: move bpp range decision in decide dsc bw range
function
  drm/amd/display: update conditions to do dfp cap ext validation

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 266 ++  
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  51 +++-
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |   3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c|  16 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |   6 +
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |  18 +-
 .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  11 +-
 .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c |  16 +-
 .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c  |   4 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c

RE: [PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread Chen, Guchun
[Public]

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
old mode 100644
new mode 100755

Please don't modify the file mode.

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of shaoyunl
Sent: Friday, September 10, 2021 10:26 PM
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Shaoyun 
Subject: [PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies 
to all associated VFs. so guest driver can not directly enable the atomicOps 
for VF, it depends on PF to enable it. In current design, amdgpu driver  will 
get the enabled atomicOps bits through private pf2vf data

Signed-off-by: shaoyunl 
Change-Id: Ifdbcb4396d64e3f3cbf6bcbf7ab9c7b2cb061052
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 24 +++--  
drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +++-
 2 files changed, 16 insertions(+), 12 deletions(-)  mode change 100644 => 
100755 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
old mode 100644
new mode 100755
index 653bd8fdaa33..3ae1721ca859
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
 
-   /* enable PCIE atomic ops */
-   r = pci_enable_atomic_ops_to_root(adev->pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
-   if (r) {
-   adev->have_atomics_support = false;
-   DRM_INFO("PCIE atomic ops is not supported\n");
-   } else {
-   adev->have_atomics_support = true;
-   }
-
amdgpu_device_get_pcie_info(adev);
 
if (amdgpu_mcbp)
@@ -3562,6 +3551,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* enable PCIE atomic ops */
+   if (amdgpu_sriov_vf(adev))
+   adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
+   
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
+   (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   else
+   adev->have_atomics_support =
+   !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   if (!adev->have_atomics_support)
+   dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
old mode 100644
new mode 100755
index a434c71fde8e..995899191288
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
/* UUID info */
struct amd_sriov_msg_uuid_info uuid_info;
+   /* pcie atomic Ops info */
+   uint32_t pcie_atomic_ops_enabled_flags;
/* reserved */
-   uint32_t reserved[256 - 47];
+   uint32_t reserved[256 - 48];
 };
 
 struct amd_sriov_msg_vf2pf_info_header {
--
2.17.1


Re: [PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread Felix Kuehling
Am 2021-09-10 um 10:26 a.m. schrieb shaoyunl:
> The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies 
> to all
> associated VFs. so guest driver can not directly enable the atomicOps for VF, 
> it
> depends on PF to enable it. In current design, amdgpu driver  will get the 
> enabled
> atomicOps bits through private pf2vf data
>
> Signed-off-by: shaoyunl 
> Change-Id: Ifdbcb4396d64e3f3cbf6bcbf7ab9c7b2cb061052

You can disable the generation of ChangeIds for a repository with

    git config gerrit.createChangeId false

Other than that, the patch is

Reviewed-by: Felix Kuehling 


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 24 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +++-
>  2 files changed, 16 insertions(+), 12 deletions(-)
>  mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> old mode 100644
> new mode 100755
> index 653bd8fdaa33..3ae1721ca859
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
>   DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
>  
> - /* enable PCIE atomic ops */
> - r = pci_enable_atomic_ops_to_root(adev->pdev,
> -   PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> -   PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> - if (r) {
> - adev->have_atomics_support = false;
> - DRM_INFO("PCIE atomic ops is not supported\n");
> - } else {
> - adev->have_atomics_support = true;
> - }
> -
>   amdgpu_device_get_pcie_info(adev);
>  
>   if (amdgpu_mcbp)
> @@ -3562,6 +3551,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   if (r)
>   return r;
>  
> + /* enable PCIE atomic ops */
> + if (amdgpu_sriov_vf(adev))
> + adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
> *)
> + 
> adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
> + (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
> PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> + else
> + adev->have_atomics_support =
> + !pci_enable_atomic_ops_to_root(adev->pdev,
> +   PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> +   PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> + if (!adev->have_atomics_support)
> + dev_info(adev->dev, "PCIE atomic ops is not supported\n");
> +
>   /* doorbell bar mapping and doorbell index init*/
>   amdgpu_device_doorbell_init(adev);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> old mode 100644
> new mode 100755
> index a434c71fde8e..995899191288
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
>   } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
>   /* UUID info */
>   struct amd_sriov_msg_uuid_info uuid_info;
> + /* pcie atomic Ops info */
> + uint32_t pcie_atomic_ops_enabled_flags;
>   /* reserved */
> - uint32_t reserved[256 - 47];
> + uint32_t reserved[256 - 48];
>  };
>  
>  struct amd_sriov_msg_vf2pf_info_header {


[PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread shaoyunl
The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies 
to all
associated VFs. so guest driver can not directly enable the atomicOps for VF, it
depends on PF to enable it. In current design, amdgpu driver  will get the 
enabled
atomicOps bits through private pf2vf data

Signed-off-by: shaoyunl 
Change-Id: Ifdbcb4396d64e3f3cbf6bcbf7ab9c7b2cb061052
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 24 +++--
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +++-
 2 files changed, 16 insertions(+), 12 deletions(-)
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
old mode 100644
new mode 100755
index 653bd8fdaa33..3ae1721ca859
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
 
-   /* enable PCIE atomic ops */
-   r = pci_enable_atomic_ops_to_root(adev->pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
-   if (r) {
-   adev->have_atomics_support = false;
-   DRM_INFO("PCIE atomic ops is not supported\n");
-   } else {
-   adev->have_atomics_support = true;
-   }
-
amdgpu_device_get_pcie_info(adev);
 
if (amdgpu_mcbp)
@@ -3562,6 +3551,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* enable PCIE atomic ops */
+   if (amdgpu_sriov_vf(adev))
+   adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
+   
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
+   (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   else
+   adev->have_atomics_support =
+   !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   if (!adev->have_atomics_support)
+   dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
old mode 100644
new mode 100755
index a434c71fde8e..995899191288
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
/* UUID info */
struct amd_sriov_msg_uuid_info uuid_info;
+   /* pcie atomic Ops info */
+   uint32_t pcie_atomic_ops_enabled_flags;
/* reserved */
-   uint32_t reserved[256 - 47];
+   uint32_t reserved[256 - 48];
 };
 
 struct amd_sriov_msg_vf2pf_info_header {
-- 
2.17.1



RE: [PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread Liu, Shaoyun
[AMD Official Use Only]

Good catch  . my editor seems has auto complete feature and  I just select the 
first one .  ☹

Thanks 
Shaoyun.liu

-Original Message-
From: Kuehling, Felix  
Sent: Friday, September 10, 2021 10:19 AM
To: amd-gfx@lists.freedesktop.org; Liu, Shaoyun 
Subject: Re: [PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

Am 2021-09-10 um 10:04 a.m. schrieb shaoyunl:
> The AtomicOp Requester Enable bit is reserved in VFs and the PF value 
> applies to all associated VFs. so guest driver can not directly enable 
> the atomicOps for VF, it depends on PF to enable it. In current 
> design, amdgpu driver  will get the enabled atomicOps bits through 
> private pf2vf data
>
> Signed-off-by: shaoyunl 
> Change-Id: Ifdbcb4396d64e3f3cbf6bcbf7ab9c7b2cb061052

Please remove the Change-Id.

In general, the change looks good to me. One question and one more nit-pick 
inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 25 
> -  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  
> 4 +++-
>  2 files changed, 17 insertions(+), 12 deletions(-)  mode change 
> 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  mode change 100644 => 100755 
> drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> old mode 100644
> new mode 100755
> index 653bd8fdaa33..fc6a6491c1b6
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
>   DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
>  
> - /* enable PCIE atomic ops */
> - r = pci_enable_atomic_ops_to_root(adev->pdev,
> -   PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> -   PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> - if (r) {
> - adev->have_atomics_support = false;
> - DRM_INFO("PCIE atomic ops is not supported\n");
> - } else {
> - adev->have_atomics_support = true;
> - }
> -
>   amdgpu_device_get_pcie_info(adev);
>  
>   if (amdgpu_mcbp)
> @@ -3562,6 +3551,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   if (r)
>   return r;
>  
> + /* enable PCIE atomic ops */
> + if (amdgpu_sriov_bios(adev))

Is this the correct condition? I think this would be true for the PF as well. 
But on the PF we still need to call pci_enable_atomic_ops_to_root.
I would expect a condition that only applies to VFs.


> + adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
> *)
> + 
> adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
> + (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
> PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> + else
> + adev->have_atomics_support =
> + !pci_enable_atomic_ops_to_root(adev->pdev,
> +   PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> +   PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> + if (!adev->have_atomics_support)
> + dev_info(adev->dev, "PCIE atomic ops is not supported\n");
> +
> +

Double blank lines. One is enough.

Regards,
  Felix


>   /* doorbell bar mapping and doorbell index init*/
>   amdgpu_device_doorbell_init(adev);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> old mode 100644
> new mode 100755
> index a434c71fde8e..995899191288
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
>   } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
>   /* UUID info */
>   struct amd_sriov_msg_uuid_info uuid_info;
> + /* pcie atomic Ops info */
> + uint32_t pcie_atomic_ops_enabled_flags;
>   /* reserved */
> - uint32_t reserved[256 - 47];
> + uint32_t reserved[256 - 48];
>  };
>  
>  struct amd_sriov_msg_vf2pf_info_header {


Re: [PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread Felix Kuehling
Am 2021-09-10 um 10:04 a.m. schrieb shaoyunl:
> The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies 
> to all
> associated VFs. so guest driver can not directly enable the atomicOps for VF, 
> it
> depends on PF to enable it. In current design, amdgpu driver  will get the 
> enabled
> atomicOps bits through private pf2vf data
>
> Signed-off-by: shaoyunl 
> Change-Id: Ifdbcb4396d64e3f3cbf6bcbf7ab9c7b2cb061052

Please remove the Change-Id.

In general, the change looks good to me. One question and one more
nit-pick inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 25 -
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +++-
>  2 files changed, 17 insertions(+), 12 deletions(-)
>  mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> old mode 100644
> new mode 100755
> index 653bd8fdaa33..fc6a6491c1b6
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
>   DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
>  
> - /* enable PCIE atomic ops */
> - r = pci_enable_atomic_ops_to_root(adev->pdev,
> -   PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> -   PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> - if (r) {
> - adev->have_atomics_support = false;
> - DRM_INFO("PCIE atomic ops is not supported\n");
> - } else {
> - adev->have_atomics_support = true;
> - }
> -
>   amdgpu_device_get_pcie_info(adev);
>  
>   if (amdgpu_mcbp)
> @@ -3562,6 +3551,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   if (r)
>   return r;
>  
> + /* enable PCIE atomic ops */
> + if (amdgpu_sriov_bios(adev))

Is this the correct condition? I think this would be true for the PF as
well. But on the PF we still need to call pci_enable_atomic_ops_to_root.
I would expect a condition that only applies to VFs.


> + adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
> *)
> + 
> adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
> + (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
> PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> + else
> + adev->have_atomics_support =
> + !pci_enable_atomic_ops_to_root(adev->pdev,
> +   PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> +   PCI_EXP_DEVCAP2_ATOMIC_COMP64);
> + if (!adev->have_atomics_support)
> + dev_info(adev->dev, "PCIE atomic ops is not supported\n");
> +
> +

Double blank lines. One is enough.

Regards,
  Felix


>   /* doorbell bar mapping and doorbell index init*/
>   amdgpu_device_doorbell_init(adev);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> old mode 100644
> new mode 100755
> index a434c71fde8e..995899191288
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
>   } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
>   /* UUID info */
>   struct amd_sriov_msg_uuid_info uuid_info;
> + /* pcie atomic Ops info */
> + uint32_t pcie_atomic_ops_enabled_flags;
>   /* reserved */
> - uint32_t reserved[256 - 47];
> + uint32_t reserved[256 - 48];
>  };
>  
>  struct amd_sriov_msg_vf2pf_info_header {


[PATCH] drm/amdgpu: Get atomicOps info from Host for sriov setup

2021-09-10 Thread shaoyunl
The AtomicOp Requester Enable bit is reserved in VFs and the PF value applies 
to all
associated VFs. so guest driver can not directly enable the atomicOps for VF, it
depends on PF to enable it. In current design, amdgpu driver  will get the 
enabled
atomicOps bits through private pf2vf data

Signed-off-by: shaoyunl 
Change-Id: Ifdbcb4396d64e3f3cbf6bcbf7ab9c7b2cb061052
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 25 -
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +++-
 2 files changed, 17 insertions(+), 12 deletions(-)
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
old mode 100644
new mode 100755
index 653bd8fdaa33..fc6a6491c1b6
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3529,17 +3529,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
 
-   /* enable PCIE atomic ops */
-   r = pci_enable_atomic_ops_to_root(adev->pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
-   if (r) {
-   adev->have_atomics_support = false;
-   DRM_INFO("PCIE atomic ops is not supported\n");
-   } else {
-   adev->have_atomics_support = true;
-   }
-
amdgpu_device_get_pcie_info(adev);
 
if (amdgpu_mcbp)
@@ -3562,6 +3551,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* enable PCIE atomic ops */
+   if (amdgpu_sriov_bios(adev))
+   adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
+   
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
+   (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   else
+   adev->have_atomics_support =
+   !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   if (!adev->have_atomics_support)
+   dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
old mode 100644
new mode 100755
index a434c71fde8e..995899191288
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -204,8 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
/* UUID info */
struct amd_sriov_msg_uuid_info uuid_info;
+   /* pcie atomic Ops info */
+   uint32_t pcie_atomic_ops_enabled_flags;
/* reserved */
-   uint32_t reserved[256 - 47];
+   uint32_t reserved[256 - 48];
 };
 
 struct amd_sriov_msg_vf2pf_info_header {
-- 
2.17.1



Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Christian König
*sigh* yeah, you are probably right. Wouldn't be to simple if this would 
be easy, doesn't it?


In this case you can also skip taking the reservation lock for the 
pre-allocated BO, since there should absolutely be only one user at a time.


Christian.

Am 10.09.21 um 11:42 schrieb Pan, Xinhui:

[AMD Official Use Only]

oh, god. uvd free handler submit delayed msg which depends on scheduler with 
reservation lock hold.
This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools 
(v2)" says
Any jobs which schedule IBs without dependence on gpu scheduler should use 
DIRECT pool.

Looks like we should only use reserved BO for direct IB submission.
As for delayed IB submission, we could alloc a new one dynamicly.

发件人: Koenig, Christian 
发送时间: 2021年9月10日 16:53
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

It should not unless we are OOM which should not happen during driver
initialization.

But there is another problem I'm thinking about: Do we still use UVD IB
submissions to forcefully tear down UVD sessions when a process crashes?

If yes that stuff could easily deadlock with an IB test executed during
GPU reset.

Christian.

Am 10.09.21 um 10:18 schrieb Pan, Xinhui:

[AMD Official Use Only]

I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
For now, the new placement is calculated by new = old ∩ new.


发件人: Koenig, Christian 
发送时间: 2021年9月10日 14:24
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Am 10.09.21 um 02:38 schrieb xinhui pan:

move BO allocation in sw_init.

Signed-off-by: xinhui pan 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
4 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e2eaac941d37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned family_id;
+ struct amdgpu_bo *bo = NULL;
+ void *addr;
int i, j, r;

INIT_DELAYED_WORK(>uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
adev->uvd.filp[i] = NULL;
}

+ r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ , NULL, );
+ if (r)
+ return r;
+
/* from uvd v5.0 HW addressing capacity increased to 64 bits */
- if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0))
+ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0)) {
adev->uvd.address_64_bit = true;
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
+ 0, 256 << 20);

Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.


+ if (r) {
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref();
+ return r;
+ }
+ r = amdgpu_bo_kmap(bo, );
+ if (r) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref();
+ return r;
+ }
+ }
+ adev->uvd.ib_bo = bo;
+ amdgpu_bo_unreserve(bo);

switch (adev->asic_type) {
case CHIP_TONGA:
@@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
amdgpu_ring_fini(>uvd.inst[j].ring_enc[i]);
}
+ amdgpu_bo_free_kernel(>uvd.ib_bo, NULL, NULL);
release_firmware(adev->uvd.fw);

return 0;
@@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
*ring, struct amdgpu_bo *bo,
unsigned offset_idx = 0;
unsigned offset[3] = { UVD_BASE_SI, 0, 0 };

- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unpin(bo);
-
- if (!ring->adev->uvd.address_64_bit) {
- struct ttm_operation_ctx ctx = { true, false };
-
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_uvd_force_into_uvd_segment(bo);
- r = ttm_bo_validate(>tbo, 

RE: [PATCH v3 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent

2021-09-10 Thread Liu, Shaoyun
[AMD Official Use Only]

Looks good to me . 
Reviewed by Shaoyun.liu < shaoyun@amd.com>

-Original Message-
From: Kuehling, Felix  
Sent: Friday, September 10, 2021 1:10 AM
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Shaoyun 
Subject: Re: [PATCH v3 1/1] drm/amdkfd: make needs_pcie_atomics FW-version 
dependent

Am 2021-09-08 um 6:48 p.m. schrieb Felix Kuehling:
> On some GPUs the PCIe atomic requirement for KFD depends on the MEC 
> firmware version. Add a firmware version check for this. The minimum 
> firmware version that works without atomics can be updated in the 
> device_info structure for each GPU type.
>
> Move PCIe atomic detection from kgf2kfd_probe into kgf2kfd_device_init 
> because the MEC firmware is not loaded yet at the probe stage.
>
> Signed-off-by: Felix Kuehling 
I tested this change on a Sienna Cichlid on a system without PCIe atomics, both 
with the old and the new firmware. This version of the change should be good to 
go if I can get an R-b.

Thanks,
  Felix


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 44 -
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  1 +
>  2 files changed, 29 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 16a57b70cc1a..30fde852af19 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -468,6 +468,7 @@ static const struct kfd_device_info navi10_device_info = {
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
>   .needs_pci_atomics = true,
> + .no_atomic_fw_version = 145,
>   .num_sdma_engines = 2,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 8,
> @@ -487,6 +488,7 @@ static const struct kfd_device_info navi12_device_info = {
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
>   .needs_pci_atomics = true,
> + .no_atomic_fw_version = 145,
>   .num_sdma_engines = 2,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 8,
> @@ -506,6 +508,7 @@ static const struct kfd_device_info navi14_device_info = {
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
>   .needs_pci_atomics = true,
> + .no_atomic_fw_version = 145,
>   .num_sdma_engines = 2,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 8,
> @@ -525,6 +528,7 @@ static const struct kfd_device_info 
> sienna_cichlid_device_info = {
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
>   .needs_pci_atomics = true,
> + .no_atomic_fw_version = 92,
>   .num_sdma_engines = 4,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 8,
> @@ -544,6 +548,7 @@ static const struct kfd_device_info 
> navy_flounder_device_info = {
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
>   .needs_pci_atomics = true,
> + .no_atomic_fw_version = 92,
>   .num_sdma_engines = 2,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 8,
> @@ -562,7 +567,8 @@ static const struct kfd_device_info vangogh_device_info = 
> {
>   .mqd_size_aligned = MQD_SIZE_ALIGNED,
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
> - .needs_pci_atomics = false,
> + .needs_pci_atomics = true,
> + .no_atomic_fw_version = 92,
>   .num_sdma_engines = 1,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 2,
> @@ -582,6 +588,7 @@ static const struct kfd_device_info 
> dimgrey_cavefish_device_info = {
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
>   .needs_pci_atomics = true,
> + .no_atomic_fw_version = 92,
>   .num_sdma_engines = 2,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 8,
> @@ -601,6 +608,7 @@ static const struct kfd_device_info 
> beige_goby_device_info = {
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
>   .needs_pci_atomics = true,
> + .no_atomic_fw_version = 92,
>   .num_sdma_engines = 1,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 8,
> @@ -619,7 +627,8 @@ static const struct kfd_device_info 
> yellow_carp_device_info = {
>   .mqd_size_aligned = MQD_SIZE_ALIGNED,
>   .needs_iommu_device = false,
>   .supports_cwsr = true,
> - .needs_pci_atomics = false,
> + .needs_pci_atomics = true,
> + .no_atomic_fw_version = 92,
>   .num_sdma_engines = 1,
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 2,
> @@ -708,20 +717,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>   if (!kfd)
>   return NULL;
>  
> - /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
> -  * 32 and 64-bit requests are possible and must be
> -  * supported.
> -  */
> - kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
> - if 

Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Christian König
It should not unless we are OOM which should not happen during driver 
initialization.


But there is another problem I'm thinking about: Do we still use UVD IB 
submissions to forcefully tear down UVD sessions when a process crashes?


If yes that stuff could easily deadlock with an IB test executed during 
GPU reset.


Christian.

Am 10.09.21 um 10:18 schrieb Pan, Xinhui:

[AMD Official Use Only]

I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
For now, the new placement is calculated by new = old ∩ new.


发件人: Koenig, Christian 
发送时间: 2021年9月10日 14:24
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Am 10.09.21 um 02:38 schrieb xinhui pan:

move BO allocation in sw_init.

Signed-off-by: xinhui pan 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
   4 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e2eaac941d37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
   const char *fw_name;
   const struct common_firmware_header *hdr;
   unsigned family_id;
+ struct amdgpu_bo *bo = NULL;
+ void *addr;
   int i, j, r;

   INIT_DELAYED_WORK(>uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
   adev->uvd.filp[i] = NULL;
   }

+ r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ , NULL, );
+ if (r)
+ return r;
+
   /* from uvd v5.0 HW addressing capacity increased to 64 bits */
- if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0))
+ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0)) {
   adev->uvd.address_64_bit = true;
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
+ 0, 256 << 20);

Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.


+ if (r) {
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref();
+ return r;
+ }
+ r = amdgpu_bo_kmap(bo, );
+ if (r) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref();
+ return r;
+ }
+ }
+ adev->uvd.ib_bo = bo;
+ amdgpu_bo_unreserve(bo);

   switch (adev->asic_type) {
   case CHIP_TONGA:
@@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
   for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
   amdgpu_ring_fini(>uvd.inst[j].ring_enc[i]);
   }
+ amdgpu_bo_free_kernel(>uvd.ib_bo, NULL, NULL);
   release_firmware(adev->uvd.fw);

   return 0;
@@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
*ring, struct amdgpu_bo *bo,
   unsigned offset_idx = 0;
   unsigned offset[3] = { UVD_BASE_SI, 0, 0 };

- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unpin(bo);
-
- if (!ring->adev->uvd.address_64_bit) {
- struct ttm_operation_ctx ctx = { true, false };
-
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_uvd_force_into_uvd_segment(bo);
- r = ttm_bo_validate(>tbo, >placement, );
- if (r)
- goto err;
- }
-
   r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED, );
   if (r)
- goto err;
+ return r;

   if (adev->asic_type >= CHIP_VEGA10) {
   offset_idx = 1 + ring->me;
@@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,
   }

   amdgpu_bo_fence(bo, f, false);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref();

   if (fence)
   *fence = dma_fence_get(f);
@@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,

   err_free:
   amdgpu_job_free(job);
-
-err:
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref();
   return r;
   }

@@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, 
uint32_t handle,
 

[PATCH v2 1/3] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread xinhui pan
move BO allocation in sw_init.

Signed-off-by: xinhui pan 
---
change from v1:
only use pre-allocated BO for direct IB submission.
and take its reservation lock to avoid any potential race.
better safe than sorry.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 103 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |   1 +
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |   8 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |   8 +-
 4 files changed, 78 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..04a6f7a18d0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -134,6 +134,51 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12);
 MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
+static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
+
+static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
+  uint32_t size,
+  struct amdgpu_bo **bo_ptr)
+{
+   struct ttm_operation_ctx ctx = { true, false };
+   struct amdgpu_bo *bo = NULL;
+   void *addr;
+   int r;
+
+   r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ , NULL, );
+   if (r)
+   return r;
+
+   if (adev->uvd.address_64_bit) {
+   *bo_ptr = bo;
+   return 0;
+   }
+
+   amdgpu_bo_kunmap(bo);
+   amdgpu_bo_unpin(bo);
+   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+   amdgpu_uvd_force_into_uvd_segment(bo);
+   r = ttm_bo_validate(>tbo, >placement, );
+   if (r)
+   goto err;
+   r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
+   if (r)
+   goto err_pin;
+   r = amdgpu_bo_kmap(bo, );
+   if (r)
+   goto err_kmap;
+   *bo_ptr = bo;
+   return 0;
+err_kmap:
+   amdgpu_bo_unpin(bo);
+err_pin:
+err:
+   amdgpu_bo_unreserve(bo);
+   amdgpu_bo_unref();
+   return r;
+}
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 {
@@ -302,6 +347,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0))
adev->uvd.address_64_bit = true;
 
+   r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, >uvd.ib_bo);
+   if (r)
+   return r;
+   amdgpu_bo_unreserve(adev->uvd.ib_bo);
+
switch (adev->asic_type) {
case CHIP_TONGA:
adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
@@ -324,6 +374,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
+   void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo);
int i, j;
 
drm_sched_entity_destroy(>uvd.entity);
@@ -342,6 +393,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
amdgpu_ring_fini(>uvd.inst[j].ring_enc[i]);
}
+   amdgpu_bo_free_kernel(>uvd.ib_bo, NULL, );
release_firmware(adev->uvd.fw);
 
return 0;
@@ -1080,23 +1132,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
*ring, struct amdgpu_bo *bo,
unsigned offset_idx = 0;
unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-   amdgpu_bo_kunmap(bo);
-   amdgpu_bo_unpin(bo);
-
-   if (!ring->adev->uvd.address_64_bit) {
-   struct ttm_operation_ctx ctx = { true, false };
-
-   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-   amdgpu_uvd_force_into_uvd_segment(bo);
-   r = ttm_bo_validate(>tbo, >placement, );
-   if (r)
-   goto err;
-   }
-
r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
 AMDGPU_IB_POOL_DELAYED, );
if (r)
-   goto err;
+   return r;
 
if (adev->asic_type >= CHIP_VEGA10) {
offset_idx = 1 + ring->me;
@@ -1148,8 +1187,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,
}
 
amdgpu_bo_fence(bo, f, false);
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_unref();
 
if (fence)
*fence = dma_fence_get(f);
@@ -1159,10 +1196,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,
 
 err_free:
amdgpu_job_free(job);
-
-err:
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_unref();
return r;
 }
 
@@ -1173,16 +1206,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, 
uint32_t handle,
  struct dma_fence **fence)
 {
struct amdgpu_device *adev 

[PATCH v2 2/3] drm/amdgpu: VCE avoid memory allocation during IB test

2021-09-10 Thread xinhui pan
alloc extra msg from direct IB pool.

Signed-off-by: xinhui pan 
---
change from v1:
let addr align up to gpu page boundary.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 27 -
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index e9fdf49d69e8..caa4d3420e00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -82,7 +82,6 @@ MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-struct amdgpu_bo *bo,
 struct dma_fence **fence);
 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
  bool direct, struct dma_fence **fence);
@@ -441,12 +440,12 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, 
struct drm_file *filp)
  * Open up a stream for HW test
  */
 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-struct amdgpu_bo *bo,
 struct dma_fence **fence)
 {
const unsigned ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
+   struct amdgpu_ib ib_msg;
struct dma_fence *f = NULL;
uint64_t addr;
int i, r;
@@ -456,9 +455,17 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle,
if (r)
return r;
 
-   ib = >ibs[0];
+   memset(_msg, 0, sizeof(ib_msg));
+   /* only one gpu page is needed, alloc +1 page to make addr aligned. */
+   r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ _msg);
+   if (r)
+   goto err;
 
-   addr = amdgpu_bo_gpu_offset(bo);
+   ib = >ibs[0];
+   /* let addr point to page boundary */
+   addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
 
/* stitch together an VCE create msg */
ib->length_dw = 0;
@@ -498,6 +505,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle,
ib->ptr[i] = 0x0;
 
r = amdgpu_job_submit_direct(job, ring, );
+   amdgpu_ib_free(ring->adev, _msg, f);
if (r)
goto err;
 
@@ -1134,20 +1142,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
struct dma_fence *fence = NULL;
-   struct amdgpu_bo *bo = NULL;
long r;
 
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != >adev->vce.ring[0])
return 0;
 
-   r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- , NULL, NULL);
-   if (r)
-   return r;
-
-   r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
+   r = amdgpu_vce_get_create_msg(ring, 1, NULL);
if (r)
goto error;
 
@@ -1163,8 +1164,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
 
 error:
dma_fence_put(fence);
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, NULL);
return r;
 }
 
-- 
2.25.1



[PATCH v2 3/3] drm/amdgpu: VCN avoid memory allocation during IB test

2021-09-10 Thread xinhui pan
alloc extra msg from direct IB pool.

Reviewed-by: Christian König 
Signed-off-by: xinhui pan 
---
change from v1:
let addr align up to gpu page boundary.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 97 +++--
 1 file changed, 44 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 561296a85b43..b60b8fe5bf67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring 
*ring)
 }
 
 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
-  struct amdgpu_bo *bo,
+  struct amdgpu_ib *ib_msg,
   struct dma_fence **fence)
 {
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
-   uint64_t addr;
-   void *msg = NULL;
+   uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
int i, r;
 
r = amdgpu_job_alloc_with_ib(adev, 64,
@@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
goto err;
 
ib = >ibs[0];
-   addr = amdgpu_bo_gpu_offset(bo);
-   msg = amdgpu_bo_kptr(bo);
ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
ib->ptr[1] = addr;
ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
@@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
 
-   amdgpu_bo_fence(bo, f, false);
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, (void **));
+   amdgpu_ib_free(adev, ib_msg, f);
 
if (fence)
*fence = dma_fence_get(f);
@@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring 
*ring,
 
 err_free:
amdgpu_job_free(job);
-
 err:
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, (void **));
+   amdgpu_ib_free(adev, ib_msg, f);
return r;
 }
 
 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-struct amdgpu_bo **bo)
+   struct amdgpu_ib *ib)
 {
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
 
-   *bo = NULL;
-   r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **));
+   memset(ib, 0, sizeof(*ib));
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   AMDGPU_IB_POOL_DIRECT,
+   ib);
if (r)
return r;
 
+   msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0038);
msg[2] = cpu_to_le32(0x0001);
@@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand
 }
 
 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
- struct amdgpu_bo **bo)
+ struct amdgpu_ib *ib)
 {
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
 
-   *bo = NULL;
-   r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **));
+   memset(ib, 0, sizeof(*ib));
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   AMDGPU_IB_POOL_DIRECT,
+   ib);
if (r)
return r;
 
+   msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0018);
msg[2] = cpu_to_le32(0x);
@@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
struct dma_fence *fence = NULL;
-   struct amdgpu_bo *bo;
+   struct amdgpu_ib ib;
long r;
 
-   r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
+   r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
if (r)
goto error;
 
-   r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
+   r = amdgpu_vcn_dec_send_msg(ring, , NULL);
if (r)
goto error;
-   r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, );
+   r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, );
if (r)
goto error;
 
-   r = amdgpu_vcn_dec_send_msg(ring, bo, );
+   r = amdgpu_vcn_dec_send_msg(ring, , );
if (r)
goto error;

回复: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Pan, Xinhui
[AMD Official Use Only]

looks like amdgpu_debugfs_test_ib_show use direct IB submission too.
It parks the ring scheduler thread, and down read the reset_sem to avoid race 
with gpu reset.
BUT, amdgpu_debugfs_test_ib_show itself could be called in parrel. Need fix it.


发件人: Koenig, Christian 
发送时间: 2021年9月10日 19:10
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during 
IB test

Yeah, but that IB test should use the indirect submission through the
scheduler as well.

Otherwise you have IB test and scheduler writing both to the ring buffer
at the same time and potentially corrupting it.

Christian.

Am 10.09.21 um 12:10 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> we need take this lock.
> IB test can be triggered through debugfs. Recent days I usually test it by 
> cat gpu recovery and amdgpu_test_ib in debugfs.
>
>
> 
> 发件人: Koenig, Christian 
> 发送时间: 2021年9月10日 18:02
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB 
> test
>
> *sigh* yeah, you are probably right. Wouldn't be to simple if this would
> be easy, doesn't it?
>
> In this case you can also skip taking the reservation lock for the
> pre-allocated BO, since there should absolutely be only one user at a time.
>
> Christian.
>
> Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> oh, god. uvd free handler submit delayed msg which depends on scheduler with 
>> reservation lock hold.
>> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib 
>> pools (v2)" says
>> Any jobs which schedule IBs without dependence on gpu scheduler should use 
>> DIRECT pool.
>>
>> Looks like we should only use reserved BO for direct IB submission.
>> As for delayed IB submission, we could alloc a new one dynamicly.
>> 
>> 发件人: Koenig, Christian 
>> 发送时间: 2021年9月10日 16:53
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB 
>> test
>>
>> It should not unless we are OOM which should not happen during driver
>> initialization.
>>
>> But there is another problem I'm thinking about: Do we still use UVD IB
>> submissions to forcefully tear down UVD sessions when a process crashes?
>>
>> If yes that stuff could easily deadlock with an IB test executed during
>> GPU reset.
>>
>> Christian.
>>
>> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>>> [AMD Official Use Only]
>>>
>>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>>> For now, the new placement is calculated by new = old ∩ new.
>>>
>>> 
>>> 发件人: Koenig, Christian 
>>> 发送时间: 2021年9月10日 14:24
>>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>>> 抄送: Deucher, Alexander
>>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>>
>>> Am 10.09.21 um 02:38 schrieb xinhui pan:
 move BO allocation in sw_init.

 Signed-off-by: xinhui pan 
 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
  4 files changed, 49 insertions(+), 43 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
 index d451c359606a..e2eaac941d37 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
 @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
  const char *fw_name;
  const struct common_firmware_header *hdr;
  unsigned family_id;
 + struct amdgpu_bo *bo = NULL;
 + void *addr;
  int i, j, r;

  INIT_DELAYED_WORK(>uvd.idle_work, 
 amdgpu_uvd_idle_work_handler);
 @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
  adev->uvd.filp[i] = NULL;
  }

 + r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
 + AMDGPU_GEM_DOMAIN_GTT,
 + , NULL, );
 + if (r)
 + return r;
 +
  /* from uvd v5.0 HW addressing capacity increased to 64 bits */
 - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 
 5, 0))
 + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 
 5, 0)) {
  adev->uvd.address_64_bit = true;
 + amdgpu_bo_kunmap(bo);
 + amdgpu_bo_unpin(bo);
 + r = 

回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Pan, Xinhui
[AMD Official Use Only]

we need take this lock.
IB test can be triggered through debugfs. Recent days I usually test it by cat 
gpu recovery and amdgpu_test_ib in debugfs.



发件人: Koenig, Christian 
发送时间: 2021年9月10日 18:02
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB 
test

*sigh* yeah, you are probably right. Wouldn't be to simple if this would
be easy, doesn't it?

In this case you can also skip taking the reservation lock for the
pre-allocated BO, since there should absolutely be only one user at a time.

Christian.

Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> oh, god. uvd free handler submit delayed msg which depends on scheduler with 
> reservation lock hold.
> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib 
> pools (v2)" says
> Any jobs which schedule IBs without dependence on gpu scheduler should use 
> DIRECT pool.
>
> Looks like we should only use reserved BO for direct IB submission.
> As for delayed IB submission, we could alloc a new one dynamicly.
> 
> 发件人: Koenig, Christian 
> 发送时间: 2021年9月10日 16:53
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> It should not unless we are OOM which should not happen during driver
> initialization.
>
> But there is another problem I'm thinking about: Do we still use UVD IB
> submissions to forcefully tear down UVD sessions when a process crashes?
>
> If yes that stuff could easily deadlock with an IB test executed during
> GPU reset.
>
> Christian.
>
> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>> For now, the new placement is calculated by new = old ∩ new.
>>
>> 
>> 发件人: Koenig, Christian 
>> 发送时间: 2021年9月10日 14:24
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>> move BO allocation in sw_init.
>>>
>>> Signed-off-by: xinhui pan 
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>> drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>> drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>> 4 files changed, 49 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> index d451c359606a..e2eaac941d37 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>> const char *fw_name;
>>> const struct common_firmware_header *hdr;
>>> unsigned family_id;
>>> + struct amdgpu_bo *bo = NULL;
>>> + void *addr;
>>> int i, j, r;
>>>
>>> INIT_DELAYED_WORK(>uvd.idle_work, 
>>> amdgpu_uvd_idle_work_handler);
>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>> adev->uvd.filp[i] = NULL;
>>> }
>>>
>>> + r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_GTT,
>>> + , NULL, );
>>> + if (r)
>>> + return r;
>>> +
>>> /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>> - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 
>>> 5, 0))
>>> + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 
>>> 5, 0)) {
>>> adev->uvd.address_64_bit = true;
>>> + amdgpu_bo_kunmap(bo);
>>> + amdgpu_bo_unpin(bo);
>>> + r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>> + 0, 256 << 20);
>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>
>>> + if (r) {
>>> + amdgpu_bo_unreserve(bo);
>>> + amdgpu_bo_unref();
>>> + return r;
>>> + }
>>> + r = amdgpu_bo_kmap(bo, );
>>> + if (r) {
>>> + amdgpu_bo_unpin(bo);
>>> + amdgpu_bo_unreserve(bo);
>>> + amdgpu_bo_unref();
>>> + return r;
>>> + }
>>> + }
>>> + adev->uvd.ib_bo = bo;
>>> + amdgpu_bo_unreserve(bo);
>>>
>>> switch (adev->asic_type) {
>>> case CHIP_TONGA:
>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>> 

[PATCH] drm/ttm: add a WARN_ON in ttm_set_driver_manager when array bounds (v2)

2021-09-10 Thread Guchun Chen
Vendor will define their own memory types on top of TTM_PL_PRIV,
but call ttm_set_driver_manager directly without checking mem_type
value when setting up memory manager. So add such check to aware
the case when array bounds.

v2: lower check level to WARN_ON

Signed-off-by: Leslie Shi 
Signed-off-by: Guchun Chen 
---
 include/drm/ttm/ttm_device.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 07d722950d5b..aa79953c807c 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -291,6 +291,7 @@ ttm_manager_type(struct ttm_device *bdev, int mem_type)
 static inline void ttm_set_driver_manager(struct ttm_device *bdev, int type,
  struct ttm_resource_manager *manager)
 {
+   WARN_ON(type >= TTM_NUM_MEM_TYPES);
bdev->man_drv[type] = manager;
 }
 
-- 
2.17.1



[PATCH] drm/amdgpu: Unify PSP TA context

2021-09-10 Thread Candice Li
Remove all TA binary structures and add the specific binary
structure in struct ta_context.

Signed-off-by: Candice Li 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  23 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 122 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  23 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |   9 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c|  22 ++--
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c|  40 ---
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c|  14 +--
 8 files changed, 141 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7e45640fbee026..d2955ea4a62bf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -341,27 +341,34 @@ static int amdgpu_firmware_info(struct 
drm_amdgpu_info_firmware *fw_info,
switch (query_fw->index) {
case TA_FW_TYPE_PSP_XGMI:
fw_info->ver = adev->psp.ta_fw_version;
-   fw_info->feature = adev->psp.xgmi.feature_version;
+   fw_info->feature = adev->psp.xgmi_context.context
+  .bin_desc.feature_version;
break;
case TA_FW_TYPE_PSP_RAS:
fw_info->ver = adev->psp.ta_fw_version;
-   fw_info->feature = adev->psp.ras.feature_version;
+   fw_info->feature = adev->psp.ras_context.context
+  .bin_desc.feature_version;
break;
case TA_FW_TYPE_PSP_HDCP:
fw_info->ver = adev->psp.ta_fw_version;
-   fw_info->feature = adev->psp.hdcp.feature_version;
+   fw_info->feature = adev->psp.hdcp_context.context
+  .bin_desc.feature_version;
break;
case TA_FW_TYPE_PSP_DTM:
fw_info->ver = adev->psp.ta_fw_version;
-   fw_info->feature = adev->psp.dtm.feature_version;
+   fw_info->feature = adev->psp.dtm_context.context
+  .bin_desc.feature_version;
break;
case TA_FW_TYPE_PSP_RAP:
fw_info->ver = adev->psp.ta_fw_version;
-   fw_info->feature = adev->psp.rap.feature_version;
+   fw_info->feature = adev->psp.rap_context.context
+  .bin_desc.feature_version;
break;
case TA_FW_TYPE_PSP_SECUREDISPLAY:
fw_info->ver = adev->psp.ta_fw_version;
-   fw_info->feature = 
adev->psp.securedisplay.feature_version;
+   fw_info->feature =
+   adev->psp.securedisplay_context.context.bin_desc
+   .feature_version;
break;
default:
return -EINVAL;
@@ -378,8 +385,8 @@ static int amdgpu_firmware_info(struct 
drm_amdgpu_info_firmware *fw_info,
fw_info->feature = adev->psp.sos.feature_version;
break;
case AMDGPU_INFO_FW_ASD:
-   fw_info->ver = adev->psp.asd.fw_version;
-   fw_info->feature = adev->psp.asd.feature_version;
+   fw_info->ver = adev->psp.asd_context.bin_desc.fw_version;
+   fw_info->feature = 
adev->psp.asd_context.bin_desc.feature_version;
break;
case AMDGPU_INFO_FW_DMCU:
fw_info->ver = adev->dm.dmcu_fw_version;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 885876e2ce73b6..071dadf3a4509f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -803,15 +803,16 @@ static int psp_asd_load(struct psp_context *psp)
 * add workaround to bypass it for sriov now.
 * TODO: add version check to make it common
 */
-   if (amdgpu_sriov_vf(psp->adev) || !psp->asd.size_bytes)
+   if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes)
return 0;
 
cmd = acquire_psp_cmd_buf(psp);
 
-   psp_copy_fw(psp, psp->asd.start_addr, psp->asd.size_bytes);
+   psp_copy_fw(psp, psp->asd_context.bin_desc.start_addr,
+   psp->asd_context.bin_desc.size_bytes);
 
psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
- psp->asd.size_bytes);
+ psp->asd_context.bin_desc.size_bytes);
 
ret = psp_cmd_submit_buf(psp, NULL, cmd,
  

回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Pan, Xinhui
[AMD Official Use Only]

oh, god. uvd free handler submit delayed msg which depends on scheduler with 
reservation lock hold.
This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools 
(v2)" says
Any jobs which schedule IBs without dependence on gpu scheduler should use 
DIRECT pool.

Looks like we should only use reserved BO for direct IB submission.
As for delayed IB submission, we could alloc a new one dynamicly.

发件人: Koenig, Christian 
发送时间: 2021年9月10日 16:53
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

It should not unless we are OOM which should not happen during driver
initialization.

But there is another problem I'm thinking about: Do we still use UVD IB
submissions to forcefully tear down UVD sessions when a process crashes?

If yes that stuff could easily deadlock with an IB test executed during
GPU reset.

Christian.

Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
> For now, the new placement is calculated by new = old ∩ new.
>
> 
> 发件人: Koenig, Christian 
> 发送时间: 2021年9月10日 14:24
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> Am 10.09.21 um 02:38 schrieb xinhui pan:
>> move BO allocation in sw_init.
>>
>> Signed-off-by: xinhui pan 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
>>drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>4 files changed, 49 insertions(+), 43 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> index d451c359606a..e2eaac941d37 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>const char *fw_name;
>>const struct common_firmware_header *hdr;
>>unsigned family_id;
>> + struct amdgpu_bo *bo = NULL;
>> + void *addr;
>>int i, j, r;
>>
>>INIT_DELAYED_WORK(>uvd.idle_work, amdgpu_uvd_idle_work_handler);
>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>adev->uvd.filp[i] = NULL;
>>}
>>
>> + r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>> + AMDGPU_GEM_DOMAIN_GTT,
>> + , NULL, );
>> + if (r)
>> + return r;
>> +
>>/* from uvd v5.0 HW addressing capacity increased to 64 bits */
>> - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 
>> 5, 0))
>> + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 
>> 5, 0)) {
>>adev->uvd.address_64_bit = true;
>> + amdgpu_bo_kunmap(bo);
>> + amdgpu_bo_unpin(bo);
>> + r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>> + 0, 256 << 20);
> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>
>> + if (r) {
>> + amdgpu_bo_unreserve(bo);
>> + amdgpu_bo_unref();
>> + return r;
>> + }
>> + r = amdgpu_bo_kmap(bo, );
>> + if (r) {
>> + amdgpu_bo_unpin(bo);
>> + amdgpu_bo_unreserve(bo);
>> + amdgpu_bo_unref();
>> + return r;
>> + }
>> + }
>> + adev->uvd.ib_bo = bo;
>> + amdgpu_bo_unreserve(bo);
>>
>>switch (adev->asic_type) {
>>case CHIP_TONGA:
>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>amdgpu_ring_fini(>uvd.inst[j].ring_enc[i]);
>>}
>> + amdgpu_bo_free_kernel(>uvd.ib_bo, NULL, NULL);
>>release_firmware(adev->uvd.fw);
>>
>>return 0;
>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
>> *ring, struct amdgpu_bo *bo,
>>unsigned offset_idx = 0;
>>unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>
>> - amdgpu_bo_kunmap(bo);
>> - amdgpu_bo_unpin(bo);
>> -
>> - if (!ring->adev->uvd.address_64_bit) {
>> - struct ttm_operation_ctx ctx = { true, false };
>> -
>> - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>> - amdgpu_uvd_force_into_uvd_segment(bo);
>> - r = ttm_bo_validate(>tbo, >placement, );
>> - if (r)
>> -   

[PATCH v3] drm/amdgpu: add manual sclk/vddc setting support for cyan skilfish(v3)

2021-09-10 Thread Lang Yu
Add manual sclk/vddc setting supoort via pp_od_clk_voltage sysfs
to maintain consistency with other asics. As cyan skillfish doesn't
support DPM, there is only a single frequency and voltage to adjust.

v2: maintain consistency and add command guide.
v3: adjust user settings storage and coding style.

Command guide:
echo vc point sclk vddc > pp_od_clk_voltage
"vc"- sclk voltage curve
"point" - must be 0
"sclk"  - target value of sclk(MHz), should be in safe range
"vddc"  - target value of vddc(mV), a 6.25(mV) stepping is
  recommended and should be in safe range (the real
  vddc is an approximation of target value)
echo c > pp_od_clk_voltage
"c" - commit the changes of sclk and vddc, only after
  the commit command, the target values set by "vc"
  command will take effect
echo r > pp_od_clk_voltage
"r" - reset sclk and vddc to default value, a subsequent
  commit command is needed to take effect

Example:
1) Check default sclk and vddc
$ cat pp_od_clk_voltage
OD_SCLK:
0: 1800Mhz *
OD_VDDC:
0: 862mV *
OD_RANGE:
SCLK:1000Mhz   2000Mhz
VDDC: 700mV1129mV
2) Set sclk to 1500MHz and vddc to 700mV
$ echo vc 0 1500 700 > pp_od_clk_voltage
$ echo c > pp_od_clk_voltage
$ cat pp_od_clk_voltage
OD_SCLK:
0: 1500Mhz *
OD_VDDC:
0: 693mV *
OD_RANGE:
SCLK:1000Mhz   2000Mhz
VDDC: 700mV1129mV
3) Reset sclk and vddc to default
$ echo r > pp_od_clk_voltage
$ echo c > pp_od_clk_voltage
$ cat pp_od_clk_voltage
OD_SCLK:
0: 1800Mhz *
OD_VDDC:
0: 874mV *
OD_RANGE:
SCLK:1000Mhz   2000Mhz
VDDC: 700mV1129mV
NOTE:
We don't specify an explicit safe range, you can set any values
between min and max at your own risk. Enjoy!

Signed-off-by: Lang Yu 
Reviewed-by: Lijo Lazar 
Reviewed-by: Huang Rui 
---
 drivers/gpu/drm/amd/pm/inc/smu_types.h|   5 +-
 .../amd/pm/swsmu/smu11/cyan_skillfish_ppt.c   | 134 ++
 2 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/inc/smu_types.h
index 6f1b1b50d527..18b862a90fbe 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h
@@ -226,7 +226,10 @@
__SMU_DUMMY_MAP(SetUclkDpmMode),\
__SMU_DUMMY_MAP(LightSBR),  \
__SMU_DUMMY_MAP(GfxDriverResetRecovery),\
-   __SMU_DUMMY_MAP(BoardPowerCalibration),
+   __SMU_DUMMY_MAP(BoardPowerCalibration),   \
+   __SMU_DUMMY_MAP(RequestGfxclk),   \
+   __SMU_DUMMY_MAP(ForceGfxVid), \
+   __SMU_DUMMY_MAP(UnforceGfxVid),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
index e1fab030cfc5..3d4c65bc29dc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
@@ -44,6 +44,21 @@
 #undef pr_info
 #undef pr_debug
 
+/* unit: MHz */
+#define CYAN_SKILLFISH_SCLK_MIN1000
+#define CYAN_SKILLFISH_SCLK_MAX2000
+#define CYAN_SKILLFISH_SCLK_DEFAULT1800
+
+/* unit: mV */
+#define CYAN_SKILLFISH_VDDC_MIN700
+#define CYAN_SKILLFISH_VDDC_MAX1129
+#define CYAN_SKILLFISH_VDDC_MAGIC  5118 // 0x13fe
+
+static struct gfx_user_settings {
+   uint32_t sclk;
+   uint32_t vddc;
+} cyan_skillfish_user_settings;
+
 #define FEATURE_MASK(feature) (1ULL << feature)
 #define SMC_DPM_FEATURE ( \
FEATURE_MASK(FEATURE_FCLK_DPM_BIT)  |   \
@@ -297,6 +312,27 @@ static int cyan_skillfish_print_clk_levels(struct 
smu_context *smu,
smu_cmn_get_sysfs_buf(, );
 
switch (clk_type) {
+   case SMU_OD_SCLK:
+   ret  = cyan_skillfish_get_smu_metrics_data(smu, 
METRICS_CURR_GFXCLK, _value);
+   if (ret)
+   return ret;
+   size += sysfs_emit_at(buf, size,"%s:\n", "OD_SCLK");
+   size += sysfs_emit_at(buf, size, "0: %uMhz *\n", cur_value);
+   break;
+   case SMU_OD_VDDC_CURVE:
+   ret  = cyan_skillfish_get_smu_metrics_data(smu, 
METRICS_VOLTAGE_VDDGFX, _value);
+   if (ret)
+   return ret;
+   size += sysfs_emit_at(buf, size,"%s:\n", "OD_VDDC");
+   size += sysfs_emit_at(buf, size, "0: %umV *\n", cur_value);
+   break;
+   case SMU_OD_RANGE:
+   size += sysfs_emit_at(buf, size, "%s:\n", 

[PATCH v3 3/3] drm/amdgpu: add some pptable funcs for cyan skilfish(v3)

2021-09-10 Thread Lang Yu
Add print_clk_levels and read_sensor pptable funcs for
cyan skilfish.

v2: keep consitency and add get_gpu_metrics callback.
v3: use sysfs_emit_at() in sysfs show function.

Signed-off-by: Lang Yu 
Reviewed-by: Huang Rui 
---
 .../amd/pm/swsmu/smu11/cyan_skillfish_ppt.c   | 347 ++
 1 file changed, 347 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
index b05f9541accc..e1fab030cfc5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
@@ -44,6 +44,12 @@
 #undef pr_info
 #undef pr_debug
 
+#define FEATURE_MASK(feature) (1ULL << feature)
+#define SMC_DPM_FEATURE ( \
+   FEATURE_MASK(FEATURE_FCLK_DPM_BIT)  |   \
+   FEATURE_MASK(FEATURE_SOC_DPM_BIT)   |   \
+   FEATURE_MASK(FEATURE_GFX_DPM_BIT))
+
 static struct cmn2asic_msg_mapping 
cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,  
0),
MSG_MAP(GetSmuVersion,  PPSMC_MSG_GetSmuVersion,
0),
@@ -52,14 +58,354 @@ static struct cmn2asic_msg_mapping 
cyan_skillfish_message_map[SMU_MSG_MAX_COUNT]
MSG_MAP(SetDriverDramAddrLow,   
PPSMC_MSG_SetDriverTableDramAddrLow,0),
MSG_MAP(TransferTableSmu2Dram,  
PPSMC_MSG_TransferTableSmu2Dram,0),
MSG_MAP(TransferTableDram2Smu,  
PPSMC_MSG_TransferTableDram2Smu,0),
+   MSG_MAP(GetEnabledSmuFeatures,  
PPSMC_MSG_GetEnabledSmuFeatures,0),
+   MSG_MAP(RequestGfxclk,  PPSMC_MSG_RequestGfxclk,
0),
+   MSG_MAP(ForceGfxVid,PPSMC_MSG_ForceGfxVid,  
0),
+   MSG_MAP(UnforceGfxVid,  PPSMC_MSG_UnforceGfxVid,
0),
+};
+
+static struct cmn2asic_mapping cyan_skillfish_table_map[SMU_TABLE_COUNT] = {
+   TAB_MAP_VALID(SMU_METRICS),
 };
 
+static int cyan_skillfish_tables_init(struct smu_context *smu)
+{
+   struct smu_table_context *smu_table = >smu_table;
+   struct smu_table *tables = smu_table->tables;
+
+   SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS,
+   sizeof(SmuMetrics_t),
+   PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_VRAM);
+
+   smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
+   if (!smu_table->metrics_table)
+   goto err0_out;
+
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
+   smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
+   if (!smu_table->gpu_metrics_table)
+   goto err1_out;
+
+   smu_table->metrics_time = 0;
+
+   return 0;
+
+err1_out:
+   smu_table->gpu_metrics_table_size = 0;
+   kfree(smu_table->metrics_table);
+err0_out:
+   return -ENOMEM;
+}
+
+static int cyan_skillfish_init_smc_tables(struct smu_context *smu)
+{
+   int ret = 0;
+
+   ret = cyan_skillfish_tables_init(smu);
+   if (ret)
+   return ret;
+
+   return smu_v11_0_init_smc_tables(smu);
+}
+
+static int cyan_skillfish_finit_smc_tables(struct smu_context *smu)
+{
+   struct smu_table_context *smu_table = >smu_table;
+
+   kfree(smu_table->metrics_table);
+   smu_table->metrics_table = NULL;
+
+   kfree(smu_table->gpu_metrics_table);
+   smu_table->gpu_metrics_table = NULL;
+   smu_table->gpu_metrics_table_size = 0;
+
+   smu_table->metrics_time = 0;
+
+   return 0;
+}
+
+static int
+cyan_skillfish_get_smu_metrics_data(struct smu_context *smu,
+   MetricsMember_t member,
+   uint32_t *value)
+{
+   struct smu_table_context *smu_table = >smu_table;
+   SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table;
+   int ret = 0;
+
+   mutex_lock(>metrics_lock);
+
+   ret = smu_cmn_get_metrics_table_locked(smu, NULL, false);
+   if (ret) {
+   mutex_unlock(>metrics_lock);
+   return ret;
+   }
+
+   switch (member) {
+   case METRICS_CURR_GFXCLK:
+   *value = metrics->Current.GfxclkFrequency;
+   break;
+   case METRICS_CURR_SOCCLK:
+   *value = metrics->Current.SocclkFrequency;
+   break;
+   case METRICS_CURR_VCLK:
+   *value = metrics->Current.VclkFrequency;
+   break;
+   case METRICS_CURR_DCLK:
+   *value = metrics->Current.DclkFrequency;
+   break;
+   case METRICS_CURR_UCLK:
+   *value = metrics->Current.MemclkFrequency;
+   break;
+   case METRICS_AVERAGE_SOCKETPOWER:
+   *value = (metrics->Current.CurrentSocketPower << 8) /
+ 

[PATCH v3 2/3] drm/amdgpu: update SMU driver interface for cyan skilfish(v3)

2021-09-10 Thread Lang Yu
Add SmuMetrics_t definition for cyan skilfish.

v2: update SmuMetrics_t definition.
v3: cleanup and rearrange the order of fields.

Signed-off-by: Lang Yu 
Reviewed-by: Huang Rui 
---
 .../pm/inc/smu11_driver_if_cyan_skillfish.h   | 86 ---
 1 file changed, 35 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h 
b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h
index 8a08ecc34c69..4884a4e1f261 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h
@@ -33,63 +33,47 @@
 #define TABLE_PMSTATUSLOG3 // Called by Tools for Agm logging
 #define TABLE_DPMCLOCKS  4 // Called by Driver; defined here, but not 
used, for backward compatible
 #define TABLE_MOMENTARY_PM   5 // Called by Tools; defined here, but not 
used, for backward compatible
-#define TABLE_COUNT  6
+#define TABLE_SMU_METRICS6 // Called by Driver
+#define TABLE_COUNT  7
 
-#define NUM_DSPCLK_LEVELS  8
-#define NUM_SOCCLK_DPM_LEVELS  8
-#define NUM_DCEFCLK_DPM_LEVELS 4
-#define NUM_FCLK_DPM_LEVELS4
-#define NUM_MEMCLK_DPM_LEVELS  4
+typedef struct SmuMetricsTable_t {
+   //CPU status
+   uint16_t CoreFrequency[6];  //[MHz]
+   uint32_t CorePower[6];  //[mW]
+   uint16_t CoreTemperature[6];//[centi-Celsius]
+   uint16_t L3Frequency[2];//[MHz]
+   uint16_t L3Temperature[2];  //[centi-Celsius]
+   uint16_t C0Residency[6];//Percentage
 
-#define NUMBER_OF_PSTATES  8
-#define NUMBER_OF_CORES8
+   // GFX status
+   uint16_t GfxclkFrequency;   //[MHz]
+   uint16_t GfxTemperature;//[centi-Celsius]
 
-typedef enum {
-   S3_TYPE_ENTRY,
-   S5_TYPE_ENTRY,
-} Sleep_Type_e;
+   // SOC IP info
+   uint16_t SocclkFrequency;   //[MHz]
+   uint16_t VclkFrequency; //[MHz]
+   uint16_t DclkFrequency; //[MHz]
+   uint16_t MemclkFrequency;   //[MHz]
 
-typedef enum {
-   GFX_OFF = 0,
-   GFX_ON  = 1,
-} GFX_Mode_e;
+   // power, VF info for CPU/GFX telemetry rails, and then socket power 
total
+   uint32_t Voltage[2];//[mV] indices: VDDCR_VDD, 
VDDCR_GFX
+   uint32_t Current[2];//[mA] indices: VDDCR_VDD, 
VDDCR_GFX
+   uint32_t Power[2];  //[mW] indices: VDDCR_VDD, 
VDDCR_GFX
+   uint32_t CurrentSocketPower;//[mW]
 
-typedef enum {
-   CPU_P0 = 0,
-   CPU_P1,
-   CPU_P2,
-   CPU_P3,
-   CPU_P4,
-   CPU_P5,
-   CPU_P6,
-   CPU_P7
-} CPU_PState_e;
+   uint16_t SocTemperature;//[centi-Celsius]
+   uint16_t EdgeTemperature;
+   uint16_t ThrottlerStatus;
+   uint16_t Spare;
 
-typedef enum {
-   CPU_CORE0 = 0,
-   CPU_CORE1,
-   CPU_CORE2,
-   CPU_CORE3,
-   CPU_CORE4,
-   CPU_CORE5,
-   CPU_CORE6,
-   CPU_CORE7
-} CORE_ID_e;
+} SmuMetricsTable_t;
 
-typedef enum {
-   DF_DPM0 = 0,
-   DF_DPM1,
-   DF_DPM2,
-   DF_DPM3,
-   DF_PState_Count
-} DF_PState_e;
-
-typedef enum {
-   GFX_DPM0 = 0,
-   GFX_DPM1,
-   GFX_DPM2,
-   GFX_DPM3,
-   GFX_PState_Count
-} GFX_PState_e;
+typedef struct SmuMetrics_t {
+   SmuMetricsTable_t Current;
+   SmuMetricsTable_t Average;
+   uint32_t SampleStartTime;
+   uint32_t SampleStopTime;
+   uint32_t Accnt;
+} SmuMetrics_t;
 
 #endif
-- 
2.25.1



[PATCH 1/3] drm/amdgpu: update SMU PPSMC for cyan skilfish

2021-09-10 Thread Lang Yu
Add some PPSMC MSGs for cyan skilfish.

Signed-off-by: Lang Yu 
Reviewed-by: Huang Rui 
---
 drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h 
b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h
index 6e6088760b18..909a86aa60f3 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h
@@ -65,6 +65,13 @@
 #define PPSMC_MSG_SetDriverTableVMID0x34
 #define PPSMC_MSG_SetSoftMinCclk0x35
 #define PPSMC_MSG_SetSoftMaxCclk0x36
-#define PPSMC_Message_Count 0x37
+#define PPSMC_MSG_GetGfxFrequency   0x37
+#define PPSMC_MSG_GetGfxVid 0x38
+#define PPSMC_MSG_ForceGfxFreq  0x39
+#define PPSMC_MSG_UnForceGfxFreq0x3A
+#define PPSMC_MSG_ForceGfxVid   0x3B
+#define PPSMC_MSG_UnforceGfxVid 0x3C
+#define PPSMC_MSG_GetEnabledSmuFeatures 0x3D
+#define PPSMC_Message_Count 0x3E
 
 #endif
-- 
2.25.1



Re: [PATCH] drm/amd/pm: fix runpm hang when amdgpu loaded prior to sound driver

2021-09-10 Thread Pierre-Eric Pelloux-Prayer

Tested-by: Pierre-Eric Pelloux-Prayer 

Thanks!

On 10/09/2021 05:17, Evan Quan wrote:

Current RUNPM mechanism relies on PMFW to master the timing for BACO
in/exit. And that needs cooperation from sound driver for dstate
change notification for function 1(audio). Otherwise(on sound driver
missing), BACO cannot be kicked in correctly and hang will be observed
on RUNPM exit.

By switching back to legacy message way on sound driver missing,
we are able to fix the runpm hang observed for the scenario below:
amdgpu driver loaded -> runpm suspend kicked -> sound driver loaded

Change-Id: I0e44fef11349b5e45e6102913eb46c8c7d279c65
Signed-off-by: Evan Quan 
Reported-by: Pierre-Eric Pelloux-Prayer 
---
  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 24 +--
  .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  4 ++--
  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c| 21 
  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h|  2 ++
  4 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 7bc90f841a11..bcafccf7f07a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2272,7 +2272,27 @@ static int navi10_baco_enter(struct smu_context *smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm)

+   /*
+* This aims the case below:
+*   amdgpu driver loaded -> runpm suspend kicked -> sound driver loaded
+*
+* For NAVI10 and later ASICs, we rely on PMFW to handle the runpm. To
+* make that possible, PMFW needs to acknowledge the dstate transition
+* process for both gfx(function 0) and audio(function 1) function of
+* the ASIC.
+*
+* The PCI device's initial runpm status is RUNPM_SUSPENDED. So as the
+* device representing the audio function of the ASIC. And that means
+* even if the sound driver(snd_hda_intel) was not loaded yet, it's 
still
+* possible runpm suspend kicked on the ASIC. However without the dstate
+* transition notification from audio function, pmfw cannot handle the
+* BACO in/exit correctly. And that will cause driver hang on runpm
+* resuming.
+*
+* To address this, we revert to legacy message way(driver masters the
+* timing for BACO in/exit) on sound driver missing.
+*/
+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO);
else
return smu_v11_0_baco_enter(smu);
@@ -2282,7 +2302,7 @@ static int navi10_baco_exit(struct smu_context *smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm) {

+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
/* Wait for PMFW handling for the Dstate change */
msleep(10);
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 43c7580a4ea6..f9b730c5ba9e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2361,7 +2361,7 @@ static int sienna_cichlid_baco_enter(struct smu_context 
*smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm)

+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO);
else
return smu_v11_0_baco_enter(smu);
@@ -2371,7 +2371,7 @@ static int sienna_cichlid_baco_exit(struct smu_context 
*smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm) {

+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
/* Wait for PMFW handling for the Dstate change */
msleep(10);
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 69da9a7b665f..d61403e917df 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -1055,3 +1055,24 @@ int smu_cmn_set_mp1_state(struct smu_context *smu,
  
  	return ret;

  }
+
+bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev)
+{
+   struct pci_dev *p = NULL;
+   bool snd_driver_loaded;
+
+   /*
+* If the ASIC comes with no audio function, we always assume
+* it is "enabled".
+*/
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+   if (!p)
+   return true;
+
+   snd_driver_loaded = pci_is_enabled(p) ? true : false;
+
+   

RE: [PATCH] drm/ttm: add a BUG_ON in ttm_set_driver_manager when array bounds

2021-09-10 Thread Chen, Guchun
[Public]

Hi Christian and Xinhui,

Thanks for your suggestion. The cause is I saw data corruption in several 
proprietary use cases. BUILD_BUG_ON will have build variation per gcc 
difference?

Anyway, WARN_ON is fine to me, and I will send a new patch set soon to address 
this.

Regards,
Guchun

From: Koenig, Christian 
Sent: Friday, September 10, 2021 2:37 PM
To: Pan, Xinhui ; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org; Deucher, Alexander 
; Chen, Guchun 
Cc: Shi, Leslie 
Subject: Re: [PATCH] drm/ttm: add a BUG_ON in ttm_set_driver_manager when array 
bounds

Yeah, that's a good point.

If build_bug_on() doesn't works for some reason then we at least need to lower 
this to a WARN_ON.

A BUG_ON() is only justified if we prevent strong data corruption with it or 
note a NULL pointer earlier on or similar.

Regards,
Christian.
Am 10.09.21 um 06:36 schrieb Pan, Xinhui:

[AMD Official Use Only]

looks good to me.
But maybe build_bug_on works too and more reasonable to detect such wrong usage.

From: Chen, Guchun 
Sent: Friday, September 10, 2021 12:30:14 PM
To: amd-gfx@lists.freedesktop.org 
; 
dri-de...@lists.freedesktop.org 
; 
Koenig, Christian ; 
Pan, Xinhui ; Deucher, Alexander 

Cc: Chen, Guchun ; Shi, Leslie 

Subject: [PATCH] drm/ttm: add a BUG_ON in ttm_set_driver_manager when array 
bounds

Vendor will define their own memory types on top of TTM_PL_PRIV,
but call ttm_set_driver_manager directly without checking mem_type
value when setting up memory manager. So add such check to aware
the case when array bounds.

Signed-off-by: Leslie Shi 
Signed-off-by: Guchun Chen 
---
 include/drm/ttm/ttm_device.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 7a0f561c57ee..24ad76ca8022 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -308,6 +308,7 @@ ttm_manager_type(struct ttm_device *bdev, int mem_type)
 static inline void ttm_set_driver_manager(struct ttm_device *bdev, int type,
   struct ttm_resource_manager *manager)
 {
+   BUG_ON(type >= TTM_NUM_MEM_TYPES);
 bdev->man_drv[type] = manager;
 }

--
2.17.1



回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Pan, Xinhui
[AMD Official Use Only]

I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
For now, the new placement is calculated by new = old ∩ new.


发件人: Koenig, Christian 
发送时间: 2021年9月10日 14:24
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Am 10.09.21 um 02:38 schrieb xinhui pan:
> move BO allocation in sw_init.
>
> Signed-off-by: xinhui pan 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>   4 files changed, 49 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index d451c359606a..e2eaac941d37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>   const char *fw_name;
>   const struct common_firmware_header *hdr;
>   unsigned family_id;
> + struct amdgpu_bo *bo = NULL;
> + void *addr;
>   int i, j, r;
>
>   INIT_DELAYED_WORK(>uvd.idle_work, amdgpu_uvd_idle_work_handler);
> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>   adev->uvd.filp[i] = NULL;
>   }
>
> + r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + , NULL, );
> + if (r)
> + return r;
> +
>   /* from uvd v5.0 HW addressing capacity increased to 64 bits */
> - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
> 0))
> + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
> 0)) {
>   adev->uvd.address_64_bit = true;
> + amdgpu_bo_kunmap(bo);
> + amdgpu_bo_unpin(bo);
> + r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
> + 0, 256 << 20);

Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.

> + if (r) {
> + amdgpu_bo_unreserve(bo);
> + amdgpu_bo_unref();
> + return r;
> + }
> + r = amdgpu_bo_kmap(bo, );
> + if (r) {
> + amdgpu_bo_unpin(bo);
> + amdgpu_bo_unreserve(bo);
> + amdgpu_bo_unref();
> + return r;
> + }
> + }
> + adev->uvd.ib_bo = bo;
> + amdgpu_bo_unreserve(bo);
>
>   switch (adev->asic_type) {
>   case CHIP_TONGA:
> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>   for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>   amdgpu_ring_fini(>uvd.inst[j].ring_enc[i]);
>   }
> + amdgpu_bo_free_kernel(>uvd.ib_bo, NULL, NULL);
>   release_firmware(adev->uvd.fw);
>
>   return 0;
> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
> *ring, struct amdgpu_bo *bo,
>   unsigned offset_idx = 0;
>   unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>
> - amdgpu_bo_kunmap(bo);
> - amdgpu_bo_unpin(bo);
> -
> - if (!ring->adev->uvd.address_64_bit) {
> - struct ttm_operation_ctx ctx = { true, false };
> -
> - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
> - amdgpu_uvd_force_into_uvd_segment(bo);
> - r = ttm_bo_validate(>tbo, >placement, );
> - if (r)
> - goto err;
> - }
> -
>   r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>AMDGPU_IB_POOL_DELAYED, );
>   if (r)
> - goto err;
> + return r;
>
>   if (adev->asic_type >= CHIP_VEGA10) {
>   offset_idx = 1 + ring->me;
> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
> *ring, struct amdgpu_bo *bo,
>   }
>
>   amdgpu_bo_fence(bo, f, false);
> - amdgpu_bo_unreserve(bo);
> - amdgpu_bo_unref();
>
>   if (fence)
>   *fence = dma_fence_get(f);
> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
> *ring, struct amdgpu_bo *bo,
>
>   err_free:
>   amdgpu_job_free(job);
> -
> -err:
> - amdgpu_bo_unreserve(bo);
> - amdgpu_bo_unref();
>   return r;
>   }
>
> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring 
> *ring, uint32_t handle,
> struct dma_fence **fence)
>   {
>   struct amdgpu_device *adev = ring->adev;
> - struct amdgpu_bo *bo = NULL;
> + struct amdgpu_bo *bo = adev->uvd.ib_bo;
>   uint32_t 

Re: 回复: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test

2021-09-10 Thread Christian König

Try that plugin here https://github.com/vivien/vim-linux-coding-style

I'm using it for years and it really helpful.

Christian.

Am 10.09.21 um 09:53 schrieb Pan, Xinhui:

[AMD Official Use Only]

I am using vim with
set tabstop=8
set shiftwidth=8
set softtabstop=8


发件人: Koenig, Christian 
发送时间: 2021年9月10日 14:33
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test



Am 10.09.21 um 02:38 schrieb xinhui pan:

alloc extra msg from direct IB pool.

Signed-off-by: xinhui pan 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 99 +++--
   1 file changed, 45 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 561296a85b43..b60d5f01fdae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring 
*ring)
   }

   static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
-struct amdgpu_bo *bo,
-struct dma_fence **fence)
+ struct amdgpu_ib *ib_msg,
+ struct dma_fence **fence)

The parameter indentation here and at a few other places doesn't look
correct to me, what editor are you using BTW?

Apart from that the patch is Reviewed-by: Christian König
.

Regards,
Christian.


   {
   struct amdgpu_device *adev = ring->adev;
   struct dma_fence *f = NULL;
   struct amdgpu_job *job;
   struct amdgpu_ib *ib;
- uint64_t addr;
- void *msg = NULL;
+ uint64_t addr = ib_msg->gpu_addr;
   int i, r;

   r = amdgpu_job_alloc_with_ib(adev, 64,
@@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
   goto err;

   ib = >ibs[0];
- addr = amdgpu_bo_gpu_offset(bo);
- msg = amdgpu_bo_kptr(bo);
   ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
   ib->ptr[1] = addr;
   ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
@@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
   if (r)
   goto err_free;

- amdgpu_bo_fence(bo, f, false);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_free_kernel(, NULL, (void **));
+ amdgpu_ib_free(adev, ib_msg, f);

   if (fence)
   *fence = dma_fence_get(f);
@@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring 
*ring,

   err_free:
   amdgpu_job_free(job);
-
   err:
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_free_kernel(, NULL, (void **));
+ amdgpu_ib_free(adev, ib_msg, f);
   return r;
   }

   static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-  struct amdgpu_bo **bo)
+ struct amdgpu_ib *ib)
   {
   struct amdgpu_device *adev = ring->adev;
   uint32_t *msg;
   int r, i;

- *bo = NULL;
- r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_VRAM,
-   bo, NULL, (void **));
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+ AMDGPU_IB_POOL_DIRECT,
+ ib);
   if (r)
   return r;

+ msg = ib->ptr;
   msg[0] = cpu_to_le32(0x0028);
   msg[1] = cpu_to_le32(0x0038);
   msg[2] = cpu_to_le32(0x0001);
@@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand
   }

   static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-   struct amdgpu_bo **bo)
+   struct amdgpu_ib *ib)
   {
   struct amdgpu_device *adev = ring->adev;
   uint32_t *msg;
   int r, i;

- *bo = NULL;
- r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_VRAM,
-   bo, NULL, (void **));
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+ AMDGPU_IB_POOL_DIRECT,
+ ib);
   if (r)
   return r;

+ msg = ib->ptr;
   msg[0] = cpu_to_le32(0x0028);
   msg[1] = cpu_to_le32(0x0018);
   msg[2] = cpu_to_le32(0x);
@@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
   int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
   {
   struct dma_fence *fence = NULL;
- struct amdgpu_bo *bo;
+ struct amdgpu_ib ib;
   long r;

- r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
+ r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
   if (r)
   goto error;

- r = 

回复: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test

2021-09-10 Thread Pan, Xinhui
[AMD Official Use Only]

I am using vim with
set tabstop=8
set shiftwidth=8
set softtabstop=8


发件人: Koenig, Christian 
发送时间: 2021年9月10日 14:33
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test



Am 10.09.21 um 02:38 schrieb xinhui pan:
> alloc extra msg from direct IB pool.
>
> Signed-off-by: xinhui pan 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 99 +++--
>   1 file changed, 45 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 561296a85b43..b60d5f01fdae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring 
> *ring)
>   }
>
>   static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
> -struct amdgpu_bo *bo,
> -struct dma_fence **fence)
> + struct amdgpu_ib *ib_msg,
> + struct dma_fence **fence)

The parameter indentation here and at a few other places doesn't look
correct to me, what editor are you using BTW?

Apart from that the patch is Reviewed-by: Christian König
.

Regards,
Christian.

>   {
>   struct amdgpu_device *adev = ring->adev;
>   struct dma_fence *f = NULL;
>   struct amdgpu_job *job;
>   struct amdgpu_ib *ib;
> - uint64_t addr;
> - void *msg = NULL;
> + uint64_t addr = ib_msg->gpu_addr;
>   int i, r;
>
>   r = amdgpu_job_alloc_with_ib(adev, 64,
> @@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring 
> *ring,
>   goto err;
>
>   ib = >ibs[0];
> - addr = amdgpu_bo_gpu_offset(bo);
> - msg = amdgpu_bo_kptr(bo);
>   ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
>   ib->ptr[1] = addr;
>   ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
> @@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring 
> *ring,
>   if (r)
>   goto err_free;
>
> - amdgpu_bo_fence(bo, f, false);
> - amdgpu_bo_unreserve(bo);
> - amdgpu_bo_free_kernel(, NULL, (void **));
> + amdgpu_ib_free(adev, ib_msg, f);
>
>   if (fence)
>   *fence = dma_fence_get(f);
> @@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring 
> *ring,
>
>   err_free:
>   amdgpu_job_free(job);
> -
>   err:
> - amdgpu_bo_unreserve(bo);
> - amdgpu_bo_free_kernel(, NULL, (void **));
> + amdgpu_ib_free(adev, ib_msg, f);
>   return r;
>   }
>
>   static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t 
> handle,
> -  struct amdgpu_bo **bo)
> + struct amdgpu_ib *ib)
>   {
>   struct amdgpu_device *adev = ring->adev;
>   uint32_t *msg;
>   int r, i;
>
> - *bo = NULL;
> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -   AMDGPU_GEM_DOMAIN_VRAM,
> -   bo, NULL, (void **));
> + memset(ib, 0, sizeof(*ib));
> + r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
> + AMDGPU_IB_POOL_DIRECT,
> + ib);
>   if (r)
>   return r;
>
> + msg = ib->ptr;
>   msg[0] = cpu_to_le32(0x0028);
>   msg[1] = cpu_to_le32(0x0038);
>   msg[2] = cpu_to_le32(0x0001);
> @@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct 
> amdgpu_ring *ring, uint32_t hand
>   }
>
>   static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, 
> uint32_t handle,
> -   struct amdgpu_bo **bo)
> +   struct amdgpu_ib *ib)
>   {
>   struct amdgpu_device *adev = ring->adev;
>   uint32_t *msg;
>   int r, i;
>
> - *bo = NULL;
> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -   AMDGPU_GEM_DOMAIN_VRAM,
> -   bo, NULL, (void **));
> + memset(ib, 0, sizeof(*ib));
> + r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
> + AMDGPU_IB_POOL_DIRECT,
> + ib);
>   if (r)
>   return r;
>
> + msg = ib->ptr;
>   msg[0] = cpu_to_le32(0x0028);
>   msg[1] = cpu_to_le32(0x0018);
>   msg[2] = cpu_to_le32(0x);
> @@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
> amdgpu_ring *ring, uint32_t han
>   int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   struct dma_fence *fence = NULL;
> - struct amdgpu_bo *bo;
> + struct amdgpu_ib ib;
>   long r;
>
> - r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
> + r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
>   if (r)
>   goto 

Re: [PATCH] Enable '-Werror' by default for all kernel builds

2021-09-10 Thread Linus Torvalds
On Wed, Sep 8, 2021 at 10:59 PM Christoph Hellwig  wrote:
>
> While we're at it, with -Werror something like this is really futile:

Yeah, I'm thinking we could do

 -Wno-error=cpp

to at least allow the cpp warnings to come through without being fatal.

Because while they can be annoying too, they are most definitely under
our direct control, so..

I didn't actually test that, but I think it should work.

That said, maybe they should just be removed. They might be better off
just as Kconfig rules, rather than as a "hey, you screwed up your
Kconfig" warning after the fact.

 Linus


Re: [PATCH] Enable '-Werror' by default for all kernel builds

2021-09-10 Thread Linus Torvalds
On Thu, Sep 9, 2021 at 4:43 AM Marco Elver  wrote:
>
> Sure, but the reality is that the real stack size is already doubled
> for KASAN. And that should be reflected in Wframe-larger-than.

I don't think that's true.

Quite the reverse, in fact.

Yes, the *dynamic* stack size is doubled due to KASAN, because it will
cause much deeper callchains.

But the individual frames don't grow that much apart from compilers
doing stupid things (ie apparently clang and KASAN_STACK), and if
anything, the deeper dynamic call chains means that the individual
frame size being small is even *more* important, but we do compensate
for the deeper stacks by making THREAD_SIZE_ORDER bigger at least on
x86.

Honestly, I am not even happy with the current "2048 bytes for
64-bit". The excuse has been that 64-bit needs more stack, but all it
ever did was clearly to just allow people to just do bad things.

Because a 1kB stack frame is horrendous even in 64-bit. That's not
"spill some registers" kind of stack frame. That's "put a big
structure on the stack" kind of stack frame regardless of any other
issues.

And no, "but we have 16kB of stack and we'll switch stacks on
interrupts" is not an excuse for one single level to use up 1kB, much
less 2kB.  Does anybody seriously believe that we don't quite normally
have stacks that are easily tens of frames deep?

Without having some true "this is the full callchain" information, the
best we can do is just limit individual stack frames. And 2kB is
*excessive*.

 Linus


Re: [PATCH] drm/ttm: add a BUG_ON in ttm_set_driver_manager when array bounds

2021-09-10 Thread Christian König

Yeah, that's a good point.

If build_bug_on() doesn't works for some reason then we at least need to 
lower this to a WARN_ON.


A BUG_ON() is only justified if we prevent strong data corruption with 
it or note a NULL pointer earlier on or similar.


Regards,
Christian.

Am 10.09.21 um 06:36 schrieb Pan, Xinhui:


[AMD Official Use Only]


looks good to me.
But maybe build_bug_on works too and more reasonable to detect such 
wrong usage.


*From:* Chen, Guchun 
*Sent:* Friday, September 10, 2021 12:30:14 PM
*To:* amd-gfx@lists.freedesktop.org ; 
dri-de...@lists.freedesktop.org ; 
Koenig, Christian ; Pan, Xinhui 
; Deucher, Alexander 
*Cc:* Chen, Guchun ; Shi, Leslie 

*Subject:* [PATCH] drm/ttm: add a BUG_ON in ttm_set_driver_manager 
when array bounds

Vendor will define their own memory types on top of TTM_PL_PRIV,
but call ttm_set_driver_manager directly without checking mem_type
value when setting up memory manager. So add such check to aware
the case when array bounds.

Signed-off-by: Leslie Shi 
Signed-off-by: Guchun Chen 
---
 include/drm/ttm/ttm_device.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 7a0f561c57ee..24ad76ca8022 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -308,6 +308,7 @@ ttm_manager_type(struct ttm_device *bdev, int 
mem_type)
 static inline void ttm_set_driver_manager(struct ttm_device *bdev, 
int type,
   struct ttm_resource_manager 
*manager)

 {
+   BUG_ON(type >= TTM_NUM_MEM_TYPES);
 bdev->man_drv[type] = manager;
 }

--
2.17.1





Re: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test

2021-09-10 Thread Christian König




Am 10.09.21 um 02:38 schrieb xinhui pan:

alloc extra msg from direct IB pool.

Signed-off-by: xinhui pan 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 99 +++--
  1 file changed, 45 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 561296a85b43..b60d5f01fdae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring 
*ring)
  }
  
  static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,

-  struct amdgpu_bo *bo,
-  struct dma_fence **fence)
+   struct amdgpu_ib *ib_msg,
+   struct dma_fence **fence)


The parameter indentation here and at a few other places doesn't look 
correct to me, what editor are you using BTW?


Apart from that the patch is Reviewed-by: Christian König 
.


Regards,
Christian.


  {
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
-   uint64_t addr;
-   void *msg = NULL;
+   uint64_t addr = ib_msg->gpu_addr;
int i, r;
  
  	r = amdgpu_job_alloc_with_ib(adev, 64,

@@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
goto err;
  
  	ib = >ibs[0];

-   addr = amdgpu_bo_gpu_offset(bo);
-   msg = amdgpu_bo_kptr(bo);
ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
ib->ptr[1] = addr;
ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
@@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
  
-	amdgpu_bo_fence(bo, f, false);

-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, (void **));
+   amdgpu_ib_free(adev, ib_msg, f);
  
  	if (fence)

*fence = dma_fence_get(f);
@@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring 
*ring,
  
  err_free:

amdgpu_job_free(job);
-
  err:
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, (void **));
+   amdgpu_ib_free(adev, ib_msg, f);
return r;
  }
  
  static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,

-struct amdgpu_bo **bo)
+   struct amdgpu_ib *ib)
  {
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
  
-	*bo = NULL;

-   r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **));
+   memset(ib, 0, sizeof(*ib));
+   r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+   AMDGPU_IB_POOL_DIRECT,
+   ib);
if (r)
return r;
  
+	msg = ib->ptr;

msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0038);
msg[2] = cpu_to_le32(0x0001);
@@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand
  }
  
  static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,

- struct amdgpu_bo **bo)
+ struct amdgpu_ib *ib)
  {
struct amdgpu_device *adev = ring->adev;
uint32_t *msg;
int r, i;
  
-	*bo = NULL;

-   r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- bo, NULL, (void **));
+   memset(ib, 0, sizeof(*ib));
+   r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+   AMDGPU_IB_POOL_DIRECT,
+   ib);
if (r)
return r;
  
+	msg = ib->ptr;

msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0018);
msg[2] = cpu_to_le32(0x);
@@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
  int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
  {
struct dma_fence *fence = NULL;
-   struct amdgpu_bo *bo;
+   struct amdgpu_ib ib;
long r;
  
-	r = amdgpu_vcn_dec_get_create_msg(ring, 1, );

+   r = amdgpu_vcn_dec_get_create_msg(ring, 1, );
if (r)
goto error;
  
-	r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);

+   r = amdgpu_vcn_dec_send_msg(ring, , NULL);
if (r)
goto error;
-   r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, );
+   r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, );
if (r)
goto error;
  
-	r = amdgpu_vcn_dec_send_msg(ring, bo, );

+   r = amdgpu_vcn_dec_send_msg(ring, , );
if (r)

Re: [PATCH 3/4] drm/amdgpu: VCE avoid memory allocation during IB test

2021-09-10 Thread Christian König

Am 10.09.21 um 02:38 schrieb xinhui pan:

alloc extra msg from direct IB pool.

Signed-off-by: xinhui pan 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 18 +++---
  1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index e9fdf49d69e8..45d98694db18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -82,7 +82,6 @@ MODULE_FIRMWARE(FIRMWARE_VEGA20);
  
  static void amdgpu_vce_idle_work_handler(struct work_struct *work);

  static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-struct amdgpu_bo *bo,
 struct dma_fence **fence);
  static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
  bool direct, struct dma_fence **fence);
@@ -441,7 +440,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, 
struct drm_file *filp)
   * Open up a stream for HW test
   */
  static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-struct amdgpu_bo *bo,
 struct dma_fence **fence)
  {
const unsigned ib_size_dw = 1024;
@@ -451,14 +449,13 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle,
uint64_t addr;
int i, r;
  
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,

+   r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4 + PAGE_SIZE,


Please use AMDGPU_PAGE_SIZE since that here is not really related to the 
CPU page size.



 AMDGPU_IB_POOL_DIRECT, );
if (r)
return r;
  
  	ib = >ibs[0];

-
-   addr = amdgpu_bo_gpu_offset(bo);
+   addr = ib->gpu_addr + ib_size_dw * 4;


That here needs to be more aligned I think.

For UVD that used to be 256bytes, but no idea what VCE requires. Leo do 
you of hand know?


Thanks,
Christian.

  
  	/* stitch together an VCE create msg */

ib->length_dw = 0;
@@ -1134,20 +1131,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
  int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
  {
struct dma_fence *fence = NULL;
-   struct amdgpu_bo *bo = NULL;
long r;
  
  	/* skip vce ring1/2 ib test for now, since it's not reliable */

if (ring != >adev->vce.ring[0])
return 0;
  
-	r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,

- AMDGPU_GEM_DOMAIN_VRAM,
- , NULL, NULL);
-   if (r)
-   return r;
-
-   r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
+   r = amdgpu_vce_get_create_msg(ring, 1, NULL);
if (r)
goto error;
  
@@ -1163,8 +1153,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
  
  error:

dma_fence_put(fence);
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_free_kernel(, NULL, NULL);
return r;
  }
  




Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

2021-09-10 Thread Christian König

Am 10.09.21 um 02:38 schrieb xinhui pan:

move BO allocation in sw_init.

Signed-off-by: xinhui pan 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
  4 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e2eaac941d37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned family_id;
+   struct amdgpu_bo *bo = NULL;
+   void *addr;
int i, j, r;
  
  	INIT_DELAYED_WORK(>uvd.idle_work, amdgpu_uvd_idle_work_handler);

@@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
adev->uvd.filp[i] = NULL;
}
  
+	r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,

+   AMDGPU_GEM_DOMAIN_GTT,
+   , NULL, );
+   if (r)
+   return r;
+
/* from uvd v5.0 HW addressing capacity increased to 64 bits */
-   if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0))
+   if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 
0)) {
adev->uvd.address_64_bit = true;
+   amdgpu_bo_kunmap(bo);
+   amdgpu_bo_unpin(bo);
+   r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
+   0, 256 << 20);


Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here, 
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.



+   if (r) {
+   amdgpu_bo_unreserve(bo);
+   amdgpu_bo_unref();
+   return r;
+   }
+   r = amdgpu_bo_kmap(bo, );
+   if (r) {
+   amdgpu_bo_unpin(bo);
+   amdgpu_bo_unreserve(bo);
+   amdgpu_bo_unref();
+   return r;
+   }
+   }
+   adev->uvd.ib_bo = bo;
+   amdgpu_bo_unreserve(bo);
  
  	switch (adev->asic_type) {

case CHIP_TONGA:
@@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
amdgpu_ring_fini(>uvd.inst[j].ring_enc[i]);
}
+   amdgpu_bo_free_kernel(>uvd.ib_bo, NULL, NULL);
release_firmware(adev->uvd.fw);
  
  	return 0;

@@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring 
*ring, struct amdgpu_bo *bo,
unsigned offset_idx = 0;
unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
  
-	amdgpu_bo_kunmap(bo);

-   amdgpu_bo_unpin(bo);
-
-   if (!ring->adev->uvd.address_64_bit) {
-   struct ttm_operation_ctx ctx = { true, false };
-
-   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-   amdgpu_uvd_force_into_uvd_segment(bo);
-   r = ttm_bo_validate(>tbo, >placement, );
-   if (r)
-   goto err;
-   }
-
r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
 AMDGPU_IB_POOL_DELAYED, );
if (r)
-   goto err;
+   return r;
  
  	if (adev->asic_type >= CHIP_VEGA10) {

offset_idx = 1 + ring->me;
@@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,
}
  
  	amdgpu_bo_fence(bo, f, false);

-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_unref();
  
  	if (fence)

*fence = dma_fence_get(f);
@@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 
struct amdgpu_bo *bo,
  
  err_free:

amdgpu_job_free(job);
-
-err:
-   amdgpu_bo_unreserve(bo);
-   amdgpu_bo_unref();
return r;
  }
  
@@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,

  struct dma_fence **fence)
  {
struct amdgpu_device *adev = ring->adev;
-   struct amdgpu_bo *bo = NULL;
+   struct amdgpu_bo *bo = adev->uvd.ib_bo;
uint32_t *msg;
int r, i;
  
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,

- AMDGPU_GEM_DOMAIN_GTT,
- , NULL, (void **));
+   r = ttm_bo_reserve(>tbo, true, true, NULL);
if (r)
return r;
  
+	msg = amdgpu_bo_kptr(bo);

/* stitch together an UVD create msg */
msg[0] = cpu_to_le32(0x0de4);
msg[1] = cpu_to_le32(0x);
@@ -1198,23 +1206,25 @@ int 

Re: [PATCH 1/4] drm/amdgpu: Increase direct IB pool size

2021-09-10 Thread Christian König

Am 10.09.21 um 02:38 schrieb xinhui pan:

Direct IB pool is used for vce/vcn IB extra msg too. Increase its size
to AMDGPU_IB_POOL_SIZE.

Signed-off-by: xinhui pan 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 ++--
  1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index c076a6b9a5a2..9274f32c3661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -307,13 +307,9 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
return 0;
  
  	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {

-   if (i == AMDGPU_IB_POOL_DIRECT)
-   size = PAGE_SIZE * 6;
-   else
-   size = AMDGPU_IB_POOL_SIZE;
-
r = amdgpu_sa_bo_manager_init(adev, >ib_pools[i],
- size, AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_IB_POOL_SIZE,
+ AMDGPU_GPU_PAGE_SIZE,
  AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error;