Mesa (main): winsys/amdgpu: cosmetic touchups around IB sizes

2024-01-15 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 7ed27bff0beacf5d6b965afc0792ead588c11045
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7ed27bff0beacf5d6b965afc0792ead588c11045

Author: Marek Olšák 
Date:   Sun Jan  7 17:11:45 2024 -0500

winsys/amdgpu: cosmetic touchups around IB sizes

Reviewed-by: Pierre-Eric Pelloux-Prayer 
Part-of: 

---

 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 6cac797e763..e09f5b38cf3 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -718,19 +718,18 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws,
uint8_t *mapped;
unsigned buffer_size;
 
-   /* Always create a buffer that is at least as large as the maximum seen IB
-* size, aligned to a power of two (and multiplied by 4 to reduce internal
-* fragmentation if chaining is not available). Limit to 512k dwords, which
-* is the largest power of two that fits into the size field of the
-* INDIRECT_BUFFER packet.
+   /* Always create a buffer that is at least as large as the maximum seen IB 
size,
+* aligned to a power of two.
 */
-   if (cs->has_chaining)
-  buffer_size = util_next_power_of_two(main_ib->max_ib_bytes);
-   else
-  buffer_size = util_next_power_of_two(4 * main_ib->max_ib_bytes);
+   buffer_size = util_next_power_of_two(main_ib->max_ib_bytes);
 
-   const unsigned min_size = MAX2(main_ib->max_check_space_size, 8 * 1024 * 4);
-   const unsigned max_size = 512 * 1024 * 4;
+   /* Multiply by 4 to reduce internal fragmentation if chaining is not 
available.*/
+   if (!cs->has_chaining)
+  buffer_size *= 4;
+
+   const unsigned min_size = MAX2(main_ib->max_check_space_size, 32 * 1024);
+   /* This is the maximum size that fits into the INDIRECT_BUFFER packet. */
+   const unsigned max_size = 2 * 1024 * 1024;
 
buffer_size = MIN2(buffer_size, max_size);
buffer_size = MAX2(buffer_size, min_size); /* min_size is more important */
@@ -784,7 +783,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *ws,
 {
struct drm_amdgpu_cs_chunk_ib *chunk_ib = &cs->csc->chunk_ib[IB_MAIN];
/* This is the minimum size of a contiguous IB. */
-   unsigned ib_size = 4 * 1024 * 4;
+   unsigned ib_size = 16 * 1024;
 
/* Always allocate at least the size of the biggest cs_check_space call,
 * because precisely the last call might have requested this size.
@@ -796,6 +795,9 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *ws,
IB_MAX_SUBMIT_BYTES));
}
 
+   /* Decay the IB buffer size over time, so that memory usage decreases after
+* a temporary peak.
+*/
main_ib->max_ib_bytes = main_ib->max_ib_bytes - main_ib->max_ib_bytes / 32;
 
rcs->prev_dw = 0;



Mesa (main): winsys/amdgpu: cosmetic touchups

2024-01-15 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 2da16e963593a91381766a84573ae76e1849920e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2da16e963593a91381766a84573ae76e1849920e

Author: Marek Olšák 
Date:   Sun Jan  7 14:58:40 2024 -0500

winsys/amdgpu: cosmetic touchups

Reviewed-by: Pierre-Eric Pelloux-Prayer 
Part-of: 

---

 src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 11 +--
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 13 -
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 4da7b20fa72..9d2bb04f66e 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -251,16 +251,15 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned 
heap, unsigned entry_s
 void amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *slab);
 uint64_t amdgpu_bo_get_va(struct pb_buffer_lean *buf);
 
-static inline
-struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer_lean *bo)
+static inline struct amdgpu_winsys_bo *
+amdgpu_winsys_bo(struct pb_buffer_lean *bo)
 {
return (struct amdgpu_winsys_bo *)bo;
 }
 
-static inline
-void amdgpu_winsys_bo_reference(struct amdgpu_winsys *ws,
-struct amdgpu_winsys_bo **dst,
-struct amdgpu_winsys_bo *src)
+static inline void
+amdgpu_winsys_bo_reference(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo 
**dst,
+   struct amdgpu_winsys_bo *src)
 {
radeon_bo_reference(&ws->dummy_ws.base,
(struct pb_buffer_lean**)dst, (struct 
pb_buffer_lean*)src);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 8c6e4b91e4c..3bef8a1ed28 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1205,10 +1205,11 @@ static void amdgpu_add_bo_fences_to_dependencies(struct 
amdgpu_cs *acs,
 {
struct amdgpu_winsys *ws = acs->ws;
unsigned queue_index = acs->queue_index;
+   struct amdgpu_cs_buffer *buffers = list->buffers;
unsigned num_buffers = list->num_buffers;
 
for (unsigned i = 0; i < num_buffers; i++) {
-  struct amdgpu_cs_buffer *buffer = &list->buffers[i];
+  struct amdgpu_cs_buffer *buffer = &buffers[i];
   struct amdgpu_winsys_bo *bo = buffer->bo;
 
   /* Add BO fences from queues other than 'queue_index' to dependencies. */
@@ -1243,10 +1244,11 @@ static void amdgpu_cs_add_syncobj_signal(struct 
radeon_cmdbuf *rws,
  */
 static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_cs_context *cs)
 {
+   struct amdgpu_cs_buffer *buffers = 
cs->buffer_lists[AMDGPU_BO_SPARSE].buffers;
unsigned num_sparse_buffers = 
cs->buffer_lists[AMDGPU_BO_SPARSE].num_buffers;
 
for (unsigned i = 0; i < num_sparse_buffers; ++i) {
-  struct amdgpu_cs_buffer *buffer = 
&cs->buffer_lists[AMDGPU_BO_SPARSE].buffers[i];
+  struct amdgpu_cs_buffer *buffer = &buffers[i];
   struct amdgpu_bo_sparse *bo = get_sparse_bo(buffer->bo);
 
   simple_mtx_lock(&bo->commit_lock);
@@ -1277,7 +1279,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, 
int thread_index)
struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
struct amdgpu_winsys *ws = acs->ws;
struct amdgpu_cs_context *cs = acs->cst;
-   int i, r;
+   int r;
uint64_t seq_no = 0;
bool has_user_fence = amdgpu_cs_has_user_fence(acs);
 
@@ -1423,11 +1425,12 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, 
int thread_index)
  goto cleanup;
   }
 
+  struct amdgpu_cs_buffer *real_buffers = 
cs->buffer_lists[AMDGPU_BO_REAL].buffers;
   unsigned num_real_buffers = cs->buffer_lists[AMDGPU_BO_REAL].num_buffers;
   bo_list = alloca((num_real_buffers + 2) * sizeof(struct 
drm_amdgpu_bo_list_entry));
 
-  for (i = 0; i < num_real_buffers; ++i) {
- struct amdgpu_cs_buffer *buffer = 
&cs->buffer_lists[AMDGPU_BO_REAL].buffers[i];
+  for (unsigned i = 0; i < num_real_buffers; ++i) {
+ struct amdgpu_cs_buffer *buffer = &real_buffers[i];
 
  bo_list[num_bo_handles].bo_handle = 
get_real_bo(buffer->bo)->kms_handle;
  bo_list[num_bo_handles].bo_priority =