amdgpu: optimize slab allocation for 2 MB amdgpu page tables

Marek Olšák Fri, 23 Nov 2018 15:41:11 -0800

From: Marek Olšák <[email protected]>

- the slab buffer size increased from 128 KB to 2 MB (PTE fragment size)
- the max suballocated buffer size increased from 64 KB to 256 KB,
  this increases memory usage because it wastes memory
- the number of suballocators increased from 1 to 3 and they are layered
  on top of each other to minimize unused space in slabs


The final increase in memory usage is:
  DeusEx:MD:  1.8%
  DOTA 2:     1.75%
  DiRT Rally: 0.2%

The kernel driver will also receive fewer buffers.
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c     | 8 ++++++++
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 9c5e5e1ebc1..a9271c33ee9 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -573,20 +573,28 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned 
heap,
       return NULL;
 
    /* Determine the slab buffer size. */
    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
       struct pb_slabs *slabs = &ws->bo_slabs[i];
       unsigned max_entry_size = 1 << (slabs->min_order + slabs->num_orders - 
1);
 
       if (entry_size <= max_entry_size) {
          /* The slab size is twice the size of the largest possible entry. */
          slab_size = max_entry_size * 2;
+
+         /* The largest slab should have the same size as the PTE fragment
+          * size to get faster address translation.
+          */
+         if (i == NUM_SLAB_ALLOCATORS - 1 &&
+             slab_size < ws->info.pte_fragment_size)
+            slab_size = ws->info.pte_fragment_size;
+         break;
       }
    }
    assert(slab_size != 0);
 
    slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base,
                                                     slab_size, slab_size,
                                                     domains, flags));
    if (!slab->buffer)
       goto fail;
 
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 91120e3c474..6b7f484f239 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -304,21 +304,21 @@ amdgpu_winsys_create(int fd, const struct 
pipe_screen_config *config,
    if (!do_winsys_init(ws, config, fd))
       goto fail_alloc;
 
    /* Create managers. */
    pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS,
                  500000, ws->check_vm ? 1.0f : 2.0f, 0,
                  (ws->info.vram_size + ws->info.gart_size) / 8,
                  amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
 
    unsigned min_slab_order = 9;  /* 512 bytes */
-   unsigned max_slab_order = 16; /* 64 KB - higher numbers increase memory 
usage */
+   unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory 
usage */
    unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
                                             NUM_SLAB_ALLOCATORS;
 
    /* Divide the size order range among slab managers. */
    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
       unsigned min_order = min_slab_order;
       unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
                                 max_slab_order);
 
       if (!pb_slabs_init(&ws->bo_slabs[i],
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index fc8f04544a9..5ae1d3e55a3 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -31,21 +31,21 @@
 #include "pipebuffer/pb_cache.h"
 #include "pipebuffer/pb_slab.h"
 #include "gallium/drivers/radeon/radeon_winsys.h"
 #include "addrlib/addrinterface.h"
 #include "util/simple_mtx.h"
 #include "util/u_queue.h"
 #include <amdgpu.h>
 
 struct amdgpu_cs;
 
-#define NUM_SLAB_ALLOCATORS 1
+#define NUM_SLAB_ALLOCATORS 3
 
 struct amdgpu_winsys {
    struct radeon_winsys base;
    struct pipe_reference reference;
    struct pb_cache bo_cache;
 
    /* Each slab buffer can only contain suballocations of equal sizes, so we
     * need to layer the allocators, so that we don't waste too much memory.
     */
    struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/7] winsys/amdgpu: optimize slab allocation for 2 MB amdgpu page tables

Reply via email to