From: Marek Olšák <marek.ol...@amd.com> - the slab buffer size increased from 128 KB to 2 MB (PTE fragment size) - the max suballocated buffer size increased from 64 KB to 256 KB, this increases memory usage because it wastes memory - the number of suballocators increased from 1 to 3 and they are layered on top of each other to minimize unused space in slabs
The final increase in memory usage is: DeusEx:MD: 1.8% DOTA 2: 1.75% DiRT Rally: 0.2% The kernel driver will also receive fewer buffers. --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 ++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 9c5e5e1ebc1..a9271c33ee9 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -573,20 +573,28 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, return NULL; /* Determine the slab buffer size. */ for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { struct pb_slabs *slabs = &ws->bo_slabs[i]; unsigned max_entry_size = 1 << (slabs->min_order + slabs->num_orders - 1); if (entry_size <= max_entry_size) { /* The slab size is twice the size of the largest possible entry. */ slab_size = max_entry_size * 2; + + /* The largest slab should have the same size as the PTE fragment + * size to get faster address translation. + */ + if (i == NUM_SLAB_ALLOCATORS - 1 && + slab_size < ws->info.pte_fragment_size) + slab_size = ws->info.pte_fragment_size; + break; } } assert(slab_size != 0); slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base, slab_size, slab_size, domains, flags)); if (!slab->buffer) goto fail; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 91120e3c474..6b7f484f239 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -304,21 +304,21 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, if (!do_winsys_init(ws, config, fd)) goto fail_alloc; /* Create managers. */ pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, 500000, ws->check_vm ? 1.0f : 2.0f, 0, (ws->info.vram_size + ws->info.gart_size) / 8, amdgpu_bo_destroy, amdgpu_bo_can_reclaim); unsigned min_slab_order = 9; /* 512 bytes */ - unsigned max_slab_order = 16; /* 64 KB - higher numbers increase memory usage */ + unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory usage */ unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS; /* Divide the size order range among slab managers. */ for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { unsigned min_order = min_slab_order; unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator, max_slab_order); if (!pb_slabs_init(&ws->bo_slabs[i], diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index fc8f04544a9..5ae1d3e55a3 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -31,21 +31,21 @@ #include "pipebuffer/pb_cache.h" #include "pipebuffer/pb_slab.h" #include "gallium/drivers/radeon/radeon_winsys.h" #include "addrlib/addrinterface.h" #include "util/simple_mtx.h" #include "util/u_queue.h" #include <amdgpu.h> struct amdgpu_cs; -#define NUM_SLAB_ALLOCATORS 1 +#define NUM_SLAB_ALLOCATORS 3 struct amdgpu_winsys { struct radeon_winsys base; struct pipe_reference reference; struct pb_cache bo_cache; /* Each slab buffer can only contain suballocations of equal sizes, so we * need to layer the allocators, so that we don't waste too much memory. */ struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev