GPUs typically benefit from contiguous memory via reduced TLB pressure and
improved caching performance, where the maximum size of contiguous block
which adds a performance benefit is related to hardware design.

TTM pool allocator by default tries (hard) to allocate up to the system
MAX_PAGE_ORDER blocks. This varies by the CPU platform and can also be
configured via Kconfig.

If that limit was set to be higher than the GPU can make an extra use of,
lets allow the individual drivers to let TTM know over which allocation
order can the pool allocator afford to make a little bit less effort with.

We implement this by disabling direct reclaim for those allocations, which
reduces the allocation latency and lowers the demands on the page
allocator, in cases where expending this effort is not critical for the
GPU in question.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com>
Cc: Christian König <christian.koe...@amd.com>
Cc: Thadeu Lima de Souza Cascardo <casca...@igalia.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 15 +++++++++++++--
 include/drm/ttm/ttm_pool.h     | 10 ++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index c5eb2e28ca9d..3bf7b6bd96a3 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -726,8 +726,16 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct 
ttm_tt *tt,
 
        page_caching = tt->caching;
        allow_pools = true;
-       for (order = ttm_pool_alloc_find_order(MAX_PAGE_ORDER, alloc);
-            alloc->remaining_pages;
+
+       order = ttm_pool_alloc_find_order(MAX_PAGE_ORDER, alloc);
+       /*
+        * Do not add latency to the allocation path for allocations orders
+        * device tolds us do not bring additional performance gains.
+        */
+       if (order > pool->max_beneficial_order)
+               gfp_flags &= ~__GFP_DIRECT_RECLAIM;
+
+       for (; alloc->remaining_pages;
             order = ttm_pool_alloc_find_order(order, alloc)) {
                struct ttm_pool_type *pt;
 
@@ -745,6 +753,8 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct 
ttm_tt *tt,
                if (!p) {
                        page_caching = ttm_cached;
                        allow_pools = false;
+                       if (order <= pool->max_beneficial_order)
+                               gfp_flags |= __GFP_DIRECT_RECLAIM;
                        p = ttm_pool_alloc_page(pool, gfp_flags, order);
                }
                /* If that fails, lower the order if possible and retry. */
@@ -1076,6 +1086,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device 
*dev,
        pool->nid = nid;
        pool->use_dma_alloc = use_dma_alloc;
        pool->use_dma32 = use_dma32;
+       pool->max_beneficial_order = MAX_PAGE_ORDER;
 
        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
                for (j = 0; j < NR_PAGE_ORDERS; ++j) {
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index 54cd34a6e4c0..24d3285c9aad 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -66,6 +66,7 @@ struct ttm_pool_type {
  * @nid: which numa node to use
  * @use_dma_alloc: if coherent DMA allocations should be used
  * @use_dma32: if GFP_DMA32 should be used
+ * @max_beneficial_order: allocations above this order do not bring 
performance gains
  * @caching: pools for each caching/order
  */
 struct ttm_pool {
@@ -74,6 +75,7 @@ struct ttm_pool {
 
        bool use_dma_alloc;
        bool use_dma32;
+       unsigned int max_beneficial_order;
 
        struct {
                struct ttm_pool_type orders[NR_PAGE_ORDERS];
@@ -88,6 +90,14 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
                   int nid, bool use_dma_alloc, bool use_dma32);
 void ttm_pool_fini(struct ttm_pool *pool);
 
+static inline unsigned int
+ttm_pool_set_max_beneficial_order(struct ttm_pool *pool, unsigned int order)
+{
+       pool->max_beneficial_order = min(MAX_PAGE_ORDER, order);
+
+       return pool->max_beneficial_order;
+}
+
 int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
 
 void ttm_pool_drop_backed_up(struct ttm_tt *tt);
-- 
2.48.0

Reply via email to