[PATCH] drm/ttm: WIP limit the TTM pool to 32bit CPUs

Christian König Wed, 06 Aug 2025 06:28:48 -0700

On some old x86 systems we had the problem that changing the caching flags
of system memory requires changing the global MTRR/PAT tables.


But on any modern x86 system (CPUs introduced rughly after 2004) we
actually don't need that any more and can update the caching flags
directly in the PTEs of the CPU mapping. It was just never disabled
because of the fear of regressions.

We already use the PTE flags for encryption on x86 64bit for quite a while
and all other supported platforms (Sparc, PowerPC, ARM, MIPS, LONGARCH)
have never done anything different either.

So disable the page pool completely for 64bit systems and just insert a
clflush to be on the safe side so that we never return memory with dirty
cache lines.

Testing on a Ryzen 5 and 7 shows that this has absolutely no performance
impact and of hand the AMD CI can't find a problem either.

Let's see what the i915 and XE CI systems say to that.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index baf27c70a419..7487eac29398 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -38,7 +38,7 @@
 #include <linux/highmem.h>
 #include <linux/sched/mm.h>
 
-#ifdef CONFIG_X86
+#ifdef CONFIG_X86_32
 #include <asm/set_memory.h>
 #endif
 
@@ -46,6 +46,7 @@
 #include <drm/ttm/ttm_pool.h>
 #include <drm/ttm/ttm_tt.h>
 #include <drm/ttm/ttm_bo.h>
+#include <drm/drm_cache.h>
 
 #include "ttm_module.h"
 
@@ -192,7 +193,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum 
ttm_caching caching,
        struct ttm_pool_dma *dma;
        void *vaddr;
 
-#ifdef CONFIG_X86
+#ifdef CONFIG_X86_32
        /* We don't care that set_pages_wb is inefficient here. This is only
         * used when we have to shrink and CPU overhead is irrelevant then.
         */
@@ -218,7 +219,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum 
ttm_caching caching,
 /* Apply any cpu-caching deferred during page allocation */
 static int ttm_pool_apply_caching(struct ttm_pool_alloc_state *alloc)
 {
-#ifdef CONFIG_X86
+#ifdef CONFIG_X86_32
        unsigned int num_pages = alloc->pages - alloc->caching_divide;
 
        if (!num_pages)
@@ -232,6 +233,11 @@ static int ttm_pool_apply_caching(struct 
ttm_pool_alloc_state *alloc)
        case ttm_uncached:
                return set_pages_array_uc(alloc->caching_divide, num_pages);
        }
+
+#elif defined(CONFIG_X86_64)
+       unsigned int num_pages = alloc->pages - alloc->caching_divide;
+
+       drm_clflush_pages(alloc->caching_divide, num_pages);
 #endif
        alloc->caching_divide = alloc->pages;
        return 0;
@@ -342,7 +348,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct 
ttm_pool *pool,
        if (pool->use_dma_alloc)
                return &pool->caching[caching].orders[order];
 
-#ifdef CONFIG_X86
+#ifdef CONFIG_X86_32
        switch (caching) {
        case ttm_write_combined:
                if (pool->nid != NUMA_NO_NODE)
@@ -980,7 +986,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt 
*tt,
            pool->use_dma_alloc || ttm_tt_is_backed_up(tt))
                return -EBUSY;
 
-#ifdef CONFIG_X86
+#ifdef CONFIG_X86_32
        /* Anything returned to the system needs to be cached. */
        if (tt->caching != ttm_cached)
                set_pages_array_wb(tt->pages, tt->num_pages);
-- 
2.43.0

[PATCH] drm/ttm: WIP limit the TTM pool to 32bit CPUs

Reply via email to