Mesa (main): radeonsi: remove Smart Access Memory because CPU access has large overhead

GitLab Mirror Thu, 02 Mar 2023 17:28:01 -0800

Module: Mesa
Branch: main
Commit: 0669d7c29b599439a7f93fdf8c640c61a846243f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0669d7c29b599439a7f93fdf8c640c61a846243f


Author: Marek Olšák <[email protected]>
Date:   Sat Feb 18 04:50:18 2023 -0500

radeonsi: remove Smart Access Memory because CPU access has large overhead

Related: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8176

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21641>

---

 src/amd/common/ac_gpu_info.c                      |  6 ------
 src/amd/common/ac_gpu_info.h                      |  1 -
 src/gallium/drivers/r300/r300_screen.c            |  2 +-
 src/gallium/drivers/r600/r600_pipe_common.c       |  2 +-
 src/gallium/drivers/r600/radeon_uvd.c             |  2 +-
 src/gallium/drivers/r600/radeon_video.c           |  2 +-
 src/gallium/drivers/radeonsi/si_buffer.c          |  8 ++------
 src/gallium/drivers/radeonsi/si_pipe.c            | 18 +++++-------------
 src/gallium/drivers/radeonsi/si_texture.c         |  3 +--
 src/gallium/include/winsys/radeon_winsys.h        |  4 +---
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     | 10 +---------
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  5 +----
 12 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 985647ad4c5..408748821b8 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -892,11 +892,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct 
radeon_info *info)
       return false;
    }
 
-   info->smart_access_memory = info->all_vram_visible &&
-                               info->gfx_level >= GFX10_3 &&
-                               util_get_cpu_caps()->family >= CPU_AMD_ZEN3 &&
-                               util_get_cpu_caps()->family < CPU_AMD_LAST;
-
    info->family_id = device_info.family;
    info->chip_external_rev = device_info.external_rev;
    info->chip_rev = device_info.chip_rev;
@@ -1638,7 +1633,6 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
    fprintf(f, "    address32_hi = 0x%x\n", info->address32_hi);
    fprintf(f, "    has_dedicated_vram = %u\n", info->has_dedicated_vram);
    fprintf(f, "    all_vram_visible = %u\n", info->all_vram_visible);
-   fprintf(f, "    smart_access_memory = %u\n", info->smart_access_memory);
    fprintf(f, "    max_tcc_blocks = %i\n", info->max_tcc_blocks);
    fprintf(f, "    tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
    fprintf(f, "    tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent);
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index d0e4d22c274..b21a3a3baff 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -165,7 +165,6 @@ struct radeon_info {
    uint32_t address32_hi;
    bool has_dedicated_vram;
    bool all_vram_visible;
-   bool smart_access_memory;
    bool has_l2_uncached;
    bool r600_has_virtual_memory;
    uint32_t max_tcc_blocks;
diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index bd53c99443b..c033ccb4469 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -822,7 +822,7 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys 
*rws,
         return NULL;
     }
 
-    rws->query_info(rws, &r300screen->info, false, false);
+    rws->query_info(rws, &r300screen->info);
 
     r300_init_debug(r300screen);
     r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps);
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c 
b/src/gallium/drivers/r600/r600_pipe_common.c
index 02a2a782898..38fbb663250 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1212,7 +1212,7 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
        struct utsname uname_data;
        const char *chip_name;
 
-       ws->query_info(ws, &rscreen->info, false, false);
+       ws->query_info(ws, &rscreen->info);
        rscreen->ws = ws;
 
        chip_name = r600_get_family_name(rscreen);
diff --git a/src/gallium/drivers/r600/radeon_uvd.c 
b/src/gallium/drivers/r600/radeon_uvd.c
index 35471e51d6a..7b29b2dae18 100644
--- a/src/gallium/drivers/r600/radeon_uvd.c
+++ b/src/gallium/drivers/r600/radeon_uvd.c
@@ -1052,7 +1052,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct 
pipe_context *context,
        struct ruvd_decoder *dec;
        int r, i;
 
-       ws->query_info(ws, &info, false, false);
+       ws->query_info(ws, &info);
 
        switch(u_reduce_video_profile(templ->profile)) {
        case PIPE_VIDEO_FORMAT_MPEG12:
diff --git a/src/gallium/drivers/r600/radeon_video.c 
b/src/gallium/drivers/r600/radeon_video.c
index 6ada9ba1862..16a522f7a1d 100644
--- a/src/gallium/drivers/r600/radeon_video.c
+++ b/src/gallium/drivers/r600/radeon_video.c
@@ -224,7 +224,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
        enum pipe_video_format codec = u_reduce_video_profile(profile);
        struct radeon_info info;
 
-       rscreen->ws->query_info(rscreen->ws, &info, false, false);
+       rscreen->ws->query_info(rscreen->ws, &info);
 
        if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
                switch (param) {
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c 
b/src/gallium/drivers/radeonsi/si_buffer.c
index b6cbbfa2856..f25b7d4da67 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -56,10 +56,7 @@ void si_init_resource_fields(struct si_screen *sscreen, 
struct si_resource *res,
    switch (res->b.b.usage) {
    case PIPE_USAGE_STREAM:
       res->flags |= RADEON_FLAG_GTT_WC;
-      if (sscreen->info.smart_access_memory)
-         res->domains = RADEON_DOMAIN_VRAM;
-      else
-         res->domains = RADEON_DOMAIN_GTT;
+      res->domains = RADEON_DOMAIN_GTT;
       break;
    case PIPE_USAGE_STAGING:
       /* Transfers are likely to occur more often with these
@@ -163,8 +160,7 @@ void si_init_resource_fields(struct si_screen *sscreen, 
struct si_resource *res,
        * because they might never be moved back again. If a buffer is large 
enough,
        * upload data by copying from a temporary GTT buffer.
        */
-      if (!sscreen->info.smart_access_memory &&
-          sscreen->info.has_dedicated_vram &&
+      if (sscreen->info.has_dedicated_vram &&
           !res->b.cpu_storage && /* TODO: The CPU storage breaks this. */
           size >= sscreen->options.max_vram_map_size)
          res->b.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 8a92a922bad..a358b620f96 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -555,24 +555,20 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen, unsign
       goto fail;
    }
 
-   /* Initialize public allocators. */
-   /* Unify uploaders as follows:
-    * - dGPUs with Smart Access Memory: there is only one uploader instance 
writing to VRAM.
+   /* Initialize public allocators. Unify uploaders as follows:
+    * - dGPUs: The const uploader writes to VRAM and the stream uploader 
writes to RAM.
     * - APUs: There is only one uploader instance writing to RAM. VRAM has the 
same perf on APUs.
-    * - Other chips: The const uploader writes to VRAM and the stream uploader 
writes to RAM.
     */
-   bool smart_access_memory = sscreen->info.smart_access_memory;
    bool is_apu = !sscreen->info.has_dedicated_vram;
    sctx->b.stream_uploader =
-      u_upload_create(&sctx->b, 1024 * 1024, 0,
-                      smart_access_memory && !is_apu ? PIPE_USAGE_DEFAULT : 
PIPE_USAGE_STREAM,
+      u_upload_create(&sctx->b, 1024 * 1024, 0, PIPE_USAGE_STREAM,
                       SI_RESOURCE_FLAG_32BIT); /* same flags as const_uploader 
*/
    if (!sctx->b.stream_uploader) {
       fprintf(stderr, "radeonsi: can't create stream_uploader\n");
       goto fail;
    }
 
-   if (smart_access_memory || is_apu) {
+   if (is_apu) {
       sctx->b.const_uploader = sctx->b.stream_uploader;
    } else {
       sctx->b.const_uploader =
@@ -1136,11 +1132,7 @@ static struct pipe_screen 
*radeonsi_screen_create_impl(struct radeon_winsys *ws,
    }
 
    sscreen->ws = ws;
-   ws->query_info(ws, &sscreen->info,
-                  sscreen->options.enable_sam,
-                  sscreen->options.disable_sam);
-
-   sscreen->info.smart_access_memory = false; /* VRAM has slower CPU access */
+   ws->query_info(ws, &sscreen->info);
 
    if (sscreen->info.gfx_level >= GFX9) {
       sscreen->se_tile_repeat = 32 * sscreen->info.max_se;
diff --git a/src/gallium/drivers/radeonsi/si_texture.c 
b/src/gallium/drivers/radeonsi/si_texture.c
index d5905189f97..43ac610e747 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -1878,8 +1878,7 @@ static void *si_texture_transfer_map(struct pipe_context 
*ctx, struct pipe_resou
        * is busy.
        */
       if (!tex->surface.is_linear || (tex->buffer.flags & 
RADEON_FLAG_ENCRYPTED) ||
-          (tex->buffer.domains & RADEON_DOMAIN_VRAM && 
sctx->screen->info.has_dedicated_vram &&
-           !sctx->screen->info.smart_access_memory))
+          (tex->buffer.domains & RADEON_DOMAIN_VRAM && 
sctx->screen->info.has_dedicated_vram))
          use_staging_texture = true;
       else if (usage & PIPE_MAP_READ)
          use_staging_texture =
diff --git a/src/gallium/include/winsys/radeon_winsys.h 
b/src/gallium/include/winsys/radeon_winsys.h
index 1bc9c1239c1..c1d7f6dfd43 100644
--- a/src/gallium/include/winsys/radeon_winsys.h
+++ b/src/gallium/include/winsys/radeon_winsys.h
@@ -321,9 +321,7 @@ struct radeon_winsys {
     * \param ws        The winsys this function is called from.
     * \param info      Return structure
     */
-   void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info,
-                      bool enable_smart_access_memory,
-                      bool disable_smart_access_memory);
+   void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info);
 
    /**
     * A hint for the winsys that it should pin its execution threads to
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 8b075f9103c..23e1903b69e 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -197,18 +197,10 @@ static void amdgpu_winsys_destroy(struct radeon_winsys 
*rws)
    amdgpu_winsys_destroy_locked(rws, false);
 }
 
-static void amdgpu_winsys_query_info(struct radeon_winsys *rws,
-                                     struct radeon_info *info,
-                                     bool enable_smart_access_memory,
-                                     bool disable_smart_access_memory)
+static void amdgpu_winsys_query_info(struct radeon_winsys *rws, struct 
radeon_info *info)
 {
    struct amdgpu_winsys *ws = amdgpu_winsys(rws);
 
-   if (disable_smart_access_memory)
-      ws->info.smart_access_memory = false;
-   else if (enable_smart_access_memory && ws->info.all_vram_visible)
-      ws->info.smart_access_memory = true;
-
    *info = ws->info;
 }
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 21c815f369c..b0e94422fe2 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -624,10 +624,7 @@ static void radeon_winsys_destroy(struct radeon_winsys 
*rws)
    FREE(rws);
 }
 
-static void radeon_query_info(struct radeon_winsys *rws,
-                              struct radeon_info *info,
-                              bool enable_smart_access_memory,
-                              bool disable_smart_access_memory)
+static void radeon_query_info(struct radeon_winsys *rws, struct radeon_info 
*info)
 {
    *info = ((struct radeon_drm_winsys *)rws)->info;
 }

Mesa (main): radeonsi: remove Smart Access Memory because CPU access has large overhead

Reply via email to