Module: Mesa
Branch: main
Commit: fae88d8791b2903771c85958903ee81080b66aec
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=fae88d8791b2903771c85958903ee81080b66aec

Author: Lionel Landwerlin <[email protected]>
Date:   Mon May  2 12:38:16 2022 +0300

anv: make use of the new smallbar uAPI

Instead of having 2 VkMemoryType pointing to the same VkMemoryHeap, we
have each VkMemoryType with VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT (one
host visible, the other not) point to its own VkMemoryHeap. For the
local heap that is host visible, we'll use the
I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag at GEM BO creation.

When the smallbar uAPI is not available we fallback to a single heap
and do not use I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS.

v2: Handle probed_cpu_visible_size == probed_size (Matthew)

v3:
 * Jordan: Use region info from devinfo

v4: Also make the vram host visible heap as local (Ken)

Signed-off-by: Lionel Landwerlin <[email protected]>
Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16739>

---

 src/intel/vulkan/anv_allocator.c | 18 ++++++-----
 src/intel/vulkan/anv_device.c    | 64 ++++++++++++++++++++++++++++++++--------
 src/intel/vulkan/anv_gem.c       |  6 +++-
 src/intel/vulkan/anv_gem_stubs.c |  2 +-
 src/intel/vulkan/anv_private.h   | 17 +++++++++--
 5 files changed, 84 insertions(+), 23 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 652c20d8c79..f6aeb5cb5cc 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1696,23 +1696,27 @@ anv_device_alloc_bo(struct anv_device *device,
    /* If we have vram size, we have multiple memory regions and should choose
     * one of them.
     */
-   if (device->physical->vram.size > 0) {
+   if (anv_physical_device_has_vram(device->physical)) {
       struct drm_i915_gem_memory_class_instance regions[2];
       uint32_t nregions = 0;
 
       if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM) {
-         regions[nregions++] = device->physical->vram.region;
+         /* vram_non_mappable & vram_mappable actually are the same region. */
+         regions[nregions++] = device->physical->vram_non_mappable.region;
       } else {
          regions[nregions++] = device->physical->sys.region;
       }
 
-      /* TODO: Add I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS to flags for
-       * after small BAR uapi is stabilized.
-       */
-      assert(intel_vram_all_mappable(&device->info));
+      uint32_t flags = 0;
+      if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE) {
+         assert(alloc_flags & ANV_BO_ALLOC_LOCAL_MEM);
+         /* We're required to add smem as a region when using mappable vram. */
+         regions[nregions++] = device->physical->sys.region;
+         flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
+      }
 
       gem_handle = anv_gem_create_regions(device, size + ccs_size,
-                                          nregions, regions);
+                                          flags, nregions, regions);
    } else {
       gem_handle = anv_gem_create(device, size + ccs_size);
    }
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 03cbea51f02..63309be61dd 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -378,11 +378,18 @@ anv_init_meminfo(struct anv_physical_device *device, int 
fd)
       anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
    device->sys.available = devinfo->mem.sram.mappable.free;
 
-   device->vram.region.memory_class = devinfo->mem.vram.mem_class;
-   device->vram.region.memory_instance =
+   device->vram_mappable.region.memory_class = devinfo->mem.vram.mem_class;
+   device->vram_mappable.region.memory_instance =
       devinfo->mem.vram.mem_instance;
-   device->vram.size = devinfo->mem.vram.mappable.size;
-   device->vram.available = devinfo->mem.vram.mappable.free;
+   device->vram_mappable.size = devinfo->mem.vram.mappable.size;
+   device->vram_mappable.available = devinfo->mem.vram.mappable.free;
+
+   device->vram_non_mappable.region.memory_class =
+      devinfo->mem.vram.mem_class;
+   device->vram_non_mappable.region.memory_instance =
+      devinfo->mem.vram.mem_instance;
+   device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
+   device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
 
    return VK_SUCCESS;
 }
@@ -395,7 +402,8 @@ anv_update_meminfo(struct anv_physical_device *device, int 
fd)
 
    const struct intel_device_info *devinfo = &device->info;
    device->sys.available = devinfo->mem.sram.mappable.free;
-   device->vram.available = devinfo->mem.vram.mappable.free;
+   device->vram_mappable.available = devinfo->mem.vram.mappable.free;
+   device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
 }
 
 
@@ -408,13 +416,19 @@ anv_physical_device_init_heaps(struct anv_physical_device 
*device, int fd)
 
    assert(device->sys.size != 0);
 
-   if (device->vram.size > 0) {
-      /* We can create 2 different heaps when we have local memory support,
-       * first heap with local memory size and second with system memory size.
+   if (anv_physical_device_has_vram(device)) {
+      /* We can create 2 or 3 different heaps when we have local memory
+       * support, first heap with local memory size and second with system
+       * memory size and the third is added only if part of the vram is
+       * mappable to the host.
        */
       device->memory.heap_count = 2;
       device->memory.heaps[0] = (struct anv_memory_heap) {
-         .size = device->vram.size,
+         /* If there is a vram_non_mappable, use that for the device only
+          * heap. Otherwise use the vram_mappable.
+          */
+         .size = device->vram_non_mappable.size != 0 ?
+                 device->vram_non_mappable.size : device->vram_mappable.size,
          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
          .is_local_mem = true,
       };
@@ -423,6 +437,17 @@ anv_physical_device_init_heaps(struct anv_physical_device 
*device, int fd)
          .flags = 0,
          .is_local_mem = false,
       };
+      /* Add an additional smaller vram mappable heap if we can't map all the
+       * vram to the host.
+       */
+      if (device->vram_non_mappable.size > 0) {
+         device->memory.heap_count++;
+         device->memory.heaps[2] = (struct anv_memory_heap) {
+            .size = device->vram_mappable.size,
+            .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+            .is_local_mem = true,
+         };
+      }
 
       device->memory.type_count = 3;
       device->memory.types[0] = (struct anv_memory_type) {
@@ -439,7 +464,11 @@ anv_physical_device_init_heaps(struct anv_physical_device 
*device, int fd)
          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-         .heapIndex = 0,
+         /* This memory type either comes from heaps[0] if there is only
+          * mappable vram region, or from heaps[2] if there is both mappable &
+          * non-mappable vram regions.
+          */
+         .heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0,
       };
    } else if (device->info.has_llc) {
       device->memory.heap_count = 1;
@@ -843,7 +872,8 @@ anv_physical_device_try_create(struct anv_instance 
*instance,
                                       device->gtt_size > (4ULL << 30 /* GiB 
*/);
 
    /* Initialize memory regions struct to 0. */
-   memset(&device->vram, 0, sizeof(device->vram));
+   memset(&device->vram_non_mappable, 0, sizeof(device->vram_non_mappable));
+   memset(&device->vram_mappable, 0, sizeof(device->vram_mappable));
    memset(&device->sys, 0, sizeof(device->sys));
 
    result = anv_physical_device_init_heaps(device, fd);
@@ -2741,7 +2771,7 @@ anv_get_memory_budget(VkPhysicalDevice physicalDevice,
 
       if (device->memory.heaps[i].is_local_mem) {
          total_heaps_size = total_vram_heaps_size;
-         mem_available = device->vram.available;
+         mem_available = device->vram_non_mappable.available;
       } else {
          total_heaps_size = total_sys_heaps_size;
          mem_available = device->sys.available;
@@ -3832,6 +3862,16 @@ VkResult anv_AllocateMemory(
    if (device->physical->has_implicit_ccs && device->info.has_aux_map)
       alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
 
+   /* If i915 reported a mappable/non_mappable vram regions and the
+    * application want lmem mappable, then we need to use the
+    * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
+    */
+   if (pdevice->vram_mappable.size > 0 &&
+       pdevice->vram_non_mappable.size > 0 &&
+       (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
+       (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
+      alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
+
    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
 
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index 852b94cb957..61896b402a2 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -66,9 +66,12 @@ anv_gem_close(struct anv_device *device, uint32_t gem_handle)
 
 uint32_t
 anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
-                       uint32_t num_regions,
+                       uint32_t flags, uint32_t num_regions,
                        struct drm_i915_gem_memory_class_instance *regions)
 {
+   /* Check for invalid flags */
+   assert((flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) == 0);
+
    struct drm_i915_gem_create_ext_memory_regions ext_regions = {
       .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
       .num_regions = num_regions,
@@ -78,6 +81,7 @@ anv_gem_create_regions(struct anv_device *device, uint64_t 
anv_bo_size,
    struct drm_i915_gem_create_ext gem_create = {
       .size = anv_bo_size,
       .extensions = (uintptr_t) &ext_regions,
+      .flags = flags,
    };
 
    int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT,
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
index b42d8bc8bf1..0a0f4c6e479 100644
--- a/src/intel/vulkan/anv_gem_stubs.c
+++ b/src/intel/vulkan/anv_gem_stubs.c
@@ -47,7 +47,7 @@ anv_gem_close(struct anv_device *device, uint32_t gem_handle)
 
 uint32_t
 anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
-                       uint32_t num_regions,
+                       uint32_t flags, uint32_t num_regions,
                        struct drm_i915_gem_memory_class_instance *regions)
 {
    return 0;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index a00ac622942..5cfce532622 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1040,7 +1040,11 @@ struct anv_physical_device {
       bool                                      need_clflush;
     } memory;
 
-    struct anv_memregion                        vram;
+    /* Either we have a single vram region and it's all mappable, or we have
+     * both mappable & non-mappable parts. System memory is always available.
+     */
+    struct anv_memregion                        vram_mappable;
+    struct anv_memregion                        vram_non_mappable;
     struct anv_memregion                        sys;
     uint8_t                                     driver_build_sha1[20];
     uint8_t                                     
pipeline_cache_uuid[VK_UUID_SIZE];
@@ -1066,6 +1070,12 @@ struct anv_physical_device {
     struct intel_measure_device                 measure_device;
 };
 
+static inline bool
+anv_physical_device_has_vram(const struct anv_physical_device *device)
+{
+   return device->vram_mappable.size > 0;
+}
+
 struct anv_app_info {
    const char*        app_name;
    uint32_t           app_version;
@@ -1368,6 +1378,9 @@ enum anv_bo_alloc_flags {
 
    /** This buffer is allocated from local memory */
    ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
+
+   /** This buffer is allocated from local memory and should be cpu visible */
+   ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11),
 };
 
 VkResult anv_device_alloc_bo(struct anv_device *device,
@@ -1431,7 +1444,7 @@ void anv_gem_munmap(struct anv_device *device, void *p, 
uint64_t size);
 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
 uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t 
anv_bo_size,
-                                uint32_t num_regions,
+                                uint32_t flags, uint32_t num_regions,
                                 struct drm_i915_gem_memory_class_instance 
*regions);
 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);

Reply via email to