Module: Mesa
Branch: main
Commit: b51ff22fbe8c9786f968e102ef8d223f415fcaba
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b51ff22fbe8c9786f968e102ef8d223f415fcaba

Author: Juston Li <justo...@google.com>
Date:   Tue Nov  7 16:44:52 2023 -0800

venus: support caching image memory requirements

Similar idea to buffer memory requirements cache but CreateImage has
many more params that may affect the memory requirements.

Instead of a sparse array, generate a SHA1 hash of all the relevant
VkImageCreateInfo params including relevant pNext structures and use
part of the hash as a key to a hash table that stores the cache entries.

Signed-off-by: Juston Li <justo...@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26118>

---

 src/virtio/vulkan/vn_common.c |   1 +
 src/virtio/vulkan/vn_common.h |   1 +
 src/virtio/vulkan/vn_device.c |   2 +
 src/virtio/vulkan/vn_device.h |   2 +
 src/virtio/vulkan/vn_image.c  | 224 +++++++++++++++++++++++++++++++++++++++++-
 src/virtio/vulkan/vn_image.h  |  23 +++++
 6 files changed, 250 insertions(+), 3 deletions(-)

diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c
index 2b3a858202a..95da1b3d2cf 100644
--- a/src/virtio/vulkan/vn_common.c
+++ b/src/virtio/vulkan/vn_common.c
@@ -52,6 +52,7 @@ static const struct debug_control vn_perf_options[] = {
    { "no_async_mem_alloc", VN_PERF_NO_ASYNC_MEM_ALLOC },
    { "no_tiled_wsi_image", VN_PERF_NO_TILED_WSI_IMAGE },
    { "no_multi_ring", VN_PERF_NO_MULTI_RING },
+   { "no_async_image_create", VN_PERF_NO_ASYNC_IMAGE_CREATE },
    { NULL, 0 },
    /* clang-format on */
 };
diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h
index f2e860e18de..3359f211703 100644
--- a/src/virtio/vulkan/vn_common.h
+++ b/src/virtio/vulkan/vn_common.h
@@ -125,6 +125,7 @@ enum vn_perf {
    VN_PERF_NO_ASYNC_MEM_ALLOC = 1ull << 9,
    VN_PERF_NO_TILED_WSI_IMAGE = 1ull << 10,
    VN_PERF_NO_MULTI_RING = 1ull << 11,
+   VN_PERF_NO_ASYNC_IMAGE_CREATE = 1ull << 12,
 };
 
 typedef uint64_t vn_object_id;
diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c
index d61a7d318b1..ea6410c2d7e 100644
--- a/src/virtio/vulkan/vn_device.c
+++ b/src/virtio/vulkan/vn_device.c
@@ -536,6 +536,7 @@ vn_device_init(struct vn_device *dev,
       goto out_cmd_pools_fini;
 
    vn_buffer_reqs_cache_init(dev);
+   vn_image_reqs_cache_init(dev);
 
    /* This is a WA to allow fossilize replay to detect if the host side shader
     * cache is no longer up to date.
@@ -626,6 +627,7 @@ vn_DestroyDevice(VkDevice device, const 
VkAllocationCallbacks *pAllocator)
    if (!dev)
       return;
 
+   vn_image_reqs_cache_fini(dev);
    vn_buffer_reqs_cache_fini(dev);
 
    for (uint32_t i = 0; i < dev->queue_count; i++)
diff --git a/src/virtio/vulkan/vn_device.h b/src/virtio/vulkan/vn_device.h
index fa91859d2c9..9fde8ce161a 100644
--- a/src/virtio/vulkan/vn_device.h
+++ b/src/virtio/vulkan/vn_device.h
@@ -16,6 +16,7 @@
 #include "vn_buffer.h"
 #include "vn_device_memory.h"
 #include "vn_feedback.h"
+#include "vn_image.h"
 
 struct vn_device_memory_report {
    PFN_vkDeviceMemoryReportCallbackEXT callback;
@@ -55,6 +56,7 @@ struct vn_device {
    uint32_t queue_count;
 
    struct vn_buffer_reqs_cache buffer_reqs_cache;
+   struct vn_image_reqs_cache image_reqs_cache;
 };
 VK_DEFINE_HANDLE_CASTS(vn_device,
                        base.base.base,
diff --git a/src/virtio/vulkan/vn_image.c b/src/virtio/vulkan/vn_image.c
index 9f358871674..a22f5ce249a 100644
--- a/src/virtio/vulkan/vn_image.c
+++ b/src/virtio/vulkan/vn_image.c
@@ -35,6 +35,213 @@ vn_image_get_plane_count(const VkImageCreateInfo 
*create_info)
    return vk_format_get_plane_count(create_info->format);
 }
 
+static void
+vn_image_cache_debug_dump(struct vn_image_reqs_cache *cache)
+{
+   vn_log(NULL, "dumping image reqs cache statistics");
+   vn_log(NULL, "  hit %u\n", cache->debug.cache_hit_count);
+   vn_log(NULL, "  miss %u\n", cache->debug.cache_miss_count);
+   vn_log(NULL, "  skip %u\n", cache->debug.cache_skip_count);
+}
+
+static uint32_t
+vn_image_cache_key_hash_function(const void *key)
+{
+   return _mesa_hash_data(key, SHA1_DIGEST_LENGTH);
+}
+
+static bool
+vn_image_cache_key_equal_function(const void *void_a, const void *void_b)
+{
+   const struct vn_image_reqs_cache_entry *a = void_a, *b = void_b;
+   return memcmp(a, b, SHA1_DIGEST_LENGTH) == 0;
+}
+
+static bool
+vn_image_get_image_reqs_key(struct vn_device *dev,
+                            const VkImageCreateInfo *create_info,
+                            uint8_t *key)
+{
+   struct mesa_sha1 sha1_ctx;
+
+   if (!dev->image_reqs_cache.ht)
+      return false;
+
+   _mesa_sha1_init(&sha1_ctx);
+
+   /* Hash relevant fields in the pNext chain */
+   vk_foreach_struct_const(src, create_info->pNext) {
+      switch (src->sType) {
+      case VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO: {
+         struct VkExternalMemoryImageCreateInfo *ext_mem =
+            (struct VkExternalMemoryImageCreateInfo *)src;
+         _mesa_sha1_update(&sha1_ctx, &ext_mem->handleTypes,
+                           sizeof(VkExternalMemoryHandleTypeFlags));
+         break;
+      }
+      case VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO: {
+         struct VkImageFormatListCreateInfo *format_list =
+            (struct VkImageFormatListCreateInfo *)src;
+         _mesa_sha1_update(&sha1_ctx, format_list->pViewFormats,
+                           sizeof(VkFormat) * format_list->viewFormatCount);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT: {
+         struct VkImageDrmFormatModifierListCreateInfoEXT *format_mod_list =
+            (struct VkImageDrmFormatModifierListCreateInfoEXT *)src;
+         _mesa_sha1_update(
+            &sha1_ctx, format_mod_list->pDrmFormatModifiers,
+            sizeof(uint64_t) * format_mod_list->drmFormatModifierCount);
+         break;
+      }
+      case 
VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT: {
+         struct VkImageDrmFormatModifierExplicitCreateInfoEXT
+            *format_mod_explicit =
+               (struct VkImageDrmFormatModifierExplicitCreateInfoEXT *)src;
+         _mesa_sha1_update(&sha1_ctx, &format_mod_explicit->drmFormatModifier,
+                           sizeof(uint64_t));
+         _mesa_sha1_update(
+            &sha1_ctx, format_mod_explicit->pPlaneLayouts,
+            sizeof(VkSubresourceLayout) *
+               format_mod_explicit->drmFormatModifierPlaneCount);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO: {
+         struct VkImageStencilUsageCreateInfo *stencil_usage =
+            (struct VkImageStencilUsageCreateInfo *)src;
+         _mesa_sha1_update(&sha1_ctx, &stencil_usage->stencilUsage,
+                           sizeof(VkImageUsageFlags));
+         break;
+      }
+      default:
+         /* Skip cache for unsupported pNext */
+         dev->image_reqs_cache.debug.cache_skip_count++;
+         return false;
+      }
+   }
+
+   /* Hash contingous block of VkImageCreateInfo starting with
+    * VkImageCreateInfo->flags and ending with VkImageCreateInfo->sharingMode
+    *
+    * There's no padding in involved in this hash block so no concern for C
+    * enum sizes or alignment.
+    */
+   static const size_t create_image_hash_block_size =
+      offsetof(VkImageCreateInfo, queueFamilyIndexCount) -
+      offsetof(VkImageCreateInfo, flags);
+
+   _mesa_sha1_update(&sha1_ctx, &create_info->flags,
+                     create_image_hash_block_size);
+
+   /* Follow pointer and hash pQueueFamilyIndices separately.
+    * pQueueFamilyIndices is ignored if sharingMode is not
+    * VK_SHARING_MODE_CONCURRENT
+    */
+   if (create_info->sharingMode == VK_SHARING_MODE_CONCURRENT) {
+      _mesa_sha1_update(
+         &sha1_ctx, create_info->pQueueFamilyIndices,
+         sizeof(uint32_t) * create_info->queueFamilyIndexCount);
+   }
+
+   _mesa_sha1_update(&sha1_ctx, &create_info->initialLayout,
+                     sizeof(create_info->initialLayout));
+   _mesa_sha1_final(&sha1_ctx, key);
+
+   return true;
+}
+
+void
+vn_image_reqs_cache_init(struct vn_device *dev)
+{
+   struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
+
+   if (VN_PERF(NO_ASYNC_IMAGE_CREATE))
+      return;
+
+   cache->ht = _mesa_hash_table_create(NULL, vn_image_cache_key_hash_function,
+                                       vn_image_cache_key_equal_function);
+   if (!cache->ht)
+      return;
+
+   simple_mtx_init(&cache->mutex, mtx_plain);
+}
+
+void
+vn_image_reqs_cache_fini(struct vn_device *dev)
+{
+   const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
+   struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
+
+   if (!cache->ht)
+      return;
+
+   hash_table_foreach(cache->ht, hash_entry) {
+      struct vn_image_reqs_cache_entry *cache_entry = hash_entry->data;
+      vk_free(alloc, cache_entry);
+   }
+   _mesa_hash_table_destroy(cache->ht, NULL);
+
+   simple_mtx_destroy(&cache->mutex);
+
+   if (VN_DEBUG(CACHE))
+      vn_image_cache_debug_dump(cache);
+}
+
+static bool
+vn_image_init_reqs_from_cache(struct vn_device *dev,
+                              struct vn_image *img,
+                              uint8_t *key)
+{
+   struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
+
+   assert(cache->ht);
+
+   simple_mtx_lock(&cache->mutex);
+   struct hash_entry *hash_entry = _mesa_hash_table_search(cache->ht, key);
+   if (hash_entry) {
+      struct vn_image_reqs_cache_entry *cache_entry = hash_entry->data;
+      for (uint32_t i = 0; i < cache_entry->plane_count; i++)
+         img->requirements[i] = cache_entry->requirements[i];
+      p_atomic_inc(&cache->debug.cache_hit_count);
+   } else {
+      p_atomic_inc(&cache->debug.cache_miss_count);
+   }
+   simple_mtx_unlock(&cache->mutex);
+
+   return !!hash_entry;
+}
+
+static void
+vn_image_store_reqs_in_cache(struct vn_device *dev,
+                             uint8_t *key,
+                             uint32_t plane_count,
+                             struct vn_image_memory_requirements *requirements)
+{
+   const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
+   struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
+   struct vn_image_reqs_cache_entry *cache_entry;
+
+   assert(cache->ht);
+
+   cache_entry = vk_zalloc(alloc, sizeof(*cache_entry), VN_DEFAULT_ALIGN,
+                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!cache_entry)
+      return;
+
+   for (uint32_t i = 0; i < plane_count; i++)
+      cache_entry->requirements[i] = requirements[i];
+
+   memcpy(cache_entry->key, key, SHA1_DIGEST_LENGTH);
+   cache_entry->plane_count = plane_count;
+
+   simple_mtx_lock(&cache->mutex);
+   if (!_mesa_hash_table_search(cache->ht, cache_entry->key)) {
+      _mesa_hash_table_insert(dev->image_reqs_cache.ht, cache_entry->key,
+                              cache_entry);
+   }
+   simple_mtx_unlock(&cache->mutex);
+}
+
 static void
 vn_image_init_memory_requirements(struct vn_image *img,
                                   struct vn_device *dev,
@@ -42,7 +249,6 @@ vn_image_init_memory_requirements(struct vn_image *img,
 {
    assert(plane_count <= ARRAY_SIZE(img->requirements));
 
-   /* TODO add a per-device cache for the requirements */
    for (uint32_t i = 0; i < plane_count; i++) {
       img->requirements[i].memory.sType =
          VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
@@ -186,7 +392,16 @@ vn_image_init(struct vn_device *dev,
 
    img->sharing_mode = create_info->sharingMode;
 
-   /* TODO async */
+   /* Check if mem reqs in cache. If found, make async call */
+   uint8_t key[SHA1_DIGEST_LENGTH] = { 0 };
+   const bool cacheable = vn_image_get_image_reqs_key(dev, create_info, key);
+
+   if (cacheable && vn_image_init_reqs_from_cache(dev, img, key)) {
+      vn_async_vkCreateImage(dev->primary_ring, device, create_info, NULL,
+                             &image);
+      return VK_SUCCESS;
+   }
+
    result = vn_call_vkCreateImage(dev->primary_ring, device, create_info,
                                   NULL, &image);
    if (result != VK_SUCCESS)
@@ -195,6 +410,9 @@ vn_image_init(struct vn_device *dev,
    const uint32_t plane_count = vn_image_get_plane_count(create_info);
    vn_image_init_memory_requirements(img, dev, plane_count);
 
+   if (cacheable)
+      vn_image_store_reqs_in_cache(dev, key, plane_count, img->requirements);
+
    return VK_SUCCESS;
 }
 
@@ -828,7 +1046,7 @@ vn_GetDeviceImageMemoryRequirements(
 {
    struct vn_device *dev = vn_device_from_handle(device);
 
-   /* TODO per-device cache */
+   /* TODO integrate image memory requirements cache */
    vn_call_vkGetDeviceImageMemoryRequirements(dev->primary_ring, device,
                                               pInfo, pMemoryRequirements);
 }
diff --git a/src/virtio/vulkan/vn_image.h b/src/virtio/vulkan/vn_image.h
index e7059aacd83..4cf3e465128 100644
--- a/src/virtio/vulkan/vn_image.h
+++ b/src/virtio/vulkan/vn_image.h
@@ -23,6 +23,23 @@ struct vn_image_memory_requirements {
    VkMemoryDedicatedRequirements dedicated;
 };
 
+struct vn_image_reqs_cache_entry {
+   struct vn_image_memory_requirements requirements[4];
+   uint8_t plane_count;
+   uint8_t key[SHA1_DIGEST_LENGTH];
+};
+
+struct vn_image_reqs_cache {
+   struct hash_table *ht;
+   simple_mtx_t mutex;
+
+   struct {
+      uint32_t cache_hit_count;
+      uint32_t cache_miss_count;
+      uint32_t cache_skip_count;
+   } debug;
+};
+
 struct vn_image_create_deferred_info {
    VkImageCreateInfo create;
    VkImageFormatListCreateInfo list;
@@ -108,4 +125,10 @@ vn_image_init_deferred(struct vn_device *dev,
                        const VkImageCreateInfo *create_info,
                        struct vn_image *img);
 
+void
+vn_image_reqs_cache_init(struct vn_device *dev);
+
+void
+vn_image_reqs_cache_fini(struct vn_device *dev);
+
 #endif /* VN_IMAGE_H */

Reply via email to