Module: Mesa
Branch: main
Commit: d2abb4f97543c8e8bbfa7a8a85e78eb04a028d4f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d2abb4f97543c8e8bbfa7a8a85e78eb04a028d4f

Author: Mike Blumenkrantz <michael.blumenkra...@gmail.com>
Date:   Wed Oct 25 11:41:02 2023 -0400

zink: make (some) vk allocation commands more robust against vram depletion

as has recently been exposed by ci, there are some cases where running
lots of tests simultaneously can temporarily result in depleted vram,
which torpedos everything

as this scenario is transient (vram will very soon become available again),
it makes more sense to add some retries at fixed intervals to try soldiering
onward instead of exploding and probably blocking a merge

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25938>

---

 src/gallium/drivers/zink/zink_batch.c    | 133 ++++++++++++++++++-------------
 src/gallium/drivers/zink/zink_pipeline.c |  84 +++++++++++--------
 src/gallium/drivers/zink/zink_screen.h   |  12 +++
 3 files changed, 141 insertions(+), 88 deletions(-)

diff --git a/src/gallium/drivers/zink/zink_batch.c 
b/src/gallium/drivers/zink/zink_batch.c
index ff41e305edd..870fb1825ae 100644
--- a/src/gallium/drivers/zink/zink_batch.c
+++ b/src/gallium/drivers/zink/zink_batch.c
@@ -334,16 +334,22 @@ create_batch_state(struct zink_context *ctx)
    VkCommandPoolCreateInfo cpci = {0};
    cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    cpci.queueFamilyIndex = screen->gfx_queue;
-   VkResult result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, 
&bs->cmdpool);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkCreateCommandPool failed (%s)", 
vk_Result_to_str(result));
-      goto fail;
-   }
-   result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, 
&bs->unsynchronized_cmdpool);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkCreateCommandPool failed (%s)", 
vk_Result_to_str(result));
-      goto fail;
-   }
+   VkResult result;
+
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkCreateCommandPool failed (%s)", 
vk_Result_to_str(result));
+         goto fail;
+      }
+   );
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, 
&bs->unsynchronized_cmdpool),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkCreateCommandPool failed (%s)", 
vk_Result_to_str(result));
+         goto fail;
+      }
+   );
 
    VkCommandBuffer cmdbufs[2];
    VkCommandBufferAllocateInfo cbai = {0};
@@ -352,21 +358,26 @@ create_batch_state(struct zink_context *ctx)
    cbai.commandPool = bs->cmdpool;
    cbai.commandBufferCount = 2;
 
-   result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", 
vk_Result_to_str(result));
-      goto fail;
-   }
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", 
vk_Result_to_str(result));
+         goto fail;
+      }
+   );
+
    bs->cmdbuf = cmdbufs[0];
    bs->reordered_cmdbuf = cmdbufs[1];
 
    cbai.commandPool = bs->unsynchronized_cmdpool;
    cbai.commandBufferCount = 1;
-   result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, 
&bs->unsynchronized_cmdbuf);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", 
vk_Result_to_str(result));
-      goto fail;
-   }
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, 
&bs->unsynchronized_cmdbuf);,
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", 
vk_Result_to_str(result));
+         goto fail;
+      }
+   );
 
 #define SET_CREATE_OR_FAIL(ptr) \
    if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \
@@ -512,17 +523,22 @@ zink_start_batch(struct zink_context *ctx, struct 
zink_batch *batch)
    cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
 
-   VkResult result = VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi);
-   if (result != VK_SUCCESS)
-      mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", 
vk_Result_to_str(result));
-   
-   result = VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi);
-   if (result != VK_SUCCESS)
-      mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", 
vk_Result_to_str(result));
-
-   result = VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, 
&cbbi);
-   if (result != VK_SUCCESS)
-      mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", 
vk_Result_to_str(result));
+   VkResult result;
+   VRAM_ALLOC_LOOP(result,
+      VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi),
+      if (result != VK_SUCCESS)
+         mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", 
vk_Result_to_str(result));
+   );
+   VRAM_ALLOC_LOOP(result,
+      VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi),
+      if (result != VK_SUCCESS)
+         mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", 
vk_Result_to_str(result));
+   );
+   VRAM_ALLOC_LOOP(result,
+      VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi),
+      if (result != VK_SUCCESS)
+         mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", 
vk_Result_to_str(result));
+   );
 
    batch->state->fence.completed = false;
    if (ctx->last_fence) {
@@ -673,12 +689,15 @@ submit_queue(void *data, void *gdata, int thread_index)
    tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount;
 
 
-   VkResult result = VKSCR(EndCommandBuffer)(bs->cmdbuf);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", 
vk_Result_to_str(result));
-      bs->is_device_lost = true;
-      goto end;
-   }
+   VkResult result;
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(EndCommandBuffer)(bs->cmdbuf),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", 
vk_Result_to_str(result));
+         bs->is_device_lost = true;
+         goto end;
+      }
+   );
    if (bs->has_barriers) {
       if (bs->unordered_write_access) {
          VkMemoryBarrier mb;
@@ -690,31 +709,37 @@ submit_queue(void *data, void *gdata, int thread_index)
                                    bs->unordered_write_stages, 0,
                                    0, 1, &mb, 0, NULL, 0, NULL);
       }
-      result = VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf);
-      if (result != VK_SUCCESS) {
-         mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", 
vk_Result_to_str(result));
-         bs->is_device_lost = true;
-         goto end;
-      }
+      VRAM_ALLOC_LOOP(result,
+         VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf),
+         if (result != VK_SUCCESS) {
+            mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", 
vk_Result_to_str(result));
+            bs->is_device_lost = true;
+            goto end;
+         }
+      );
    }
    if (bs->has_unsync) {
-      result = VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf);
-      if (result != VK_SUCCESS) {
-         mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", 
vk_Result_to_str(result));
-         bs->is_device_lost = true;
-         goto end;
-      }
+      VRAM_ALLOC_LOOP(result,
+         VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf),
+         if (result != VK_SUCCESS) {
+            mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", 
vk_Result_to_str(result));
+            bs->is_device_lost = true;
+            goto end;
+         }
+      );
    }
 
    if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount)
       num_si--;
 
    simple_mtx_lock(&screen->queue_lock);
-   result = VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result));
-      bs->is_device_lost = true;
-   }
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkQueueSubmit failed (%s)", 
vk_Result_to_str(result));
+         bs->is_device_lost = true;
+      }
+   );
    simple_mtx_unlock(&screen->queue_lock);
 
    unsigned i = 0;
diff --git a/src/gallium/drivers/zink/zink_pipeline.c 
b/src/gallium/drivers/zink/zink_pipeline.c
index 063fdd9d73f..9c2971bf1e3 100644
--- a/src/gallium/drivers/zink/zink_pipeline.c
+++ b/src/gallium/drivers/zink/zink_pipeline.c
@@ -430,13 +430,15 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
 
    VkPipeline pipeline;
    u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
-   VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, 
prog->base.pipeline_cache,
-                                                    1, &pci, NULL, &pipeline);
-   u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", 
vk_Result_to_str(result));
-      return VK_NULL_HANDLE;
-   }
+   VkResult result;
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 
1, &pci, NULL, &pipeline),
+      u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", 
vk_Result_to_str(result));
+         return VK_NULL_HANDLE;
+      }
+   );
 
    return pipeline;
 }
@@ -498,14 +500,16 @@ zink_create_compute_pipeline(struct zink_screen *screen, 
struct zink_compute_pro
    pci.stage = stage;
 
    VkPipeline pipeline;
+   VkResult result;
    u_rwlock_wrlock(&comp->base.pipeline_cache_lock);
-   VkResult result = VKSCR(CreateComputePipelines)(screen->dev, 
comp->base.pipeline_cache,
-                                                   1, &pci, NULL, &pipeline);
-   u_rwlock_wrunlock(&comp->base.pipeline_cache_lock);
-   if (result != VK_SUCCESS) {
-      mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", 
vk_Result_to_str(result));
-      return VK_NULL_HANDLE;
-   }
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, 1, 
&pci, NULL, &pipeline),
+      u_rwlock_wrunlock(&comp->base.pipeline_cache_lock);
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", 
vk_Result_to_str(result));
+         return VK_NULL_HANDLE;
+      }
+   );
 
    return pipeline;
 }
@@ -618,11 +622,14 @@ zink_create_gfx_pipeline_output(struct zink_screen 
*screen, struct zink_gfx_pipe
    pci.pDynamicState = &pipelineDynamicStateCreateInfo;
 
    VkPipeline pipeline;
-   if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci,
-                                      NULL, &pipeline) != VK_SUCCESS) {
-      mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
-      return VK_NULL_HANDLE;
-   }
+   VkResult result;
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, 
NULL, &pipeline),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", 
vk_Result_to_str(result));
+         return VK_NULL_HANDLE;
+      }
+   );
 
    return pipeline;
 }
@@ -696,11 +703,14 @@ zink_create_gfx_pipeline_input(struct zink_screen *screen,
    pci.pDynamicState = &pipelineDynamicStateCreateInfo;
 
    VkPipeline pipeline;
-   if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci,
-                                      NULL, &pipeline) != VK_SUCCESS) {
-      mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
-      return VK_NULL_HANDLE;
-   }
+   VkResult result;
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, 
NULL, &pipeline),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", 
vk_Result_to_str(result));
+         return VK_NULL_HANDLE;
+      }
+   );
 
    return pipeline;
 }
@@ -831,10 +841,14 @@ create_gfx_pipeline_library(struct zink_screen *screen, 
struct zink_shader_objec
       pci.flags |= 
VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
 
    VkPipeline pipeline;
-   if (VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, 
NULL, &pipeline) != VK_SUCCESS) {
-      mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
-      return VK_NULL_HANDLE;
-   }
+   VkResult result;
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, 
NULL, &pipeline),
+      if (result != VK_SUCCESS) {
+         mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
+         return VK_NULL_HANDLE;
+      }
+   );
 
    return pipeline;
 }
@@ -886,13 +900,15 @@ zink_create_gfx_pipeline_combined(struct zink_screen 
*screen, struct zink_gfx_pr
 
    VkPipeline pipeline;
    u_rwlock_wrlock(&prog->base.pipeline_cache_lock);
-   VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, 
prog->base.pipeline_cache, 1, &pci, NULL, &pipeline);
-   if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) {
-      mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
+   VkResult result;
+   VRAM_ALLOC_LOOP(result,
+      VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 
1, &pci, NULL, &pipeline),
       u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
-      return VK_NULL_HANDLE;
-   }
-   u_rwlock_wrunlock(&prog->base.pipeline_cache_lock);
+      if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) {
+         mesa_loge("ZINK: vkCreateGraphicsPipelines failed");
+         return VK_NULL_HANDLE;
+      }
+   );
 
    return pipeline;
 }
diff --git a/src/gallium/drivers/zink/zink_screen.h 
b/src/gallium/drivers/zink/zink_screen.h
index 09a003317fa..886700b65ad 100644
--- a/src/gallium/drivers/zink/zink_screen.h
+++ b/src/gallium/drivers/zink/zink_screen.h
@@ -114,6 +114,18 @@ zink_string_vkflags_unroll(char *buf, size_t bufsize, 
uint64_t flags, zink_vkfla
    return idx;
 }
 
+#define VRAM_ALLOC_LOOP(RET, DOIT, ...) \
+   do { \
+      unsigned _us[] = {0, 1000, 10000, 500000, 1000000}; \
+      for (unsigned _i = 0; _i < ARRAY_SIZE(_us); _i++) { \
+         RET = DOIT; \
+         if (RET == VK_SUCCESS || RET != VK_ERROR_OUT_OF_DEVICE_MEMORY) \
+            break; \
+         os_time_sleep(_us[_i]); \
+      } \
+      __VA_ARGS__ \
+   } while (0)
+
 VkSemaphore
 zink_create_semaphore(struct zink_screen *screen);
 

Reply via email to