Module: Mesa Branch: main Commit: d2abb4f97543c8e8bbfa7a8a85e78eb04a028d4f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d2abb4f97543c8e8bbfa7a8a85e78eb04a028d4f
Author: Mike Blumenkrantz <michael.blumenkra...@gmail.com> Date: Wed Oct 25 11:41:02 2023 -0400 zink: make (some) vk allocation commands more robust against vram depletion as has recently been exposed by ci, there are some cases where running lots of tests simultaneously can temporarily result in depleted vram, which torpedos everything as this scenario is transient (vram will very soon become available again), it makes more sense to add some retries at fixed intervals to try soldiering onward instead of exploding and probably blocking a merge Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25938> --- src/gallium/drivers/zink/zink_batch.c | 133 ++++++++++++++++++------------- src/gallium/drivers/zink/zink_pipeline.c | 84 +++++++++++-------- src/gallium/drivers/zink/zink_screen.h | 12 +++ 3 files changed, 141 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index ff41e305edd..870fb1825ae 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -334,16 +334,22 @@ create_batch_state(struct zink_context *ctx) VkCommandPoolCreateInfo cpci = {0}; cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cpci.queueFamilyIndex = screen->gfx_queue; - VkResult result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); - goto fail; - } - result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); - goto fail; - } + VkResult result; + + VRAM_ALLOC_LOOP(result, + VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->cmdpool), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); + VRAM_ALLOC_LOOP(result, + VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); VkCommandBuffer cmdbufs[2]; VkCommandBufferAllocateInfo cbai = {0}; @@ -352,21 +358,26 @@ create_batch_state(struct zink_context *ctx) cbai.commandPool = bs->cmdpool; cbai.commandBufferCount = 2; - result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); - goto fail; - } + VRAM_ALLOC_LOOP(result, + VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); + bs->cmdbuf = cmdbufs[0]; bs->reordered_cmdbuf = cmdbufs[1]; cbai.commandPool = bs->unsynchronized_cmdpool; cbai.commandBufferCount = 1; - result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); - goto fail; - } + VRAM_ALLOC_LOOP(result, + VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf);, + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); + goto fail; + } + ); #define SET_CREATE_OR_FAIL(ptr) \ if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \ @@ -512,17 +523,22 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VkResult result = VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi); - if (result != VK_SUCCESS) - mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); - - result = VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi); - if (result != VK_SUCCESS) - mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); - - result = VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi); - if (result != VK_SUCCESS) - mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + VkResult result; + VRAM_ALLOC_LOOP(result, + VKCTX(BeginCommandBuffer)(batch->state->cmdbuf, &cbbi), + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + ); + VRAM_ALLOC_LOOP(result, + VKCTX(BeginCommandBuffer)(batch->state->reordered_cmdbuf, &cbbi), + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + ); + VRAM_ALLOC_LOOP(result, + VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi), + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + ); batch->state->fence.completed = false; if (ctx->last_fence) { @@ -673,12 +689,15 @@ submit_queue(void *data, void *gdata, int thread_index) tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount; - VkResult result = VKSCR(EndCommandBuffer)(bs->cmdbuf); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); - bs->is_device_lost = true; - goto end; - } + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); if (bs->has_barriers) { if (bs->unordered_write_access) { VkMemoryBarrier mb; @@ -690,31 +709,37 @@ submit_queue(void *data, void *gdata, int thread_index) bs->unordered_write_stages, 0, 0, 1, &mb, 0, NULL, 0, NULL); } - result = VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); - bs->is_device_lost = true; - goto end; - } + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); } if (bs->has_unsync) { - result = VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); - bs->is_device_lost = true; - goto end; - } + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); } if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount) num_si--; simple_mtx_lock(&screen->queue_lock); - result = VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result)); - bs->is_device_lost = true; - } + VRAM_ALLOC_LOOP(result, + VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkQueueSubmit failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + } + ); simple_mtx_unlock(&screen->queue_lock); unsigned i = 0; diff --git a/src/gallium/drivers/zink/zink_pipeline.c b/src/gallium/drivers/zink/zink_pipeline.c index 063fdd9d73f..9c2971bf1e3 100644 --- a/src/gallium/drivers/zink/zink_pipeline.c +++ b/src/gallium/drivers/zink/zink_pipeline.c @@ -430,13 +430,15 @@ zink_create_gfx_pipeline(struct zink_screen *screen, VkPipeline pipeline; u_rwlock_wrlock(&prog->base.pipeline_cache_lock); - VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, - 1, &pci, NULL, &pipeline); - u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); - return VK_NULL_HANDLE; - } + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline), + u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); return pipeline; } @@ -498,14 +500,16 @@ zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_pro pci.stage = stage; VkPipeline pipeline; + VkResult result; u_rwlock_wrlock(&comp->base.pipeline_cache_lock); - VkResult result = VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, - 1, &pci, NULL, &pipeline); - u_rwlock_wrunlock(&comp->base.pipeline_cache_lock); - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result)); - return VK_NULL_HANDLE; - } + VRAM_ALLOC_LOOP(result, + VKSCR(CreateComputePipelines)(screen->dev, comp->base.pipeline_cache, 1, &pci, NULL, &pipeline), + u_rwlock_wrunlock(&comp->base.pipeline_cache_lock); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateComputePipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); return pipeline; } @@ -618,11 +622,14 @@ zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipe pci.pDynamicState = &pipelineDynamicStateCreateInfo; VkPipeline pipeline; - if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, - NULL, &pipeline) != VK_SUCCESS) { - mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); - return VK_NULL_HANDLE; - } + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); return pipeline; } @@ -696,11 +703,14 @@ zink_create_gfx_pipeline_input(struct zink_screen *screen, pci.pDynamicState = &pipelineDynamicStateCreateInfo; VkPipeline pipeline; - if (VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, - NULL, &pipeline) != VK_SUCCESS) { - mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); - return VK_NULL_HANDLE; - } + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); return pipeline; } @@ -831,10 +841,14 @@ create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_objec pci.flags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; VkPipeline pipeline; - if (VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline) != VK_SUCCESS) { - mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); - return VK_NULL_HANDLE; - } + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, pipeline_cache, 1, &pci, NULL, &pipeline), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); + return VK_NULL_HANDLE; + } + ); return pipeline; } @@ -886,13 +900,15 @@ zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_pr VkPipeline pipeline; u_rwlock_wrlock(&prog->base.pipeline_cache_lock); - VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline); - if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) { - mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline), u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); - return VK_NULL_HANDLE; - } - u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); + if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED_EXT) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed"); + return VK_NULL_HANDLE; + } + ); return pipeline; } diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index 09a003317fa..886700b65ad 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -114,6 +114,18 @@ zink_string_vkflags_unroll(char *buf, size_t bufsize, uint64_t flags, zink_vkfla return idx; } +#define VRAM_ALLOC_LOOP(RET, DOIT, ...) \ + do { \ + unsigned _us[] = {0, 1000, 10000, 500000, 1000000}; \ + for (unsigned _i = 0; _i < ARRAY_SIZE(_us); _i++) { \ + RET = DOIT; \ + if (RET == VK_SUCCESS || RET != VK_ERROR_OUT_OF_DEVICE_MEMORY) \ + break; \ + os_time_sleep(_us[_i]); \ + } \ + __VA_ARGS__ \ + } while (0) + VkSemaphore zink_create_semaphore(struct zink_screen *screen);