Commit: 4527dd1ce4784292cd3b8dd3764b9cd843020f9a Author: Jason Fielder Date: Fri Jul 1 10:30:16 2022 +0200 Branches: master https://developer.blender.org/rB4527dd1ce4784292cd3b8dd3764b9cd843020f9a
Metal: MTLMemoryManager implementation includes functions which manage allocation of MTLBuffer resources. The memory manager includes both a GPUContext-local manager which allocates per-context resources such as Circular Scratch Buffers for temporary data such as uniform updates and resource staging, and a GPUContext-global memory manager which features a pooled memory allocator for efficient re-use of resources, to reduce CPU-overhead of frequent memory allocations. These Memory Managers act as a simple interface for use by other Metal backend modules and to coordinate the lifetime of buffers, to ensure that GPU-resident resources are correctly tracked and freed when no longer in use. Note: This also contains dependent DIFF changes from D15027, though these will be removed once D15027 lands. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D15277 =================================================================== M source/blender/blenlib/BLI_math_base.h M source/blender/blenlib/intern/math_base_inline.c M source/blender/draw/engines/eevee/eevee_render.c M source/blender/draw/engines/workbench/workbench_render.c M source/blender/gpu/CMakeLists.txt M source/blender/gpu/intern/gpu_immediate_util.c M source/blender/gpu/metal/mtl_backend.mm M source/blender/gpu/metal/mtl_command_buffer.mm M source/blender/gpu/metal/mtl_common.hh M source/blender/gpu/metal/mtl_context.hh M source/blender/gpu/metal/mtl_context.mm M source/blender/gpu/metal/mtl_framebuffer.mm A source/blender/gpu/metal/mtl_memory.hh A source/blender/gpu/metal/mtl_memory.mm M source/blender/gpu/metal/mtl_state.hh M source/blender/gpu/metal/mtl_state.mm M source/blender/gpu/metal/mtl_texture.hh M source/blender/gpu/metal/mtl_texture.mm =================================================================== diff --git a/source/blender/blenlib/BLI_math_base.h b/source/blender/blenlib/BLI_math_base.h index f072a17f384..c0c4594ddc0 100644 --- a/source/blender/blenlib/BLI_math_base.h +++ b/source/blender/blenlib/BLI_math_base.h @@ -221,6 +221,19 @@ MINLINE unsigned int power_of_2_min_u(unsigned int x); * with integers, to avoid gradual darkening when rounding down. */ MINLINE int divide_round_i(int a, int b); + +/** + * Integer division that returns the ceiling, instead of flooring like normal C division. + */ +MINLINE uint divide_ceil_u(uint a, uint b); +MINLINE uint64_t divide_ceil_ul(uint64_t a, uint64_t b); + +/** + * Returns \a a if it is a multiple of \a b or the next multiple or \a b after \b a . + */ +MINLINE uint ceil_to_multiple_u(uint a, uint b); +MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b); + /** * modulo that handles negative numbers, works the same as Python's. */ diff --git a/source/blender/blenlib/intern/math_base_inline.c b/source/blender/blenlib/intern/math_base_inline.c index cb7659a7059..fb71e84c23e 100644 --- a/source/blender/blenlib/intern/math_base_inline.c +++ b/source/blender/blenlib/intern/math_base_inline.c @@ -370,6 +370,11 @@ MINLINE uint divide_ceil_u(uint a, uint b) return (a + b - 1) / b; } +MINLINE uint64_t divide_ceil_ul(uint64_t a, uint64_t b) +{ + return (a + b - 1) / b; +} + /** * Returns \a a if it is a multiple of \a b or the next multiple or \a b after \b a . */ @@ -378,6 +383,11 @@ MINLINE uint ceil_to_multiple_u(uint a, uint b) return divide_ceil_u(a, b) * b; } +MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b) +{ + return divide_ceil_ul(a, b) * b; +} + MINLINE int mod_i(int i, int n) { return (i % n + n) % n; diff --git a/source/blender/draw/engines/eevee/eevee_render.c b/source/blender/draw/engines/eevee/eevee_render.c index bef19c589c2..82944f237ea 100644 --- a/source/blender/draw/engines/eevee/eevee_render.c +++ b/source/blender/draw/engines/eevee/eevee_render.c @@ -24,6 +24,7 @@ #include "DEG_depsgraph_query.h" #include "GPU_capabilities.h" +#include "GPU_context.h" #include "GPU_framebuffer.h" #include "GPU_state.h" @@ -646,6 +647,10 @@ void EEVEE_render_draw(EEVEE_Data *vedata, RenderEngine *engine, RenderLayer *rl /* XXX Seems to fix TDR issue with NVidia drivers on linux. */ GPU_finish(); + /* Perform render step between samples to allow + * flushing of freed GPUBackend resources. */ + GPU_render_step(); + RE_engine_update_progress(engine, (float)(render_samples++) / (float)tot_sample); } } diff --git a/source/blender/draw/engines/workbench/workbench_render.c b/source/blender/draw/engines/workbench/workbench_render.c index e5dcf6c5624..931f6a2dc92 100644 --- a/source/blender/draw/engines/workbench/workbench_render.c +++ b/source/blender/draw/engines/workbench/workbench_render.c @@ -17,6 +17,7 @@ #include "ED_view3d.h" +#include "GPU_context.h" #include "GPU_shader.h" #include "DEG_depsgraph.h" @@ -188,6 +189,10 @@ void workbench_render(void *ved, RenderEngine *engine, RenderLayer *render_layer workbench_draw_finish(data); + /* Perform render step between samples to allow + * flushing of freed GPUBackend resources. */ + GPU_render_step(); + /* Write render output. */ const char *viewname = RE_GetActiveRenderView(engine->re); RenderPass *rp = RE_pass_find_by_name(render_layer, RE_PASSNAME_COMBINED, viewname); diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 62d5537772a..9b5ce6e147e 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -194,6 +194,7 @@ set(METAL_SRC metal/mtl_command_buffer.mm metal/mtl_debug.mm metal/mtl_framebuffer.mm + metal/mtl_memory.mm metal/mtl_state.mm metal/mtl_texture.mm metal/mtl_texture_util.mm @@ -204,6 +205,7 @@ set(METAL_SRC metal/mtl_context.hh metal/mtl_debug.hh metal/mtl_framebuffer.hh + metal/mtl_memory.hh metal/mtl_state.hh metal/mtl_texture.hh ) diff --git a/source/blender/gpu/intern/gpu_immediate_util.c b/source/blender/gpu/intern/gpu_immediate_util.c index a275fd8fc6c..5233ff2dbf6 100644 --- a/source/blender/gpu/intern/gpu_immediate_util.c +++ b/source/blender/gpu/intern/gpu_immediate_util.c @@ -142,7 +142,7 @@ static void imm_draw_circle(GPUPrimType prim_type, int nsegments) { if (prim_type == GPU_PRIM_LINE_LOOP) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line strip.. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line strip.. */ immBegin(GPU_PRIM_LINES, nsegments * 2); immVertex2f(shdr_pos, x + (radius_x * cosf(0.0f)), y + (radius_y * sinf(0.0f))); @@ -333,7 +333,7 @@ static void imm_draw_circle_3D( GPUPrimType prim_type, uint pos, float x, float y, float radius, int nsegments) { if (prim_type == GPU_PRIM_LINE_LOOP) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line strip. */ immBegin(GPU_PRIM_LINES, nsegments * 2); const float angle = (float)(2 * M_PI) / (float)nsegments; @@ -386,7 +386,7 @@ void imm_draw_circle_fill_3d(uint pos, float x, float y, float radius, int nsegm void imm_draw_box_wire_2d(uint pos, float x1, float y1, float x2, float y2) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line-strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line-strip. */ immBegin(GPU_PRIM_LINES, 8); immVertex2f(pos, x1, y1); immVertex2f(pos, x1, y2); @@ -405,7 +405,7 @@ void imm_draw_box_wire_2d(uint pos, float x1, float y1, float x2, float y2) void imm_draw_box_wire_3d(uint pos, float x1, float y1, float x2, float y2) { /* use this version when GPUVertFormat has a vec3 position */ - /* Note(Metal/AMD): For small primitives, line list more efficient than line-strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line-strip. */ immBegin(GPU_PRIM_LINES, 8); immVertex3f(pos, x1, y1, 0.0f); immVertex3f(pos, x1, y2, 0.0f); diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index 81f8f279759..117b8352a0a 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -127,7 +127,21 @@ void MTLBackend::render_end() void MTLBackend::render_step() { - /* Placeholder */ + /* NOTE(Metal): Primarily called from main thread, but below datastructures + * and operations are thread-safe, and GPUContext rendering coordination + * is also thread-safe. */ + + /* Flush any MTLSafeFreeLists which have previously been released by any MTLContext. */ + MTLContext::get_global_memory_manager().update_memory_pools(); + + /* End existing MTLSafeFreeList and begin new list -- + * Buffers wont `free` until all associated in-flight command buffers have completed. + * Decrement final reference count for ensuring the previous list is certainly + * released. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + MTLContext::get_global_memory_manager().begin_new_safe_list(); + cmd_free_buffer_list->decrement_reference(); } bool MTLBackend::is_inside_render_boundary() diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm index 4f6077e8159..f9edd87a73c 100644 --- a/source/blender/gpu/metal/mtl_command_buffer.mm +++ b/source/blender/gpu/metal/mtl_command_buffer.mm @@ -19,7 +19,7 @@ namespace blender::gpu { * dependencies not being honored for work submitted between * different GPUContext's. */ id<MTLEvent> MTLCommandBufferManager::sync_event = nil; -unsigned long long MTLCommandBufferManager::event_signal_val = 0; +uint64_t MTLCommandBufferManager::event_signal_val = 0; /* Counter for active command buffers. */ int MTLCommandBufferManager::num_active_cmd_bufs = 0; @@ -28,10 +28,9 @@ int MTLCommandBufferManager::num_active_cmd_bufs = 0; /** \name MTLCommandBuffer initialization and render coordination. * \{ */ -void MTLCommandBufferManager::prepare(MTLContext *ctx, bool supports_render) +void MTLCommandBufferManager::prepare(bool supports_render) { - context_ = ctx; - render_pass_state_.prepare(this, ctx); + render_pass_state_.reset_state(); } void MTLCommandBufferManager::register_encoder_counters() @@ -54,10 +53,10 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin() MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init]; desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus; desc.retainedReferences = YES; - active_command_buffer_ = [context_->queue commandBufferWithDescriptor:desc]; + active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc]; } else { - active_command_buffer_ = [context_->queue commandBuffer]; + active_command_buffer_ = [context_.queue commandBuffer]; } [active_command_buffer_ retain]; MTLCommandBufferManager::num_active_cmd_bufs++; @@ -67,6 +66,10 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin() [active_command_buffer_ encodeWaitForEvent:this->sync_event value:this->event_signal_val]; } + /* Ensure we begin new Scratch Buffer if we are on a new frame. */ + MTLScratchBufferManager &mem = context_.memory_manager; + mem.ensure_increment_scratch_buffer(); + /* Reset Command buffer heuristics. */ this->reset_counters(); } @@ -86,12 +89,15 @@ bool MTLCommandBufferManager::submit(bool wait) this->end_active_command_encoder(); BLI_assert(active_command_encoder_type_ == MTL_NO_COMMAND_ENCODER); + /* Flush active ScratchBuffer associated with parent MTLContext. */ + context_.memory_manager.flush_active_scratch_buffer(); + /*** Submit Command Buffer. ***/ /* Strict ordering ensures command buffers are guaranteed to execute after a previous * one has completed. Resolves flickering when command buffers are submitted from * different MTLContex @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list [email protected] List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
