Commit: df1fe18ed75812265cf2af186f6b082d8d27d9fe Author: Jason Fielder Date: Tue Dec 20 14:08:37 2022 +0100 Branches: master https://developer.blender.org/rBdf1fe18ed75812265cf2af186f6b082d8d27d9fe
Metal: Fix GPencil texture buffer attribute packing issue and cutting tool rendering. Line Loop topology support for cutting tool and add support for packing several vertex attributes across individual pixels within a texture buffer. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D16783 =================================================================== M source/blender/gpu/metal/mtl_immediate.mm M source/blender/gpu/metal/mtl_primitive.hh M source/blender/gpu/metal/mtl_texture.mm =================================================================== diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm index 7af5ca30578..f0809e6e9d3 100644 --- a/source/blender/gpu/metal/mtl_immediate.mm +++ b/source/blender/gpu/metal/mtl_immediate.mm @@ -39,8 +39,16 @@ uchar *MTLImmediate::begin() metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_); has_begun_ = true; + /* If prim type is line loop, add an extra vertex at the end for placing the closing line, + * as metal does not support this primitive type. We treat this as a Line strip with one + * extra value. */ + int vertex_alloc_length = vertex_len; + if (prim_type == GPU_PRIM_LINE_LOOP) { + vertex_alloc_length++; + } + /* Allocate a range of data and return host-accessible pointer. */ - const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len); + const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_alloc_length); current_allocation_ = context_->get_scratchbuffer_manager() .scratch_buffer_allocate_range_aligned(bytes_needed, 256); [current_allocation_.metal_buffer retain]; @@ -266,71 +274,88 @@ void MTLImmediate::end() * For immediate mode, generating these is currently very cheap, as we use * fast scratch buffer allocations. Though we may benefit from caching of * frequently used buffer sizes. */ + bool rendered = false; if (mtl_needs_topology_emulation(this->prim_type)) { - /* Debug safety check for SSBO FETCH MODE. */ - if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { - BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode"); - } - /* Emulate Tri-fan. */ - if (this->prim_type == GPU_PRIM_TRI_FAN) { - /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input - * vertices. */ - uint32_t base_vert_count = this->vertex_idx; - uint32_t num_triangles = max_ii(base_vert_count - 2, 0); - uint32_t fan_index_count = num_triangles * 3; - BLI_assert(num_triangles > 0); - - uint32_t alloc_size = sizeof(uint32_t) * fan_index_count; - uint32_t *index_buffer = nullptr; - - MTLTemporaryBuffer allocation = - context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned( - alloc_size, 128); - index_buffer = (uint32_t *)allocation.data; - - int a = 0; - for (int i = 0; i < num_triangles; i++) { - index_buffer[a++] = 0; - index_buffer[a++] = i + 1; - index_buffer[a++] = i + 2; - } + switch (this->prim_type) { + case GPU_PRIM_TRI_FAN: { + /* Debug safety check for SSBO FETCH MODE. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + BLI_assert( + false && + "Topology emulation for TriangleFan not supported with SSBO Vertex Fetch mode"); + } - @autoreleasepool { + /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input + * vertices. */ + uint32_t base_vert_count = this->vertex_idx; + uint32_t num_triangles = max_ii(base_vert_count - 2, 0); + uint32_t fan_index_count = num_triangles * 3; + BLI_assert(num_triangles > 0); + + uint32_t alloc_size = sizeof(uint32_t) * fan_index_count; + uint32_t *index_buffer = nullptr; + + MTLTemporaryBuffer allocation = + context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned( + alloc_size, 128); + index_buffer = (uint32_t *)allocation.data; + + int a = 0; + for (int i = 0; i < num_triangles; i++) { + index_buffer[a++] = 0; + index_buffer[a++] = i + 1; + index_buffer[a++] = i + 2; + } - id<MTLBuffer> index_buffer_mtl = nil; - uint32_t index_buffer_offset = 0; + @autoreleasepool { - /* Region of scratch buffer used for topology emulation element data. - * NOTE(Metal): We do not need to manually flush as the entire scratch - * buffer for current command buffer is flushed upon submission. */ - index_buffer_mtl = allocation.metal_buffer; - index_buffer_offset = allocation.buffer_offset; + id<MTLBuffer> index_buffer_mtl = nil; + uint32_t index_buffer_offset = 0; - /* Set depth stencil state (requires knowledge of primitive type). */ - context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle); + /* Region of scratch buffer used for topology emulation element data. + * NOTE(Metal): We do not need to manually flush as the entire scratch + * buffer for current command buffer is flushed upon submission. */ + index_buffer_mtl = allocation.metal_buffer; + index_buffer_offset = allocation.buffer_offset; - /* Bind Vertex Buffer. */ - rps.bind_vertex_buffer( - current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0); + /* Set depth stencil state (requires knowledge of primitive type). */ + context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle); - /* Draw. */ - [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle - indexCount:fan_index_count - indexType:MTLIndexTypeUInt32 - indexBuffer:index_buffer_mtl - indexBufferOffset:index_buffer_offset]; - } - } - else { - /* TODO(Metal): Topology emulation for line loop. - * NOTE(Metal): This is currently not used anywhere and modified at the high - * level for efficiency in such cases. */ - BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode."); + /* Bind Vertex Buffer. */ + rps.bind_vertex_buffer( + current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0); + + /* Draw. */ + [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:fan_index_count + indexType:MTLIndexTypeUInt32 + indexBuffer:index_buffer_mtl + indexBufferOffset:index_buffer_offset]; + context_->main_command_buffer.register_draw_counters(fan_index_count); + } + rendered = true; + } break; + case GPU_PRIM_LINE_LOOP: { + /* Patch final vertex of line loop to close. Rendered using LineStrip. + * Note: vertex_len represents original length, however, allocated Metal + * buffer contains space for one extra vertex when LineLoop is used. */ + uchar *buffer_data = reinterpret_cast<uchar *>(current_allocation_.data); + memcpy(buffer_data + (vertex_len)*vertex_format.stride, + buffer_data, + vertex_format.stride); + this->vertex_idx++; + } break; + default: { + BLI_assert_unreachable(); + } break; } } - else { + + /* If not yet rendered, run through main render path. LineLoop primitive topology emulation + * will simply amend original data passed into default rendering path. */ + if (!rendered) { MTLPrimitiveType primitive_type = metal_primitive_type_; int vertex_count = this->vertex_idx; diff --git a/source/blender/gpu/metal/mtl_primitive.hh b/source/blender/gpu/metal/mtl_primitive.hh index b32854a04bf..0b66a51d630 100644 --- a/source/blender/gpu/metal/mtl_primitive.hh +++ b/source/blender/gpu/metal/mtl_primitive.hh @@ -39,10 +39,10 @@ static inline MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type) return MTLPrimitiveTypePoint; case GPU_PRIM_LINES: case GPU_PRIM_LINES_ADJ: - case GPU_PRIM_LINE_LOOP: return MTLPrimitiveTypeLine; case GPU_PRIM_LINE_STRIP: case GPU_PRIM_LINE_STRIP_ADJ: + case GPU_PRIM_LINE_LOOP: return MTLPrimitiveTypeLineStrip; case GPU_PRIM_TRIS: case GPU_PRIM_TRI_FAN: diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index 411d1187610..d2d466bffe1 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -1621,6 +1621,7 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo) } /* Verify Texture and vertex buffer alignment. */ + const GPUVertFormat *format = GPU_vertbuf_get_format(vbo); int bytes_per_pixel = get_mtl_format_bytesize(mtl_format); int bytes_per_row = bytes_per_pixel * w_; @@ -1628,12 +1629,40 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo) uint32_t align_requirement = static_cast<uint32_t>( [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]); - /* Verify per-vertex size aligns with texture size. */ - const GPUVertFormat *format = GPU_vertbuf_get_format(vbo); - BLI_assert(bytes_per_pixel == format->stride && - "Pixel format stride MUST match the texture format stride -- These being different " - "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex"); - UNUSED_VARS_NDEBUG(format); + /* If stride is larger than bytes per pixel, but format has multiple attributes, + * split attributes across several pixels. */ + if (format->stride > bytes_per_pixel && format->attr_len > 1) { + + /* We need to increase the number of pixels available to store additional attributes. + * First ensure that the total stride of the vertex format fits uniformly into + * multiple pixels. If these sizes are different, then attributes are of differing + * sizes and this operation is unsupported. */ + if (bytes_per_pixel * format->attr_len != format->stride) { + B @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list [email protected] List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
