Module: Mesa Branch: main Commit: 2e4951d3fb517d77b4bb6f3494f9e4a48db1a2ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e4951d3fb517d77b4bb6f3494f9e4a48db1a2ab
Author: Konstantin Seurer <konstantin.seu...@gmail.com> Date: Sun Dec 10 08:46:31 2023 +0100 radv: Remove the BVH depth heuristics It only helps Quake II RTX and hurts everything else. Reviewed-by: Friedrich Vock <friedrich.v...@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26481> --- src/amd/vulkan/bvh/bvh.h | 2 -- src/amd/vulkan/bvh/leaf.comp | 1 - src/amd/vulkan/bvh/meson.build | 7 +---- src/amd/vulkan/bvh/ploc_internal.comp | 42 +--------------------------- src/amd/vulkan/radv_acceleration_structure.c | 28 ++----------------- src/amd/vulkan/radv_private.h | 1 - 6 files changed, 5 insertions(+), 76 deletions(-) diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index 52e06ec5db3..b77c6b8d47a 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -107,8 +107,6 @@ struct radv_accel_struct_header { struct radv_ir_node { radv_aabb aabb; - /* Generic normalized cost of not merging this node. */ - float cost; }; #define RADV_UNKNOWN_BVH_OFFSET 0xFFFFFFFF diff --git a/src/amd/vulkan/bvh/leaf.comp b/src/amd/vulkan/bvh/leaf.comp index 58ab8eeceec..6ef35cf156d 100644 --- a/src/amd/vulkan/bvh/leaf.comp +++ b/src/amd/vulkan/bvh/leaf.comp @@ -366,7 +366,6 @@ main(void) if (is_active) { REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id); DEREF(ir_node).aabb = bounds; - DEREF(ir_node).cost = 0.0; } uint32_t ir_offset = primitive_id * SIZEOF(radv_ir_node); diff --git a/src/amd/vulkan/bvh/meson.build b/src/amd/vulkan/bvh/meson.build index 244bce3195e..17af9fba496 100644 --- a/src/amd/vulkan/bvh/meson.build +++ b/src/amd/vulkan/bvh/meson.build @@ -63,12 +63,7 @@ bvh_shaders = [ [ 'ploc_internal.comp', 'ploc_internal', - ['EXTENDED_SAH=0'], - ], - [ - 'ploc_internal.comp', - 'ploc_internal_extended', - ['EXTENDED_SAH=1'], + [], ], ] diff --git a/src/amd/vulkan/bvh/ploc_internal.comp b/src/amd/vulkan/bvh/ploc_internal.comp index c7c8b5d394e..298fb0a1ef6 100644 --- a/src/amd/vulkan/bvh/ploc_internal.comp +++ b/src/amd/vulkan/bvh/ploc_internal.comp @@ -116,8 +116,6 @@ push_node(uint32_t children[2], radv_aabb bounds[2]) total_bounds.min = vec3(INFINITY); total_bounds.max = vec3(-INFINITY); - float cost = 0.0; - for (uint i = 0; i < 2; ++i) { VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(children[i])); REF(radv_ir_node) child = REF(radv_ir_node)(node); @@ -125,15 +123,10 @@ push_node(uint32_t children[2], radv_aabb bounds[2]) total_bounds.min = min(total_bounds.min, bounds[i].min); total_bounds.max = max(total_bounds.max, bounds[i].max); - cost += DEREF(child).cost; - DEREF(dst_node).children[i] = children[i]; } DEREF(dst_node).base.aabb = total_bounds; -#if EXTENDED_SAH - DEREF(dst_node).base.cost = cost * 0.5 + BVH_LEVEL_COST; -#endif DEREF(dst_node).bvh_offset = RADV_UNKNOWN_BVH_OFFSET; return dst_id; } @@ -159,9 +152,6 @@ decode_neighbour_offset(uint32_t encoded_offset) #define NUM_PLOC_LDS_ITEMS PLOC_WORKGROUP_SIZE + 4 * PLOC_NEIGHBOURHOOD shared radv_aabb shared_bounds[NUM_PLOC_LDS_ITEMS]; -#if EXTENDED_SAH -shared float shared_costs[NUM_PLOC_LDS_ITEMS]; -#endif shared uint32_t nearest_neighbour_indices[NUM_PLOC_LDS_ITEMS]; uint32_t @@ -187,9 +177,6 @@ load_bounds(VOID_REF ids, uint32_t iter, uint32_t task_index, uint32_t lds_base, REF(radv_ir_node) node = REF(radv_ir_node)(addr); shared_bounds[i - lds_base] = DEREF(node).aabb; -#if EXTENDED_SAH - shared_costs[i - lds_base] = DEREF(node).cost; -#endif } } @@ -199,34 +186,7 @@ combined_node_cost(uint32_t lds_base, uint32_t i, uint32_t j) radv_aabb combined_bounds; combined_bounds.min = min(shared_bounds[i - lds_base].min, shared_bounds[j - lds_base].min); combined_bounds.max = max(shared_bounds[i - lds_base].max, shared_bounds[j - lds_base].max); - float area = aabb_surface_area(combined_bounds); - -#if EXTENDED_SAH - if (area == 0.0) - return 0.0; - - /* p_a and p_b are the probabilities that i or j are hit by a ray: - * Assuming that the current node is hit (p = 1) and the probability of hitting a node - * is proportional to its surface area, p = area * c with p = 1 for the current node. - * -> c = 1 / area - * - * We can use those probabilities to limit the impact of child cost to be proportional to - * its hit probability. (Child cost is the cost of not merging a node which increases with - * tree depth for internal nodes) - * - * Dividing area by both relative costs will make it more likely that we merge nodes with - * a high child cost. - */ - float p_i = aabb_surface_area(shared_bounds[i - lds_base]) / area; - float p_j = aabb_surface_area(shared_bounds[j - lds_base]) / area; - - float combined_cost = - (1.0 + shared_costs[i - lds_base] * p_i) * (1.0 + shared_costs[j - lds_base] * p_j); - - return area / combined_cost; -#else - return area; -#endif + return aabb_surface_area(combined_bounds); } shared uint32_t shared_aggregate_sum; diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index 06612cdf52f..a2981306d58 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -57,10 +57,6 @@ static const uint32_t ploc_spv[] = { #include "bvh/ploc_internal.spv.h" }; -static const uint32_t ploc_extended_spv[] = { -#include "bvh/ploc_internal_extended.spv.h" -}; - static const uint32_t copy_spv[] = { #include "bvh/copy.spv.h" }; @@ -87,7 +83,6 @@ enum internal_build_type { struct build_config { enum internal_build_type internal_type; - bool extended_sah; bool compact; }; @@ -129,11 +124,6 @@ build_config(uint32_t leaf_count, const VkAccelerationStructureBuildGeometryInfo else config.internal_type = INTERNAL_BUILD_TYPE_LBVH; - /* 4^(lds stack entry count) assuming we push 1 node on average. */ - uint32_t lds_spill_threshold = 1 << (8 * 2); - if (leaf_count < lds_spill_threshold) - config.extended_sah = true; - if (build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) config.compact = true; @@ -306,7 +296,6 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.copy_pipeline, &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_extended_pipeline, &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_pipeline, &state->alloc); @@ -544,12 +533,6 @@ radv_device_init_accel_struct_build_state(struct radv_device *device) if (result != VK_SUCCESS) goto exit; - result = create_build_pipeline_spv(device, ploc_extended_spv, sizeof(ploc_extended_spv), sizeof(struct ploc_args), - &device->meta_state.accel_struct_build.ploc_extended_pipeline, - &device->meta_state.accel_struct_build.ploc_p_layout); - if (result != VK_SUCCESS) - goto exit; - result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args), &device->meta_state.accel_struct_build.encode_pipeline, &device->meta_state.accel_struct_build.encode_p_layout); @@ -1004,19 +987,15 @@ lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, static void ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, - bool extended_sah) + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, - extended_sah ? cmd_buffer->device->meta_state.accel_struct_build.ploc_extended_pipeline - : cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline); + cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline); for (uint32_t i = 0; i < infoCount; ++i) { if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC) continue; - if (bvh_states[i].config.extended_sah != extended_sah) - continue; uint32_t src_scratch_offset = bvh_states[i].scratch_offset; uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0]) @@ -1242,8 +1221,7 @@ radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t i lbvh_build_internal(commandBuffer, infoCount, pInfos, bvh_states, flush_bits); - ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states, false); - ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states, true); + ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states); cmd_buffer->state.flush_bits |= flush_bits; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 426593f8ac8..367eba65632 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -657,7 +657,6 @@ struct radv_meta_state { VkPipeline lbvh_generate_ir_pipeline; VkPipelineLayout ploc_p_layout; VkPipeline ploc_pipeline; - VkPipeline ploc_extended_pipeline; VkPipelineLayout encode_p_layout; VkPipeline encode_pipeline; VkPipeline encode_compact_pipeline;