Mesa (main): radv: Remove the BVH depth heuristics

GitLab Mirror Tue, 09 Jan 2024 01:20:21 -0800

Module: Mesa
Branch: main
Commit: 2e4951d3fb517d77b4bb6f3494f9e4a48db1a2ab
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e4951d3fb517d77b4bb6f3494f9e4a48db1a2ab


Author: Konstantin Seurer <konstantin.seu...@gmail.com>
Date:   Sun Dec 10 08:46:31 2023 +0100

radv: Remove the BVH depth heuristics

It only helps Quake II RTX and hurts everything else.

Reviewed-by: Friedrich Vock <friedrich.v...@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26481>

---

 src/amd/vulkan/bvh/bvh.h                     |  2 --
 src/amd/vulkan/bvh/leaf.comp                 |  1 -
 src/amd/vulkan/bvh/meson.build               |  7 +----
 src/amd/vulkan/bvh/ploc_internal.comp        | 42 +---------------------------
 src/amd/vulkan/radv_acceleration_structure.c | 28 ++-----------------
 src/amd/vulkan/radv_private.h                |  1 -
 6 files changed, 5 insertions(+), 76 deletions(-)

diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h
index 52e06ec5db3..b77c6b8d47a 100644
--- a/src/amd/vulkan/bvh/bvh.h
+++ b/src/amd/vulkan/bvh/bvh.h
@@ -107,8 +107,6 @@ struct radv_accel_struct_header {
 
 struct radv_ir_node {
    radv_aabb aabb;
-   /* Generic normalized cost of not merging this node. */
-   float cost;
 };
 
 #define RADV_UNKNOWN_BVH_OFFSET 0xFFFFFFFF
diff --git a/src/amd/vulkan/bvh/leaf.comp b/src/amd/vulkan/bvh/leaf.comp
index 58ab8eeceec..6ef35cf156d 100644
--- a/src/amd/vulkan/bvh/leaf.comp
+++ b/src/amd/vulkan/bvh/leaf.comp
@@ -366,7 +366,6 @@ main(void)
    if (is_active) {
       REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id);
       DEREF(ir_node).aabb = bounds;
-      DEREF(ir_node).cost = 0.0;
    }
 
    uint32_t ir_offset = primitive_id * SIZEOF(radv_ir_node);
diff --git a/src/amd/vulkan/bvh/meson.build b/src/amd/vulkan/bvh/meson.build
index 244bce3195e..17af9fba496 100644
--- a/src/amd/vulkan/bvh/meson.build
+++ b/src/amd/vulkan/bvh/meson.build
@@ -63,12 +63,7 @@ bvh_shaders = [
   [
     'ploc_internal.comp',
     'ploc_internal',
-    ['EXTENDED_SAH=0'],
-  ],
-  [
-    'ploc_internal.comp',
-    'ploc_internal_extended',
-    ['EXTENDED_SAH=1'],
+    [],
   ],
 ]
 
diff --git a/src/amd/vulkan/bvh/ploc_internal.comp 
b/src/amd/vulkan/bvh/ploc_internal.comp
index c7c8b5d394e..298fb0a1ef6 100644
--- a/src/amd/vulkan/bvh/ploc_internal.comp
+++ b/src/amd/vulkan/bvh/ploc_internal.comp
@@ -116,8 +116,6 @@ push_node(uint32_t children[2], radv_aabb bounds[2])
    total_bounds.min = vec3(INFINITY);
    total_bounds.max = vec3(-INFINITY);
 
-   float cost = 0.0;
-
    for (uint i = 0; i < 2; ++i) {
       VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(children[i]));
       REF(radv_ir_node) child = REF(radv_ir_node)(node);
@@ -125,15 +123,10 @@ push_node(uint32_t children[2], radv_aabb bounds[2])
       total_bounds.min = min(total_bounds.min, bounds[i].min);
       total_bounds.max = max(total_bounds.max, bounds[i].max);
 
-      cost += DEREF(child).cost;
-
       DEREF(dst_node).children[i] = children[i];
    }
 
    DEREF(dst_node).base.aabb = total_bounds;
-#if EXTENDED_SAH
-   DEREF(dst_node).base.cost = cost * 0.5 + BVH_LEVEL_COST;
-#endif
    DEREF(dst_node).bvh_offset = RADV_UNKNOWN_BVH_OFFSET;
    return dst_id;
 }
@@ -159,9 +152,6 @@ decode_neighbour_offset(uint32_t encoded_offset)
 #define NUM_PLOC_LDS_ITEMS PLOC_WORKGROUP_SIZE + 4 * PLOC_NEIGHBOURHOOD
 
 shared radv_aabb shared_bounds[NUM_PLOC_LDS_ITEMS];
-#if EXTENDED_SAH
-shared float shared_costs[NUM_PLOC_LDS_ITEMS];
-#endif
 shared uint32_t nearest_neighbour_indices[NUM_PLOC_LDS_ITEMS];
 
 uint32_t
@@ -187,9 +177,6 @@ load_bounds(VOID_REF ids, uint32_t iter, uint32_t 
task_index, uint32_t lds_base,
       REF(radv_ir_node) node = REF(radv_ir_node)(addr);
 
       shared_bounds[i - lds_base] = DEREF(node).aabb;
-#if EXTENDED_SAH
-      shared_costs[i - lds_base] = DEREF(node).cost;
-#endif
    }
 }
 
@@ -199,34 +186,7 @@ combined_node_cost(uint32_t lds_base, uint32_t i, uint32_t 
j)
    radv_aabb combined_bounds;
    combined_bounds.min = min(shared_bounds[i - lds_base].min, shared_bounds[j 
- lds_base].min);
    combined_bounds.max = max(shared_bounds[i - lds_base].max, shared_bounds[j 
- lds_base].max);
-   float area = aabb_surface_area(combined_bounds);
-
-#if EXTENDED_SAH
-   if (area == 0.0)
-      return 0.0;
-
-   /* p_a and p_b are the probabilities that i or j are hit by a ray:
-    *    Assuming that the current node is hit (p = 1) and the probability of 
hitting a node
-    *    is proportional to its surface area, p = area * c with p = 1 for the 
current node.
-    *    -> c = 1 / area
-    *
-    * We can use those probabilities to limit the impact of child cost to be 
proportional to
-    * its hit probability. (Child cost is the cost of not merging a node which 
increases with
-    * tree depth for internal nodes)
-    *
-    * Dividing area by both relative costs will make it more likely that we 
merge nodes with
-    * a high child cost.
-    */
-   float p_i = aabb_surface_area(shared_bounds[i - lds_base]) / area;
-   float p_j = aabb_surface_area(shared_bounds[j - lds_base]) / area;
-
-   float combined_cost =
-      (1.0 + shared_costs[i - lds_base] * p_i) * (1.0 + shared_costs[j - 
lds_base] * p_j);
-
-   return area / combined_cost;
-#else
-   return area;
-#endif
+   return aabb_surface_area(combined_bounds);
 }
 
 shared uint32_t shared_aggregate_sum;
diff --git a/src/amd/vulkan/radv_acceleration_structure.c 
b/src/amd/vulkan/radv_acceleration_structure.c
index 06612cdf52f..a2981306d58 100644
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -57,10 +57,6 @@ static const uint32_t ploc_spv[] = {
 #include "bvh/ploc_internal.spv.h"
 };
 
-static const uint32_t ploc_extended_spv[] = {
-#include "bvh/ploc_internal_extended.spv.h"
-};
-
 static const uint32_t copy_spv[] = {
 #include "bvh/copy.spv.h"
 };
@@ -87,7 +83,6 @@ enum internal_build_type {
 
 struct build_config {
    enum internal_build_type internal_type;
-   bool extended_sah;
    bool compact;
 };
 
@@ -129,11 +124,6 @@ build_config(uint32_t leaf_count, const 
VkAccelerationStructureBuildGeometryInfo
    else
       config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
 
-   /* 4^(lds stack entry count) assuming we push 1 node on average. */
-   uint32_t lds_spill_threshold = 1 << (8 * 2);
-   if (leaf_count < lds_spill_threshold)
-      config.extended_sah = true;
-
    if (build_info->flags & 
VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
       config.compact = true;
 
@@ -306,7 +296,6 @@ radv_device_finish_accel_struct_build_state(struct 
radv_device *device)
    struct radv_meta_state *state = &device->meta_state;
    radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.copy_pipeline, &state->alloc);
    radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.ploc_pipeline, &state->alloc);
-   radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.ploc_extended_pipeline, &state->alloc);
    radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.lbvh_generate_ir_pipeline,
                         &state->alloc);
    radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.lbvh_main_pipeline, &state->alloc);
@@ -544,12 +533,6 @@ radv_device_init_accel_struct_build_state(struct 
radv_device *device)
    if (result != VK_SUCCESS)
       goto exit;
 
-   result = create_build_pipeline_spv(device, ploc_extended_spv, 
sizeof(ploc_extended_spv), sizeof(struct ploc_args),
-                                      
&device->meta_state.accel_struct_build.ploc_extended_pipeline,
-                                      
&device->meta_state.accel_struct_build.ploc_p_layout);
-   if (result != VK_SUCCESS)
-      goto exit;
-
    result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), 
sizeof(struct encode_args),
                                       
&device->meta_state.accel_struct_build.encode_pipeline,
                                       
&device->meta_state.accel_struct_build.encode_p_layout);
@@ -1004,19 +987,15 @@ lbvh_build_internal(VkCommandBuffer commandBuffer, 
uint32_t infoCount,
 
 static void
 ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
-                    const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, 
struct bvh_state *bvh_states,
-                    bool extended_sah)
+                    const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, 
struct bvh_state *bvh_states)
 {
    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
    radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
-                        extended_sah ? 
cmd_buffer->device->meta_state.accel_struct_build.ploc_extended_pipeline
-                                     : 
cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
+                        
cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
 
    for (uint32_t i = 0; i < infoCount; ++i) {
       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
          continue;
-      if (bvh_states[i].config.extended_sah != extended_sah)
-         continue;
 
       uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
       uint32_t dst_scratch_offset = (src_scratch_offset == 
bvh_states[i].scratch.sort_buffer_offset[0])
@@ -1242,8 +1221,7 @@ radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer 
commandBuffer, uint32_t i
 
    lbvh_build_internal(commandBuffer, infoCount, pInfos, bvh_states, 
flush_bits);
 
-   ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states, false);
-   ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states, true);
+   ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states);
 
    cmd_buffer->state.flush_bits |= flush_bits;
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 426593f8ac8..367eba65632 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -657,7 +657,6 @@ struct radv_meta_state {
       VkPipeline lbvh_generate_ir_pipeline;
       VkPipelineLayout ploc_p_layout;
       VkPipeline ploc_pipeline;
-      VkPipeline ploc_extended_pipeline;
       VkPipelineLayout encode_p_layout;
       VkPipeline encode_pipeline;
       VkPipeline encode_compact_pipeline;

Mesa (main): radv: Remove the BVH depth heuristics

Reply via email to