Module: Mesa Branch: main Commit: b3c43d6865c8c121c5e4459666410ea1edf3f5b8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3c43d6865c8c121c5e4459666410ea1edf3f5b8
Author: Konstantin Seurer <[email protected]> Date: Wed Aug 23 12:13:27 2023 +0200 radv/bvh/ploc: Load child bounds from LDS The bounds are already in LDS so there is no need to load them from VRAM. Reviewed-by: Friedrich Vock <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24846> --- src/amd/vulkan/bvh/ploc_internal.comp | 47 ++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/src/amd/vulkan/bvh/ploc_internal.comp b/src/amd/vulkan/bvh/ploc_internal.comp index 67c987e44e9..267a914d89b 100644 --- a/src/amd/vulkan/bvh/ploc_internal.comp +++ b/src/amd/vulkan/bvh/ploc_internal.comp @@ -105,7 +105,7 @@ prefix_scan(uvec4 ballot, REF(ploc_prefix_scan_partition) partitions, uint32_t t #define BVH_LEVEL_COST 0.2 uint32_t -push_node(uint32_t children[2]) +push_node(uint32_t children[2], radv_aabb bounds[2]) { uint32_t internal_node_index = atomicAdd(DEREF(args.header).ir_internal_node_count, 1); uint32_t dst_offset = args.internal_node_offset + internal_node_index * SIZEOF(radv_ir_box_node); @@ -119,16 +119,13 @@ push_node(uint32_t children[2]) float cost = 0.0; for (uint i = 0; i < 2; ++i) { - if (children[i] != RADV_BVH_INVALID_NODE) { - VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(children[i])); - REF(radv_ir_node) child = REF(radv_ir_node)(node); - radv_aabb bounds = DEREF(child).aabb; + VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(children[i])); + REF(radv_ir_node) child = REF(radv_ir_node)(node); - total_bounds.min = min(total_bounds.min, bounds.min); - total_bounds.max = max(total_bounds.max, bounds.max); + total_bounds.min = min(total_bounds.min, bounds[i].min); + total_bounds.max = max(total_bounds.max, bounds[i].max); - cost += DEREF(child).cost; - } + cost += DEREF(child).cost; DEREF(dst_node).children[i] = children[i]; } @@ -244,10 +241,31 @@ main(void) * but sometimes all leaves might be inactive */ if (DEREF(args.header).active_leaf_count <= 2) { if (gl_GlobalInvocationID.x == 0) { - uint32_t children[2] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE}; - for (uint32_t i = 0; i < DEREF(args.header).active_leaf_count; ++i) - children[i] = DEREF(REF(key_id_pair)(INDEX(key_id_pair, src_ids, i))).id; - push_node(children); + uint32_t internal_node_index = atomicAdd(DEREF(args.header).ir_internal_node_count, 1); + uint32_t dst_offset = args.internal_node_offset + internal_node_index * SIZEOF(radv_ir_box_node); + REF(radv_ir_box_node) dst_node = REF(radv_ir_box_node)(OFFSET(args.bvh, dst_offset)); + + radv_aabb total_bounds; + total_bounds.min = vec3(INFINITY); + total_bounds.max = vec3(-INFINITY); + + for (uint32_t i = 0; i < DEREF(args.header).active_leaf_count; i++) { + uint32_t child_id = DEREF(INDEX(key_id_pair, src_ids, i)).id; + + if (child_id != RADV_BVH_INVALID_NODE) { + VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(child_id)); + REF(radv_ir_node) child = REF(radv_ir_node)(node); + radv_aabb bounds = DEREF(child).aabb; + + total_bounds.min = min(total_bounds.min, bounds.min); + total_bounds.max = max(total_bounds.max, bounds.max); + } + + DEREF(dst_node).children[i] = child_id; + } + + DEREF(dst_node).base.aabb = total_bounds; + DEREF(dst_node).bvh_offset = RADV_UNKNOWN_BVH_OFFSET; } return; } @@ -357,8 +375,9 @@ main(void) if (task_index < neighbour_index) { uint32_t neighbour_id = load_id(src_ids, iter, neighbour_index); uint32_t children[2] = {id, neighbour_id}; + radv_aabb bounds[2] = {shared_bounds[task_index - lds_base], shared_bounds[neighbour_index - lds_base]}; - DEREF(REF(uint32_t)(INDEX(uint32_t, dst_ids, task_index))) = push_node(children); + DEREF(REF(uint32_t)(INDEX(uint32_t, dst_ids, task_index))) = push_node(children, bounds); DEREF(REF(uint32_t)(INDEX(uint32_t, dst_ids, neighbour_index))) = RADV_BVH_INVALID_NODE; } else {
