Mesa (main): radv: Implement device-side BVH building.

GitLab Mirror Fri, 18 Jun 2021 15:42:39 -0700

Module: Mesa
Branch: main
Commit: 0dad88b4694cf82664f633187442ec65c14f3812
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0dad88b4694cf82664f633187442ec65c14f3812


Author: Bas Nieuwenhuizen <[email protected]>
Date:   Tue May 18 13:25:00 2021 +0200

radv: Implement device-side BVH building.

Same naive algorithm as the host build.

Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11078>

---

 src/amd/vulkan/radv_acceleration_structure.c | 830 +++++++++++++++++++++++++++
 src/amd/vulkan/radv_meta.c                   |   7 +
 src/amd/vulkan/radv_meta.h                   |   3 +
 src/amd/vulkan/radv_private.h                |   7 +
 4 files changed, 847 insertions(+)

diff --git a/src/amd/vulkan/radv_acceleration_structure.c 
b/src/amd/vulkan/radv_acceleration_structure.c
index 98ef979378d..15d1a62f085 100644
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -23,6 +23,9 @@
 #include "radv_private.h"
 
 #include "util/half_float.h"
+#include "nir_builder.h"
+#include "radv_cs.h"
+#include "radv_meta.h"
 
 struct radv_accel_struct_header {
    uint32_t root_node_offset;
@@ -589,3 +592,830 @@ radv_BuildAccelerationStructuresKHR(
    }
    return result;
 }
+
+static nir_ssa_def *
+get_indices(nir_builder *b, nir_ssa_def *addr, nir_ssa_def *type, nir_ssa_def 
*id)
+{
+   const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
+   nir_variable *result =
+      nir_variable_create(b->shader, nir_var_shader_temp, uvec3_type, 
"indices");
+
+   nir_push_if(b, nir_ult(b, type, nir_imm_int(b, 2)));
+   nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_UINT16)));
+   {
+      nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 6));
+      nir_ssa_def *indices[3];
+      for (unsigned i = 0; i < 3; ++i) {
+         indices[i] = nir_build_load_global(
+            b, 1, 16, nir_iadd(b, addr, nir_u2u64(b, nir_iadd(b, index_id, 
nir_imm_int(b, 2 * i)))),
+            .align_mul = 2, .align_offset = 0);
+      }
+      nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
+   }
+   nir_push_else(b, NULL);
+   {
+      nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 12));
+      nir_ssa_def *indices = nir_build_load_global(
+         b, 3, 32, nir_iadd(b, addr, nir_u2u64(b, index_id)), .align_mul = 4, 
.align_offset = 0);
+      nir_store_var(b, result, indices, 7);
+   }
+   nir_pop_if(b, NULL);
+   nir_push_else(b, NULL);
+   {
+      nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 3));
+      nir_ssa_def *indices[] = {
+         index_id,
+         nir_iadd(b, index_id, nir_imm_int(b, 1)),
+         nir_iadd(b, index_id, nir_imm_int(b, 2)),
+      };
+
+      nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_NONE_KHR)));
+      {
+         nir_store_var(b, result, nir_vec(b, indices, 3), 7);
+      }
+      nir_push_else(b, NULL);
+      {
+         for (unsigned i = 0; i < 3; ++i) {
+            indices[i] = nir_build_load_global(b, 1, 8, nir_iadd(b, addr, 
nir_u2u64(b, indices[i])),
+                                               .align_mul = 1, .align_offset = 
0);
+         }
+         nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
+      }
+      nir_pop_if(b, NULL);
+   }
+   nir_pop_if(b, NULL);
+   return nir_load_var(b, result);
+}
+
+static void
+get_vertices(nir_builder *b, nir_ssa_def *addresses, nir_ssa_def *format, 
nir_ssa_def *positions[3])
+{
+   const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+   nir_variable *results[3] = {
+      nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, 
"vertex0"),
+      nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, 
"vertex1"),
+      nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, 
"vertex2")};
+
+   VkFormat formats[] = {
+      VK_FORMAT_R32G32B32_SFLOAT,
+      VK_FORMAT_R32G32B32A32_SFLOAT,
+      VK_FORMAT_R16G16B16_SFLOAT,
+      VK_FORMAT_R16G16B16A16_SFLOAT,
+   };
+
+   for (unsigned f = 0; f < ARRAY_SIZE(formats); ++f) {
+      if (f + 1 < ARRAY_SIZE(formats))
+         nir_push_if(b, nir_ieq(b, format, nir_imm_int(b, formats[f])));
+
+      for (unsigned i = 0; i < 3; ++i) {
+         switch (formats[f]) {
+         case VK_FORMAT_R32G32B32_SFLOAT:
+         case VK_FORMAT_R32G32B32A32_SFLOAT:
+            nir_store_var(b, results[i],
+                          nir_build_load_global(b, 3, 32, nir_channel(b, 
addresses, i),
+                                                .align_mul = 4, .align_offset 
= 0),
+                          7);
+            break;
+         case VK_FORMAT_R16G16B16_SFLOAT:
+         case VK_FORMAT_R16G16B16A16_SFLOAT: {
+            nir_ssa_def *values[3];
+            nir_ssa_def *addr = nir_channel(b, addresses, i);
+            for (unsigned j = 0; j < 3; ++j)
+               values[j] =
+                  nir_build_load_global(b, 1, 16, nir_iadd(b, addr, 
nir_imm_int64(b, j * 2)),
+                                        .align_mul = 2, .align_offset = 0);
+            nir_store_var(b, results[i], nir_f2f32(b, nir_vec(b, values, 3)), 
7);
+            break;
+         }
+         default:
+            unreachable("Unhandled format");
+         }
+      }
+      if (f + 1 < ARRAY_SIZE(formats))
+         nir_push_else(b, NULL);
+   }
+   for (unsigned f = 1; f < ARRAY_SIZE(formats); ++f) {
+      nir_pop_if(b, NULL);
+   }
+
+   for (unsigned i = 0; i < 3; ++i)
+      positions[i] = nir_load_var(b, results[i]);
+}
+
+struct build_primitive_constants {
+   uint64_t node_dst_addr;
+   uint64_t scratch_addr;
+   uint32_t dst_offset;
+   uint32_t dst_scratch_offset;
+   uint32_t geometry_type;
+   uint32_t geometry_id;
+
+   union {
+      struct {
+         uint64_t vertex_addr;
+         uint64_t index_addr;
+         uint64_t transform_addr;
+         uint32_t vertex_stride;
+         uint32_t vertex_format;
+         uint32_t index_format;
+      };
+      struct {
+         uint64_t instance_data;
+      };
+      struct {
+         uint64_t aabb_addr;
+         uint32_t aabb_stride;
+      };
+   };
+};
+
+struct build_internal_constants {
+   uint64_t node_dst_addr;
+   uint64_t scratch_addr;
+   uint32_t dst_offset;
+   uint32_t dst_scratch_offset;
+   uint32_t src_scratch_offset;
+   uint32_t fill_header;
+};
+
+/* This inverts a 3x3 matrix using cofactors, as in e.g.
+ * 
https://www.mathsisfun.com/algebra/matrix-inverse-minors-cofactors-adjugate.html
 */
+static void
+nir_invert_3x3(nir_builder *b, nir_ssa_def *in[3][3], nir_ssa_def *out[3][3])
+{
+   nir_ssa_def *cofactors[3][3];
+   for (unsigned i = 0; i < 3; ++i) {
+      for (unsigned j = 0; j < 3; ++j) {
+         cofactors[i][j] =
+            nir_fsub(b, nir_fmul(b, in[(i + 1) % 3][(j + 1) % 3], in[(i + 2) % 
3][(j + 2) % 3]),
+                     nir_fmul(b, in[(i + 1) % 3][(j + 2) % 3], in[(i + 2) % 
3][(j + 1) % 3]));
+      }
+   }
+
+   nir_ssa_def *det = NULL;
+   for (unsigned i = 0; i < 3; ++i) {
+      nir_ssa_def *det_part = nir_fmul(b, in[0][i], cofactors[0][i]);
+      det = det ? nir_fadd(b, det, det_part) : det_part;
+   }
+
+   nir_ssa_def *det_inv = nir_frcp(b, det);
+   for (unsigned i = 0; i < 3; ++i) {
+      for (unsigned j = 0; j < 3; ++j) {
+         out[i][j] = nir_fmul(b, cofactors[j][i], det_inv);
+      }
+   }
+}
+
+static nir_shader *
+build_leaf_shader(struct radv_device *dev)
+{
+   const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, 
"accel_build_leaf_shader");
+
+   b.shader->info.workgroup_size[0] = 64;
+   b.shader->info.workgroup_size[1] = 1;
+   b.shader->info.workgroup_size[2] = 1;
+
+   nir_ssa_def *pconst0 =
+      nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range 
= 16);
+   nir_ssa_def *pconst1 =
+      nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range 
= 16);
+   nir_ssa_def *pconst2 =
+      nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 32, .range 
= 16);
+   nir_ssa_def *pconst3 =
+      nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 48, .range 
= 16);
+   nir_ssa_def *pconst4 =
+      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 64, .range 
= 4);
+
+   nir_ssa_def *geom_type = nir_channel(&b, pconst1, 2);
+   nir_ssa_def *node_dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 
3));
+   nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 
12));
+   nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
+   nir_ssa_def *scratch_offset = nir_channel(&b, pconst1, 1);
+   nir_ssa_def *geometry_id = nir_channel(&b, pconst1, 3);
+
+   nir_ssa_def *global_id =
+      nir_iadd(&b,
+               nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 
1),
+                          nir_imm_int(&b, b.shader->info.workgroup_size[0])),
+               nir_channels(&b, nir_load_local_invocation_id(&b), 1));
+   scratch_addr = nir_iadd(
+      &b, scratch_addr,
+      nir_u2u64(&b, nir_iadd(&b, scratch_offset, nir_umul24(&b, global_id, 
nir_imm_int(&b, 4)))));
+
+   nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, 
VK_GEOMETRY_TYPE_TRIANGLES_KHR)));
+   { /* Triangles */
+      nir_ssa_def *vertex_addr = nir_pack_64_2x32(&b, nir_channels(&b, 
pconst2, 3));
+      nir_ssa_def *index_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 
12));
+      nir_ssa_def *transform_addr = nir_pack_64_2x32(&b, nir_channels(&b, 
pconst3, 3));
+      nir_ssa_def *vertex_stride = nir_channel(&b, pconst3, 2);
+      nir_ssa_def *vertex_format = nir_channel(&b, pconst3, 3);
+      nir_ssa_def *index_format = nir_channel(&b, pconst4, 0);
+      unsigned repl_swizzle[4] = {0, 0, 0, 0};
+
+      nir_ssa_def *node_offset =
+         nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, 
nir_imm_int(&b, 64)));
+      nir_ssa_def *triangle_node_dst_addr = nir_iadd(&b, node_dst_addr, 
nir_u2u64(&b, node_offset));
+
+      nir_ssa_def *indices = get_indices(&b, index_addr, index_format, 
global_id);
+      nir_ssa_def *vertex_addresses = nir_iadd(
+         &b, nir_u2u64(&b, nir_imul(&b, indices, nir_swizzle(&b, 
vertex_stride, repl_swizzle, 3))),
+         nir_swizzle(&b, vertex_addr, repl_swizzle, 3));
+      nir_ssa_def *positions[3];
+      get_vertices(&b, vertex_addresses, vertex_format, positions);
+
+      nir_ssa_def *node_data[16];
+      memset(node_data, 0, sizeof(node_data));
+
+      nir_variable *transform[] = {
+         nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), 
"transform0"),
+         nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), 
"transform1"),
+         nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), 
"transform2"),
+      };
+      nir_store_var(&b, transform[0], nir_imm_vec4(&b, 1.0, 0.0, 0.0, 0.0), 
0xf);
+      nir_store_var(&b, transform[1], nir_imm_vec4(&b, 0.0, 1.0, 0.0, 0.0), 
0xf);
+      nir_store_var(&b, transform[2], nir_imm_vec4(&b, 0.0, 0.0, 1.0, 0.0), 
0xf);
+
+      nir_push_if(&b, nir_ine(&b, transform_addr, nir_imm_int64(&b, 0)));
+      nir_store_var(
+         &b, transform[0],
+         nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, 
nir_imm_int64(&b, 0)),
+                               .align_mul = 4, .align_offset = 0),
+         0xf);
+      nir_store_var(
+         &b, transform[1],
+         nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, 
nir_imm_int64(&b, 16)),
+                               .align_mul = 4, .align_offset = 0),
+         0xf);
+      nir_store_var(
+         &b, transform[2],
+         nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, 
nir_imm_int64(&b, 32)),
+                               .align_mul = 4, .align_offset = 0),
+         0xf);
+      nir_pop_if(&b, NULL);
+
+      for (unsigned i = 0; i < 3; ++i)
+         for (unsigned j = 0; j < 3; ++j)
+            node_data[i * 3 + j] = nir_fdph(&b, positions[i], nir_load_var(&b, 
transform[j]));
+
+      node_data[12] = global_id;
+      node_data[13] = geometry_id;
+      node_data[15] = nir_imm_int(&b, 9);
+      for (unsigned i = 0; i < ARRAY_SIZE(node_data); ++i)
+         if (!node_data[i])
+            node_data[i] = nir_imm_int(&b, 0);
+
+      for (unsigned i = 0; i < 4; ++i) {
+         nir_build_store_global(&b, nir_vec(&b, node_data + i * 4, 4),
+                                nir_iadd(&b, triangle_node_dst_addr, 
nir_imm_int64(&b, i * 16)),
+                                .write_mask = 15, .align_mul = 16, 
.align_offset = 0);
+      }
+
+      nir_ssa_def *node_id = nir_ushr(&b, node_offset, nir_imm_int(&b, 3));
+      nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, 
.align_mul = 4,
+                             .align_offset = 0);
+   }
+   nir_push_else(&b, NULL);
+   nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, 
VK_GEOMETRY_TYPE_AABBS_KHR)));
+   { /* AABBs */
+      nir_ssa_def *aabb_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 
3));
+      nir_ssa_def *aabb_stride = nir_channel(&b, pconst2, 2);
+
+      nir_ssa_def *node_offset =
+         nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, 
nir_imm_int(&b, 64)));
+      nir_ssa_def *aabb_node_dst_addr = nir_iadd(&b, node_dst_addr, 
nir_u2u64(&b, node_offset));
+      nir_ssa_def *node_id =
+         nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), 
nir_imm_int(&b, 7));
+      nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, 
.align_mul = 4,
+                             .align_offset = 0);
+
+      aabb_addr = nir_iadd(&b, aabb_addr, nir_u2u64(&b, nir_imul(&b, 
aabb_stride, global_id)));
+
+      nir_ssa_def *min_bound =
+         nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, 
nir_imm_int64(&b, 0)),
+                               .align_mul = 4, .align_offset = 0);
+      nir_ssa_def *max_bound =
+         nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, 
nir_imm_int64(&b, 12)),
+                               .align_mul = 4, .align_offset = 0);
+
+      nir_ssa_def *values[] = {nir_channel(&b, min_bound, 0),
+                               nir_channel(&b, min_bound, 1),
+                               nir_channel(&b, min_bound, 2),
+                               nir_channel(&b, max_bound, 0),
+                               nir_channel(&b, max_bound, 1),
+                               nir_channel(&b, max_bound, 2),
+                               global_id,
+                               geometry_id};
+
+      nir_build_store_global(&b, nir_vec(&b, values + 0, 4),
+                             nir_iadd(&b, aabb_node_dst_addr, 
nir_imm_int64(&b, 0)),
+                             .write_mask = 15, .align_mul = 16, .align_offset 
= 0);
+      nir_build_store_global(&b, nir_vec(&b, values + 4, 4),
+                             nir_iadd(&b, aabb_node_dst_addr, 
nir_imm_int64(&b, 16)),
+                             .write_mask = 15, .align_mul = 16, .align_offset 
= 0);
+   }
+   nir_push_else(&b, NULL);
+   { /* Instances */
+
+      nir_ssa_def *instance_addr =
+         nir_iadd(&b, nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3)),
+                  nir_u2u64(&b, nir_imul(&b, global_id, nir_imm_int(&b, 64))));
+      nir_ssa_def *inst_transform[] = {
+         nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, 
nir_imm_int64(&b, 0)),
+                               .align_mul = 4, .align_offset = 0),
+         nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, 
nir_imm_int64(&b, 16)),
+                               .align_mul = 4, .align_offset = 0),
+         nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, 
nir_imm_int64(&b, 32)),
+                               .align_mul = 4, .align_offset = 0)};
+      nir_ssa_def *inst3 =
+         nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, 
nir_imm_int64(&b, 48)),
+                               .align_mul = 4, .align_offset = 0);
+
+      nir_ssa_def *node_offset =
+         nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, 
nir_imm_int(&b, 128)));
+      node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
+      nir_ssa_def *node_id =
+         nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), 
nir_imm_int(&b, 6));
+      nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, 
.align_mul = 4,
+                             .align_offset = 0);
+
+      nir_variable *bounds[2] = {
+         nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, 
"min_bound"),
+         nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, 
"max_bound"),
+      };
+
+      nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, 
NAN, NAN), 7), 7);
+      nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, 
NAN, NAN), 7), 7);
+
+      nir_ssa_def *header_addr = nir_pack_64_2x32(&b, nir_channels(&b, inst3, 
12));
+      nir_push_if(&b, nir_ine(&b, header_addr, nir_imm_int64(&b, 0)));
+      nir_ssa_def *header_root_offset =
+         nir_build_load_global(&b, 1, 32, nir_iadd(&b, header_addr, 
nir_imm_int64(&b, 0)),
+                               .align_mul = 4, .align_offset = 0);
+      nir_ssa_def *header_min =
+         nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, 
nir_imm_int64(&b, 8)),
+                               .align_mul = 4, .align_offset = 0);
+      nir_ssa_def *header_max =
+         nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, 
nir_imm_int64(&b, 20)),
+                               .align_mul = 4, .align_offset = 0);
+
+      nir_ssa_def *bound_defs[2][3];
+      for (unsigned i = 0; i < 3; ++i) {
+         bound_defs[0][i] = bound_defs[1][i] = nir_channel(&b, 
inst_transform[i], 3);
+
+         nir_ssa_def *mul_a = nir_fmul(&b, nir_channels(&b, inst_transform[i], 
7), header_min);
+         nir_ssa_def *mul_b = nir_fmul(&b, nir_channels(&b, inst_transform[i], 
7), header_max);
+         nir_ssa_def *mi = nir_fmin(&b, mul_a, mul_b);
+         nir_ssa_def *ma = nir_fmax(&b, mul_a, mul_b);
+         for (unsigned j = 0; j < 3; ++j) {
+            bound_defs[0][i] = nir_fadd(&b, bound_defs[0][i], nir_channel(&b, 
mi, j));
+            bound_defs[1][i] = nir_fadd(&b, bound_defs[1][i], nir_channel(&b, 
ma, j));
+         }
+      }
+
+      nir_store_var(&b, bounds[0], nir_vec(&b, bound_defs[0], 3), 7);
+      nir_store_var(&b, bounds[1], nir_vec(&b, bound_defs[1], 3), 7);
+
+      nir_ssa_def *m_in[3][3], *m_out[3][3], *m_vec[3][4];
+      for (unsigned i = 0; i < 3; ++i)
+         for (unsigned j = 0; j < 3; ++j)
+            m_in[i][j] = nir_channel(&b, inst_transform[i], j);
+      nir_invert_3x3(&b, m_in, m_out);
+      for (unsigned i = 0; i < 3; ++i) {
+         for (unsigned j = 0; j < 3; ++j)
+            m_vec[i][j] = m_out[i][j];
+         m_vec[i][3] = nir_channel(&b, inst_transform[i], 3);
+      }
+
+      for (unsigned i = 0; i < 3; ++i) {
+         nir_build_store_global(&b, nir_vec(&b, m_vec[i], 4),
+                                nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 
16 + 16 * i)),
+                                .write_mask = 0xf, .align_mul = 4, 
.align_offset = 0);
+      }
+
+      nir_ssa_def *out0[4] = {
+         nir_ior(&b, nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 0), 
header_root_offset),
+         nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 1), 
nir_channel(&b, inst3, 0),
+         nir_channel(&b, inst3, 1)};
+      nir_build_store_global(&b, nir_vec(&b, out0, 4),
+                             nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 
0)), .write_mask = 0xf,
+                             .align_mul = 4, .align_offset = 0);
+      nir_build_store_global(&b, global_id, nir_iadd(&b, node_dst_addr, 
nir_imm_int64(&b, 88)),
+                             .write_mask = 0x1, .align_mul = 4, .align_offset 
= 0);
+      nir_pop_if(&b, NULL);
+      nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
+                             nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 
64)), .write_mask = 0x7,
+                             .align_mul = 4, .align_offset = 0);
+      nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
+                             nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 
76)), .write_mask = 0x7,
+                             .align_mul = 4, .align_offset = 0);
+   }
+   nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
+
+   return b.shader;
+}
+
+static void
+determine_bounds(nir_builder *b, nir_ssa_def *node_addr, nir_ssa_def *node_id,
+                 nir_variable *bounds_vars[2])
+{
+   nir_ssa_def *node_type = nir_iand(b, node_id, nir_imm_int(b, 7));
+   node_addr = nir_iadd(
+      b, node_addr,
+      nir_u2u64(b, nir_ishl(b, nir_iand(b, node_id, nir_imm_int(b, ~7u)), 
nir_imm_int(b, 3))));
+
+   nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 0)));
+   {
+      nir_ssa_def *positions[3];
+      for (unsigned i = 0; i < 3; ++i)
+         positions[i] =
+            nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, 
nir_imm_int64(b, i * 12)),
+                                  .align_mul = 4, .align_offset = 0);
+      nir_ssa_def *bounds[] = {positions[0], positions[0]};
+      for (unsigned i = 1; i < 3; ++i) {
+         bounds[0] = nir_fmin(b, bounds[0], positions[i]);
+         bounds[1] = nir_fmax(b, bounds[1], positions[i]);
+      }
+      nir_store_var(b, bounds_vars[0], bounds[0], 7);
+      nir_store_var(b, bounds_vars[1], bounds[1], 7);
+   }
+   nir_push_else(b, NULL);
+   nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 5)));
+   {
+      nir_ssa_def *input_bounds[4][2];
+      for (unsigned i = 0; i < 4; ++i)
+         for (unsigned j = 0; j < 2; ++j)
+            input_bounds[i][j] = nir_build_load_global(
+               b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 16 + i * 24 + 
j * 12)),
+               .align_mul = 4, .align_offset = 0);
+      nir_ssa_def *bounds[] = {input_bounds[0][0], input_bounds[0][1]};
+      for (unsigned i = 1; i < 4; ++i) {
+         bounds[0] = nir_fmin(b, bounds[0], input_bounds[i][0]);
+         bounds[1] = nir_fmax(b, bounds[1], input_bounds[i][1]);
+      }
+
+      nir_store_var(b, bounds_vars[0], bounds[0], 7);
+      nir_store_var(b, bounds_vars[1], bounds[1], 7);
+   }
+   nir_push_else(b, NULL);
+   nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 6)));
+   { /* Instances */
+      nir_ssa_def *bounds[2];
+      for (unsigned i = 0; i < 2; ++i)
+         bounds[i] =
+            nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, 
nir_imm_int64(b, 64 + i * 12)),
+                                  .align_mul = 4, .align_offset = 0);
+      nir_store_var(b, bounds_vars[0], bounds[0], 7);
+      nir_store_var(b, bounds_vars[1], bounds[1], 7);
+   }
+   nir_push_else(b, NULL);
+   { /* AABBs */
+      nir_ssa_def *bounds[2];
+      for (unsigned i = 0; i < 2; ++i)
+         bounds[i] =
+            nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, 
nir_imm_int64(b, i * 12)),
+                                  .align_mul = 4, .align_offset = 0);
+      nir_store_var(b, bounds_vars[0], bounds[0], 7);
+      nir_store_var(b, bounds_vars[1], bounds[1], 7);
+   }
+   nir_pop_if(b, NULL);
+   nir_pop_if(b, NULL);
+   nir_pop_if(b, NULL);
+}
+
+static nir_shader *
+build_internal_shader(struct radv_device *dev)
+{
+   const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, 
"accel_build_internal_shader");
+
+   b.shader->info.workgroup_size[0] = 64;
+   b.shader->info.workgroup_size[1] = 1;
+   b.shader->info.workgroup_size[2] = 1;
+
+   /*
+    * push constants:
+    *   i32 x 2: node dst address
+    *   i32 x 2: scratch address
+    *   i32: dst offset
+    *   i32: dst scratch offset
+    *   i32: src scratch offset
+    *   i32: src_node_count | (fill_header << 31)
+    */
+   nir_ssa_def *pconst0 =
+      nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range 
= 16);
+   nir_ssa_def *pconst1 =
+      nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range 
= 16);
+
+   nir_ssa_def *node_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
+   nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 
12));
+   nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
+   nir_ssa_def *dst_scratch_offset = nir_channel(&b, pconst1, 1);
+   nir_ssa_def *src_scratch_offset = nir_channel(&b, pconst1, 2);
+   nir_ssa_def *src_node_count =
+      nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 0x7FFFFFFFU));
+   nir_ssa_def *fill_header =
+      nir_ine(&b, nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 
0x80000000U)),
+              nir_imm_int(&b, 0));
+
+   nir_ssa_def *global_id =
+      nir_iadd(&b,
+               nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 
1),
+                          nir_imm_int(&b, b.shader->info.workgroup_size[0])),
+               nir_channels(&b, nir_load_local_invocation_id(&b), 1));
+   nir_ssa_def *src_idx = nir_imul(&b, global_id, nir_imm_int(&b, 4));
+   nir_ssa_def *src_count = nir_umin(&b, nir_imm_int(&b, 4), nir_isub(&b, 
src_node_count, src_idx));
+
+   nir_ssa_def *node_offset =
+      nir_iadd(&b, node_dst_offset, nir_ishl(&b, global_id, nir_imm_int(&b, 
7)));
+   nir_ssa_def *node_dst_addr = nir_iadd(&b, node_addr, nir_u2u64(&b, 
node_offset));
+   nir_ssa_def *src_nodes = nir_build_load_global(
+      &b, 4, 32,
+      nir_iadd(&b, scratch_addr,
+               nir_u2u64(&b, nir_iadd(&b, src_scratch_offset,
+                                      nir_ishl(&b, global_id, nir_imm_int(&b, 
4))))),
+      .align_mul = 4, .align_offset = 0);
+
+   nir_build_store_global(&b, src_nodes, nir_iadd(&b, node_dst_addr, 
nir_imm_int64(&b, 0)),
+                          .write_mask = 0xf, .align_mul = 4, .align_offset = 
0);
+
+   nir_ssa_def *total_bounds[2] = {
+      nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
+      nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
+   };
+
+   for (unsigned i = 0; i < 4; ++i) {
+      nir_variable *bounds[2] = {
+         nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, 
"min_bound"),
+         nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, 
"max_bound"),
+      };
+      nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, 
NAN, NAN), 7), 7);
+      nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, 
NAN, NAN), 7), 7);
+
+      nir_push_if(&b, nir_ilt(&b, nir_imm_int(&b, i), src_count));
+      determine_bounds(&b, node_addr, nir_channel(&b, src_nodes, i), bounds);
+      nir_pop_if(&b, NULL);
+      nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
+                             nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 16 
+ 24 * i)),
+                             .write_mask = 0x7, .align_mul = 4, .align_offset 
= 0);
+      nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
+                             nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 28 
+ 24 * i)),
+                             .write_mask = 0x7, .align_mul = 4, .align_offset 
= 0);
+      total_bounds[0] = nir_fmin(&b, total_bounds[0], nir_load_var(&b, 
bounds[0]));
+      total_bounds[1] = nir_fmax(&b, total_bounds[1], nir_load_var(&b, 
bounds[1]));
+   }
+
+   nir_ssa_def *node_id =
+      nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), 
nir_imm_int(&b, 5));
+   nir_ssa_def *dst_scratch_addr = nir_iadd(
+      &b, scratch_addr,
+      nir_u2u64(&b, nir_iadd(&b, dst_scratch_offset, nir_ishl(&b, global_id, 
nir_imm_int(&b, 2)))));
+   nir_build_store_global(&b, node_id, dst_scratch_addr, .write_mask = 1, 
.align_mul = 4,
+                          .align_offset = 0);
+
+   nir_push_if(&b, fill_header);
+   nir_build_store_global(&b, node_id, node_addr, .write_mask = 1, .align_mul 
= 4,
+                          .align_offset = 0);
+   nir_build_store_global(&b, total_bounds[0], nir_iadd(&b, node_addr, 
nir_imm_int64(&b, 8)),
+                          .write_mask = 7, .align_mul = 4, .align_offset = 0);
+   nir_build_store_global(&b, total_bounds[1], nir_iadd(&b, node_addr, 
nir_imm_int64(&b, 20)),
+                          .write_mask = 7, .align_mul = 4, .align_offset = 0);
+   nir_pop_if(&b, NULL);
+   return b.shader;
+}
+
+void
+radv_device_finish_accel_struct_build_state(struct radv_device *device)
+{
+   struct radv_meta_state *state = &device->meta_state;
+   radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.internal_pipeline,
+                        &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.leaf_pipeline,
+                        &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                              state->accel_struct_build.internal_p_layout, 
&state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                              state->accel_struct_build.leaf_p_layout, 
&state->alloc);
+}
+
+VkResult
+radv_device_init_accel_struct_build_state(struct radv_device *device)
+{
+   VkResult result;
+   nir_shader *leaf_cs = build_leaf_shader(device);
+   nir_shader *internal_cs = build_internal_shader(device);
+
+   const VkPipelineLayoutCreateInfo leaf_pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = 
&(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
+                                                    sizeof(struct 
build_primitive_constants)},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), 
&leaf_pl_create_info,
+                                      &device->meta_state.alloc,
+                                      
&device->meta_state.accel_struct_build.leaf_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo leaf_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(leaf_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo leaf_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = leaf_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.accel_struct_build.leaf_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), 
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &leaf_pipeline_info, NULL, 
&device->meta_state.accel_struct_build.leaf_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   const VkPipelineLayoutCreateInfo internal_pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = 
&(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
+                                                    sizeof(struct 
build_internal_constants)},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), 
&internal_pl_create_info,
+                                      &device->meta_state.alloc,
+                                      
&device->meta_state.accel_struct_build.internal_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo internal_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(internal_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo internal_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = internal_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.accel_struct_build.internal_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), 
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &internal_pipeline_info, NULL, 
&device->meta_state.accel_struct_build.internal_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   return VK_SUCCESS;
+
+fail:
+   radv_device_finish_accel_struct_build_state(device);
+   ralloc_free(internal_cs);
+   ralloc_free(leaf_cs);
+   return result;
+}
+
+struct bvh_state {
+   uint32_t node_offset;
+   uint32_t node_count;
+   uint32_t scratch_offset;
+};
+
+void
+radv_CmdBuildAccelerationStructuresKHR(
+   VkCommandBuffer commandBuffer, uint32_t infoCount,
+   const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
+   const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_meta_saved_state saved_state;
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | 
RADV_META_SAVE_CONSTANTS);
+   struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 
VK_PIPELINE_BIND_POINT_COMPUTE,
+                        
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
+
+   for (uint32_t i = 0; i < infoCount; ++i) {
+      RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
+                       pInfos[i].dstAccelerationStructure);
+
+      struct build_primitive_constants prim_consts = {
+         .node_dst_addr = radv_accel_struct_get_va(accel_struct),
+         .scratch_addr = pInfos[i].scratchData.deviceAddress,
+         .dst_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64) + 
128,
+         .dst_scratch_offset = 0,
+      };
+
+      for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
+         const VkAccelerationStructureGeometryKHR *geom =
+            pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : 
pInfos[i].ppGeometries[j];
+
+         prim_consts.geometry_type = geom->geometryType;
+         prim_consts.geometry_id = j | (geom->flags << 28);
+         unsigned prim_size;
+         switch (geom->geometryType) {
+         case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
+            prim_consts.vertex_addr =
+               geom->geometry.triangles.vertexData.deviceAddress +
+               ppBuildRangeInfos[i][j].firstVertex * 
geom->geometry.triangles.vertexStride +
+               (geom->geometry.triangles.indexType != VK_INDEX_TYPE_NONE_KHR
+                   ? ppBuildRangeInfos[i][j].primitiveOffset
+                   : 0);
+            prim_consts.index_addr = 
geom->geometry.triangles.indexData.deviceAddress +
+                                     ppBuildRangeInfos[i][j].primitiveOffset;
+            prim_consts.transform_addr = 
geom->geometry.triangles.transformData.deviceAddress +
+                                         
ppBuildRangeInfos[i][j].transformOffset;
+            prim_consts.vertex_stride = geom->geometry.triangles.vertexStride;
+            prim_consts.vertex_format = geom->geometry.triangles.vertexFormat;
+            prim_consts.index_format = geom->geometry.triangles.indexType;
+            prim_size = 64;
+            break;
+         case VK_GEOMETRY_TYPE_AABBS_KHR:
+            prim_consts.aabb_addr =
+               geom->geometry.aabbs.data.deviceAddress + 
ppBuildRangeInfos[i][j].primitiveOffset;
+            prim_consts.aabb_stride = geom->geometry.aabbs.stride;
+            prim_size = 64;
+            break;
+         case VK_GEOMETRY_TYPE_INSTANCES_KHR:
+            prim_consts.instance_data = 
geom->geometry.instances.data.deviceAddress;
+            prim_size = 128;
+            break;
+         default:
+            unreachable("Unknown geometryType");
+         }
+
+         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                               
cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
+                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 
sizeof(prim_consts), &prim_consts);
+         radv_unaligned_dispatch(cmd_buffer, 
ppBuildRangeInfos[i][j].primitiveCount, 1, 1);
+         prim_consts.dst_offset += prim_size * 
ppBuildRangeInfos[i][j].primitiveCount;
+         prim_consts.dst_scratch_offset += 4 * 
ppBuildRangeInfos[i][j].primitiveCount;
+      }
+      bvh_states[i].node_offset = prim_consts.dst_offset;
+      bvh_states[i].node_count = prim_consts.dst_scratch_offset / 4;
+   }
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 
VK_PIPELINE_BIND_POINT_COMPUTE,
+                        
cmd_buffer->device->meta_state.accel_struct_build.internal_pipeline);
+   bool progress = true;
+   for (unsigned iter = 0; progress; ++iter) {
+      progress = false;
+      for (uint32_t i = 0; i < infoCount; ++i) {
+         RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
+                          pInfos[i].dstAccelerationStructure);
+
+         if (iter && bvh_states[i].node_count == 1)
+            continue;
+
+         if (!progress) {
+            cmd_buffer->state.flush_bits |=
+               RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+               radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, 
NULL) |
+               radv_dst_access_flush(cmd_buffer,
+                                     VK_ACCESS_SHADER_READ_BIT | 
VK_ACCESS_SHADER_WRITE_BIT, NULL);
+         }
+         progress = true;
+         uint32_t dst_node_count = MAX2(1, 
DIV_ROUND_UP(bvh_states[i].node_count, 4));
+         bool final_iter = dst_node_count == 1;
+         uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
+         uint32_t dst_scratch_offset = src_scratch_offset ? 0 : 
bvh_states[i].node_count * 4;
+         uint32_t dst_node_offset = bvh_states[i].node_offset;
+         if (final_iter)
+            dst_node_offset = ALIGN(sizeof(struct radv_accel_struct_header), 
64);
+
+         const struct build_internal_constants consts = {
+            .node_dst_addr = radv_accel_struct_get_va(accel_struct),
+            .scratch_addr = pInfos[i].scratchData.deviceAddress,
+            .dst_offset = dst_node_offset,
+            .dst_scratch_offset = dst_scratch_offset,
+            .src_scratch_offset = src_scratch_offset,
+            .fill_header = bvh_states[i].node_count | (final_iter ? 
0x80000000U : 0),
+         };
+
+         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                               
cmd_buffer->device->meta_state.accel_struct_build.internal_p_layout,
+                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), 
&consts);
+         radv_unaligned_dispatch(cmd_buffer, dst_node_count, 1, 1);
+         bvh_states[i].node_offset += dst_node_count * 128;
+         bvh_states[i].node_count = dst_node_count;
+         bvh_states[i].scratch_offset = dst_scratch_offset;
+      }
+   }
+   free(bvh_states);
+   radv_meta_restore(&saved_state, cmd_buffer);
+}
\ No newline at end of file
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index 8416e9adfda..b54e4bd183c 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -474,8 +474,14 @@ radv_device_init_meta(struct radv_device *device)
    if (result != VK_SUCCESS)
       goto fail_fmask_expand;
 
+   result = radv_device_init_accel_struct_build_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_accel_struct_build;
+
    return VK_SUCCESS;
 
+fail_accel_struct_build:
+   radv_device_finish_meta_fmask_expand_state(device);
 fail_fmask_expand:
    radv_device_finish_meta_resolve_fragment_state(device);
 fail_resolve_fragment:
@@ -507,6 +513,7 @@ fail_clear:
 void
 radv_device_finish_meta(struct radv_device *device)
 {
+   radv_device_finish_accel_struct_build_state(device);
    radv_device_finish_meta_clear_state(device);
    radv_device_finish_meta_resolve_state(device);
    radv_device_finish_meta_blit_state(device);
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index c827baf969c..d04da9d3a2b 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -133,6 +133,9 @@ void radv_device_finish_meta_dcc_retile_state(struct 
radv_device *device);
 
 void radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device);
 
+VkResult radv_device_init_accel_struct_build_state(struct radv_device *device);
+void radv_device_finish_accel_struct_build_state(struct radv_device *device);
+
 void radv_meta_save(struct radv_meta_saved_state *saved_state, struct 
radv_cmd_buffer *cmd_buffer,
                     uint32_t flags);
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b5abef0827d..bf99e928b9f 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -661,6 +661,13 @@ struct radv_meta_state {
       VkPipelineLayout p_layout;
       VkPipeline pipeline;
    } dcc_retile;
+
+   struct {
+      VkPipelineLayout leaf_p_layout;
+      VkPipeline leaf_pipeline;
+      VkPipelineLayout internal_p_layout;
+      VkPipeline internal_pipeline;
+   } accel_struct_build;
 };
 
 /* queue types */

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (main): radv: Implement device-side BVH building.

Reply via email to