Module: Mesa Branch: main Commit: d51a4b4c4b0f1a6c4f73cab026d2f1b42ead547d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d51a4b4c4b0f1a6c4f73cab026d2f1b42ead547d
Author: Bas Nieuwenhuizen <[email protected]> Date: Mon Jan 18 12:11:19 2021 +0100 radv: Add initial CPU BVH building. The algorithm used for the BVH: 1) first create 1 leaf per primitive (triangle/aabb/instance) 2) Then create internal layers from the bottom up until we are left with 1 node in the top layer. Node i in the layer will have children (i*4+0) ... (i*4+3) in the previous layer. This results in a very naive algorithm but it is also very simple to implement. Reviewed-by: Samuel Pitoiset <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11078> --- src/amd/vulkan/Makefile.sources | 1 + src/amd/vulkan/meson.build | 1 + src/amd/vulkan/radv_acceleration_structure.c | 591 +++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 15 + 4 files changed, 608 insertions(+) diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources index 126b67b06bf..2912ce46397 100644 --- a/src/amd/vulkan/Makefile.sources +++ b/src/amd/vulkan/Makefile.sources @@ -43,6 +43,7 @@ RADV_LAYER_SQTT_FILES := \ layers/radv_sqtt_layer.c VULKAN_FILES := \ + radv_acceleration_structure.c \ radv_cmd_buffer.c \ radv_cs.h \ radv_debug.c \ diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 629dd04c431..60e28724e5d 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -38,6 +38,7 @@ libradv_files = files( 'winsys/null/radv_null_cs.h', 'winsys/null/radv_null_winsys.c', 'winsys/null/radv_null_winsys_public.h', + 'radv_acceleration_structure.c', 'radv_android.c', 'radv_cmd_buffer.c', 'radv_cs.h', diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c new file mode 100644 index 00000000000..98ef979378d --- /dev/null +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -0,0 +1,591 @@ +/* + * Copyright © 2021 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "radv_private.h" + +#include "util/half_float.h" + +struct radv_accel_struct_header { + uint32_t root_node_offset; + uint32_t reserved; + float aabb[2][3]; + uint64_t compacted_size; + uint64_t serialization_size; +}; + +struct radv_bvh_triangle_node { + float coords[3][3]; + uint32_t reserved[3]; + uint32_t triangle_id; + /* flags in upper 4 bits */ + uint32_t geometry_id_and_flags; + uint32_t reserved2; + uint32_t id; +}; + +struct radv_bvh_aabb_node { + float aabb[2][3]; + uint32_t primitive_id; + /* flags in upper 4 bits */ + uint32_t geometry_id_and_flags; + uint32_t reserved[8]; +}; + +struct radv_bvh_instance_node { + uint64_t base_ptr; + /* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */ + uint32_t custom_instance_and_mask; + /* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */ + uint32_t sbt_offset_and_flags; + + /* The translation component is actually a pre-translation instead of a post-translation. If you + * want to get a proper matrix out of it you need to apply the directional component of the + * matrix to it. The pre-translation of the world->object matrix is the same as the + * post-translation of the object->world matrix so this way we can share data between both + * matrices. */ + float wto_matrix[12]; + float aabb[2][3]; + uint32_t instance_id; + uint32_t reserved[9]; +}; + +struct radv_bvh_box16_node { + uint32_t children[4]; + uint32_t coords[4][3]; +}; + +struct radv_bvh_box32_node { + uint32_t children[4]; + float coords[4][2][3]; + uint32_t reserved[4]; +}; + +void +radv_GetAccelerationStructureBuildSizesKHR( + VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType, + const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo, + const uint32_t *pMaxPrimitiveCounts, VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo) +{ + uint64_t triangles = 0, boxes = 0, instances = 0; + + for (uint32_t i = 0; i < pBuildInfo->geometryCount; ++i) { + const VkAccelerationStructureGeometryKHR *geometry; + if (pBuildInfo->pGeometries) + geometry = &pBuildInfo->pGeometries[i]; + else + geometry = pBuildInfo->ppGeometries[i]; + + switch (geometry->geometryType) { + case VK_GEOMETRY_TYPE_TRIANGLES_KHR: + triangles += pMaxPrimitiveCounts[i]; + break; + case VK_GEOMETRY_TYPE_AABBS_KHR: + boxes += pMaxPrimitiveCounts[i]; + break; + case VK_GEOMETRY_TYPE_INSTANCES_KHR: + instances += pMaxPrimitiveCounts[i]; + break; + case VK_GEOMETRY_TYPE_MAX_ENUM_KHR: + unreachable("VK_GEOMETRY_TYPE_MAX_ENUM_KHR unhandled"); + } + } + + uint64_t children = boxes + instances + triangles; + uint64_t internal_nodes = 0; + while (children > 1) { + children = DIV_ROUND_UP(children, 4); + internal_nodes += children; + } + + /* The stray 128 is to ensure we have space for a header + * which we'd want to use for some metadata (like the + * total AABB of the BVH) */ + uint64_t size = boxes * 128 + instances * 128 + triangles * 64 + internal_nodes * 128 + 192; + + pSizeInfo->accelerationStructureSize = size; + + /* 2x the max number of nodes in a BVH layer (one uint32_t each) */ + pSizeInfo->updateScratchSize = pSizeInfo->buildScratchSize = + MAX2(4096, 2 * (boxes + instances + triangles) * sizeof(uint32_t)); +} + +VkResult +radv_CreateAccelerationStructureKHR(VkDevice _device, + const VkAccelerationStructureCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkAccelerationStructureKHR *pAccelerationStructure) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer); + struct radv_acceleration_structure *accel; + + accel = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*accel), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (accel == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &accel->base, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR); + + accel->mem_offset = buffer->offset + pCreateInfo->offset; + accel->size = pCreateInfo->size; + accel->bo = buffer->bo; + + *pAccelerationStructure = radv_acceleration_structure_to_handle(accel); + return VK_SUCCESS; +} + +void +radv_DestroyAccelerationStructureKHR(VkDevice _device, + VkAccelerationStructureKHR accelerationStructure, + const VkAllocationCallbacks *pAllocator) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_acceleration_structure, accel, accelerationStructure); + + if (!accel) + return; + + vk_object_base_finish(&accel->base); + vk_free2(&device->vk.alloc, pAllocator, accel); +} + +VkDeviceAddress +radv_GetAccelerationStructureDeviceAddressKHR( + VkDevice _device, const VkAccelerationStructureDeviceAddressInfoKHR *pInfo) +{ + RADV_FROM_HANDLE(radv_acceleration_structure, accel, pInfo->accelerationStructure); + return radv_accel_struct_get_va(accel); +} + +VkResult +radv_WriteAccelerationStructuresPropertiesKHR( + VkDevice _device, uint32_t accelerationStructureCount, + const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType, + size_t dataSize, void *pData, size_t stride) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + char *data_out = (char*)pData; + + for (uint32_t i = 0; i < accelerationStructureCount; ++i) { + RADV_FROM_HANDLE(radv_acceleration_structure, accel, pAccelerationStructures[i]); + const char *base_ptr = (const char *)device->ws->buffer_map(accel->bo); + if (!base_ptr) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + const struct radv_accel_struct_header *header = (const void*)(base_ptr + accel->mem_offset); + if (stride * i + sizeof(VkDeviceSize) <= dataSize) { + uint64_t value; + switch (queryType) { + case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: + value = header->compacted_size; + break; + case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: + value = header->serialization_size; + break; + default: + unreachable("Unhandled acceleration structure query"); + } + *(VkDeviceSize *)(data_out + stride * i) = value; + } + device->ws->buffer_unmap(accel->bo); + } + return VK_SUCCESS; +} + +struct radv_bvh_build_ctx { + uint32_t *write_scratch; + char *base; + char *curr_ptr; +}; + +static void +build_triangles(struct radv_bvh_build_ctx *ctx, const VkAccelerationStructureGeometryKHR *geom, + const VkAccelerationStructureBuildRangeInfoKHR *range, unsigned geometry_id) +{ + const VkAccelerationStructureGeometryTrianglesDataKHR *tri_data = &geom->geometry.triangles; + VkTransformMatrixKHR matrix; + const char *index_data = (const char *)tri_data->indexData.hostAddress + range->primitiveOffset; + + if (tri_data->transformData.hostAddress) { + matrix = *(const VkTransformMatrixKHR *)((const char *)tri_data->transformData.hostAddress + + range->transformOffset); + } else { + matrix = (VkTransformMatrixKHR){ + .matrix = {{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0}, {0.0, 0.0, 1.0, 0.0}}}; + } + + for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 64) { + struct radv_bvh_triangle_node *node = (void*)ctx->curr_ptr; + uint32_t node_offset = ctx->curr_ptr - ctx->base; + uint32_t node_id = node_offset >> 3; + *ctx->write_scratch++ = node_id; + + for (unsigned v = 0; v < 3; ++v) { + uint32_t v_index = range->firstVertex; + switch (tri_data->indexType) { + case VK_INDEX_TYPE_NONE_KHR: + v_index += p * 3 + v; + break; + case VK_INDEX_TYPE_UINT8_EXT: + v_index += *(const uint8_t *)index_data; + index_data += 1; + break; + case VK_INDEX_TYPE_UINT16: + v_index += *(const uint16_t *)index_data; + index_data += 2; + break; + case VK_INDEX_TYPE_UINT32: + v_index += *(const uint32_t *)index_data; + index_data += 4; + break; + case VK_INDEX_TYPE_MAX_ENUM: + unreachable("Unhandled VK_INDEX_TYPE_MAX_ENUM"); + break; + } + + const char *v_data = (const char *)tri_data->vertexData.hostAddress + v_index * tri_data->vertexStride; + float coords[4]; + switch (tri_data->vertexFormat) { + case VK_FORMAT_R32G32B32_SFLOAT: + coords[0] = *(const float *)(v_data + 0); + coords[1] = *(const float *)(v_data + 4); + coords[2] = *(const float *)(v_data + 8); + coords[3] = 1.0f; + break; + case VK_FORMAT_R32G32B32A32_SFLOAT: + coords[0] = *(const float *)(v_data + 0); + coords[1] = *(const float *)(v_data + 4); + coords[2] = *(const float *)(v_data + 8); + coords[3] = *(const float *)(v_data + 12); + break; + case VK_FORMAT_R16G16B16_SFLOAT: + coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0)); + coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2)); + coords[2] = _mesa_half_to_float(*(const uint16_t *)(v_data + 4)); + coords[3] = 1.0f; + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0)); + coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2)); + coords[2] = _mesa_half_to_float(*(const uint16_t *)(v_data + 4)); + coords[3] = _mesa_half_to_float(*(const uint16_t *)(v_data + 6)); + break; + default: + unreachable("Unhandled vertex format in BVH build"); + } + + for (unsigned j = 0; j < 3; ++j) { + float r = 0; + for (unsigned k = 0; k < 4; ++k) + r += matrix.matrix[j][k] * coords[k]; + node->coords[v][j] = r; + } + + node->triangle_id = p; + node->geometry_id_and_flags = geometry_id | (geom->flags << 28); + + /* Seems to be needed for IJ, otherwise I = J = ? */ + node->id = 9; + } + } +} + +static VkResult +build_instances(struct radv_device *device, struct radv_bvh_build_ctx *ctx, + const VkAccelerationStructureGeometryKHR *geom, + const VkAccelerationStructureBuildRangeInfoKHR *range) +{ + const VkAccelerationStructureGeometryInstancesDataKHR *inst_data = &geom->geometry.instances; + + for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 128) { + const VkAccelerationStructureInstanceKHR *instance = + inst_data->arrayOfPointers + ? (((const VkAccelerationStructureInstanceKHR *const *)inst_data->data.hostAddress)[p]) + : &((const VkAccelerationStructureInstanceKHR *)inst_data->data.hostAddress)[p]; + if (!instance->accelerationStructureReference) { + continue; + } + + struct radv_bvh_instance_node *node = (void*)ctx->curr_ptr; + uint32_t node_offset = ctx->curr_ptr - ctx->base; + uint32_t node_id = (node_offset >> 3) | 6; + *ctx->write_scratch++ = node_id; + + float transform[16], inv_transform[16]; + memcpy(transform, &instance->transform.matrix, sizeof(instance->transform.matrix)); + transform[12] = transform[13] = transform[14] = 0.0f; + transform[15] = 1.0f; + + util_invert_mat4x4(inv_transform, transform); + memcpy(node->wto_matrix, inv_transform, sizeof(node->wto_matrix)); + node->wto_matrix[3] = transform[3]; + node->wto_matrix[7] = transform[7]; + node->wto_matrix[11] = transform[11]; + node->custom_instance_and_mask = instance->instanceCustomIndex | (instance->mask << 24); + node->sbt_offset_and_flags = + instance->instanceShaderBindingTableRecordOffset | (instance->flags << 24); + node->instance_id = p; + + RADV_FROM_HANDLE(radv_acceleration_structure, src_accel_struct, + (VkAccelerationStructureKHR)instance->accelerationStructureReference); + const void *src_base = device->ws->buffer_map(src_accel_struct->bo); + if (!src_base) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + src_base = (const char *)src_base + src_accel_struct->mem_offset; + const struct radv_accel_struct_header *src_header = src_base; + node->base_ptr = radv_accel_struct_get_va(src_accel_struct) | src_header->root_node_offset; + + for (unsigned j = 0; j < 3; ++j) { + node->aabb[0][j] = instance->transform.matrix[j][3]; + node->aabb[1][j] = instance->transform.matrix[j][3]; + for (unsigned k = 0; k < 3; ++k) { + node->aabb[0][j] += MIN2(instance->transform.matrix[j][k] * src_header->aabb[0][k], + instance->transform.matrix[j][k] * src_header->aabb[1][k]); + node->aabb[1][j] += MAX2(instance->transform.matrix[j][k] * src_header->aabb[0][k], + instance->transform.matrix[j][k] * src_header->aabb[1][k]); + } + } + device->ws->buffer_unmap(src_accel_struct->bo); + } + return VK_SUCCESS; +} + +static void +build_aabbs(struct radv_bvh_build_ctx *ctx, const VkAccelerationStructureGeometryKHR *geom, + const VkAccelerationStructureBuildRangeInfoKHR *range, unsigned geometry_id) +{ + const VkAccelerationStructureGeometryAabbsDataKHR *aabb_data = &geom->geometry.aabbs; + + for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 64) { + struct radv_bvh_aabb_node *node = (void*)ctx->curr_ptr; + uint32_t node_offset = ctx->curr_ptr - ctx->base; + uint32_t node_id = (node_offset >> 3) | 6; + *ctx->write_scratch++ = node_id; + + const VkAabbPositionsKHR *aabb = + (const VkAabbPositionsKHR *)((const char *)aabb_data->data.hostAddress + + p * aabb_data->stride); + + node->aabb[0][0] = aabb->minX; + node->aabb[0][1] = aabb->minY; + node->aabb[0][2] = aabb->minZ; + node->aabb[1][0] = aabb->maxX; + node->aabb[1][1] = aabb->maxY; + node->aabb[1][2] = aabb->maxZ; + node->primitive_id = p; + node->geometry_id_and_flags = geometry_id; + } +} + +static uint32_t +leaf_node_count(const VkAccelerationStructureBuildGeometryInfoKHR *info, + const VkAccelerationStructureBuildRangeInfoKHR *ranges) +{ + uint32_t count = 0; + for (uint32_t i = 0; i < info->geometryCount; ++i) { + count += ranges[i].primitiveCount; + } + return count; +} + +static void +compute_bounds(const char *base_ptr, uint32_t node_id, float *bounds) +{ + for (unsigned i = 0; i < 3; ++i) + bounds[i] = INFINITY; + for (unsigned i = 0; i < 3; ++i) + bounds[3 + i] = -INFINITY; + + switch (node_id & 7) { + case 0: { + const struct radv_bvh_triangle_node *node = (const void*)(base_ptr + (node_id / 8 * 64)); + for (unsigned v = 0; v < 3; ++v) { + for (unsigned j = 0; j < 3; ++j) { + bounds[j] = MIN2(bounds[j], node->coords[v][j]); + bounds[3 + j] = MAX2(bounds[3 + j], node->coords[v][j]); + } + } + break; + } + case 5: { + const struct radv_bvh_box32_node *node = (const void*)(base_ptr + (node_id / 8 * 64)); + for (unsigned c2 = 0; c2 < 4; ++c2) { + if (isnan(node->coords[c2][0][0])) + continue; + for (unsigned j = 0; j < 3; ++j) { + bounds[j] = MIN2(bounds[j], node->coords[c2][0][j]); + bounds[3 + j] = MAX2(bounds[3 + j], node->coords[c2][1][j]); + } + } + break; + } + case 6: { + const struct radv_bvh_instance_node *node = (const void*)(base_ptr + (node_id / 8 * 64)); + for (unsigned j = 0; j < 3; ++j) { + bounds[j] = MIN2(bounds[j], node->aabb[0][j]); + bounds[3 + j] = MAX2(bounds[3 + j], node->aabb[1][j]); + } + break; + } + case 7: { + const struct radv_bvh_aabb_node *node = (const void*)(base_ptr + (node_id / 8 * 64)); + for (unsigned j = 0; j < 3; ++j) { + bounds[j] = MIN2(bounds[j], node->aabb[0][j]); + bounds[3 + j] = MAX2(bounds[3 + j], node->aabb[1][j]); + } + break; + } + } +} + +static VkResult +build_bvh(struct radv_device *device, const VkAccelerationStructureBuildGeometryInfoKHR *info, + const VkAccelerationStructureBuildRangeInfoKHR *ranges) +{ + RADV_FROM_HANDLE(radv_acceleration_structure, accel, info->dstAccelerationStructure); + VkResult result = VK_SUCCESS; + + uint32_t *scratch[2]; + scratch[0] = info->scratchData.hostAddress; + scratch[1] = scratch[0] + leaf_node_count(info, ranges); + + char *base_ptr = (char*)device->ws->buffer_map(accel->bo); + if (!base_ptr) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + base_ptr = base_ptr + accel->mem_offset; + struct radv_accel_struct_header *header = (void*)base_ptr; + void *first_node_ptr = (char *)base_ptr + ALIGN(sizeof(*header), 64); + + struct radv_bvh_build_ctx ctx = {.write_scratch = scratch[0], + .base = base_ptr, + .curr_ptr = (char *)first_node_ptr + 128}; + + /* This initializes the leaf nodes of the BVH all at the same level. */ + for (uint32_t i = 0; i < info->geometryCount; ++i) { + const VkAccelerationStructureGeometryKHR *geom = + info->pGeometries ? &info->pGeometries[i] : info->ppGeometries[i]; + + switch (geom->geometryType) { + case VK_GEOMETRY_TYPE_TRIANGLES_KHR: + build_triangles(&ctx, geom, ranges + i, i); + break; + case VK_GEOMETRY_TYPE_AABBS_KHR: + build_aabbs(&ctx, geom, ranges + i, i); + break; + case VK_GEOMETRY_TYPE_INSTANCES_KHR: { + result = build_instances(device, &ctx, geom, ranges + i); + if (result != VK_SUCCESS) + goto fail; + break; + } + case VK_GEOMETRY_TYPE_MAX_ENUM_KHR: + unreachable("VK_GEOMETRY_TYPE_MAX_ENUM_KHR unhandled"); + } + } + + uint32_t node_counts[2] = {ctx.write_scratch - scratch[0], 0}; + unsigned d; + + /* + * This is the most naive BVH building algorithm I could think of: + * just iteratively builds each level from bottom to top with + * the children of each node being in-order and tightly packed. + * + * Is probably terrible for traversal but should be easy to build an + * equivalent GPU version. + */ + for (d = 0; node_counts[d & 1] > 1 || d == 0; ++d) { + uint32_t child_count = node_counts[d & 1]; + const uint32_t *children = scratch[d & 1]; + uint32_t *dst_ids = scratch[(d & 1) ^ 1]; + unsigned dst_count; + unsigned child_idx = 0; + for (dst_count = 0; child_idx < MAX2(1, child_count); ++dst_count, child_idx += 4) { + unsigned local_child_count = MIN2(4, child_count - child_idx); + uint32_t child_ids[4]; + float bounds[4][6]; + + for (unsigned c = 0; c < local_child_count; ++c) { + uint32_t id = children[child_idx + c]; + child_ids[c] = id; + + compute_bounds(base_ptr, id, bounds[c]); + } + + struct radv_bvh_box32_node *node; + + /* Put the root node at base_ptr so the id = 0, which allows some + * traversal optimizations. */ + if (child_idx == 0 && local_child_count == child_count) { + node = first_node_ptr; + header->root_node_offset = ((char *)first_node_ptr - (char *)base_ptr) / 64 * 8 + 5; + } else { + uint32_t dst_id = (ctx.curr_ptr - base_ptr) / 64; + dst_ids[dst_count] = dst_id * 8 + 5; + + node = (void*)ctx.curr_ptr; + ctx.curr_ptr += 128; + } + + for (unsigned c = 0; c < local_child_count; ++c) { + node->children[c] = child_ids[c]; + for (unsigned i = 0; i < 2; ++i) + for (unsigned j = 0; j < 3; ++j) + node->coords[c][i][j] = bounds[c][i * 3 + j]; + } + for (unsigned c = local_child_count; c < 4; ++c) { + for (unsigned i = 0; i < 2; ++i) + for (unsigned j = 0; j < 3; ++j) + node->coords[c][i][j] = NAN; + } + } + + node_counts[(d & 1) ^ 1] = dst_count; + } + + compute_bounds(base_ptr, header->root_node_offset, &header->aabb[0][0]); + + /* TODO init sizes and figure out what is needed for serialization. */ + +fail: + device->ws->buffer_unmap(accel->bo); + return result; +} + +VkResult +radv_BuildAccelerationStructuresKHR( + VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, + const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; i < infoCount; ++i) { + result = build_bvh(device, pInfos + i, ppBuildRangeInfos[i]); + if (result != VK_SUCCESS) + break; + } + return result; +} diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 610c8bb7653..b5abef0827d 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2697,6 +2697,20 @@ radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage return device->physical_device->use_llvm; } +struct radv_acceleration_structure { + struct vk_object_base base; + + struct radeon_winsys_bo *bo; + uint64_t mem_offset; + uint64_t size; +}; + +static inline uint64_t +radv_accel_struct_get_va(const struct radv_acceleration_structure *accel) +{ + return radv_buffer_get_va(accel->bo) + accel->mem_offset; +} + #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \ \ static inline struct __radv_type *__radv_type##_from_handle(__VkType _handle) \ @@ -2730,6 +2744,7 @@ RADV_DEFINE_HANDLE_CASTS(radv_instance, VkInstance) RADV_DEFINE_HANDLE_CASTS(radv_physical_device, VkPhysicalDevice) RADV_DEFINE_HANDLE_CASTS(radv_queue, VkQueue) +RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, VkAccelerationStructureKHR) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, VkCommandPool) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, VkBuffer) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, VkBufferView) _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
