Hi Bas,
it seems like this junk
+ nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader,
nir_intrinsic_store_ssbo);
+ store->src[0] = nir_src_for_ssa(available);
+ store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+ store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b,
elem_count, elem_size)));
+ nir_intrinsic_set_write_mask(store, 0x1);
+ store->num_components = 1;
+ nir_builder_instr_insert(&b, &store->instr);
would make a great helper function, as it is repeated 5 times in the
code and only the input for src[0] and src[2] changes.
Similarly you could simplify those longer sequences
+ /* Store the availability bit if requested. */
+ nir_if *availability_if = nir_if_create(b.shader);
+ availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags,
nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+ nir_cf_node_insert(b.cursor, &availability_if->cf_node);
+
+ b.cursor = nir_after_cf_list(&availability_if->then_list);
+
+ nir_store_for_ssbo(store, available, )nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+ store->src[0] = nir_src_for_ssa(available);
+ store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+ store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b,
elem_count, elem_size)));
+ nir_intrinsic_set_write_mask(store, 0x1);
+ store->num_components = 1;
+ nir_builder_instr_insert(&b, &store->instr);
+
+ b.cursor = nir_after_cf_node(&availability_if->cf_node);
which appear twice
All the best
Michael
Am 11.04.2017 um 02:04 schrieb Bas Nieuwenhuizen:
The devil is in the shader again, otherwise this is
fairly straightforward.
The CTS contains no pipeline statistics copy to buffer
testcases, so I did a basic smoketest.
Signed-off-by: Bas Nieuwenhuizen
---
src/amd/vulkan/radv_device.c | 2 +-
src/amd/vulkan/radv_private.h | 2 +
src/amd/vulkan/radv_query.c | 414 +++---
3 files changed, 392 insertions(+), 26 deletions(-)
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 9e8faa3da9a..5f14394196a 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -483,7 +483,7 @@ void radv_GetPhysicalDeviceFeatures(
.textureCompressionASTC_LDR = false,
.textureCompressionBC = true,
.occlusionQueryPrecise= true,
- .pipelineStatisticsQuery = false,
+ .pipelineStatisticsQuery = true,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = true,
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b54a2537c8a..2cb8cdd8d84 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -443,6 +443,7 @@ struct radv_meta_state {
VkDescriptorSetLayout ds_layout;
VkPipelineLayout p_layout;
VkPipeline occlusion_query_pipeline;
+ VkPipeline pipeline_statistics_query_pipeline;
} query;
};
@@ -1379,6 +1380,7 @@ struct radv_query_pool {
uint32_t availability_offset;
char *ptr;
VkQueryType type;
+ uint32_t pipeline_stats_mask;
};
VkResult
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index dc1844adb51..2de484224bc 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -35,6 +35,9 @@
#include "radv_cs.h"
#include "sid.h"
+
+static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0,
8, 9, 10};
+
static unsigned get_max_db(struct radv_device *device)
{
unsigned num_db = device->physical_device->rad_info.num_render_backends;
@@ -269,14 +272,259 @@ build_occlusion_query_shader(struct radv_device *device)
{
return b.shader;
}
+static nir_shader *
+build_pipeline_statistics_query_shader(struct radv_device *device) {
+ /* the shader this builds is roughly
+*
+* push constants {
+* uint32_t flags;
+* uint32_t dst_stride;
+* uint32_t stats_mask;
+* uint32_t avail_offset;
+* };
+*
+* uint32_t src_stride = 11 * 16;
+*
+* location(binding = 0) buffer dst_buf;
+* location(binding = 1) buffer src_buf;
+*
+* void main() {
+* uint64_t src_offset = src_stride * global_id.x;
+* uint64_t dst_base = dst_stride * global_id.x;
+* uint64_t dst_offset = dst_base;
+* uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+* uint32_t elem_count = stats_mask >> 16;
+