Re: [Mesa-dev] [PATCH 4/4] radv: Implement pipeline statistics queries.

2017-04-11 Thread Michael Schellenberger Costa

Hi Bas,

it seems like this junk

+   nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_store_ssbo);
+   store->src[0] = nir_src_for_ssa(available);
+   store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+   store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b, 
elem_count, elem_size)));
+   nir_intrinsic_set_write_mask(store, 0x1);
+   store->num_components = 1;
+   nir_builder_instr_insert(&b, &store->instr);


would make a great helper function, as it is repeated 5 times in the 
code and only the input for src[0] and src[2] changes.


Similarly you could simplify those longer sequences

+   /* Store the availability bit if requested. */
+   nir_if *availability_if = nir_if_create(b.shader);
+   availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, 
nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+   nir_cf_node_insert(b.cursor, &availability_if->cf_node);
+
+   b.cursor = nir_after_cf_list(&availability_if->then_list);
+
+   nir_store_for_ssbo(store, available, )nir_intrinsic_instr *store = 
nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+   store->src[0] = nir_src_for_ssa(available);
+   store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+   store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b, 
elem_count, elem_size)));
+   nir_intrinsic_set_write_mask(store, 0x1);
+   store->num_components = 1;
+   nir_builder_instr_insert(&b, &store->instr);
+
+   b.cursor = nir_after_cf_node(&availability_if->cf_node);

which appear twice

All the best
Michael

Am 11.04.2017 um 02:04 schrieb Bas Nieuwenhuizen:

The devil is in the shader again, otherwise this is
fairly straightforward.

The CTS contains no pipeline statistics copy to buffer
testcases, so I did a basic smoketest.

Signed-off-by: Bas Nieuwenhuizen 
---
  src/amd/vulkan/radv_device.c  |   2 +-
  src/amd/vulkan/radv_private.h |   2 +
  src/amd/vulkan/radv_query.c   | 414 +++---
  3 files changed, 392 insertions(+), 26 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 9e8faa3da9a..5f14394196a 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -483,7 +483,7 @@ void radv_GetPhysicalDeviceFeatures(
.textureCompressionASTC_LDR   = false,
.textureCompressionBC = true,
.occlusionQueryPrecise= true,
-   .pipelineStatisticsQuery  = false,
+   .pipelineStatisticsQuery  = true,
.vertexPipelineStoresAndAtomics   = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize   = true,
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b54a2537c8a..2cb8cdd8d84 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -443,6 +443,7 @@ struct radv_meta_state {
VkDescriptorSetLayout ds_layout;
VkPipelineLayout p_layout;
VkPipeline occlusion_query_pipeline;
+   VkPipeline pipeline_statistics_query_pipeline;
} query;
  };
  
@@ -1379,6 +1380,7 @@ struct radv_query_pool {

uint32_t availability_offset;
char *ptr;
VkQueryType type;
+   uint32_t pipeline_stats_mask;
  };
  
  VkResult

diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index dc1844adb51..2de484224bc 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -35,6 +35,9 @@
  #include "radv_cs.h"
  #include "sid.h"
  
+

+static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 
8, 9, 10};
+
  static unsigned get_max_db(struct radv_device *device)
  {
unsigned num_db = device->physical_device->rad_info.num_render_backends;
@@ -269,14 +272,259 @@ build_occlusion_query_shader(struct radv_device *device) 
{
return b.shader;
  }
  
+static nir_shader *

+build_pipeline_statistics_query_shader(struct radv_device *device) {
+   /* the shader this builds is roughly
+*
+* push constants {
+*  uint32_t flags;
+*  uint32_t dst_stride;
+*  uint32_t stats_mask;
+*  uint32_t avail_offset;
+* };
+*
+* uint32_t src_stride = 11 * 16;
+*
+* location(binding = 0) buffer dst_buf;
+* location(binding = 1) buffer src_buf;
+*
+* void main() {
+*  uint64_t src_offset = src_stride * global_id.x;
+*  uint64_t dst_base = dst_stride * global_id.x;
+*  uint64_t dst_offset = dst_base;
+*  uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+*  uint32_t elem_count = stats_mask >> 16;
+ 

[Mesa-dev] [PATCH 4/4] radv: Implement pipeline statistics queries.

2017-04-10 Thread Bas Nieuwenhuizen
The devil is in the shader again, otherwise this is
fairly straightforward.

The CTS contains no pipeline statistics copy to buffer
testcases, so I did a basic smoketest.

Signed-off-by: Bas Nieuwenhuizen 
---
 src/amd/vulkan/radv_device.c  |   2 +-
 src/amd/vulkan/radv_private.h |   2 +
 src/amd/vulkan/radv_query.c   | 414 +++---
 3 files changed, 392 insertions(+), 26 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 9e8faa3da9a..5f14394196a 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -483,7 +483,7 @@ void radv_GetPhysicalDeviceFeatures(
.textureCompressionASTC_LDR   = false,
.textureCompressionBC = true,
.occlusionQueryPrecise= true,
-   .pipelineStatisticsQuery  = false,
+   .pipelineStatisticsQuery  = true,
.vertexPipelineStoresAndAtomics   = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize   = true,
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b54a2537c8a..2cb8cdd8d84 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -443,6 +443,7 @@ struct radv_meta_state {
VkDescriptorSetLayout ds_layout;
VkPipelineLayout p_layout;
VkPipeline occlusion_query_pipeline;
+   VkPipeline pipeline_statistics_query_pipeline;
} query;
 };
 
@@ -1379,6 +1380,7 @@ struct radv_query_pool {
uint32_t availability_offset;
char *ptr;
VkQueryType type;
+   uint32_t pipeline_stats_mask;
 };
 
 VkResult
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index dc1844adb51..2de484224bc 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -35,6 +35,9 @@
 #include "radv_cs.h"
 #include "sid.h"
 
+
+static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 
8, 9, 10};
+
 static unsigned get_max_db(struct radv_device *device)
 {
unsigned num_db = device->physical_device->rad_info.num_render_backends;
@@ -269,14 +272,259 @@ build_occlusion_query_shader(struct radv_device *device) 
{
return b.shader;
 }
 
+static nir_shader *
+build_pipeline_statistics_query_shader(struct radv_device *device) {
+   /* the shader this builds is roughly
+*
+* push constants {
+*  uint32_t flags;
+*  uint32_t dst_stride;
+*  uint32_t stats_mask;
+*  uint32_t avail_offset;
+* };
+*
+* uint32_t src_stride = 11 * 16;
+*
+* location(binding = 0) buffer dst_buf;
+* location(binding = 1) buffer src_buf;
+*
+* void main() {
+*  uint64_t src_offset = src_stride * global_id.x;
+*  uint64_t dst_base = dst_stride * global_id.x;
+*  uint64_t dst_offset = dst_base;
+*  uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+*  uint32_t elem_count = stats_mask >> 16;
+*  uint32_t available = src_buf[avail_offset + 4 * global_id.x];
+*  if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+*  dst_buf[dst_offset + elem_count * elem_size] = 
available;
+*  }
+*  if (available) {
+*  // repeat 11 times:
+*  if (stats_mask & (1 << 0)) {
+*  uint64_t start = src_buf[src_offset + 8 * 
indices[0]];
+*  uint64_t end = src_buf[src_offset + 8 * 
indices[0] + 0x58];
+*  uint64_t result = end - start;
+*  if (flags & VK_QUERY_RESULT_64_BIT)
+*  dst_buf[dst_offset] = result;
+*  else
+*  dst_buf[dst_offset] = (uint32_t)result.
+*  dst_offset += elem_size;
+*  }
+*  } else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
+*  // Set everything to 0 as we don't know what is valid.
+*  for (int i = 0; i < elem_count; ++i)
+*  dst_buf[dst_base + elem_size * i] = 0;
+*  }
+* }
+*/
+   nir_builder b;
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+   b.shader->info->name = ralloc_strdup(b.shader, 
"pipeline_statistics_query");
+   b.shader->info->cs.local_size[0] = 64;
+   b.shader->info->cs.local_size[1] = 1;
+   b.shader->info->cs.local_size[2] = 1;
+
+   nir_variable *output_offset = nir_local_variable_create(b.impl, 
glsl_int_type(), "output_offset");
+
+   nir_ssa_def *