Re: [Mesa-dev] [PATCH] radeonsi: load streamout buffer descriptors before use (v2)

2016-09-14 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 13.09.2016 22:20, Marek Olšák wrote:

From: Marek Olšák 

v2: inline the code and remove the conditional that's a no-op now
---
 src/gallium/drivers/radeonsi/si_shader.c | 47 ++--
 1 file changed, 14 insertions(+), 33 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index be6fae7..d61f4ff 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -105,21 +105,20 @@ struct si_shader_context
unsigned uniform_md_kind;
LLVMValueRef empty_md;

LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
LLVMValueRef lds;
LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
LLVMValueRef fmasks[SI_NUM_SAMPLERS];
LLVMValueRef images[SI_NUM_IMAGES];
-   LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
LLVMValueRef gs_next_vertex[4];
LLVMValueRef return_value;

LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i32;
LLVMTypeRef i64;
@@ -2264,20 +2263,33 @@ static void si_dump_streamout(struct 
pipe_stream_output_info *so)
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
   struct si_shader_output_values *outputs,
   unsigned noutput)
 {
struct pipe_stream_output_info *so = >shader->selector->so;
struct gallivm_state *gallivm = >radeon_bld.gallivm;
LLVMBuilderRef builder = gallivm->builder;
int i, j;
struct lp_build_if_state if_ctx;
+   LLVMValueRef so_buffers[4];
+   LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
+   SI_PARAM_RW_BUFFERS);
+
+   /* Load the descriptors. */
+   for (i = 0; i < 4; ++i) {
+   if (ctx->shader->selector->so.stride[i]) {
+   LLVMValueRef offset = lp_build_const_int32(gallivm,
+  
SI_VS_STREAMOUT_BUF0 + i);
+
+   so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, 
offset);
+   }
+   }

/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count =
unpack_param(ctx, ctx->param_streamout_config, 16, 7);

LLVMValueRef tid = get_thread_id(ctx);

/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
@@ -2359,21 +2371,21 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
}
break;
}

LLVMValueRef can_emit_stream =
LLVMBuildICmp(builder, LLVMIntEQ,
  stream_id,
  lp_build_const_int32(gallivm, stream), 
"");

lp_build_if(_ctx_stream, gallivm, can_emit_stream);
-   build_tbuffer_store_dwords(ctx, 
ctx->so_buffers[buf_idx],
+   build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
   vdata, num_comps,
   so_write_offset[buf_idx],
   LLVMConstInt(ctx->i32, 0, 0),
   so->output[i].dst_offset*4);
lp_build_endif(_ctx_stream);
}
}
lp_build_endif(_ctx);
 }

@@ -5917,49 +5929,20 @@ static void preload_images(struct si_shader_context 
*ctx)
 lp_build_const_int32(gallivm, 
i));

if (info->images_writemask & (1 << i) &&
!(info->images_buffers & (1 << i)))
rsrc = force_dcc_off(ctx, rsrc);

ctx->images[i] = rsrc;
}
 }

-static void preload_streamout_buffers(struct si_shader_context *ctx)
-{
-   struct lp_build_tgsi_context *bld_base = >radeon_bld.soa.bld_base;
-   struct gallivm_state *gallivm = bld_base->base.gallivm;
-   unsigned i;
-
-   /* Streamout can only be used if the shader is compiled as VS. */
-   if (!ctx->shader->selector->so.num_outputs ||
-   (ctx->type == PIPE_SHADER_VERTEX &&
-(ctx->shader->key.vs.as_es ||
- ctx->shader->key.vs.as_ls)) ||
-   (ctx->type == PIPE_SHADER_TESS_EVAL &&
-ctx->shader->key.tes.as_es))
-   return;
-
-   LLVMValueRef buf_ptr = 

[Mesa-dev] [PATCH] radeonsi: load streamout buffer descriptors before use (v2)

2016-09-13 Thread Marek Olšák
From: Marek Olšák 

v2: inline the code and remove the conditional that's a no-op now
---
 src/gallium/drivers/radeonsi/si_shader.c | 47 ++--
 1 file changed, 14 insertions(+), 33 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index be6fae7..d61f4ff 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -105,21 +105,20 @@ struct si_shader_context
unsigned uniform_md_kind;
LLVMValueRef empty_md;
 
LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
LLVMValueRef lds;
LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
LLVMValueRef fmasks[SI_NUM_SAMPLERS];
LLVMValueRef images[SI_NUM_IMAGES];
-   LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
LLVMValueRef gs_next_vertex[4];
LLVMValueRef return_value;
 
LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i32;
LLVMTypeRef i64;
@@ -2264,20 +2263,33 @@ static void si_dump_streamout(struct 
pipe_stream_output_info *so)
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
   struct si_shader_output_values *outputs,
   unsigned noutput)
 {
struct pipe_stream_output_info *so = >shader->selector->so;
struct gallivm_state *gallivm = >radeon_bld.gallivm;
LLVMBuilderRef builder = gallivm->builder;
int i, j;
struct lp_build_if_state if_ctx;
+   LLVMValueRef so_buffers[4];
+   LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
+   SI_PARAM_RW_BUFFERS);
+
+   /* Load the descriptors. */
+   for (i = 0; i < 4; ++i) {
+   if (ctx->shader->selector->so.stride[i]) {
+   LLVMValueRef offset = lp_build_const_int32(gallivm,
+  
SI_VS_STREAMOUT_BUF0 + i);
+
+   so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, 
offset);
+   }
+   }
 
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count =
unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
LLVMValueRef tid = get_thread_id(ctx);
 
/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
@@ -2359,21 +2371,21 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
}
break;
}
 
LLVMValueRef can_emit_stream =
LLVMBuildICmp(builder, LLVMIntEQ,
  stream_id,
  lp_build_const_int32(gallivm, 
stream), "");
 
lp_build_if(_ctx_stream, gallivm, can_emit_stream);
-   build_tbuffer_store_dwords(ctx, 
ctx->so_buffers[buf_idx],
+   build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
   vdata, num_comps,
   so_write_offset[buf_idx],
   LLVMConstInt(ctx->i32, 0, 0),
   so->output[i].dst_offset*4);
lp_build_endif(_ctx_stream);
}
}
lp_build_endif(_ctx);
 }
 
@@ -5917,49 +5929,20 @@ static void preload_images(struct si_shader_context 
*ctx)
 lp_build_const_int32(gallivm, 
i));
 
if (info->images_writemask & (1 << i) &&
!(info->images_buffers & (1 << i)))
rsrc = force_dcc_off(ctx, rsrc);
 
ctx->images[i] = rsrc;
}
 }
 
-static void preload_streamout_buffers(struct si_shader_context *ctx)
-{
-   struct lp_build_tgsi_context *bld_base = >radeon_bld.soa.bld_base;
-   struct gallivm_state *gallivm = bld_base->base.gallivm;
-   unsigned i;
-
-   /* Streamout can only be used if the shader is compiled as VS. */
-   if (!ctx->shader->selector->so.num_outputs ||
-   (ctx->type == PIPE_SHADER_VERTEX &&
-(ctx->shader->key.vs.as_es ||
- ctx->shader->key.vs.as_ls)) ||
-   (ctx->type == PIPE_SHADER_TESS_EVAL &&
-ctx->shader->key.tes.as_es))
-   return;
-
-   LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
-