---
src/gallium/drivers/radeonsi/si_shader.c | 79 ++++++++++++--------------------
1 file changed, 29 insertions(+), 50 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index 72cf827..5b15ad4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -825,12 +825,13 @@ static LLVMValueRef build_buffer_load(struct
si_shader_context *ctx,
LLVMValueRef soffset,
unsigned inst_offset,
unsigned glc,
- unsigned slc)
+ unsigned slc,
+ bool need_range_checks)
{
struct gallivm_state *gallivm = &ctx->gallivm;
unsigned func = CLAMP(num_channels, 1, 3) - 1;
- if (HAVE_LLVM >= 0x309) {
+ if (need_range_checks && HAVE_LLVM >= 0x309) {
LLVMValueRef args[] = {
LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32,
""),
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
@@ -896,7 +897,7 @@ static LLVMValueRef build_buffer_load(struct
si_shader_context *ctx,
static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type, unsigned swizzle,
LLVMValueRef buffer, LLVMValueRef offset,
- LLVMValueRef base)
+ LLVMValueRef base, bool need_range_checks)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -906,14 +907,14 @@ static LLVMValueRef buffer_load(struct
lp_build_tgsi_context *bld_base,
if (swizzle == ~0) {
value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
- 0, 1, 0);
+ 0, 1, 0, need_range_checks);
return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
}
if (!tgsi_type_is_64bit(type)) {
value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
- 0, 1, 0);
+ 0, 1, 0, need_range_checks);
value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
return LLVMBuildExtractElement(gallivm->builder, value,
@@ -921,10 +922,10 @@ static LLVMValueRef buffer_load(struct
lp_build_tgsi_context *bld_base,
}
value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
- swizzle * 4, 1, 0);
+ swizzle * 4, 1, 0, need_range_checks);
value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
- swizzle * 4 + 4, 1, 0);
+ swizzle * 4 + 4, 1, 0, need_range_checks);
return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
}
@@ -1044,7 +1045,7 @@ static LLVMValueRef fetch_input_tes(
base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
- return buffer_load(bld_base, type, swizzle, buffer, base, addr);
+ return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
}
static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
@@ -1125,13 +1126,12 @@ static LLVMValueRef fetch_input_gs(
struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
struct gallivm_state *gallivm = base->gallivm;
LLVMValueRef vtx_offset;
- LLVMValueRef args[9];
unsigned vtx_offset_param;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index =
info->input_semantic_index[reg->Register.Index];
unsigned param;
- LLVMValueRef value;
+ LLVMValueRef soffset, value;
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
return get_primitive_id(bld_base, swizzle);
@@ -1163,27 +1163,15 @@ static LLVMValueRef fetch_input_gs(
4);
param = si_shader_io_get_unique_index(semantic_name, semantic_index);
- args[0] = ctx->esgs_ring;
- args[1] = vtx_offset;
- args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
- args[3] = uint->zero;
- args[4] = uint->one; /* OFFEN */
- args[5] = uint->zero; /* IDXEN */
- args[6] = uint->one; /* GLC */
- args[7] = uint->zero; /* SLC */
- args[8] = uint->zero; /* TFE */
-
- value = lp_build_intrinsic(gallivm->builder,
- "llvm.SI.buffer.load.dword.i32.i32",
- ctx->i32, args, 9,
- LP_FUNC_ATTR_READONLY);
+ soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
+
+ value = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
+ vtx_offset, soffset, 0, 1, 0, false);
if (tgsi_type_is_64bit(type)) {
LLVMValueRef value2;
- args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle +
1) * 256);
- value2 = lp_build_intrinsic(gallivm->builder,
- "llvm.SI.buffer.load.dword.i32.i32",
- ctx->i32, args, 9,
- LP_FUNC_ATTR_READONLY);
+ soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle +
1) * 256);
+ value2 = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
+ vtx_offset, soffset, 0, 1, 0, false);
return si_llvm_emit_fetch_64bit(bld_base, type,
value, value2);
}
@@ -1709,7 +1697,7 @@ static void declare_system_value(
lp_build_const_int32(gallivm, param));
value = buffer_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
- ~0, buffer, base, addr);
+ ~0, buffer, base, addr, true);
break;
}
@@ -6420,7 +6408,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
struct lp_build_context *uint = &bld_base->uint_bld;
struct si_shader_output_values *outputs;
struct tgsi_shader_info *gsinfo = &gs_selector->info;
- LLVMValueRef args[9];
+ LLVMValueRef voffset;
int i, r;
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
@@ -6447,18 +6435,6 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
create_function(&ctx);
preload_ring_buffers(&ctx);
- args[0] = ctx.gsvs_ring[0];
- args[1] = lp_build_mul_imm(uint,
- LLVMGetParam(ctx.main_fn,
- ctx.param_vertex_id),
- 4);
- args[3] = uint->zero;
- args[4] = uint->one; /* OFFEN */
- args[5] = uint->zero; /* IDXEN */
- args[6] = uint->one; /* GLC */
- args[7] = uint->one; /* SLC */
- args[8] = uint->zero; /* TFE */
-
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
@@ -6468,6 +6444,9 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
stream_id = uint->zero;
/* Fill in output information. */
+ voffset = lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
+ ctx.param_vertex_id), 4);
+ /* Fetch vertex data from GSVS ring */
for (i = 0; i < gsinfo->num_outputs; ++i) {
outputs[i].semantic_name = gsinfo->output_semantic_name[i];
outputs[i].semantic_index = gsinfo->output_semantic_index[i];
@@ -6502,24 +6481,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
offset = 0;
for (i = 0; i < gsinfo->num_outputs; ++i) {
for (unsigned chan = 0; chan < 4; chan++) {
+ LLVMValueRef load, soffset;
if (!(gsinfo->output_usagemask[i] & (1 <<
chan)) ||
outputs[i].vertex_stream[chan] != stream) {
outputs[i].values[chan] =
ctx.soa.bld_base.base.undef;
continue;
}
- args[2] = lp_build_const_int32(
- gallivm,
+ soffset = lp_build_const_int32(gallivm,
offset *
gs_selector->gs_max_out_vertices * 16 * 4);
offset++;
+ load = build_buffer_load(&ctx,
ctx.gsvs_ring[0], 1,
+ NULL, voffset, soffset,
+ 0, 1, 1, false);
+
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
-
lp_build_intrinsic(gallivm->builder,
-
"llvm.SI.buffer.load.dword.i32.i32",
- ctx.i32, args,
9,
-
LP_FUNC_ATTR_READONLY),
- ctx.f32, "");
+ load, ctx.f32, "");
}
}