Because NIR can create non vec4 variables when implementing component
packing we need to make sure not to reprocess the same slot again.
Also we can drop the fs_attr_idx counter and just use driver_location.
---
src/gallium/drivers/radeonsi/si_shader_nir.c | 46 +++++++++++++++-------------
1 file changed, 25 insertions(+), 21 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5d82715f7a..ec748c9679 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -437,46 +437,42 @@ si_lower_nir(struct si_shader_selector* sel)
NIR_PASS(progress, sel->nir, nir_opt_undef);
NIR_PASS(progress, sel->nir, nir_opt_conditional_discard);
if (sel->nir->options->max_unroll_iterations) {
NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0);
}
} while (progress);
}
static void declare_nir_input_vs(struct si_shader_context *ctx,
- struct nir_variable *variable, unsigned rel,
+ struct nir_variable *variable,
LLVMValueRef out[4])
{
- si_llvm_load_input_vs(ctx, variable->data.driver_location / 4 + rel,
out);
+ si_llvm_load_input_vs(ctx, variable->data.driver_location / 4, out);
}
static void declare_nir_input_fs(struct si_shader_context *ctx,
- struct nir_variable *variable, unsigned rel,
- unsigned *fs_attr_idx,
+ struct nir_variable *variable,
+ unsigned input_index,
LLVMValueRef out[4])
{
- unsigned slot = variable->data.location + rel;
-
- assert(variable->data.location >= VARYING_SLOT_VAR0 || rel == 0);
-
+ unsigned slot = variable->data.location;
if (slot == VARYING_SLOT_POS) {
out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
LLVMGetParam(ctx->main_fn,
SI_PARAM_POS_W_FLOAT));
return;
}
- si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
- (*fs_attr_idx)++;
+ si_llvm_load_input_fs(ctx, input_index, out);
}
static LLVMValueRef
si_nir_load_sampler_desc(struct ac_shader_abi *abi,
unsigned descriptor_set, unsigned base_index,
unsigned constant_index, LLVMValueRef dynamic_index,
enum ac_descriptor_type desc_type, bool image,
bool write)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
@@ -516,39 +512,47 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
index = LLVMBuildAdd(ctx->gallivm.builder, index,
LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
return si_load_sampler_desc(ctx, list, index, desc_type);
}
bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
{
struct tgsi_shader_info *info = &ctx->shader->selector->info;
- unsigned fs_attr_idx = 0;
+ uint64_t processed_outputs = 0;
nir_foreach_variable(variable, &nir->inputs) {
unsigned attrib_count =
glsl_count_attribute_slots(variable->type,
nir->info.stage == MESA_SHADER_VERTEX);
unsigned input_idx = variable->data.driver_location;
- for (unsigned i = 0; i < attrib_count; ++i) {
- LLVMValueRef data[4];
+ assert(attrib_count == 1);
- if (nir->info.stage == MESA_SHADER_VERTEX)
- declare_nir_input_vs(ctx, variable, i, data);
- else if (nir->info.stage == MESA_SHADER_FRAGMENT)
- declare_nir_input_fs(ctx, variable, i,
&fs_attr_idx, data);
+ LLVMValueRef data[4];
+ unsigned loc = variable->data.location;
- for (unsigned chan = 0; chan < 4; chan++) {
- ctx->inputs[input_idx + chan] =
- LLVMBuildBitCast(ctx->ac.builder,
data[chan], ctx->ac.i32, "");
- }
+ /* Packed components share the same location so skip
+ * them if we have already processed the location.
+ */
+ if (processed_outputs & ((uint64_t)1 << loc))
+ continue;
+
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ declare_nir_input_vs(ctx, variable, data);
+ else if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ declare_nir_input_fs(ctx, variable, input_idx / 4,
data);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ ctx->inputs[input_idx + chan] =
+ LLVMBuildBitCast(ctx->ac.builder, data[chan],
ctx->ac.i32, "");
}
+ processed_outputs |= ((uint64_t)1 << loc);
}
ctx->abi.inputs = &ctx->inputs[0];
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
ctx->abi.clamp_shadow_reference = true;
ctx->num_samplers = util_last_bit(info->samplers_declared);
ctx->num_images = util_last_bit(info->images_declared);
ac_nir_translate(&ctx->ac, &ctx->abi, nir, NULL);
--
2.14.3
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev