Signed-off-by: Alyssa Rosenzweig <aly...@rosenzweig.io> --- .../panfrost/midgard/midgard_compile.c | 151 +++--------------- .../panfrost/midgard/midgard_nir_algebraic.py | 12 +- 2 files changed, 37 insertions(+), 126 deletions(-)
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index 25316cab053..4ed36c53dfc 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -715,58 +715,6 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu) nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum)); } -/* Lower csel with mixed condition channels to mulitple csel instructions. For - * context, the csel ops on Midgard are vector in *outputs*, but not in - * *conditions*. So, if the condition is e.g. yyyy, a single op can select a - * vec4. But if the condition is e.g. xyzw, four ops are needed as the ISA - * can't cope with the divergent channels.*/ - -static void -midgard_nir_lower_mixed_csel_body(nir_builder *b, nir_alu_instr *alu) -{ - if (alu->op != nir_op_bcsel) - return; - - b->cursor = nir_before_instr(&alu->instr); - - /* Must be run before registering */ - assert(alu->dest.dest.is_ssa); - - /* Check for mixed condition */ - - unsigned comp = alu->src[0].swizzle[0]; - unsigned nr_components = alu->dest.dest.ssa.num_components; - - bool mixed = false; - - for (unsigned c = 1; c < nr_components; ++c) - mixed |= (alu->src[0].swizzle[c] != comp); - - if (!mixed) - return; - - /* We're mixed, so lower */ - - assert(nr_components <= 4); - nir_ssa_def *results[4]; - - nir_ssa_def *cond = nir_ssa_for_alu_src(b, alu, 0); - nir_ssa_def *choice0 = nir_ssa_for_alu_src(b, alu, 1); - nir_ssa_def *choice1 = nir_ssa_for_alu_src(b, alu, 2); - - for (unsigned c = 0; c < nr_components; ++c) { - results[c] = nir_bcsel(b, - nir_channel(b, cond, c), - nir_channel(b, choice0, c), - nir_channel(b, choice1, c)); - } - - /* Replace with our scalarized version */ - - nir_ssa_def *result = nir_vec(b, results, nr_components); - nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(result)); -} - static int midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr) { @@ -851,36 +799,6 @@ midgard_nir_lower_fdot2(nir_shader *shader) return progress; } -static bool -midgard_nir_lower_mixed_csel(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(function, shader) { - if (!function->impl) continue; - - nir_builder _b; - nir_builder *b = &_b; - nir_builder_init(b, function->impl); - - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_alu) continue; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - midgard_nir_lower_mixed_csel_body(b, alu); - - progress |= true; - } - } - - nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); - - } - - return progress; -} - static void optimise_nir(nir_shader *nir) { @@ -892,7 +810,6 @@ optimise_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_lower_regs_to_ssa); NIR_PASS(progress, nir, midgard_nir_lower_fdot2); - NIR_PASS(progress, nir, midgard_nir_lower_mixed_csel); nir_lower_tex_options lower_tex_options = { .lower_rect = true @@ -957,6 +874,11 @@ optimise_nir(nir_shader *nir) } while (progress); NIR_PASS(progress, nir, nir_opt_algebraic_late); + + /* We implement booleans as 32-bit 0/~0 */ + NIR_PASS(progress, nir, nir_lower_bool_to_int32); + + /* Now that booleans are lowered, we can run out late opts */ NIR_PASS(progress, nir, midgard_nir_lower_algebraic_late); /* Lower mods for float ops only. Integer ops don't support modifiers @@ -967,9 +889,6 @@ optimise_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_dce); - /* We implement booleans as 32-bit 0/~0 */ - NIR_PASS(progress, nir, nir_lower_bool_to_int32); - /* Take us out of SSA */ NIR_PASS(progress, nir, nir_lower_locals_to_regs); NIR_PASS(progress, nir, nir_convert_from_ssa, true); @@ -1189,7 +1108,6 @@ emit_indirect_offset(compiler_context *ctx, nir_src *src) case nir_op_##nir: \ op = midgard_alu_op_##_op; \ break; - static bool nir_is_fzero_constant(nir_src src) { @@ -1332,53 +1250,36 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) break; } - /* For a few special csel cases not handled by NIR, we can opt to - * bitwise. Otherwise, we emit the condition and do a real csel */ - case nir_op_b32csel: { - if (nir_is_fzero_constant(instr->src[2].src)) { - /* (b ? v : 0) = (b & v) */ - op = midgard_alu_op_iand; - nr_inputs = 2; - } else if (nir_is_fzero_constant(instr->src[1].src)) { - /* (b ? 0 : v) = (!b ? v : 0) = (~b & v) = (v & ~b) */ - op = midgard_alu_op_iandnot; - nr_inputs = 2; - instr->src[1] = instr->src[0]; - instr->src[0] = instr->src[2]; - } else { - /* Midgard features both fcsel and icsel, depending on - * the type of the arguments/output. However, as long - * as we're careful we can _always_ use icsel and - * _never_ need fcsel, since the latter does additional - * floating-point-specific processing whereas the - * former just moves bits on the wire. It's not obvious - * why these are separate opcodes, save for the ability - * to do things like sat/pos/abs/neg for free */ - - op = midgard_alu_op_icsel; + /* Midgard features both fcsel and icsel, depending on + * the type of the arguments/output. However, as long + * as we're careful we can _always_ use icsel and + * _never_ need fcsel, since the latter does additional + * floating-point-specific processing whereas the + * former just moves bits on the wire. It's not obvious + * why these are separate opcodes, save for the ability + * to do things like sat/pos/abs/neg for free */ - /* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */ - nr_inputs = 2; + op = midgard_alu_op_icsel; - /* Figure out which component the condition is in */ + /* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */ + nr_inputs = 2; - unsigned comp = instr->src[0].swizzle[0]; + /* Mixed should have been lowered */ - /* Make sure NIR isn't throwing a mixed condition at us */ + unsigned comp = instr->src[0].swizzle[0]; - for (unsigned c = 1; c < nr_components; ++c) - assert(instr->src[0].swizzle[c] == comp); + for (unsigned c = 1; c < nr_components; ++c) + assert(instr->src[0].swizzle[c] == comp); - /* Emit the condition into r31.w */ - emit_condition(ctx, &instr->src[0].src, false, comp); + /* Emit the condition into r31.w */ + emit_condition(ctx, &instr->src[0].src, false, comp); - /* The condition is the first argument; move the other - * arguments up one to be a binary instruction for - * Midgard */ + /* The condition is the first argument; move the other + * arguments up one to be a binary instruction for + * Midgard */ - memmove(instr->src, instr->src + 1, 2 * sizeof(nir_alu_src)); - } + memmove(instr->src, instr->src + 1, 2 * sizeof(nir_alu_src)); break; } diff --git a/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py index 7c8cd06feed..6f93e95b517 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py +++ b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py @@ -28,6 +28,7 @@ import math a = 'a' b = 'b' +c = 'c' algebraic_late = [ # ineg must be lowered late, but only for integers; floats will try to @@ -35,8 +36,17 @@ algebraic_late = [ # a more standard lower_negate approach (('ineg', a), ('isub', 0, a)), -] + # These two special-cases save space/an op than the actual csel op + + (('b32csel', a, 'b@32', 0), ('iand', a, b)), + (('b32csel', a, 0, 'b@32'), ('iand', ('inot', a), b)), + + # Our csel op is scalar, but our bitwise is vector + (('b32csel', 'a(is_non_scalar_swizzle)', 'b@32', 'c@32'), + ('ior', ('iand', a, b), + ('iand', ('inot', a), c))), +] # Midgard scales fsin/fcos arguments by pi. # Pass must be run only once, after the main loop -- 2.20.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev