Module: Mesa Branch: main Commit: f974eacab3d239e9cd3c3f29f0b2589eb8563386 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f974eacab3d239e9cd3c3f29f0b2589eb8563386
Author: Francisco Jerez <curroje...@riseup.net> Date: Fri Jul 22 17:11:52 2022 -0700 intel/compiler/xe2: Fix for the removal of most predication modes. Reworks: * Remove changes to fixup_nomask workaround since it applies only for Gfx12 family. Reviewed-by: Caio Oliveira <caio.olive...@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26860> --- src/intel/compiler/brw_disasm.c | 11 ++++++- src/intel/compiler/brw_eu_defines.h | 2 ++ src/intel/compiler/brw_fs.cpp | 41 +++++++++++++----------- src/intel/compiler/brw_fs_nir.cpp | 43 +++++++++++++++----------- src/intel/compiler/brw_lower_logical_sends.cpp | 1 + 5 files changed, 61 insertions(+), 37 deletions(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 02c7eaa79e4..c56d4e554ca 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -227,6 +227,12 @@ static const char *const pred_ctrl_align1[16] = { [BRW_PREDICATE_ALIGN1_ALL32H] = ".all32h", }; +static const char *const xe2_pred_ctrl[4] = { + [BRW_PREDICATE_NORMAL] = "", + [XE2_PREDICATE_ANY] = ".any", + [XE2_PREDICATE_ALL] = ".all", +}; + static const char *const thread_ctrl[4] = { [BRW_THREAD_NORMAL] = "", [BRW_THREAD_ATOMIC] = "atomic", @@ -2059,7 +2065,10 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, format(file, "f%"PRIu64".%"PRIu64, devinfo->ver >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0, brw_inst_flag_subreg_nr(devinfo, inst)); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (devinfo->ver >= 20) { + err |= control(file, "predicate control", xe2_pred_ctrl, + brw_inst_pred_control(devinfo, inst), NULL); + } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { err |= control(file, "predicate control align1", pred_ctrl_align1, brw_inst_pred_control(devinfo, inst), NULL); } else { diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 820ac0f4ef5..495c6662cfc 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -1032,6 +1032,8 @@ enum ENUM_PACKED brw_predicate { BRW_PREDICATE_ALIGN16_REPLICATE_W = 5, BRW_PREDICATE_ALIGN16_ANY4H = 6, BRW_PREDICATE_ALIGN16_ALL4H = 7, + XE2_PREDICATE_ANY = 2, + XE2_PREDICATE_ALL = 3 }; enum ENUM_PACKED brw_reg_file { diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index cdf4db7a67f..c91fb1dfe8b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -984,22 +984,26 @@ fs_inst::size_read(int arg) const namespace { unsigned - predicate_width(brw_predicate predicate) + predicate_width(const intel_device_info *devinfo, brw_predicate predicate) { - switch (predicate) { - case BRW_PREDICATE_NONE: return 1; - case BRW_PREDICATE_NORMAL: return 1; - case BRW_PREDICATE_ALIGN1_ANY2H: return 2; - case BRW_PREDICATE_ALIGN1_ALL2H: return 2; - case BRW_PREDICATE_ALIGN1_ANY4H: return 4; - case BRW_PREDICATE_ALIGN1_ALL4H: return 4; - case BRW_PREDICATE_ALIGN1_ANY8H: return 8; - case BRW_PREDICATE_ALIGN1_ALL8H: return 8; - case BRW_PREDICATE_ALIGN1_ANY16H: return 16; - case BRW_PREDICATE_ALIGN1_ALL16H: return 16; - case BRW_PREDICATE_ALIGN1_ANY32H: return 32; - case BRW_PREDICATE_ALIGN1_ALL32H: return 32; - default: unreachable("Unsupported predicate"); + if (devinfo->ver >= 20) { + return 1; + } else { + switch (predicate) { + case BRW_PREDICATE_NONE: return 1; + case BRW_PREDICATE_NORMAL: return 1; + case BRW_PREDICATE_ALIGN1_ANY2H: return 2; + case BRW_PREDICATE_ALIGN1_ALL2H: return 2; + case BRW_PREDICATE_ALIGN1_ANY4H: return 4; + case BRW_PREDICATE_ALIGN1_ALL4H: return 4; + case BRW_PREDICATE_ALIGN1_ANY8H: return 8; + case BRW_PREDICATE_ALIGN1_ALL8H: return 8; + case BRW_PREDICATE_ALIGN1_ANY16H: return 16; + case BRW_PREDICATE_ALIGN1_ALL16H: return 16; + case BRW_PREDICATE_ALIGN1_ANY32H: return 32; + case BRW_PREDICATE_ALIGN1_ALL32H: return 32; + default: unreachable("Unsupported predicate"); + } } } @@ -1039,15 +1043,15 @@ namespace { unsigned fs_inst::flags_read(const intel_device_info *devinfo) const { - if (predicate == BRW_PREDICATE_ALIGN1_ANYV || - predicate == BRW_PREDICATE_ALIGN1_ALLV) { + if (devinfo->ver < 20 && (predicate == BRW_PREDICATE_ALIGN1_ANYV || + predicate == BRW_PREDICATE_ALIGN1_ALLV)) { /* The vertical predication modes combine corresponding bits from * f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware. */ const unsigned shift = devinfo->ver >= 7 ? 4 : 2; return flag_mask(this, 1) << shift | flag_mask(this, 1); } else if (predicate) { - return flag_mask(this, predicate_width(predicate)); + return flag_mask(this, predicate_width(devinfo, predicate)); } else { unsigned mask = 0; for (int i = 0; i < sources; i++) { @@ -4607,6 +4611,7 @@ brw_emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst) assert(inst->predicate == BRW_PREDICATE_NORMAL); assert(!inst->predicate_inverse); assert(inst->flag_subreg == 0); + assert(s.devinfo->ver < 20); /* Combine the sample mask with the existing predicate by using a * vertical predication mode. */ diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index ccdd0fe7db8..ed67e626f9b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4176,7 +4176,8 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, /* Only jump when the whole quad is demoted. For historical * reasons this is also used for discard. */ - jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H; + jump->predicate = (devinfo->ver >= 20 ? XE2_PREDICATE_ANY : + BRW_PREDICATE_ALIGN1_ANY4H); } if (devinfo->ver < 7) @@ -7167,7 +7168,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, unreachable("not reached"); case nir_intrinsic_vote_any: { - const fs_builder ubld = bld.exec_all().group(1, 0); + const fs_builder ubld1 = bld.exec_all().group(1, 0); /* The any/all predicates do not consider channel enables. To prevent * dead channels from affecting the result, we initialize the flag with @@ -7175,10 +7176,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, */ if (s.dispatch_width == 32) { /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */ - ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), - brw_imm_ud(0)); + ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), + brw_imm_ud(0)); } else { - ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0)); + ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0)); } bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ); @@ -7188,18 +7189,20 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * getting garbage in the second half. Work around this by using a pair * of 1-wide MOVs and scattering the result. */ + const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1; fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D); ubld.MOV(res1, brw_imm_d(0)); - set_predicate(s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H : + set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ANY : + s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H : - BRW_PREDICATE_ALIGN1_ANY32H, + BRW_PREDICATE_ALIGN1_ANY32H, ubld.MOV(res1, brw_imm_d(-1))); bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), component(res1, 0)); break; } case nir_intrinsic_vote_all: { - const fs_builder ubld = bld.exec_all().group(1, 0); + const fs_builder ubld1 = bld.exec_all().group(1, 0); /* The any/all predicates do not consider channel enables. To prevent * dead channels from affecting the result, we initialize the flag with @@ -7207,10 +7210,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, */ if (s.dispatch_width == 32) { /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */ - ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), - brw_imm_ud(0xffffffff)); + ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), + brw_imm_ud(0xffffffff)); } else { - ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); + ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); } bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ); @@ -7220,11 +7223,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * getting garbage in the second half. Work around this by using a pair * of 1-wide MOVs and scattering the result. */ + const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1; fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D); ubld.MOV(res1, brw_imm_d(0)); - set_predicate(s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : + set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL : + s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H : - BRW_PREDICATE_ALIGN1_ALL32H, + BRW_PREDICATE_ALIGN1_ALL32H, ubld.MOV(res1, brw_imm_d(-1))); bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), component(res1, 0)); @@ -7240,7 +7245,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } fs_reg uniformized = bld.emit_uniformize(value); - const fs_builder ubld = bld.exec_all().group(1, 0); + const fs_builder ubld1 = bld.exec_all().group(1, 0); /* The any/all predicates do not consider channel enables. To prevent * dead channels from affecting the result, we initialize the flag with @@ -7248,10 +7253,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, */ if (s.dispatch_width == 32) { /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */ - ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), + ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), brw_imm_ud(0xffffffff)); } else { - ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); + ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); } bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z); @@ -7261,11 +7266,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * getting garbage in the second half. Work around this by using a pair * of 1-wide MOVs and scattering the result. */ + const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1; fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D); ubld.MOV(res1, brw_imm_d(0)); - set_predicate(s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : + set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL : + s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H : - BRW_PREDICATE_ALIGN1_ALL32H, + BRW_PREDICATE_ALIGN1_ALL32H, ubld.MOV(res1, brw_imm_d(-1))); bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), component(res1, 0)); diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index c284ed7870b..7ea4d8fb09b 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1514,6 +1514,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst) assert(inst->predicate == BRW_PREDICATE_NORMAL); assert(!inst->predicate_inverse); assert(inst->flag_subreg == 0); + assert(s.devinfo->ver < 20); /* Combine the vector mask with the existing predicate by using a * vertical predication mode. */