xe2: Fix for the removal of most predication modes.

GitLab Mirror Fri, 12 Jan 2024 12:58:33 -0800

Module: Mesa
Branch: main
Commit: f974eacab3d239e9cd3c3f29f0b2589eb8563386
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f974eacab3d239e9cd3c3f29f0b2589eb8563386


Author: Francisco Jerez <[email protected]>
Date:   Fri Jul 22 17:11:52 2022 -0700

intel/compiler/xe2: Fix for the removal of most predication modes.

Reworks:
* Remove changes to fixup_nomask workaround since it applies only for
  Gfx12 family.

Reviewed-by: Caio Oliveira <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26860>

---

 src/intel/compiler/brw_disasm.c                | 11 ++++++-
 src/intel/compiler/brw_eu_defines.h            |  2 ++
 src/intel/compiler/brw_fs.cpp                  | 41 +++++++++++++-----------
 src/intel/compiler/brw_fs_nir.cpp              | 43 +++++++++++++++-----------
 src/intel/compiler/brw_lower_logical_sends.cpp |  1 +
 5 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
index 02c7eaa79e4..c56d4e554ca 100644
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -227,6 +227,12 @@ static const char *const pred_ctrl_align1[16] = {
    [BRW_PREDICATE_ALIGN1_ALL32H] = ".all32h",
 };
 
+static const char *const xe2_pred_ctrl[4] = {
+   [BRW_PREDICATE_NORMAL]        = "",
+   [XE2_PREDICATE_ANY]           = ".any",
+   [XE2_PREDICATE_ALL]           = ".all",
+};
+
 static const char *const thread_ctrl[4] = {
    [BRW_THREAD_NORMAL] = "",
    [BRW_THREAD_ATOMIC] = "atomic",
@@ -2059,7 +2065,10 @@ brw_disassemble_inst(FILE *file, const struct 
brw_isa_info *isa,
       format(file, "f%"PRIu64".%"PRIu64,
              devinfo->ver >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0,
              brw_inst_flag_subreg_nr(devinfo, inst));
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+      if (devinfo->ver >= 20) {
+         err |= control(file, "predicate control", xe2_pred_ctrl,
+                        brw_inst_pred_control(devinfo, inst), NULL);
+      } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
          err |= control(file, "predicate control align1", pred_ctrl_align1,
                         brw_inst_pred_control(devinfo, inst), NULL);
       } else {
diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index 820ac0f4ef5..495c6662cfc 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -1032,6 +1032,8 @@ enum ENUM_PACKED brw_predicate {
    BRW_PREDICATE_ALIGN16_REPLICATE_W =  5,
    BRW_PREDICATE_ALIGN16_ANY4H       =  6,
    BRW_PREDICATE_ALIGN16_ALL4H       =  7,
+   XE2_PREDICATE_ANY = 2,
+   XE2_PREDICATE_ALL = 3
 };
 
 enum ENUM_PACKED brw_reg_file {
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index cdf4db7a67f..c91fb1dfe8b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -984,22 +984,26 @@ fs_inst::size_read(int arg) const
 
 namespace {
    unsigned
-   predicate_width(brw_predicate predicate)
+   predicate_width(const intel_device_info *devinfo, brw_predicate predicate)
    {
-      switch (predicate) {
-      case BRW_PREDICATE_NONE:            return 1;
-      case BRW_PREDICATE_NORMAL:          return 1;
-      case BRW_PREDICATE_ALIGN1_ANY2H:    return 2;
-      case BRW_PREDICATE_ALIGN1_ALL2H:    return 2;
-      case BRW_PREDICATE_ALIGN1_ANY4H:    return 4;
-      case BRW_PREDICATE_ALIGN1_ALL4H:    return 4;
-      case BRW_PREDICATE_ALIGN1_ANY8H:    return 8;
-      case BRW_PREDICATE_ALIGN1_ALL8H:    return 8;
-      case BRW_PREDICATE_ALIGN1_ANY16H:   return 16;
-      case BRW_PREDICATE_ALIGN1_ALL16H:   return 16;
-      case BRW_PREDICATE_ALIGN1_ANY32H:   return 32;
-      case BRW_PREDICATE_ALIGN1_ALL32H:   return 32;
-      default: unreachable("Unsupported predicate");
+      if (devinfo->ver >= 20) {
+         return 1;
+      } else {
+         switch (predicate) {
+         case BRW_PREDICATE_NONE:            return 1;
+         case BRW_PREDICATE_NORMAL:          return 1;
+         case BRW_PREDICATE_ALIGN1_ANY2H:    return 2;
+         case BRW_PREDICATE_ALIGN1_ALL2H:    return 2;
+         case BRW_PREDICATE_ALIGN1_ANY4H:    return 4;
+         case BRW_PREDICATE_ALIGN1_ALL4H:    return 4;
+         case BRW_PREDICATE_ALIGN1_ANY8H:    return 8;
+         case BRW_PREDICATE_ALIGN1_ALL8H:    return 8;
+         case BRW_PREDICATE_ALIGN1_ANY16H:   return 16;
+         case BRW_PREDICATE_ALIGN1_ALL16H:   return 16;
+         case BRW_PREDICATE_ALIGN1_ANY32H:   return 32;
+         case BRW_PREDICATE_ALIGN1_ALL32H:   return 32;
+         default: unreachable("Unsupported predicate");
+         }
       }
    }
 
@@ -1039,15 +1043,15 @@ namespace {
 unsigned
 fs_inst::flags_read(const intel_device_info *devinfo) const
 {
-   if (predicate == BRW_PREDICATE_ALIGN1_ANYV ||
-       predicate == BRW_PREDICATE_ALIGN1_ALLV) {
+   if (devinfo->ver < 20 && (predicate == BRW_PREDICATE_ALIGN1_ANYV ||
+                             predicate == BRW_PREDICATE_ALIGN1_ALLV)) {
       /* The vertical predication modes combine corresponding bits from
        * f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware.
        */
       const unsigned shift = devinfo->ver >= 7 ? 4 : 2;
       return flag_mask(this, 1) << shift | flag_mask(this, 1);
    } else if (predicate) {
-      return flag_mask(this, predicate_width(predicate));
+      return flag_mask(this, predicate_width(devinfo, predicate));
    } else {
       unsigned mask = 0;
       for (int i = 0; i < sources; i++) {
@@ -4607,6 +4611,7 @@ brw_emit_predicate_on_sample_mask(const fs_builder &bld, 
fs_inst *inst)
       assert(inst->predicate == BRW_PREDICATE_NORMAL);
       assert(!inst->predicate_inverse);
       assert(inst->flag_subreg == 0);
+      assert(s.devinfo->ver < 20);
       /* Combine the sample mask with the existing predicate by using a
        * vertical predication mode.
        */
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index ccdd0fe7db8..ed67e626f9b 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4176,7 +4176,8 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
          /* Only jump when the whole quad is demoted.  For historical
           * reasons this is also used for discard.
           */
-         jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H;
+         jump->predicate = (devinfo->ver >= 20 ? XE2_PREDICATE_ANY :
+                            BRW_PREDICATE_ALIGN1_ANY4H);
       }
 
       if (devinfo->ver < 7)
@@ -7167,7 +7168,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
       unreachable("not reached");
 
    case nir_intrinsic_vote_any: {
-      const fs_builder ubld = bld.exec_all().group(1, 0);
+      const fs_builder ubld1 = bld.exec_all().group(1, 0);
 
       /* The any/all predicates do not consider channel enables. To prevent
        * dead channels from affecting the result, we initialize the flag with
@@ -7175,10 +7176,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
        */
       if (s.dispatch_width == 32) {
          /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
-         ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
-                         brw_imm_ud(0));
+         ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+                   brw_imm_ud(0));
       } else {
-         ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+         ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
       }
       bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), 
BRW_CONDITIONAL_NZ);
 
@@ -7188,18 +7189,20 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
        * getting garbage in the second half.  Work around this by using a pair
        * of 1-wide MOVs and scattering the result.
        */
+      const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1;
       fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D);
       ubld.MOV(res1, brw_imm_d(0));
-      set_predicate(s.dispatch_width == 8  ? BRW_PREDICATE_ALIGN1_ANY8H :
+      set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ANY :
+                    s.dispatch_width == 8  ? BRW_PREDICATE_ALIGN1_ANY8H :
                     s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H :
-                                              BRW_PREDICATE_ALIGN1_ANY32H,
+                                             BRW_PREDICATE_ALIGN1_ANY32H,
                     ubld.MOV(res1, brw_imm_d(-1)));
 
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), component(res1, 0));
       break;
    }
    case nir_intrinsic_vote_all: {
-      const fs_builder ubld = bld.exec_all().group(1, 0);
+      const fs_builder ubld1 = bld.exec_all().group(1, 0);
 
       /* The any/all predicates do not consider channel enables. To prevent
        * dead channels from affecting the result, we initialize the flag with
@@ -7207,10 +7210,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
        */
       if (s.dispatch_width == 32) {
          /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
-         ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
-                         brw_imm_ud(0xffffffff));
+         ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+                   brw_imm_ud(0xffffffff));
       } else {
-         ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+         ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
       }
       bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), 
BRW_CONDITIONAL_NZ);
 
@@ -7220,11 +7223,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
        * getting garbage in the second half.  Work around this by using a pair
        * of 1-wide MOVs and scattering the result.
        */
+      const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1;
       fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D);
       ubld.MOV(res1, brw_imm_d(0));
-      set_predicate(s.dispatch_width == 8  ? BRW_PREDICATE_ALIGN1_ALL8H :
+      set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL :
+                    s.dispatch_width == 8  ? BRW_PREDICATE_ALIGN1_ALL8H :
                     s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
-                                              BRW_PREDICATE_ALIGN1_ALL32H,
+                                             BRW_PREDICATE_ALIGN1_ALL32H,
                     ubld.MOV(res1, brw_imm_d(-1)));
 
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), component(res1, 0));
@@ -7240,7 +7245,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
       }
 
       fs_reg uniformized = bld.emit_uniformize(value);
-      const fs_builder ubld = bld.exec_all().group(1, 0);
+      const fs_builder ubld1 = bld.exec_all().group(1, 0);
 
       /* The any/all predicates do not consider channel enables. To prevent
        * dead channels from affecting the result, we initialize the flag with
@@ -7248,10 +7253,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
        */
       if (s.dispatch_width == 32) {
          /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
-         ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+         ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
                          brw_imm_ud(0xffffffff));
       } else {
-         ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+         ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
       }
       bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
 
@@ -7261,11 +7266,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
        * getting garbage in the second half.  Work around this by using a pair
        * of 1-wide MOVs and scattering the result.
        */
+      const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1;
       fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D);
       ubld.MOV(res1, brw_imm_d(0));
-      set_predicate(s.dispatch_width == 8  ? BRW_PREDICATE_ALIGN1_ALL8H :
+      set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL :
+                    s.dispatch_width == 8  ? BRW_PREDICATE_ALIGN1_ALL8H :
                     s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
-                                              BRW_PREDICATE_ALIGN1_ALL32H,
+                                             BRW_PREDICATE_ALIGN1_ALL32H,
                     ubld.MOV(res1, brw_imm_d(-1)));
 
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), component(res1, 0));
diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp 
b/src/intel/compiler/brw_lower_logical_sends.cpp
index c284ed7870b..7ea4d8fb09b 100644
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -1514,6 +1514,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, 
fs_inst *inst)
       assert(inst->predicate == BRW_PREDICATE_NORMAL);
       assert(!inst->predicate_inverse);
       assert(inst->flag_subreg == 0);
+      assert(s.devinfo->ver < 20);
       /* Combine the vector mask with the existing predicate by using a
        * vertical predication mode.
        */

Mesa (main): intel/compiler/xe2: Fix for the removal of most predication modes.

Reply via email to