Module: Mesa
Branch: main
Commit: 7db3f0b1c1739a5b09c46f743c32e160849c484d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7db3f0b1c1739a5b09c46f743c32e160849c484d

Author: Francisco Jerez <curroje...@riseup.net>
Date:   Thu Jul  7 14:42:21 2022 -0700

intel/compiler/xe2: Implement instruction compaction.

Reworks:
* Handle DPAS in has_3src_unmapped_bits.

Reviewed-by: Caio Oliveira <caio.olive...@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26860>

---

 src/intel/compiler/brw_eu_compact.c | 380 +++++++++++++++++++++++++++++++++---
 1 file changed, 351 insertions(+), 29 deletions(-)

diff --git a/src/intel/compiler/brw_eu_compact.c 
b/src/intel/compiler/brw_eu_compact.c
index 413d240af0b..077c68cb21c 100644
--- a/src/intel/compiler/brw_eu_compact.c
+++ b/src/intel/compiler/brw_eu_compact.c
@@ -859,6 +859,125 @@ static const uint16_t xehp_src1_index_table[16] = {
    0b100001000100, /*      -r[a]<1;1,0> */
 };
 
+static const uint32_t xe2_control_index_table[32] = {
+   0b000000000000000100, /* (16|M0)               */
+   0b000000100000000000, /* (W) (1|M0)            */
+   0b000000000010000100, /* (16|M16)              */
+   0b000000000000000000, /* (1|M0)                */
+   0b000000100000000100, /* (W) (16|M0)           */
+   0b010000000000000100, /* (16|M0) (.ge)f0.0     */
+   0b010100000000000100, /* (16|M0) (.lt)f0.0     */
+   0b000000100000000010, /* (W) (4|M0)            */
+   0b000000000000000101, /* (32|M0)               */
+   0b000000100000000011, /* (W) (8|M0)            */
+   0b001100100000000000, /* (W) (1|M0) (.gt)f0.0  */
+   0b000010000000000100, /* (16|M0) (sat)         */
+   0b000100000000000100, /* (16|M0) (.eq)f0.0     */
+   0b000000100000000001, /* (W) (2|M0)            */
+   0b001100000000000100, /* (16|M0) (.gt)f0.0     */
+   0b000100100000000000, /* (W) (1|M0) (.eq)f0.0  */
+   0b010100100000000010, /* (W) (4|M0) (.lt)f0.0  */
+   0b010000100000000000, /* (W) (1|M0) (.ge)f0.0  */
+   0b010000100000000010, /* (W) (4|M0) (.ge)f0.0  */
+   0b010100100000000000, /* (W) (1|M0) (.lt)f0.0  */
+   0b001000000000000100, /* (16|M0) (.ne)f0.0     */
+   0b000000000100100100, /* (f2.0) (16|M0)        */
+   0b010100100000000011, /* (W) (8|M0) (.lt)f0.0  */
+   0b000000000100011100, /* (f1.1) (16|M0)        */
+   0b010000100000000011, /* (W) (8|M0) (.ge)f0.0  */
+   0b000000000100001100, /* (f0.1) (16|M0)        */
+   0b000000000100010100, /* (f1.0) (16|M0)        */
+   0b000000000100110100, /* (f3.0) (16|M0)        */
+   0b000000000100111100, /* (f3.1) (16|M0)        */
+   0b000000000100101100, /* (f2.1) (16|M0)        */
+   0b000000000100000100, /* (f0.0) (16|M0)        */
+   0b010100000000100100, /* (16|M0) (.lt)f2.0     */
+};
+
+static const uint32_t xe2_datatype_table[32] = {
+   0b11010110100101010100, /* grf<1>:f grf:f grf:f    */
+   0b11010100100101010100, /* arf<1>:f grf:f grf:f    */
+   0b00000110100101010100, /* grf<1>:f grf:f arf:ub   */
+   0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */
+   0b01010110110101010100, /* grf<1>:f grf:f imm:f    */
+   0b11010010100101010100, /* grf<1>:f arf:f grf:f    */
+   0b10111110100011101110, /* grf<1>:q grf:q grf:q    */
+   0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
+   0b01010110100101010100, /* grf<1>:f grf:f arf:f    */
+   0b00000010101001000100, /* grf<1>:ud imm:ud        */
+   0b00101110110011001100, /* grf<1>:d grf:d imm:w    */
+   0b11010000100101010100, /* arf<1>:f arf:f grf:f    */
+   0b01010100100101010100, /* arf<1>:f grf:f arf:f    */
+   0b01010100110101010100, /* arf<1>:f grf:f imm:f    */
+   0b00000010101101010100, /* grf<1>:f imm:f          */
+   0b00000110100011001100, /* grf<1>:d grf:d arf:ub   */
+   0b00101110110011101110, /* grf<1>:q grf:q imm:w    */
+   0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */
+   0b01010000100101010100, /* arf<1>:f arf:f arf:f    */
+   0b10110110100011001100, /* grf<1>:d grf:d grf:d    */
+   0b01010010100101010100, /* grf<1>:f arf:f arf:f    */
+   0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */
+   0b00110110110011001110, /* grf<1>:q grf:d imm:d    */
+   0b00101100110011001100, /* arf<1>:d grf:d imm:w    */
+   0b11011110100101110110, /* grf<1>:df grf:df grf:df */
+   0b01010010110101010100, /* grf<1>:f arf:f imm:f    */
+   0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
+   0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
+   0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */
+   0b00000010101010101100, /* grf<1>:d imm:w          */
+   0b01010000110101010100, /* arf<1>:f arf:f imm:f    */
+   0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */
+};
+
+static const uint16_t xe2_subreg_table[16] = {
+   0b000000000000, /* .0 .0  */
+   0b000010000000, /* .0 .4  */
+   0b000000000100, /* .4 .0  */
+   0b010000000000, /* .0 .32 */
+   0b001000000000, /* .0 .16 */
+   0b000000001000, /* .8 .0  */
+   0b000100000000, /* .0 .8  */
+   0b010100000000, /* .0 .40 */
+   0b011000000000, /* .0 .48 */
+   0b000110000000, /* .0 .12 */
+   0b000000010000, /* .16 .0 */
+   0b011010000000, /* .0 .52 */
+   0b001100000000, /* .0 .24 */
+   0b011100000000, /* .0 .56 */
+   0b010110000000, /* .0 .44 */
+   0b010010000000, /* .0 .36 */
+};
+
+static const uint16_t xe2_src0_index_table[8] = {
+   0b00100000000, /* r<1;1,0>      */
+   0b00000000000, /* r<0;1,0>      */
+   0b01000000000, /* r<2;1,0>      */
+   0b00100000010, /* -r<1;1,0>     */
+   0b01100000000, /* r<4;1,0>      */
+   0b00100000001, /* (abs)r<1;1,0> */
+   0b00000000010, /* -r<0;1,0>     */
+   0b01001000000, /* r<2;4,0>      */
+};
+
+static const uint16_t xe2_src1_index_table[16] = {
+   0b0000100000000000, /* r<1;1,0>.0  */
+   0b0000000000000000, /* r<0;1,0>.0  */
+   0b1000100000000000, /* -r<1;1,0>.0 */
+   0b0000000000010000, /* r<0;1,0>.8  */
+   0b0000000000001000, /* r<0;1,0>.4  */
+   0b0000000000011000, /* r<0;1,0>.12 */
+   0b0000000001010000, /* r<0;1,0>.40 */
+   0b0000000001000000, /* r<0;1,0>.32 */
+   0b0000000000100000, /* r<0;1,0>.16 */
+   0b0000000001111000, /* r<0;1,0>.60 */
+   0b0000000000111000, /* r<0;1,0>.28 */
+   0b0000000000101000, /* r<0;1,0>.20 */
+   0b0000000001011000, /* r<0;1,0>.44 */
+   0b0000000001001000, /* r<0;1,0>.36 */
+   0b0000000001110000, /* r<0;1,0>.56 */
+   0b0000000000110000, /* r<0;1,0>.24 */
+};
+
 /* This is actually the control index table for Cherryview (26 bits), but the
  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
  * the start.
@@ -956,6 +1075,25 @@ static const uint64_t xehp_3src_control_index_table[32] = 
{
    0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0)        grf<1>:f 
  :f  :bf  :bf          */
 };
 
+static const uint64_t xe2_3src_control_index_table[16] = {
+   0b0000010010100010101000000000000100, /* (16|M0) grf<1>:f :f :f :f      */
+   0b0000010010000010101000000000000100, /* (16|M0) arf<1>:f :f :f :f      */
+   0b0000010010100010101000100000000100, /* (W)(16|M0) grf<1>:f :f :f :f   */
+   0b0000010010000010101000100000000100, /* (W)(16|M0) arf<1>:f :f :f :f   */
+   0b0000011011100011101100000000000100, /* (16|M0) grf<1>:df :df :df :df  */
+   0b0000011011100011101100000010000100, /* (16|M16) grf<1>:df :df :df :df */
+   0b0000011011000011101100000000000100, /* (16|M0) arf<1>:df :df :df :df  */
+   0b0000010010100010101000000000000101, /* (32|M0) grf<1>:f :f :f :f      */
+   0b0000010010000010101000000000000101, /* (32|M0) arf<1>:f :f :f :f      */
+   0b0000010010000010101010000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
+   0b0000010010100010101010000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
+   0b0000011011000011101100000010000100, /* (16|M16) arf<1>:df :df :df :df */
+   0b0000010010100010101000100000000000, /* (W)(1|M0) grf<1>:f :f :f :f    */
+   0b0000010010100010001000000000000100, /* (16|M0) grf<1>:ud :ud :ud :ud  */
+   0b0000110110100110011000000000000101, /* (32|M0) grf<1>:d :d :d :d      */
+   0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df   */
+};
+
 static const uint32_t gfx12_3src_source_index_table[32] = {
    0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
    0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
@@ -1029,6 +1167,25 @@ static const uint32_t xehp_3src_source_index_table[32] = 
{
    0b100100010010100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u4,s4] */
 };
 
+static const uint32_t xe2_3src_source_index_table[16] = {
+   0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1>  */
+   0b101100000001000000001, /* arf<1;0> grf<1;0> grf<1>  */
+   0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0>  */
+   0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0>  */
+   0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0>  */
+   0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0>  */
+   0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0>  */
+   0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
+   0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1>  */
+   0b101000000001000000001, /* arf<1;0> arf<1;0> grf<1>  */
+   0b100000000001000000001, /* arf<1;0> arf<1;0> grf<0>  */
+   0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>  */
+   0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0>  */
+   0b101100000101000000001, /* arf<1;0> grf<1;0> -grf<1> */
+   0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
+   0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */
+};
+
 static const uint32_t gfx12_3src_subreg_table[32] = {
    0b00000000000000000000, /* .0  .0  .0  .0  */
    0b00100000000000000000, /* .0  .0  .0  .4  */
@@ -1064,6 +1221,41 @@ static const uint32_t gfx12_3src_subreg_table[32] = {
    0b01000000000010000000, /* .0  .4  .0  .8  */
 };
 
+static const uint32_t xe2_3src_subreg_table[32] = {
+   0b00000000000000000000, /* .0 .0 .0 .0   */
+   0b00100000000000000000, /* .0 .0 .0 .8   */
+   0b10000000000000000000, /* .0 .0 .0 .32  */
+   0b00010000000000000000, /* .0 .0 .0 .4   */
+   0b11100000000000000000, /* .0 .0 .0 .56  */
+   0b01010000000000000000, /* .0 .0 .0 .20  */
+   0b10110000000000000000, /* .0 .0 .0 .44  */
+   0b01000000000011000000, /* .0 .12 .0 .16 */
+   0b01100000000000000000, /* .0 .0 .0 .24  */
+   0b10100000000000000000, /* .0 .0 .0 .40  */
+   0b11000000000000000000, /* .0 .0 .0 .48  */
+   0b01000000000000000000, /* .0 .0 .0 .16  */
+   0b01110000000110000000, /* .0 .24 .0 .28 */
+   0b10100000001001000000, /* .0 .36 .0 .40 */
+   0b11010000001100000000, /* .0 .48 .0 .52 */
+   0b01110000000000000000, /* .0 .0 .0 .28  */
+   0b11110000000000000000, /* .0 .0 .0 .60  */
+   0b10010000000000000000, /* .0 .0 .0 .36  */
+   0b00110000000000000000, /* .0 .0 .0 .12  */
+   0b00100000000010000000, /* .0 .8 .0 .8   */
+   0b00010000000001000000, /* .0 .4 .0 .4   */
+   0b00110000000011000000, /* .0 .12 .0 .12 */
+   0b11010000000000000000, /* .0 .0 .0 .52  */
+   0b00000000000001000000, /* .0 .4 .0 .0   */
+   0b00000101100000000000, /* .0 .0 .44 .0  */
+   0b00000100000000000000, /* .0 .0 .32 .0  */
+   0b00000000000010000000, /* .0 .8 .0 .0   */
+   0b00000000001100000000, /* .0 .48 .0 .0  */
+   0b00000000001101000000, /* .0 .52 .0 .0  */
+   0b00000110100000000000, /* .0 .0 .52 .0  */
+   0b00000000001000000000, /* .0 .32 .0 .0  */
+   0b00000000001111000000, /* .0 .60 .0 .0  */
+};
+
 struct compaction_state {
    const struct brw_isa_info *isa;
    const uint32_t *control_index_table;
@@ -1083,7 +1275,17 @@ set_control_index(const struct compaction_state *c,
    const struct intel_device_info *devinfo = c->isa->devinfo;
    uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
 
-   if (devinfo->ver >= 12) {
+   if (devinfo->ver >= 20) {
+      uncompacted = (brw_inst_bits(src, 95, 92) << 14) | /*  4b */
+                    (brw_inst_bits(src, 34, 34) << 13) | /*  1b */
+                    (brw_inst_bits(src, 32, 32) << 12) | /*  1b */
+                    (brw_inst_bits(src, 31, 31) << 11) | /*  1b */
+                    (brw_inst_bits(src, 28, 28) << 10) | /*  1b */
+                    (brw_inst_bits(src, 27, 26) <<  8) | /*  2b */
+                    (brw_inst_bits(src, 25, 24) <<  6) | /*  2b */
+                    (brw_inst_bits(src, 23, 21) <<  3) | /*  3b */
+                    (brw_inst_bits(src, 20, 18));        /*  3b */
+   } else if (devinfo->ver >= 12) {
       uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /*  4b */
                     (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
                     (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
@@ -1169,9 +1371,16 @@ set_subreg_index(const struct compaction_state *c, 
brw_compact_inst *dst,
                  const brw_inst *src, bool is_immediate)
 {
    const struct intel_device_info *devinfo = c->isa->devinfo;
-   uint16_t uncompacted; /* 15b */
-
-   if (devinfo->ver >= 12) {
+   const unsigned table_len = devinfo->ver >= 20 ?
+      ARRAY_SIZE(xe2_subreg_table) : ARRAY_SIZE(g45_subreg_table);
+   uint16_t uncompacted; /* 15b/G45+; 12b/Xe2+ */
+
+   if (devinfo->ver >= 20) {
+      uncompacted = (brw_inst_bits(src, 33, 33) << 0) |    /* 1b */
+                    (brw_inst_bits(src, 55, 51) << 1) |    /* 5b */
+                    (brw_inst_bits(src, 71, 67) << 6) |    /* 5b */
+                    (brw_inst_bits(src, 87, 87) << 11);    /* 1b */
+   } else if (devinfo->ver >= 12) {
       uncompacted = (brw_inst_bits(src, 55, 51) << 0) |    /* 5b */
                     (brw_inst_bits(src, 71, 67) << 5);     /* 5b */
 
@@ -1185,7 +1394,7 @@ set_subreg_index(const struct compaction_state *c, 
brw_compact_inst *dst,
          uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
    }
 
-   for (int i = 0; i < 32; i++) {
+   for (int i = 0; i < table_len; i++) {
       if (c->subreg_table[i] == uncompacted) {
          brw_compact_inst_set_subreg_index(devinfo, dst, i);
         return true;
@@ -1200,12 +1409,15 @@ set_src0_index(const struct compaction_state *c, 
brw_compact_inst *dst,
                const brw_inst *src)
 {
    const struct intel_device_info *devinfo = c->isa->devinfo;
-   uint16_t uncompacted; /* 12b */
+   uint16_t uncompacted; /* 12b/G45+; 11b/Xe2+ */
    int table_len;
 
    if (devinfo->ver >= 12) {
-      table_len = ARRAY_SIZE(gfx12_src0_index_table);
-      uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /*  4b */
+      table_len = (devinfo->ver >= 20 ? ARRAY_SIZE(xe2_src0_index_table) :
+                   ARRAY_SIZE(gfx12_src0_index_table));
+      uncompacted = (devinfo->ver >= 20 ? 0 :
+                     brw_inst_bits(src, 87, 87) << 11) | /*  1b */
+                    (brw_inst_bits(src, 86, 84) << 8) | /*  3b */
                     (brw_inst_bits(src, 83, 81) << 5) | /*  3b */
                     (brw_inst_bits(src, 80, 80) << 4) | /*  1b */
                     (brw_inst_bits(src, 65, 64) << 2) | /*  2b */
@@ -1240,10 +1452,18 @@ set_src1_index(const struct compaction_state *c, 
brw_compact_inst *dst,
       }
       return true;
    } else {
-      uint16_t uncompacted; /* 12b */
+      uint16_t uncompacted; /* 12b/G45+ 16b/Xe2+ */
       int table_len;
 
-      if (devinfo->ver >= 12) {
+      if (devinfo->ver >= 20) {
+         table_len = ARRAY_SIZE(xe2_src1_index_table);
+         uncompacted = (brw_inst_bits(src, 121, 120) << 14) | /*  2b */
+                       (brw_inst_bits(src, 118, 116) << 11) | /*  3b */
+                       (brw_inst_bits(src, 115, 113) <<  8) | /*  3b */
+                       (brw_inst_bits(src, 112, 112) <<  7) | /*  1b */
+                       (brw_inst_bits(src, 103,  99) <<  2) | /*  5b */
+                       (brw_inst_bits(src,  97,  96));        /*  2b */
+      } else if (devinfo->ver >= 12) {
          table_len = ARRAY_SIZE(gfx12_src0_index_table);
          uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /*  2b */
                        (brw_inst_bits(src, 119, 116) <<  6) | /*  4b */
@@ -1272,7 +1492,33 @@ set_3src_control_index(const struct intel_device_info 
*devinfo,
 {
    assert(devinfo->ver >= 8);
 
-   if (devinfo->verx10 >= 125) {
+   if (devinfo->ver >= 20) {
+      const uint64_t uncompacted =        /* 34b/Xe2+ */
+         (brw_inst_bits(src, 95, 92) << 30) | /*  4b */
+         (brw_inst_bits(src, 90, 88) << 27) | /*  3b */
+         (brw_inst_bits(src, 82, 80) << 24) | /*  3b */
+         (brw_inst_bits(src, 50, 50) << 23) | /*  1b */
+         0                                  | /*  1b */
+         (brw_inst_bits(src, 48, 48) << 21) | /*  1b */
+         (brw_inst_bits(src, 42, 40) << 18) | /*  3b */
+         (brw_inst_bits(src, 39, 39) << 17) | /*  1b */
+         (brw_inst_bits(src, 38, 36) << 14) | /*  3b */
+         (brw_inst_bits(src, 34, 34) << 13) | /*  1b */
+         (brw_inst_bits(src, 32, 32) << 12) | /*  1b */
+         (brw_inst_bits(src, 31, 31) << 11) | /*  1b */
+         (brw_inst_bits(src, 28, 28) << 10) | /*  1b */
+         (brw_inst_bits(src, 27, 26) <<  8) | /*  2b */
+         (brw_inst_bits(src, 25, 24) <<  6) | /*  2b */
+         (brw_inst_bits(src, 23, 21) <<  3) | /*  3b */
+         (brw_inst_bits(src, 20, 18));        /*  3b */
+
+      for (unsigned i = 0; i < ARRAY_SIZE(xe2_3src_control_index_table); i++) {
+         if (xe2_3src_control_index_table[i] == uncompacted) {
+            brw_compact_inst_set_3src_control_index(devinfo, dst, i);
+            return true;
+         }
+      }
+   } else if (devinfo->verx10 >= 125) {
       uint64_t uncompacted =             /* 37b/XeHP+ */
          (brw_inst_bits(src, 95, 92) << 33) | /*  4b */
          (brw_inst_bits(src, 90, 88) << 30) | /*  3b */
@@ -1372,11 +1618,13 @@ set_3src_source_index(const struct intel_device_info 
*devinfo,
          (brw_inst_bits(src,  35,  35));        /*  1b */
 
       const uint32_t *three_src_source_index_table =
-         devinfo->verx10 >= 125 ?
-         xehp_3src_source_index_table : gfx12_3src_source_index_table;
+         devinfo->ver >= 20 ? xe2_3src_source_index_table :
+         devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
+         gfx12_3src_source_index_table;
       const uint32_t three_src_source_index_table_len =
+         devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_source_index_table) :
          devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
-                                  ARRAY_SIZE(gfx12_3src_source_index_table);
+         ARRAY_SIZE(gfx12_3src_source_index_table);
 
       for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
          if (three_src_source_index_table[i] == uncompacted) {
@@ -1426,8 +1674,14 @@ set_3src_subreg_index(const struct intel_device_info 
*devinfo,
       (brw_inst_bits(src,  71,  67) <<  5) | /*  5b */
       (brw_inst_bits(src,  55,  51));        /*  5b */
 
-   for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
-      if (gfx12_3src_subreg_table[i] == uncompacted) {
+   const uint32_t *table = devinfo->ver >= 20 ? xe2_3src_subreg_table :
+                           gfx12_3src_subreg_table;
+   const uint32_t len =
+      devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_subreg_table) :
+      ARRAY_SIZE(gfx12_3src_subreg_table);
+
+   for (unsigned i = 0; i < len; i++) {
+      if (table[i] == uncompacted) {
          brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
         return true;
       }
@@ -1473,14 +1727,22 @@ has_unmapped_bits(const struct brw_isa_info *isa, const 
brw_inst *src)
 }
 
 static bool
-has_3src_unmapped_bits(const struct intel_device_info *devinfo,
+has_3src_unmapped_bits(const struct brw_isa_info *isa,
                        const brw_inst *src)
 {
+   const struct intel_device_info *devinfo = isa->devinfo;
+
    /* Check for three-source instruction bits that don't map to any of the
     * fields of the compacted instruction.  All of them seem to be reserved
     * bits currently.
     */
-   if (devinfo->ver >= 12) {
+   ASSERTED enum opcode opcode = brw_inst_opcode(isa, src);
+   if (devinfo->ver >= 20) {
+      assert(opcode == BRW_OPCODE_DPAS || !brw_inst_bits(src, 49, 49));
+      assert(!brw_inst_bits(src, 33, 33));
+      assert(!brw_inst_bits(src, 7, 7));
+   } else if (devinfo->ver >= 12) {
+      assert(opcode == BRW_OPCODE_DPAS || !brw_inst_bits(src, 49, 49));
       assert(!brw_inst_bits(src, 7, 7));
    } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
       assert(!brw_inst_bits(src, 127, 127) &&
@@ -1501,12 +1763,13 @@ has_3src_unmapped_bits(const struct intel_device_info 
*devinfo,
 }
 
 static bool
-brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,
+brw_try_compact_3src_instruction(const struct brw_isa_info *isa,
                                  brw_compact_inst *dst, const brw_inst *src)
 {
+   const struct intel_device_info *devinfo = isa->devinfo;
    assert(devinfo->ver >= 8);
 
-   if (has_3src_unmapped_bits(devinfo, src))
+   if (has_3src_unmapped_bits(isa, src))
       return false;
 
 #define compact(field) \
@@ -1827,7 +2090,7 @@ try_compact_instruction(const struct compaction_state *c,
    if (is_3src(c->isa, brw_inst_opcode(c->isa, src))) {
       if (devinfo->ver >= 8) {
          memset(&temp, 0, sizeof(temp));
-         if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
+         if (brw_try_compact_3src_instruction(c->isa, &temp, src)) {
             *dst = temp;
             return true;
          } else {
@@ -1939,7 +2202,17 @@ set_uncompacted_control(const struct compaction_state 
*c, brw_inst *dst,
    uint32_t uncompacted =
       c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
 
-   if (devinfo->ver >= 12) {
+   if (devinfo->ver >= 20) {
+      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 14) & 0xf);
+      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
+      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
+      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
+      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
+      brw_inst_set_bits(dst, 27, 26, (uncompacted >>  8) & 0x3);
+      brw_inst_set_bits(dst, 25, 24, (uncompacted >>  6) & 0x3);
+      brw_inst_set_bits(dst, 23, 21, (uncompacted >>  3) & 0x7);
+      brw_inst_set_bits(dst, 20, 18, (uncompacted >>  0) & 0x7);
+   } else if (devinfo->ver >= 12) {
       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
@@ -2002,7 +2275,12 @@ set_uncompacted_subreg(const struct compaction_state *c, 
brw_inst *dst,
    uint16_t uncompacted =
       c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
 
-   if (devinfo->ver >= 12) {
+   if (devinfo->ver >= 20) {
+      brw_inst_set_bits(dst, 33, 33, (uncompacted >> 0) & 0x1);
+      brw_inst_set_bits(dst, 55, 51, (uncompacted >> 1) & 0x1f);
+      brw_inst_set_bits(dst, 71, 67, (uncompacted >> 6) & 0x1f);
+      brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
+   } else if (devinfo->ver >= 12) {
       brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
       brw_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
       brw_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
@@ -2022,7 +2300,9 @@ set_uncompacted_src0(const struct compaction_state *c, 
brw_inst *dst,
    uint16_t uncompacted = c->src0_index_table[compacted];
 
    if (devinfo->ver >= 12) {
-      brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
+      if (devinfo->ver < 20)
+         brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
+      brw_inst_set_bits(dst, 86, 84, (uncompacted >> 8) & 0x7);
       brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
       brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
       brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
@@ -2040,7 +2320,14 @@ set_uncompacted_src1(const struct compaction_state *c, 
brw_inst *dst,
    uint16_t uncompacted =
       c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
 
-   if (devinfo->ver >= 12) {
+   if (devinfo->ver >= 20) {
+      brw_inst_set_bits(dst, 121, 120, (uncompacted >> 14) & 0x3);
+      brw_inst_set_bits(dst, 118, 116, (uncompacted >> 11) & 0x7);
+      brw_inst_set_bits(dst, 115, 113, (uncompacted >>  8) & 0x7);
+      brw_inst_set_bits(dst, 112, 112, (uncompacted >>  7) & 0x1);
+      brw_inst_set_bits(dst, 103,  99, (uncompacted >>  2) & 0x1f);
+      brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
+   } else if (devinfo->ver >= 12) {
       brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
       brw_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
       brw_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
@@ -2058,7 +2345,28 @@ set_uncompacted_3src_control_index(const struct 
compaction_state *c,
    const struct intel_device_info *devinfo = c->isa->devinfo;
    assert(devinfo->ver >= 8);
 
-   if (devinfo->verx10 >= 125) {
+   if (devinfo->ver >= 20) {
+      uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
+      uint64_t uncompacted = xe2_3src_control_index_table[compacted];
+
+      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf);
+      brw_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7);
+      brw_inst_set_bits(dst, 82, 80, (uncompacted >> 24) & 0x7);
+      brw_inst_set_bits(dst, 50, 50, (uncompacted >> 23) & 0x1);
+      brw_inst_set_bits(dst, 48, 48, (uncompacted >> 21) & 0x1);
+      brw_inst_set_bits(dst, 42, 40, (uncompacted >> 18) & 0x7);
+      brw_inst_set_bits(dst, 39, 39, (uncompacted >> 17) & 0x1);
+      brw_inst_set_bits(dst, 38, 36, (uncompacted >> 14) & 0x7);
+      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
+      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
+      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
+      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
+      brw_inst_set_bits(dst, 27, 26, (uncompacted >>  8) & 0x3);
+      brw_inst_set_bits(dst, 25, 24, (uncompacted >>  6) & 0x3);
+      brw_inst_set_bits(dst, 23, 21, (uncompacted >>  3) & 0x7);
+      brw_inst_set_bits(dst, 20, 18, (uncompacted >>  0) & 0x7);
+
+   } else if (devinfo->verx10 >= 125) {
       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
       uint64_t uncompacted = xehp_3src_control_index_table[compacted];
 
@@ -2125,8 +2433,9 @@ set_uncompacted_3src_source_index(const struct 
intel_device_info *devinfo,
 
    if (devinfo->ver >= 12) {
       const uint32_t *three_src_source_index_table =
-         devinfo->verx10 >= 125 ?
-         xehp_3src_source_index_table : gfx12_3src_source_index_table;
+         devinfo->ver >= 20 ? xe2_3src_source_index_table :
+         devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
+                                  gfx12_3src_source_index_table;
       uint32_t uncompacted = three_src_source_index_table[compacted];
 
       brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
@@ -2171,7 +2480,8 @@ set_uncompacted_3src_subreg_index(const struct 
intel_device_info *devinfo,
    assert(devinfo->ver >= 12);
 
    uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
-   uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
+   uint32_t uncompacted = (devinfo->ver >= 20 ? 
xe2_3src_subreg_table[compacted]:
+                           gfx12_3src_subreg_table[compacted]);
 
    brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
    brw_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
@@ -2419,9 +2729,21 @@ compaction_state_init(struct compaction_state *c,
    assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
    assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
    assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
+   assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 
0);
+   assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0);
+   assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0);
+   assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0);
+   assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0);
 
    c->isa = isa;
    switch (devinfo->ver) {
+   case 20:
+      c->control_index_table = xe2_control_index_table;
+      c->datatype_table = xe2_datatype_table;
+      c->subreg_table = xe2_subreg_table;
+      c->src0_index_table = xe2_src0_index_table;
+      c->src1_index_table = xe2_src1_index_table;
+      break;
    case 12:
       c->control_index_table = gfx12_control_index_table;;
       c->datatype_table = gfx12_datatype_table;

Reply via email to