Module: Mesa Branch: main Commit: 7db3f0b1c1739a5b09c46f743c32e160849c484d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7db3f0b1c1739a5b09c46f743c32e160849c484d
Author: Francisco Jerez <curroje...@riseup.net> Date: Thu Jul 7 14:42:21 2022 -0700 intel/compiler/xe2: Implement instruction compaction. Reworks: * Handle DPAS in has_3src_unmapped_bits. Reviewed-by: Caio Oliveira <caio.olive...@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26860> --- src/intel/compiler/brw_eu_compact.c | 380 +++++++++++++++++++++++++++++++++--- 1 file changed, 351 insertions(+), 29 deletions(-) diff --git a/src/intel/compiler/brw_eu_compact.c b/src/intel/compiler/brw_eu_compact.c index 413d240af0b..077c68cb21c 100644 --- a/src/intel/compiler/brw_eu_compact.c +++ b/src/intel/compiler/brw_eu_compact.c @@ -859,6 +859,125 @@ static const uint16_t xehp_src1_index_table[16] = { 0b100001000100, /* -r[a]<1;1,0> */ }; +static const uint32_t xe2_control_index_table[32] = { + 0b000000000000000100, /* (16|M0) */ + 0b000000100000000000, /* (W) (1|M0) */ + 0b000000000010000100, /* (16|M16) */ + 0b000000000000000000, /* (1|M0) */ + 0b000000100000000100, /* (W) (16|M0) */ + 0b010000000000000100, /* (16|M0) (.ge)f0.0 */ + 0b010100000000000100, /* (16|M0) (.lt)f0.0 */ + 0b000000100000000010, /* (W) (4|M0) */ + 0b000000000000000101, /* (32|M0) */ + 0b000000100000000011, /* (W) (8|M0) */ + 0b001100100000000000, /* (W) (1|M0) (.gt)f0.0 */ + 0b000010000000000100, /* (16|M0) (sat) */ + 0b000100000000000100, /* (16|M0) (.eq)f0.0 */ + 0b000000100000000001, /* (W) (2|M0) */ + 0b001100000000000100, /* (16|M0) (.gt)f0.0 */ + 0b000100100000000000, /* (W) (1|M0) (.eq)f0.0 */ + 0b010100100000000010, /* (W) (4|M0) (.lt)f0.0 */ + 0b010000100000000000, /* (W) (1|M0) (.ge)f0.0 */ + 0b010000100000000010, /* (W) (4|M0) (.ge)f0.0 */ + 0b010100100000000000, /* (W) (1|M0) (.lt)f0.0 */ + 0b001000000000000100, /* (16|M0) (.ne)f0.0 */ + 0b000000000100100100, /* (f2.0) (16|M0) */ + 0b010100100000000011, /* (W) (8|M0) (.lt)f0.0 */ + 0b000000000100011100, /* (f1.1) (16|M0) */ + 0b010000100000000011, /* (W) (8|M0) (.ge)f0.0 */ + 0b000000000100001100, /* (f0.1) (16|M0) */ + 0b000000000100010100, /* (f1.0) (16|M0) */ + 0b000000000100110100, /* (f3.0) (16|M0) */ + 0b000000000100111100, /* (f3.1) (16|M0) */ + 0b000000000100101100, /* (f2.1) (16|M0) */ + 0b000000000100000100, /* (f0.0) (16|M0) */ + 0b010100000000100100, /* (16|M0) (.lt)f2.0 */ +}; + +static const uint32_t xe2_datatype_table[32] = { + 0b11010110100101010100, /* grf<1>:f grf:f grf:f */ + 0b11010100100101010100, /* arf<1>:f grf:f grf:f */ + 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */ + 0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */ + 0b01010110110101010100, /* grf<1>:f grf:f imm:f */ + 0b11010010100101010100, /* grf<1>:f arf:f grf:f */ + 0b10111110100011101110, /* grf<1>:q grf:q grf:q */ + 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */ + 0b01010110100101010100, /* grf<1>:f grf:f arf:f */ + 0b00000010101001000100, /* grf<1>:ud imm:ud */ + 0b00101110110011001100, /* grf<1>:d grf:d imm:w */ + 0b11010000100101010100, /* arf<1>:f arf:f grf:f */ + 0b01010100100101010100, /* arf<1>:f grf:f arf:f */ + 0b01010100110101010100, /* arf<1>:f grf:f imm:f */ + 0b00000010101101010100, /* grf<1>:f imm:f */ + 0b00000110100011001100, /* grf<1>:d grf:d arf:ub */ + 0b00101110110011101110, /* grf<1>:q grf:q imm:w */ + 0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */ + 0b01010000100101010100, /* arf<1>:f arf:f arf:f */ + 0b10110110100011001100, /* grf<1>:d grf:d grf:d */ + 0b01010010100101010100, /* grf<1>:f arf:f arf:f */ + 0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */ + 0b00110110110011001110, /* grf<1>:q grf:d imm:d */ + 0b00101100110011001100, /* arf<1>:d grf:d imm:w */ + 0b11011110100101110110, /* grf<1>:df grf:df grf:df */ + 0b01010010110101010100, /* grf<1>:f arf:f imm:f */ + 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */ + 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */ + 0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */ + 0b00000010101010101100, /* grf<1>:d imm:w */ + 0b01010000110101010100, /* arf<1>:f arf:f imm:f */ + 0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */ +}; + +static const uint16_t xe2_subreg_table[16] = { + 0b000000000000, /* .0 .0 */ + 0b000010000000, /* .0 .4 */ + 0b000000000100, /* .4 .0 */ + 0b010000000000, /* .0 .32 */ + 0b001000000000, /* .0 .16 */ + 0b000000001000, /* .8 .0 */ + 0b000100000000, /* .0 .8 */ + 0b010100000000, /* .0 .40 */ + 0b011000000000, /* .0 .48 */ + 0b000110000000, /* .0 .12 */ + 0b000000010000, /* .16 .0 */ + 0b011010000000, /* .0 .52 */ + 0b001100000000, /* .0 .24 */ + 0b011100000000, /* .0 .56 */ + 0b010110000000, /* .0 .44 */ + 0b010010000000, /* .0 .36 */ +}; + +static const uint16_t xe2_src0_index_table[8] = { + 0b00100000000, /* r<1;1,0> */ + 0b00000000000, /* r<0;1,0> */ + 0b01000000000, /* r<2;1,0> */ + 0b00100000010, /* -r<1;1,0> */ + 0b01100000000, /* r<4;1,0> */ + 0b00100000001, /* (abs)r<1;1,0> */ + 0b00000000010, /* -r<0;1,0> */ + 0b01001000000, /* r<2;4,0> */ +}; + +static const uint16_t xe2_src1_index_table[16] = { + 0b0000100000000000, /* r<1;1,0>.0 */ + 0b0000000000000000, /* r<0;1,0>.0 */ + 0b1000100000000000, /* -r<1;1,0>.0 */ + 0b0000000000010000, /* r<0;1,0>.8 */ + 0b0000000000001000, /* r<0;1,0>.4 */ + 0b0000000000011000, /* r<0;1,0>.12 */ + 0b0000000001010000, /* r<0;1,0>.40 */ + 0b0000000001000000, /* r<0;1,0>.32 */ + 0b0000000000100000, /* r<0;1,0>.16 */ + 0b0000000001111000, /* r<0;1,0>.60 */ + 0b0000000000111000, /* r<0;1,0>.28 */ + 0b0000000000101000, /* r<0;1,0>.20 */ + 0b0000000001011000, /* r<0;1,0>.44 */ + 0b0000000001001000, /* r<0;1,0>.36 */ + 0b0000000001110000, /* r<0;1,0>.56 */ + 0b0000000000110000, /* r<0;1,0>.24 */ +}; + /* This is actually the control index table for Cherryview (26 bits), but the * only difference from Broadwell (24 bits) is that it has two extra 0-bits at * the start. @@ -956,6 +1075,25 @@ static const uint64_t xehp_3src_control_index_table[32] = { 0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */ }; +static const uint64_t xe2_3src_control_index_table[16] = { + 0b0000010010100010101000000000000100, /* (16|M0) grf<1>:f :f :f :f */ + 0b0000010010000010101000000000000100, /* (16|M0) arf<1>:f :f :f :f */ + 0b0000010010100010101000100000000100, /* (W)(16|M0) grf<1>:f :f :f :f */ + 0b0000010010000010101000100000000100, /* (W)(16|M0) arf<1>:f :f :f :f */ + 0b0000011011100011101100000000000100, /* (16|M0) grf<1>:df :df :df :df */ + 0b0000011011100011101100000010000100, /* (16|M16) grf<1>:df :df :df :df */ + 0b0000011011000011101100000000000100, /* (16|M0) arf<1>:df :df :df :df */ + 0b0000010010100010101000000000000101, /* (32|M0) grf<1>:f :f :f :f */ + 0b0000010010000010101000000000000101, /* (32|M0) arf<1>:f :f :f :f */ + 0b0000010010000010101010000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */ + 0b0000010010100010101010000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */ + 0b0000011011000011101100000010000100, /* (16|M16) arf<1>:df :df :df :df */ + 0b0000010010100010101000100000000000, /* (W)(1|M0) grf<1>:f :f :f :f */ + 0b0000010010100010001000000000000100, /* (16|M0) grf<1>:ud :ud :ud :ud */ + 0b0000110110100110011000000000000101, /* (32|M0) grf<1>:d :d :d :d */ + 0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df */ +}; + static const uint32_t gfx12_3src_source_index_table[32] = { 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */ 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */ @@ -1029,6 +1167,25 @@ static const uint32_t xehp_3src_source_index_table[32] = { 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */ }; +static const uint32_t xe2_3src_source_index_table[16] = { + 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */ + 0b101100000001000000001, /* arf<1;0> grf<1;0> grf<1> */ + 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */ + 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */ + 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */ + 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */ + 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */ + 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */ + 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */ + 0b101000000001000000001, /* arf<1;0> arf<1;0> grf<1> */ + 0b100000000001000000001, /* arf<1;0> arf<1;0> grf<0> */ + 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */ + 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */ + 0b101100000101000000001, /* arf<1;0> grf<1;0> -grf<1> */ + 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */ + 0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */ +}; + static const uint32_t gfx12_3src_subreg_table[32] = { 0b00000000000000000000, /* .0 .0 .0 .0 */ 0b00100000000000000000, /* .0 .0 .0 .4 */ @@ -1064,6 +1221,41 @@ static const uint32_t gfx12_3src_subreg_table[32] = { 0b01000000000010000000, /* .0 .4 .0 .8 */ }; +static const uint32_t xe2_3src_subreg_table[32] = { + 0b00000000000000000000, /* .0 .0 .0 .0 */ + 0b00100000000000000000, /* .0 .0 .0 .8 */ + 0b10000000000000000000, /* .0 .0 .0 .32 */ + 0b00010000000000000000, /* .0 .0 .0 .4 */ + 0b11100000000000000000, /* .0 .0 .0 .56 */ + 0b01010000000000000000, /* .0 .0 .0 .20 */ + 0b10110000000000000000, /* .0 .0 .0 .44 */ + 0b01000000000011000000, /* .0 .12 .0 .16 */ + 0b01100000000000000000, /* .0 .0 .0 .24 */ + 0b10100000000000000000, /* .0 .0 .0 .40 */ + 0b11000000000000000000, /* .0 .0 .0 .48 */ + 0b01000000000000000000, /* .0 .0 .0 .16 */ + 0b01110000000110000000, /* .0 .24 .0 .28 */ + 0b10100000001001000000, /* .0 .36 .0 .40 */ + 0b11010000001100000000, /* .0 .48 .0 .52 */ + 0b01110000000000000000, /* .0 .0 .0 .28 */ + 0b11110000000000000000, /* .0 .0 .0 .60 */ + 0b10010000000000000000, /* .0 .0 .0 .36 */ + 0b00110000000000000000, /* .0 .0 .0 .12 */ + 0b00100000000010000000, /* .0 .8 .0 .8 */ + 0b00010000000001000000, /* .0 .4 .0 .4 */ + 0b00110000000011000000, /* .0 .12 .0 .12 */ + 0b11010000000000000000, /* .0 .0 .0 .52 */ + 0b00000000000001000000, /* .0 .4 .0 .0 */ + 0b00000101100000000000, /* .0 .0 .44 .0 */ + 0b00000100000000000000, /* .0 .0 .32 .0 */ + 0b00000000000010000000, /* .0 .8 .0 .0 */ + 0b00000000001100000000, /* .0 .48 .0 .0 */ + 0b00000000001101000000, /* .0 .52 .0 .0 */ + 0b00000110100000000000, /* .0 .0 .52 .0 */ + 0b00000000001000000000, /* .0 .32 .0 .0 */ + 0b00000000001111000000, /* .0 .60 .0 .0 */ +}; + struct compaction_state { const struct brw_isa_info *isa; const uint32_t *control_index_table; @@ -1083,7 +1275,17 @@ set_control_index(const struct compaction_state *c, const struct intel_device_info *devinfo = c->isa->devinfo; uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */ - if (devinfo->ver >= 12) { + if (devinfo->ver >= 20) { + uncompacted = (brw_inst_bits(src, 95, 92) << 14) | /* 4b */ + (brw_inst_bits(src, 34, 34) << 13) | /* 1b */ + (brw_inst_bits(src, 32, 32) << 12) | /* 1b */ + (brw_inst_bits(src, 31, 31) << 11) | /* 1b */ + (brw_inst_bits(src, 28, 28) << 10) | /* 1b */ + (brw_inst_bits(src, 27, 26) << 8) | /* 2b */ + (brw_inst_bits(src, 25, 24) << 6) | /* 2b */ + (brw_inst_bits(src, 23, 21) << 3) | /* 3b */ + (brw_inst_bits(src, 20, 18)); /* 3b */ + } else if (devinfo->ver >= 12) { uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /* 4b */ (brw_inst_bits(src, 34, 34) << 16) | /* 1b */ (brw_inst_bits(src, 33, 33) << 15) | /* 1b */ @@ -1169,9 +1371,16 @@ set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst, const brw_inst *src, bool is_immediate) { const struct intel_device_info *devinfo = c->isa->devinfo; - uint16_t uncompacted; /* 15b */ - - if (devinfo->ver >= 12) { + const unsigned table_len = devinfo->ver >= 20 ? + ARRAY_SIZE(xe2_subreg_table) : ARRAY_SIZE(g45_subreg_table); + uint16_t uncompacted; /* 15b/G45+; 12b/Xe2+ */ + + if (devinfo->ver >= 20) { + uncompacted = (brw_inst_bits(src, 33, 33) << 0) | /* 1b */ + (brw_inst_bits(src, 55, 51) << 1) | /* 5b */ + (brw_inst_bits(src, 71, 67) << 6) | /* 5b */ + (brw_inst_bits(src, 87, 87) << 11); /* 1b */ + } else if (devinfo->ver >= 12) { uncompacted = (brw_inst_bits(src, 55, 51) << 0) | /* 5b */ (brw_inst_bits(src, 71, 67) << 5); /* 5b */ @@ -1185,7 +1394,7 @@ set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst, uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */ } - for (int i = 0; i < 32; i++) { + for (int i = 0; i < table_len; i++) { if (c->subreg_table[i] == uncompacted) { brw_compact_inst_set_subreg_index(devinfo, dst, i); return true; @@ -1200,12 +1409,15 @@ set_src0_index(const struct compaction_state *c, brw_compact_inst *dst, const brw_inst *src) { const struct intel_device_info *devinfo = c->isa->devinfo; - uint16_t uncompacted; /* 12b */ + uint16_t uncompacted; /* 12b/G45+; 11b/Xe2+ */ int table_len; if (devinfo->ver >= 12) { - table_len = ARRAY_SIZE(gfx12_src0_index_table); - uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /* 4b */ + table_len = (devinfo->ver >= 20 ? ARRAY_SIZE(xe2_src0_index_table) : + ARRAY_SIZE(gfx12_src0_index_table)); + uncompacted = (devinfo->ver >= 20 ? 0 : + brw_inst_bits(src, 87, 87) << 11) | /* 1b */ + (brw_inst_bits(src, 86, 84) << 8) | /* 3b */ (brw_inst_bits(src, 83, 81) << 5) | /* 3b */ (brw_inst_bits(src, 80, 80) << 4) | /* 1b */ (brw_inst_bits(src, 65, 64) << 2) | /* 2b */ @@ -1240,10 +1452,18 @@ set_src1_index(const struct compaction_state *c, brw_compact_inst *dst, } return true; } else { - uint16_t uncompacted; /* 12b */ + uint16_t uncompacted; /* 12b/G45+ 16b/Xe2+ */ int table_len; - if (devinfo->ver >= 12) { + if (devinfo->ver >= 20) { + table_len = ARRAY_SIZE(xe2_src1_index_table); + uncompacted = (brw_inst_bits(src, 121, 120) << 14) | /* 2b */ + (brw_inst_bits(src, 118, 116) << 11) | /* 3b */ + (brw_inst_bits(src, 115, 113) << 8) | /* 3b */ + (brw_inst_bits(src, 112, 112) << 7) | /* 1b */ + (brw_inst_bits(src, 103, 99) << 2) | /* 5b */ + (brw_inst_bits(src, 97, 96)); /* 2b */ + } else if (devinfo->ver >= 12) { table_len = ARRAY_SIZE(gfx12_src0_index_table); uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /* 2b */ (brw_inst_bits(src, 119, 116) << 6) | /* 4b */ @@ -1272,7 +1492,33 @@ set_3src_control_index(const struct intel_device_info *devinfo, { assert(devinfo->ver >= 8); - if (devinfo->verx10 >= 125) { + if (devinfo->ver >= 20) { + const uint64_t uncompacted = /* 34b/Xe2+ */ + (brw_inst_bits(src, 95, 92) << 30) | /* 4b */ + (brw_inst_bits(src, 90, 88) << 27) | /* 3b */ + (brw_inst_bits(src, 82, 80) << 24) | /* 3b */ + (brw_inst_bits(src, 50, 50) << 23) | /* 1b */ + 0 | /* 1b */ + (brw_inst_bits(src, 48, 48) << 21) | /* 1b */ + (brw_inst_bits(src, 42, 40) << 18) | /* 3b */ + (brw_inst_bits(src, 39, 39) << 17) | /* 1b */ + (brw_inst_bits(src, 38, 36) << 14) | /* 3b */ + (brw_inst_bits(src, 34, 34) << 13) | /* 1b */ + (brw_inst_bits(src, 32, 32) << 12) | /* 1b */ + (brw_inst_bits(src, 31, 31) << 11) | /* 1b */ + (brw_inst_bits(src, 28, 28) << 10) | /* 1b */ + (brw_inst_bits(src, 27, 26) << 8) | /* 2b */ + (brw_inst_bits(src, 25, 24) << 6) | /* 2b */ + (brw_inst_bits(src, 23, 21) << 3) | /* 3b */ + (brw_inst_bits(src, 20, 18)); /* 3b */ + + for (unsigned i = 0; i < ARRAY_SIZE(xe2_3src_control_index_table); i++) { + if (xe2_3src_control_index_table[i] == uncompacted) { + brw_compact_inst_set_3src_control_index(devinfo, dst, i); + return true; + } + } + } else if (devinfo->verx10 >= 125) { uint64_t uncompacted = /* 37b/XeHP+ */ (brw_inst_bits(src, 95, 92) << 33) | /* 4b */ (brw_inst_bits(src, 90, 88) << 30) | /* 3b */ @@ -1372,11 +1618,13 @@ set_3src_source_index(const struct intel_device_info *devinfo, (brw_inst_bits(src, 35, 35)); /* 1b */ const uint32_t *three_src_source_index_table = - devinfo->verx10 >= 125 ? - xehp_3src_source_index_table : gfx12_3src_source_index_table; + devinfo->ver >= 20 ? xe2_3src_source_index_table : + devinfo->verx10 >= 125 ? xehp_3src_source_index_table : + gfx12_3src_source_index_table; const uint32_t three_src_source_index_table_len = + devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_source_index_table) : devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) : - ARRAY_SIZE(gfx12_3src_source_index_table); + ARRAY_SIZE(gfx12_3src_source_index_table); for (unsigned i = 0; i < three_src_source_index_table_len; i++) { if (three_src_source_index_table[i] == uncompacted) { @@ -1426,8 +1674,14 @@ set_3src_subreg_index(const struct intel_device_info *devinfo, (brw_inst_bits(src, 71, 67) << 5) | /* 5b */ (brw_inst_bits(src, 55, 51)); /* 5b */ - for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) { - if (gfx12_3src_subreg_table[i] == uncompacted) { + const uint32_t *table = devinfo->ver >= 20 ? xe2_3src_subreg_table : + gfx12_3src_subreg_table; + const uint32_t len = + devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_subreg_table) : + ARRAY_SIZE(gfx12_3src_subreg_table); + + for (unsigned i = 0; i < len; i++) { + if (table[i] == uncompacted) { brw_compact_inst_set_3src_subreg_index(devinfo, dst, i); return true; } @@ -1473,14 +1727,22 @@ has_unmapped_bits(const struct brw_isa_info *isa, const brw_inst *src) } static bool -has_3src_unmapped_bits(const struct intel_device_info *devinfo, +has_3src_unmapped_bits(const struct brw_isa_info *isa, const brw_inst *src) { + const struct intel_device_info *devinfo = isa->devinfo; + /* Check for three-source instruction bits that don't map to any of the * fields of the compacted instruction. All of them seem to be reserved * bits currently. */ - if (devinfo->ver >= 12) { + ASSERTED enum opcode opcode = brw_inst_opcode(isa, src); + if (devinfo->ver >= 20) { + assert(opcode == BRW_OPCODE_DPAS || !brw_inst_bits(src, 49, 49)); + assert(!brw_inst_bits(src, 33, 33)); + assert(!brw_inst_bits(src, 7, 7)); + } else if (devinfo->ver >= 12) { + assert(opcode == BRW_OPCODE_DPAS || !brw_inst_bits(src, 49, 49)); assert(!brw_inst_bits(src, 7, 7)); } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) { assert(!brw_inst_bits(src, 127, 127) && @@ -1501,12 +1763,13 @@ has_3src_unmapped_bits(const struct intel_device_info *devinfo, } static bool -brw_try_compact_3src_instruction(const struct intel_device_info *devinfo, +brw_try_compact_3src_instruction(const struct brw_isa_info *isa, brw_compact_inst *dst, const brw_inst *src) { + const struct intel_device_info *devinfo = isa->devinfo; assert(devinfo->ver >= 8); - if (has_3src_unmapped_bits(devinfo, src)) + if (has_3src_unmapped_bits(isa, src)) return false; #define compact(field) \ @@ -1827,7 +2090,7 @@ try_compact_instruction(const struct compaction_state *c, if (is_3src(c->isa, brw_inst_opcode(c->isa, src))) { if (devinfo->ver >= 8) { memset(&temp, 0, sizeof(temp)); - if (brw_try_compact_3src_instruction(devinfo, &temp, src)) { + if (brw_try_compact_3src_instruction(c->isa, &temp, src)) { *dst = temp; return true; } else { @@ -1939,7 +2202,17 @@ set_uncompacted_control(const struct compaction_state *c, brw_inst *dst, uint32_t uncompacted = c->control_index_table[brw_compact_inst_control_index(devinfo, src)]; - if (devinfo->ver >= 12) { + if (devinfo->ver >= 20) { + brw_inst_set_bits(dst, 95, 92, (uncompacted >> 14) & 0xf); + brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1); + brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1); + brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1); + brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1); + brw_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3); + brw_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3); + brw_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7); + brw_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7); + } else if (devinfo->ver >= 12) { brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17)); brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1); brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1); @@ -2002,7 +2275,12 @@ set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst, uint16_t uncompacted = c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)]; - if (devinfo->ver >= 12) { + if (devinfo->ver >= 20) { + brw_inst_set_bits(dst, 33, 33, (uncompacted >> 0) & 0x1); + brw_inst_set_bits(dst, 55, 51, (uncompacted >> 1) & 0x1f); + brw_inst_set_bits(dst, 71, 67, (uncompacted >> 6) & 0x1f); + brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1); + } else if (devinfo->ver >= 12) { brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10)); brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f); brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f); @@ -2022,7 +2300,9 @@ set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst, uint16_t uncompacted = c->src0_index_table[compacted]; if (devinfo->ver >= 12) { - brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8)); + if (devinfo->ver < 20) + brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1); + brw_inst_set_bits(dst, 86, 84, (uncompacted >> 8) & 0x7); brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7); brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1); brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3); @@ -2040,7 +2320,14 @@ set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst, uint16_t uncompacted = c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)]; - if (devinfo->ver >= 12) { + if (devinfo->ver >= 20) { + brw_inst_set_bits(dst, 121, 120, (uncompacted >> 14) & 0x3); + brw_inst_set_bits(dst, 118, 116, (uncompacted >> 11) & 0x7); + brw_inst_set_bits(dst, 115, 113, (uncompacted >> 8) & 0x7); + brw_inst_set_bits(dst, 112, 112, (uncompacted >> 7) & 0x1); + brw_inst_set_bits(dst, 103, 99, (uncompacted >> 2) & 0x1f); + brw_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3); + } else if (devinfo->ver >= 12) { brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10)); brw_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf); brw_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7); @@ -2058,7 +2345,28 @@ set_uncompacted_3src_control_index(const struct compaction_state *c, const struct intel_device_info *devinfo = c->isa->devinfo; assert(devinfo->ver >= 8); - if (devinfo->verx10 >= 125) { + if (devinfo->ver >= 20) { + uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src); + uint64_t uncompacted = xe2_3src_control_index_table[compacted]; + + brw_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf); + brw_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7); + brw_inst_set_bits(dst, 82, 80, (uncompacted >> 24) & 0x7); + brw_inst_set_bits(dst, 50, 50, (uncompacted >> 23) & 0x1); + brw_inst_set_bits(dst, 48, 48, (uncompacted >> 21) & 0x1); + brw_inst_set_bits(dst, 42, 40, (uncompacted >> 18) & 0x7); + brw_inst_set_bits(dst, 39, 39, (uncompacted >> 17) & 0x1); + brw_inst_set_bits(dst, 38, 36, (uncompacted >> 14) & 0x7); + brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1); + brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1); + brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1); + brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1); + brw_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3); + brw_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3); + brw_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7); + brw_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7); + + } else if (devinfo->verx10 >= 125) { uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src); uint64_t uncompacted = xehp_3src_control_index_table[compacted]; @@ -2125,8 +2433,9 @@ set_uncompacted_3src_source_index(const struct intel_device_info *devinfo, if (devinfo->ver >= 12) { const uint32_t *three_src_source_index_table = - devinfo->verx10 >= 125 ? - xehp_3src_source_index_table : gfx12_3src_source_index_table; + devinfo->ver >= 20 ? xe2_3src_source_index_table : + devinfo->verx10 >= 125 ? xehp_3src_source_index_table : + gfx12_3src_source_index_table; uint32_t uncompacted = three_src_source_index_table[compacted]; brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20)); @@ -2171,7 +2480,8 @@ set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo, assert(devinfo->ver >= 12); uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src); - uint32_t uncompacted = gfx12_3src_subreg_table[compacted]; + uint32_t uncompacted = (devinfo->ver >= 20 ? xe2_3src_subreg_table[compacted]: + gfx12_3src_subreg_table[compacted]); brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15)); brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f); @@ -2419,9 +2729,21 @@ compaction_state_init(struct compaction_state *c, assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0); assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0); assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0); + assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0); + assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0); + assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0); + assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0); + assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0); c->isa = isa; switch (devinfo->ver) { + case 20: + c->control_index_table = xe2_control_index_table; + c->datatype_table = xe2_datatype_table; + c->subreg_table = xe2_subreg_table; + c->src0_index_table = xe2_src0_index_table; + c->src1_index_table = xe2_src1_index_table; + break; case 12: c->control_index_table = gfx12_control_index_table;; c->datatype_table = gfx12_datatype_table;