Module: Mesa
Branch: main
Commit: f68797ead72c29678a54aae457dbd9e9b7946588
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f68797ead72c29678a54aae457dbd9e9b7946588

Author: Rhys Perry <[email protected]>
Date:   Tue Apr 27 12:11:37 2021 +0100

aco: create v_mac_legacy_f32/v_fmac_legacy_f32

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>

---

 src/amd/compiler/aco_ir.cpp                  |  1 +
 src/amd/compiler/aco_ir.h                    |  1 +
 src/amd/compiler/aco_opcodes.py              |  6 +++++-
 src/amd/compiler/aco_register_allocation.cpp | 16 ++++++++++++++++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 523b25e619e..1b624e4b930 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -148,6 +148,7 @@ init_program(Program* program, Stage stage, const struct 
radv_shader_info* info,
    if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
        program->family == CHIP_HAWAII)
       program->dev.has_fast_fma32 = true;
+   program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || 
program->chip_class >= GFX10;
 
    program->wgp_mode = wgp_mode;
 
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 39b2e3aa70f..4a44448d014 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -2047,6 +2047,7 @@ struct DeviceInfo {
    unsigned max_wave64_per_simd;
    unsigned simd_per_cu;
    bool has_fast_fma32 = false;
+   bool has_mac_legacy32 = false;
    bool xnack_enabled = false;
    bool sram_ecc_enabled = false;
 };
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 16494a701c4..d95f37a21e7 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -674,7 +674,8 @@ VOP2 = {
    (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
    (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
    (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
-   (0x06, 0x06,   -1,   -1, 0x06, "v_mac_legacy_f32", True),
+   (0x06, 0x06,   -1,   -1, 0x06, "v_mac_legacy_f32", True), #GFX6,7,10
+   (  -1,   -1,   -1,   -1, 0x06, "v_fmac_legacy_f32", True), #GFX10.3+
    (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
    (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
    (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
@@ -1686,6 +1687,9 @@ for ver in ['gfx9', 'gfx10']:
             # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
             if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 
'v_fma_legacy_f32']):
                 continue
+            # v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3
+            if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 
'v_fmac_legacy_f32']):
+                continue
 
             print('%s and %s share the same opcode number (%s)' % 
(op_to_name[key], op.name, ver))
             sys.exit(1)
diff --git a/src/amd/compiler/aco_register_allocation.cpp 
b/src/amd/compiler/aco_register_allocation.cpp
index 088afaa307c..ab10b2f3bcc 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -2383,6 +2383,13 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& 
live_out_per_block)
                   op = instr->operands[2];
                   break;
 
+               case aco_opcode::v_mad_legacy_f32:
+               case aco_opcode::v_fma_legacy_f32:
+                  if (instr->usesModifiers() || 
!ctx.program->dev.has_mac_legacy32)
+                     continue;
+                  op = instr->operands[2];
+                  break;
+
                default: continue;
                }
 
@@ -2577,6 +2584,8 @@ register_allocation(Program* program, std::vector<IDSet>& 
live_out_per_block, ra
               instr->opcode == aco_opcode::v_mad_legacy_f16 ||
               (instr->opcode == aco_opcode::v_fma_f16 && program->chip_class 
>= GFX10) ||
               (instr->opcode == aco_opcode::v_pk_fma_f16 && 
program->chip_class >= GFX10) ||
+              (instr->opcode == aco_opcode::v_mad_legacy_f32 && 
program->dev.has_mac_legacy32) ||
+              (instr->opcode == aco_opcode::v_fma_legacy_f32 && 
program->dev.has_mac_legacy32) ||
               (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family 
!= CHIP_VEGA20)) &&
              instr->operands[2].isTemp() && 
instr->operands[2].isKillBeforeDef() &&
              instr->operands[2].getTemp().type() == RegType::vgpr &&
@@ -2608,6 +2617,12 @@ register_allocation(Program* program, 
std::vector<IDSet>& live_out_per_block, ra
                case aco_opcode::v_fma_f16: instr->opcode = 
aco_opcode::v_fmac_f16; break;
                case aco_opcode::v_pk_fma_f16: instr->opcode = 
aco_opcode::v_pk_fmac_f16; break;
                case aco_opcode::v_dot4_i32_i8: instr->opcode = 
aco_opcode::v_dot4c_i32_i8; break;
+               case aco_opcode::v_mad_legacy_f32:
+                  instr->opcode = aco_opcode::v_mac_legacy_f32;
+                  break;
+               case aco_opcode::v_fma_legacy_f32:
+                  instr->opcode = aco_opcode::v_fmac_legacy_f32;
+                  break;
                default: break;
                }
             }
@@ -2617,6 +2632,7 @@ register_allocation(Program* program, std::vector<IDSet>& 
live_out_per_block, ra
          if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
              instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == 
aco_opcode::v_fmac_f32 ||
              instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == 
aco_opcode::v_fmac_f16 ||
+             instr->opcode == aco_opcode::v_fmac_legacy_f32 ||
              instr->opcode == aco_opcode::v_pk_fmac_f16 ||
              instr->opcode == aco_opcode::v_writelane_b32 ||
              instr->opcode == aco_opcode::v_writelane_b32_e64 ||

Reply via email to