Author: vangthao95
Date: 2025-12-22T09:05:35-08:00
New Revision: 0e91db465e06a377349f0d5c25a8318045e3aa0b

URL: 
https://github.com/llvm/llvm-project/commit/0e91db465e06a377349f0d5c25a8318045e3aa0b
DIFF: 
https://github.com/llvm/llvm-project/commit/0e91db465e06a377349f0d5c25a8318045e3aa0b.diff

LOG: Revert "[AMDGPU][GlobalISel] Add RegBankLegalize support for G_FMAD, G_FMA 
(#…"

This reverts commit c471badd81a59f72820294e54c72c40922a38dcc.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
    llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir

Removed: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/fmad.ll


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index f21b87c8f92f0..cc31d7d5c55ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -684,12 +684,10 @@ bool RegBankLegalizeHelper::lowerSplitTo16(MachineInstr 
&MI) {
   Register Dst = MI.getOperand(0).getReg();
   assert(MRI.getType(Dst) == V2S16);
   unsigned Opc = MI.getOpcode();
-  unsigned NumOps = MI.getNumOperands();
   auto Flags = MI.getFlags();
 
-  auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
-
-  if (NumOps == 2) {
+  if (MI.getNumOperands() == 2) {
+    auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
     auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo}, Flags);
     auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi}, Flags);
     B.buildMergeLikeInstr(Dst, {Lo, Hi});
@@ -697,20 +695,11 @@ bool RegBankLegalizeHelper::lowerSplitTo16(MachineInstr 
&MI) {
     return true;
   }
 
+  assert(MI.getNumOperands() == 3);
+  auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
   auto [Op2Lo, Op2Hi] = unpackAExtTruncS16(MI.getOperand(2).getReg());
-
-  if (NumOps == 3) {
-    auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
-    auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
-    B.buildMergeLikeInstr(Dst, {Lo, Hi});
-    MI.eraseFromParent();
-    return true;
-  }
-
-  assert(NumOps == 4);
-  auto [Op3Lo, Op3Hi] = unpackAExtTruncS16(MI.getOperand(3).getReg());
-  auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo, Op3Lo}, Flags);
-  auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi, Op3Hi}, Flags);
+  auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
+  auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
   B.buildMergeLikeInstr(Dst, {Lo, Hi});
   MI.eraseFromParent();
   return true;
@@ -982,7 +971,6 @@ LLT 
RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
     return LLT::fixed_vector(2, 16);
   case SgprV2S32:
   case VgprV2S32:
-  case UniInVgprV2S32:
     return LLT::fixed_vector(2, 32);
   case SgprV4S32:
   case SgprV4S32_WF:
@@ -1086,7 +1074,6 @@ 
RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case UniInVgprS32:
   case UniInVgprS64:
   case UniInVgprV2S16:
-  case UniInVgprV2S32:
   case UniInVgprV4S32:
   case UniInVgprB32:
   case UniInVgprB64:
@@ -1222,7 +1209,6 @@ bool RegBankLegalizeHelper::applyMappingDst(
     case UniInVgprS32:
     case UniInVgprS64:
     case UniInVgprV2S16:
-    case UniInVgprV2S32:
     case UniInVgprV4S32: {
       assert(Ty == getTyFromID(MethodIDs[OpIdx]));
       assert(RB == SgprRB);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index dee8488855b7a..63135feb4ea16 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -120,8 +120,6 @@ bool matchUniformityAndLLT(Register Reg, 
UniformityLLTOpPredicateID UniID,
     return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
   case UniV2S16:
     return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
-  case UniV2S32:
-    return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
   case UniB32:
     return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
   case UniB64:
@@ -162,8 +160,6 @@ bool matchUniformityAndLLT(Register Reg, 
UniformityLLTOpPredicateID UniID,
     return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
   case DivV2S16:
     return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && 
MUI.isDivergent(Reg);
-  case DivV2S32:
-    return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && 
MUI.isDivergent(Reg);
   case DivB32:
     return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
   case DivB64:
@@ -972,30 +968,6 @@ RegBankLegalizeRules::RegBankLegalizeRules(const 
GCNSubtarget &_ST,
       .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
       .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
 
-  addRulesForGOpcs({G_FMAD}, Standard)
-      .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
-      .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
-      .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
-      .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
-
-  addRulesForGOpcs({G_FMA}, Standard)
-      .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
-      .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
-      .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
-      .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
-      .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
-      .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
-      .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
-      .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
-      .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
-      .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
-      .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
-      .Uni(V2S16,
-           {{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, ScalarizeToS16},
-           hasSALUFloat)
-      .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}},
-           !hasSALUFloat);
-
   // FNEG and FABS are either folded as source modifiers or can be selected as
   // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
   // targets without SALU float we still select them as VGPR since there would

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
index 48b6dd95bdc0d..b2b433167fe4d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < 
%s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck 
-check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX10-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
 
 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z) -> (fma x, y, (fma (fpext u), 
(fpext v), z))
 define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float 
%z, half %u, half %v) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
index 21997e2224735..4d603f7487754 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck 
-check-prefix=GFX9-FAST-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck 
-check-prefix=GFX10-FAST-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck 
-check-prefix=GFX9-FAST-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck 
-check-prefix=GFX10-FAST-DENORM %s
 
 ; fold (fadd fast (fpext (fmul fast x, y)), z) -> (fma (fpext x), (fpext y), z)
 ; fold (fadd fast x, (fpext (fmul fast y, z))) -> (fma (fpext y), (fpext z), x)
@@ -49,26 +49,21 @@ define amdgpu_vs <5 x float> 
@test_5xf16_5xf32_add_ext_mul(<5 x half> inreg %x,
 ; GFX9-FAST-DENORM-LABEL: test_5xf16_5xf32_add_ext_mul:
 ; GFX9-FAST-DENORM:       ; %bb.0: ; %.entry
 ; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v0, s0, v0
-; GFX9-FAST-DENORM-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v0, s1, v0
-; GFX9-FAST-DENORM-NEXT:    v_readfirstlane_b32 s1, v0
-; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v0, s2, v0
-; GFX9-FAST-DENORM-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX9-FAST-DENORM-NEXT:    s_lshr_b32 s3, s0, 16
-; GFX9-FAST-DENORM-NEXT:    s_lshr_b32 s4, s1, 16
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v1, s3
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v2, s1
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v3, s4
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v4, s2
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v0, s6, v0
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v1, s7, v1
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v2, s8, v2
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v3, s9, v3
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v4, s10, v4
+; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v1, s1, v1
+; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v2, s2, v2
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v3, v0
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD 
dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v5, v1
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD 
dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v7, v2
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v0, s6, v3
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v1, s7, v4
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v2, s8, v5
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v3, s9, v6
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v4, s10, v7
 ; GFX9-FAST-DENORM-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-FAST-DENORM-LABEL: test_5xf16_5xf32_add_ext_mul:
@@ -99,29 +94,23 @@ define amdgpu_vs <6 x float> 
@test_6xf16_6xf32_add_ext_mul_rhs(<6 x half> inreg
 ; GFX9-FAST-DENORM-LABEL: test_6xf16_6xf32_add_ext_mul_rhs:
 ; GFX9-FAST-DENORM:       ; %bb.0: ; %.entry
 ; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v0, s0, v0
-; GFX9-FAST-DENORM-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v0, s1, v0
-; GFX9-FAST-DENORM-NEXT:    v_readfirstlane_b32 s1, v0
-; GFX9-FAST-DENORM-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v0, s2, v0
-; GFX9-FAST-DENORM-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX9-FAST-DENORM-NEXT:    s_lshr_b32 s3, s0, 16
-; GFX9-FAST-DENORM-NEXT:    s_lshr_b32 s4, s1, 16
-; GFX9-FAST-DENORM-NEXT:    s_lshr_b32 s5, s2, 16
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v1, s3
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v2, s1
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v3, s4
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v4, s2
-; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v5, s5
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v0, s6, v0
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v1, s7, v1
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v2, s8, v2
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v3, s9, v3
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v4, s10, v4
-; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v5, s11, v5
+; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v1, s1, v1
+; GFX9-FAST-DENORM-NEXT:    v_pk_mul_f16 v2, s2, v2
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v3, v0
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD 
dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v5, v1
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD 
dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_e32 v7, v2
+; GFX9-FAST-DENORM-NEXT:    v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD 
dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v0, s6, v3
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v1, s7, v4
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v2, s8, v5
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v3, s9, v6
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v4, s10, v7
+; GFX9-FAST-DENORM-NEXT:    v_add_f32_e32 v5, s11, v8
 ; GFX9-FAST-DENORM-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-FAST-DENORM-LABEL: test_6xf16_6xf32_add_ext_mul_rhs:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
index 8183a4dec10ca..6ea0a9446ff9d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 
-mattr=-real-true16 -fp-contract=fast < %s | FileCheck 
-check-prefixes=GFX11-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 
-mattr=-real-true16 --denormal-fp-math=preserve-sign < %s | FileCheck 
-check-prefixes=GFX11-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX9-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX10-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
-fp-contract=fast < %s | FileCheck -check-prefixes=GFX11-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefixes=GFX11-DENORM 
%s
 
 ; fadd (fma a, b, (fmul c, d)), e --> fma a, b, (fma c, d, e)
 ; fadd e, (fma a, b, (fmul c, d)) --> fma a, b, (fma c, d, e)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
index 1e02f6308a0c5..3f6e3d81c52ad 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s 
| FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < 
%s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck 
-check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX9-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck 
-check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX10-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
 
 define float @test_f32_add_mul(float %x, float %y, float %z) {
 ; GFX9-LABEL: test_f32_add_mul:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll
index 8879f7dc2b44c..4d6e60cbf6977 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
 
 ; fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
 define amdgpu_vs float @test_f16_to_f32_sub_ext_mul(half %x, half %y, float 
%z) {

diff  --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll
index df6c8dffba5ef..814a34754e883 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
 
 ; fold (fsub (fpext (fneg (fmul, x, y))), z) -> (fneg (fma (fpext x), (fpext 
y), z))
 define amdgpu_vs float @test_f16_to_f32_sub_ext_neg_mul(half %x, half %y, 
float %z) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
index d046b854fb0d8..99bdcdd1f31e5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s 
| FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < 
%s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 
-mattr=-real-true16 -fp-contract=fast < %s | FileCheck 
-check-prefixes=GFX11-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 
-mattr=-real-true16 --denormal-fp-math=preserve-sign < %s | FileCheck 
-check-prefixes=GFX11-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck 
-check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX9-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck 
-check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX10-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
-fp-contract=fast < %s | FileCheck -check-prefixes=GFX11-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefixes=GFX11-DENORM 
%s
 
 ; fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
 ; fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
index c0a828ecacbae..70f961e2777af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s 
| FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < 
%s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
-fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck 
-check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX9-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck 
-check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | 
FileCheck -check-prefix=GFX10-CONTRACT %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 
--denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
 
 ; fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
 define float @test_f32_sub_ext_neg_mul(float %x, float %y, float %z) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
index 067704cfb4d80..0b09cabf25a16 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
@@ -1,12 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=tahiti < %s 
| FileCheck -check-prefix=GFX6 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji < %s | 
FileCheck -check-prefix=GFX8 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s 
| FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a < %s 
| FileCheck -check-prefix=GFX90A %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < 
%s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 
-mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck 
-check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 
-mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck 
-check-prefixes=GFX11,GFX11-FAKE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 
-amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck 
-check-prefix=GFX6 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck 
-check-prefix=GFX8 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck 
-check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck 
-check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 
-amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 
%s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
-amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 
%s
 
 define float @v_fma_f32(float %x, float %y, float %z) {
 ; GFX6-LABEL: v_fma_f32:
@@ -27,12 +25,6 @@ define float @v_fma_f32(float %x, float %y, float %z) {
 ; GFX9-NEXT:    v_fma_f32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f32:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f32 v0, v0, v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -44,16 +36,6 @@ define float @v_fma_f32(float %x, float %y, float %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f32 v0, v0, v1, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, v0, v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
   ret float %fma
 }
@@ -80,12 +62,6 @@ define <2 x float> @v_fma_v2f32(<2 x float> %x, <2 x float> 
%y, <2 x float> %z)
 ; GFX9-NEXT:    v_fma_f32 v1, v1, v3, v5
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v2f32:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_fma_f32 v[0:1], v[0:1], v[2:3], v[4:5]
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v2f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -99,17 +75,6 @@ define <2 x float> @v_fma_v2f32(<2 x float> %x, <2 x float> 
%y, <2 x float> %z)
 ; GFX11-NEXT:    v_fma_f32 v0, v0, v2, v4
 ; GFX11-NEXT:    v_fma_f32 v1, v1, v3, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v2f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, v0, v2, v4
-; GFX12-NEXT:    v_fma_f32 v1, v1, v3, v5
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x 
float> %z)
   ret <2 x float> %fma
 }
@@ -137,12 +102,6 @@ define half @v_fma_f16(half %x, half %y, half %z) {
 ; GFX9-NEXT:    v_fma_f16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f16:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f16 v0, v0, v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -161,16 +120,6 @@ define half @v_fma_f16(half %x, half %y, half %z) {
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_fma_f16 v0, v0, v1, v2
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f16 v0, v0, v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call half @llvm.fma.f16(half %x, half %y, half %z)
   ret half %fma
 }
@@ -198,12 +147,6 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) 
{
 ; GFX9-NEXT:    v_fma_f16 v0, -v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f16_fneg_lhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f16 v0, -v0, v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f16_fneg_lhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -221,16 +164,6 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) 
{
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_fma_f16 v0, -v0, v1, v2
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f16_fneg_lhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f16 v0, -v0, v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %neg.x = fneg half %x
   %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
   ret half %fma
@@ -259,12 +192,6 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) 
{
 ; GFX9-NEXT:    v_fma_f16 v0, v0, -v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f16_fneg_rhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f16 v0, v0, -v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f16_fneg_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -282,16 +209,6 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) 
{
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_fma_f16 v0, v0, -v1, v2
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f16_fneg_rhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f16 v0, v0, -v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %neg.y = fneg half %y
   %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
   ret half %fma
@@ -320,12 +237,6 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) 
{
 ; GFX9-NEXT:    v_fma_f16 v0, v0, v1, -v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f16_fneg_add:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f16 v0, v0, v1, -v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f16_fneg_add:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -343,16 +254,6 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) 
{
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_fma_f16 v0, v0, v1, -v2
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f16_fneg_add:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f16 v0, v0, v1, -v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %neg.z = fneg half %z
   %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
   ret half %fma
@@ -392,12 +293,6 @@ define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> 
%y, <2 x half> %z) {
 ; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v2f16:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -409,16 +304,6 @@ define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> 
%y, <2 x half> %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v2f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x 
half> %z)
   ret <2 x half> %fma
 }
@@ -463,12 +348,6 @@ define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 
x half> %y, <2 x half>
 ; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v2f16_fneg_lhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v2f16_fneg_lhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -480,16 +359,6 @@ define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 
x half> %y, <2 x half>
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v2f16_fneg_lhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %x.fneg = fneg <2 x half> %x
   %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y, <2 
x half> %z)
   ret <2 x half> %fma
@@ -535,12 +404,6 @@ define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 
x half> %y, <2 x half>
 ; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v2f16_fneg_rhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v2f16_fneg_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -552,16 +415,6 @@ define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 
x half> %y, <2 x half>
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v2f16_fneg_rhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %y.fneg = fneg <2 x half> %y
   %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y.fneg, <2 
x half> %z)
   ret <2 x half> %fma
@@ -601,12 +454,6 @@ define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, 
<2 x half> %y, <2 x h
 ; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v2f16_fneg_lhs_rhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v2f16_fneg_lhs_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -618,16 +465,6 @@ define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, 
<2 x half> %y, <2 x h
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v2f16_fneg_lhs_rhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %x.fneg = fneg <2 x half> %x
   %y.fneg = fneg <2 x half> %y
   %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> 
%y.fneg, <2 x half> %z)
@@ -675,13 +512,6 @@ define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> 
%y, <3 x half> %z) {
 ; GFX9-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v3f16:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
-; GFX90A-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v3f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -695,17 +525,6 @@ define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> 
%y, <3 x half> %z) {
 ; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
 ; GFX11-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v3f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
-; GFX12-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call <3 x half> @llvm.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x 
half> %z)
   ret <3 x half> %fma
 }
@@ -762,13 +581,6 @@ define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> 
%y, <4 x half> %z) {
 ; GFX9-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v4f16:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
-; GFX90A-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v4f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -782,17 +594,6 @@ define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> 
%y, <4 x half> %z) {
 ; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
 ; GFX11-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v4f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
-; GFX12-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x 
half> %z)
   ret <4 x half> %fma
 }
@@ -816,14 +617,6 @@ define double @v_fma_f64(double %x, double %y, double %z) {
 ; GFX9-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f64:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
-; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
-; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -835,16 +628,6 @@ define double @v_fma_f64(double %x, double %y, double %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call double @llvm.fma.f64(double %x, double %y, double %z)
   ret double %fma
 }
@@ -868,12 +651,6 @@ define double @v_fma_f64_fneg_all(double %x, double %y, 
double %z) {
 ; GFX9-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f64_fneg_all:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f64_fneg_all:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -885,16 +662,6 @@ define double @v_fma_f64_fneg_all(double %x, double %y, 
double %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f64_fneg_all:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %neg.x = fneg double %x
   %neg.y = fneg double %y
   %neg.z = fneg double %z
@@ -924,17 +691,6 @@ define <2 x double> @v_fma_v2f64(<2 x double> %x, <2 x 
double> %y, <2 x double>
 ; GFX9-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_v2f64:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fmac_f64_e32 v[8:9], v[0:1], v[4:5]
-; GFX90A-NEXT:    v_fmac_f64_e32 v[10:11], v[2:3], v[6:7]
-; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
-; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
-; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
-; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_v2f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -948,17 +704,6 @@ define <2 x double> @v_fma_v2f64(<2 x double> %x, <2 x 
double> %y, <2 x double>
 ; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
 ; GFX11-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_v2f64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
-; GFX12-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fma = call <2 x double> @llvm.fma.v2f64(<2 x double> %x, <2 x double> %y, 
<2 x double> %z)
   ret <2 x double> %fma
 }
@@ -982,12 +727,6 @@ define float @v_fma_f32_fabs_lhs(float %x, float %y, float 
%z) {
 ; GFX9-NEXT:    v_fma_f32 v0, |v0|, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f32_fabs_lhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f32 v0, |v0|, v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f32_fabs_lhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -999,16 +738,6 @@ define float @v_fma_f32_fabs_lhs(float %x, float %y, float 
%z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f32 v0, |v0|, v1, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f32_fabs_lhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, |v0|, v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fabs.x = call float @llvm.fabs.f32(float %x)
   %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
   ret float %fma
@@ -1033,12 +762,6 @@ define float @v_fma_f32_fabs_rhs(float %x, float %y, 
float %z) {
 ; GFX9-NEXT:    v_fma_f32 v0, v0, |v1|, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f32_fabs_rhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f32 v0, v0, |v1|, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f32_fabs_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1050,16 +773,6 @@ define float @v_fma_f32_fabs_rhs(float %x, float %y, 
float %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f32 v0, v0, |v1|, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f32_fabs_rhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, v0, |v1|, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fabs.y = call float @llvm.fabs.f32(float %y)
   %fma = call float @llvm.fma.f32(float %x, float %fabs.y, float %z)
   ret float %fma
@@ -1084,12 +797,6 @@ define float @v_fma_f32_fabs_lhs_rhs(float %x, float %y, 
float %z) {
 ; GFX9-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f32_fabs_lhs_rhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f32_fabs_lhs_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1101,16 +808,6 @@ define float @v_fma_f32_fabs_lhs_rhs(float %x, float %y, 
float %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f32_fabs_lhs_rhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fabs.x = call float @llvm.fabs.f32(float %x)
   %fabs.y = call float @llvm.fabs.f32(float %y)
   %fma = call float @llvm.fma.f32(float %fabs.x, float %fabs.y, float %z)
@@ -1133,11 +830,6 @@ define amdgpu_ps float @v_fma_f32_sgpr_vgpr_vgpr(float 
inreg %x, float %y, float
 ; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
-; GFX90A-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX90A-NEXT:    ; return to shader part epilog
-;
 ; GFX10-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_fma_f32 v0, s0, v0, v1
@@ -1147,11 +839,6 @@ define amdgpu_ps float @v_fma_f32_sgpr_vgpr_vgpr(float 
inreg %x, float %y, float
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_fma_f32 v0, s0, v0, v1
 ; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX12-NEXT:    ; return to shader part epilog
   %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
   ret float %fma
 }
@@ -1172,11 +859,6 @@ define amdgpu_ps float @v_fma_f32_vgpr_sgpr_vgpr(float 
%x, float inreg %y, float
 ; GFX9-NEXT:    v_fma_f32 v0, v0, s0, v1
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
-; GFX90A-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX90A-NEXT:    ; return to shader part epilog
-;
 ; GFX10-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_fma_f32 v0, s0, v0, v1
@@ -1186,11 +868,6 @@ define amdgpu_ps float @v_fma_f32_vgpr_sgpr_vgpr(float 
%x, float inreg %y, float
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_fma_f32 v0, s0, v0, v1
 ; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX12-NEXT:    ; return to shader part epilog
   %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
   ret float %fma
 }
@@ -1217,13 +894,6 @@ define amdgpu_ps float @v_fma_f32_sgpr_sgpr_sgpr(float 
inreg %x, float inreg %y,
 ; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
-; GFX90A-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_mov_b32_e32 v0, s1
-; GFX90A-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90A-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX90A-NEXT:    ; return to shader part epilog
-;
 ; GFX10-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
@@ -1235,12 +905,6 @@ define amdgpu_ps float @v_fma_f32_sgpr_sgpr_sgpr(float 
inreg %x, float inreg %y,
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_fma_f32 v0, s1, s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_fmac_f32 s2, s0, s1
-; GFX12-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12-NEXT:    ; return to shader part epilog
   %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
   ret float %fma
 }
@@ -1264,12 +928,6 @@ define float @v_fma_f32_fneg_lhs(float %x, float %y, 
float %z) {
 ; GFX9-NEXT:    v_fma_f32 v0, -v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f32_fneg_lhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f32 v0, -v0, v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f32_fneg_lhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1281,16 +939,6 @@ define float @v_fma_f32_fneg_lhs(float %x, float %y, 
float %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f32 v0, -v0, v1, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f32_fneg_lhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, -v0, v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %neg.x = fneg float %x
   %fma = call float @llvm.fma.f32(float %neg.x, float %y, float %z)
   ret float %fma
@@ -1315,12 +963,6 @@ define float @v_fma_f32_fneg_rhs(float %x, float %y, 
float %z) {
 ; GFX9-NEXT:    v_fma_f32 v0, v0, -v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f32_fneg_rhs:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f32 v0, v0, -v1, v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f32_fneg_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1332,16 +974,6 @@ define float @v_fma_f32_fneg_rhs(float %x, float %y, 
float %z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f32 v0, v0, -v1, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f32_fneg_rhs:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, v0, -v1, v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %neg.y = fneg float %y
   %fma = call float @llvm.fma.f32(float %x, float %neg.y, float %z)
   ret float %fma
@@ -1366,12 +998,6 @@ define float @v_fma_f32_fneg_z(float %x, float %y, float 
%z) {
 ; GFX9-NEXT:    v_fma_f32 v0, v0, v1, -v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX90A-LABEL: v_fma_f32_fneg_z:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT:    v_fma_f32 v0, v0, v1, -v2
-; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10-LABEL: v_fma_f32_fneg_z:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1383,16 +1009,6 @@ define float @v_fma_f32_fneg_z(float %x, float %y, float 
%z) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f32 v0, v0, v1, -v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_fma_f32_fneg_z:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_fma_f32 v0, v0, v1, -v2
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %neg.z = fneg float %z
   %fma = call float @llvm.fma.f32(float %x, float %y, float %neg.z)
   ret float %fma
@@ -1414,11 +1030,6 @@ define amdgpu_ps float 
@dont_crash_after_fma_mix_select_attempt(float inreg %x,
 ; GFX9-NEXT:    v_fma_f32 v0, |s0|, v0, v1
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
-; GFX90A-LABEL: dont_crash_after_fma_mix_select_attempt:
-; GFX90A:       ; %bb.0: ; %.entry
-; GFX90A-NEXT:    v_fma_f32 v0, |s0|, v0, v1
-; GFX90A-NEXT:    ; return to shader part epilog
-;
 ; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt:
 ; GFX10:       ; %bb.0: ; %.entry
 ; GFX10-NEXT:    v_fma_f32 v0, |s0|, v0, v1
@@ -1428,331 +1039,12 @@ define amdgpu_ps float 
@dont_crash_after_fma_mix_select_attempt(float inreg %x,
 ; GFX11:       ; %bb.0: ; %.entry
 ; GFX11-NEXT:    v_fma_f32 v0, |s0|, v0, v1
 ; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: dont_crash_after_fma_mix_select_attempt:
-; GFX12:       ; %bb.0: ; %.entry
-; GFX12-NEXT:    v_fma_f32 v0, |s0|, v0, v1
-; GFX12-NEXT:    ; return to shader part epilog
 .entry:
   %fabs.x = call contract float @llvm.fabs.f32(float %x)
   %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
   ret float %fma
 }
 
-define amdgpu_ps half @fma_s16_uniform(half inreg %a, half inreg %b, half 
inreg %c) {
-; GFX6-LABEL: fma_s16_uniform:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, s1
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, s2
-; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
-; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: fma_s16_uniform:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mov_b32_e32 v0, s1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8-NEXT:    v_fma_f16 v0, s0, v0, v1
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fma_s16_uniform:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9-NEXT:    v_fma_f16 v0, s0, v0, v1
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX90A-LABEL: fma_s16_uniform:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_mov_b32_e32 v0, s1
-; GFX90A-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90A-NEXT:    v_fma_f16 v0, s0, v0, v1
-; GFX90A-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fma_s16_uniform:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_fma_f16 v0, s1, s0, v0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX11-TRUE16-LABEL: fma_s16_uniform:
-; GFX11-TRUE16:       ; %bb.0:
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s2
-; GFX11-TRUE16-NEXT:    v_fmac_f16_e64 v0.l, s0, s1
-; GFX11-TRUE16-NEXT:    ; return to shader part epilog
-;
-; GFX11-FAKE16-LABEL: fma_s16_uniform:
-; GFX11-FAKE16:       ; %bb.0:
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11-FAKE16-NEXT:    v_fma_f16 v0, s1, s0, v0
-; GFX11-FAKE16-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: fma_s16_uniform:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_fmac_f16 s2, s0, s1
-; GFX12-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12-NEXT:    ; return to shader part epilog
-  %fma = call half @llvm.fma.f16(half %a, half %b, half %c)
-  ret half %fma
-}
-
-define amdgpu_ps float @fma_s32_uniform(float inreg %a, float inreg %b, float 
inreg %c) {
-; GFX6-LABEL: fma_s32_uniform:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, s1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: fma_s32_uniform:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mov_b32_e32 v0, s1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fma_s32_uniform:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX90A-LABEL: fma_s32_uniform:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_mov_b32_e32 v0, s1
-; GFX90A-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90A-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX90A-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fma_s32_uniform:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_fma_f32 v0, s1, s0, v0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX11-LABEL: fma_s32_uniform:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11-NEXT:    v_fma_f32 v0, s1, s0, v0
-; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: fma_s32_uniform:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_fmac_f32 s2, s0, s1
-; GFX12-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12-NEXT:    ; return to shader part epilog
-  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  ret float %fma
-}
-
-define amdgpu_ps void @fma_s64_uniform(double inreg %a, double inreg %b, 
double inreg %c, ptr addrspace(1) %ptr) {
-; GFX6-LABEL: fma_s64_uniform:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v2, s2
-; GFX6-NEXT:    v_mov_b32_e32 v4, s4
-; GFX6-NEXT:    v_mov_b32_e32 v3, s3
-; GFX6-NEXT:    v_mov_b32_e32 v5, s5
-; GFX6-NEXT:    v_fma_f64 v[2:3], s[0:1], v[2:3], v[4:5]
-; GFX6-NEXT:    s_mov_b32 s2, 0
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NEXT:    s_mov_b64 s[0:1], 0
-; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
-; GFX6-NEXT:    s_endpgm
-;
-; GFX8-LABEL: fma_s64_uniform:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8-NEXT:    v_mov_b32_e32 v4, s4
-; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    v_mov_b32_e32 v5, s5
-; GFX8-NEXT:    v_fma_f64 v[2:3], s[0:1], v[2:3], v[4:5]
-; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8-NEXT:    s_endpgm
-;
-; GFX9-LABEL: fma_s64_uniform:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-NEXT:    v_fma_f64 v[2:3], s[0:1], v[2:3], v[4:5]
-; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX90A-LABEL: fma_s64_uniform:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
-; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
-; GFX90A-NEXT:    v_fmac_f64_e32 v[4:5], s[0:1], v[2:3]
-; GFX90A-NEXT:    global_store_dwordx2 v[0:1], v[4:5], off
-; GFX90A-NEXT:    s_endpgm
-;
-; GFX10-LABEL: fma_s64_uniform:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mov_b32_e32 v2, s4
-; GFX10-NEXT:    v_mov_b32_e32 v3, s5
-; GFX10-NEXT:    v_fma_f64 v[2:3], s[0:1], s[2:3], v[2:3]
-; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: fma_s64_uniform:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
-; GFX11-NEXT:    v_fma_f64 v[2:3], s[0:1], s[2:3], v[2:3]
-; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: fma_s64_uniform:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
-; GFX12-NEXT:    v_fma_f64 v[2:3], s[0:1], s[2:3], v[2:3]
-; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT:    s_endpgm
-  %fma = call double @llvm.fma.f64(double %a, double %b, double %c)
-  store double %fma, ptr addrspace(1) %ptr
-  ret void
-}
-
-define amdgpu_ps <2 x half> @fma_v2s16_uniform(<2 x half> inreg %a, <2 x half> 
inreg %b, <2 x half> inreg %c) {
-; GFX6-LABEL: fma_v2s16_uniform:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, s2
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, s4
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, s1
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, s3
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, s5
-; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
-; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX6-NEXT:    v_fma_f32 v1, v3, v4, v5
-; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: fma_v2s16_uniform:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mov_b32_e32 v0, s1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
-; GFX8-NEXT:    s_lshr_b32 s5, s2, 16
-; GFX8-NEXT:    v_fma_f16 v0, s0, v0, v1
-; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
-; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8-NEXT:    v_mov_b32_e32 v1, s5
-; GFX8-NEXT:    v_fma_f16 v0, s3, v0, v1
-; GFX8-NEXT:    v_readfirstlane_b32 s1, v0
-; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
-; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
-; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX8-NEXT:    s_or_b32 s0, s0, s1
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fma_v2s16_uniform:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9-NEXT:    v_pk_fma_f16 v0, s0, v0, v1
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX90A-LABEL: fma_v2s16_uniform:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_mov_b32_e32 v0, s1
-; GFX90A-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90A-NEXT:    v_pk_fma_f16 v0, s0, v0, v1
-; GFX90A-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fma_v2s16_uniform:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_pk_fma_f16 v0, s0, s1, v0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX11-LABEL: fma_v2s16_uniform:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11-NEXT:    v_pk_fma_f16 v0, s0, s1, v0
-; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: fma_v2s16_uniform:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_lshr_b32 s3, s0, 16
-; GFX12-NEXT:    s_lshr_b32 s4, s1, 16
-; GFX12-NEXT:    s_lshr_b32 s5, s2, 16
-; GFX12-NEXT:    s_fmac_f16 s2, s0, s1
-; GFX12-NEXT:    s_fmac_f16 s5, s3, s4
-; GFX12-NEXT:    s_pack_ll_b32_b16 s0, s2, s5
-; GFX12-NEXT:    v_mov_b32_e32 v0, s0
-; GFX12-NEXT:    ; return to shader part epilog
-  %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x 
half> %c)
-  ret <2 x half> %fma
-}
-
-define amdgpu_ps <2 x float> @fma_v2s32_uniform(<2 x float> inreg %a, <2 x 
float> inreg %b, <2 x float> inreg %c) {
-; GFX6-LABEL: fma_v2s32_uniform:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s4
-; GFX6-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s3
-; GFX6-NEXT:    v_mov_b32_e32 v2, s5
-; GFX6-NEXT:    v_fma_f32 v1, s1, v1, v2
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: fma_v2s32_uniform:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-NEXT:    v_mov_b32_e32 v1, s4
-; GFX8-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8-NEXT:    v_mov_b32_e32 v2, s5
-; GFX8-NEXT:    v_fma_f32 v1, s1, v1, v2
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fma_v2s32_uniform:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s4
-; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_mov_b32_e32 v2, s5
-; GFX9-NEXT:    v_fma_f32 v1, s1, v1, v2
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX90A-LABEL: fma_v2s32_uniform:
-; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
-; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
-; GFX90A-NEXT:    v_pk_fma_f32 v[0:1], s[0:1], v[0:1], v[2:3]
-; GFX90A-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fma_v2s32_uniform:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mov_b32_e32 v0, s4
-; GFX10-NEXT:    v_mov_b32_e32 v1, s5
-; GFX10-NEXT:    v_fma_f32 v0, s2, s0, v0
-; GFX10-NEXT:    v_fma_f32 v1, s3, s1, v1
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX11-LABEL: fma_v2s32_uniform:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
-; GFX11-NEXT:    v_fma_f32 v0, s2, s0, v0
-; GFX11-NEXT:    v_fma_f32 v1, s3, s1, v1
-; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX12-LABEL: fma_v2s32_uniform:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_fmac_f32 s4, s0, s2
-; GFX12-NEXT:    s_fmac_f32 s5, s1, s3
-; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
-; GFX12-NEXT:    ; return to shader part epilog
-  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x 
float> %c)
-  ret <2 x float> %fma
-}
-
 declare half @llvm.fma.f16(half, half, half) #0
 declare float @llvm.fma.f32(float, float, float) #0
 declare double @llvm.fma.f64(double, double, double) #0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmad.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmad.ll
deleted file mode 100644
index 4907ee16a4978..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmad.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal 
-mcpu=fiji --denormal-fp-math=preserve-sign -o - %s | FileCheck 
-check-prefix=GFX8 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal 
-mcpu=gfx900 --denormal-fp-math=preserve-sign -o - %s | FileCheck 
-check-prefix=GFX9 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal 
-mcpu=gfx1010 --denormal-fp-math=preserve-sign -o - %s | FileCheck 
-check-prefix=GFX10 %s
-
-define amdgpu_ps float @fmad_s32_uniform(float inreg %a, float inreg %b, float 
inreg %c) {
-; GFX8-LABEL: fmad_s32_uniform:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mov_b32_e32 v0, s1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8-NEXT:    v_mad_f32 v0, s0, v0, v1
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fmad_s32_uniform:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9-NEXT:    v_mad_f32 v0, s0, v0, v1
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fmad_s32_uniform:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_mad_f32 v0, s1, s0, v0
-; GFX10-NEXT:    ; return to shader part epilog
-  %mul = fmul float %a, %b
-  %result = fadd float %mul, %c
-  ret float %result
-}
-
-define amdgpu_ps float @fmad_s32_div(float %a, float %b, float %c) {
-; GFX8-LABEL: fmad_s32_div:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mad_f32 v0, v0, v1, v2
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fmad_s32_div:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mad_f32 v0, v0, v1, v2
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fmad_s32_div:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_f32 v0, v0, v1, v2
-; GFX10-NEXT:    ; return to shader part epilog
-  %mul = fmul float %a, %b
-  %result = fadd float %mul, %c
-  ret float %result
-}
-
-define amdgpu_ps half @fmad_s16_uniform(half inreg %a, half inreg %b, half 
inreg %c) {
-; GFX8-LABEL: fmad_s16_uniform:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mov_b32_e32 v0, s1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8-NEXT:    v_mad_f16 v0, s0, v0, v1
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fmad_s16_uniform:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9-NEXT:    v_mad_legacy_f16 v0, s0, v0, v1
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fmad_s16_uniform:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mul_f16_e64 v0, s0, s1
-; GFX10-NEXT:    v_add_f16_e32 v0, s2, v0
-; GFX10-NEXT:    ; return to shader part epilog
-  %mul = fmul half %a, %b
-  %result = fadd half %mul, %c
-  ret half %result
-}
-
-define amdgpu_ps half @fmad_s16_div(half %a, half %b, half %c) {
-; GFX8-LABEL: fmad_s16_div:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_mad_f16 v0, v0, v1, v2
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX9-LABEL: fmad_s16_div:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mad_legacy_f16 v0, v0, v1, v2
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: fmad_s16_div:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX10-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX10-NEXT:    ; return to shader part epilog
-  %mul = fmul half %a, %b
-  %result = fadd half %mul, %c
-  ret half %result
-}

diff  --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.ll
index cc2a8ee11f180..dc4545bd82ae2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmamix-constant-bus-violation.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 %s 
-o - | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 %s -o - | FileCheck %s
 
 define float @test_fmamix_constant_bus_violation_sss(i32 inreg %val.0, i32 
inreg %val.1, i32 inreg %val.2) #0 {
 ; CHECK-LABEL: test_fmamix_constant_bus_violation_sss:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
index 1e7c7dcb620a5..1220c0e3b1ead 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa 
-mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa 
-mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa 
-mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa 
-mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | 
FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck 
-check-prefix=GFX8 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | 
FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 
-amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
+
+; TODO: Switch test to use -new-reg-bank-select after adding G_FNEG support.
 
 define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) {
 ; GFX9-LABEL: v_fmul_v2f16:

diff  --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.ll
index 52425323332dd..2351bf2d6e876 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/madmix-constant-bus-violation.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 %s 
-o - | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 %s -o - | FileCheck %s
 
 define float @test_fmamix_constant_bus_violation_sss(i32 inreg %val.0, i32 
inreg %val.1, i32 inreg %val.2) #0 {
 ; CHECK-LABEL: test_fmamix_constant_bus_violation_sss:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir
index 9dfc7700e2c80..d63fc07ada772 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 
-run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s 
-verify-machineinstrs -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s 
-verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s 
-verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
 name: fma_sss
@@ -14,7 +15,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-    ; CHECK-NEXT: [[FMA:%[0-9]+]]:sgpr(s32) = G_FMA [[COPY]], [[COPY1]], 
[[COPY2]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], 
[[COPY5]]
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = COPY $sgpr2


        
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to