https://github.com/hidekisaito updated https://github.com/llvm/llvm-project/pull/190916
>From 78167056825b6811532de36825b3687e0cbe0079 Mon Sep 17 00:00:00 2001 From: Hideki Saito <[email protected]> Date: Wed, 8 Apr 2026 00:28:38 -0500 Subject: [PATCH] [AMDGPU] First installment of IGLP_OPT 4 (MFMAValuSpacingOpt) implementation Add MFMAValuSpacingOpt strategy in pre-RA scheduler. Tries to schedule equal number of VALU MIR instructions between MFMAs. Assisted-by: Cursor Made-with: Cursor --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 6 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 14 +++- llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 83 +++++++++++++++++-- llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h | 1 + llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 36 +++++++- .../iglp-opt4-mfma-valu-spacing-scheduler.ll | 70 ++++++++++++++++ .../iglp-opt4-mfma-valu-spacing-scheduler.mir | 35 ++++++++ llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll | 12 +++ 8 files changed, 241 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.ll create mode 100644 llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.mir diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 0fc40d396a87d..cd4cb103291dd 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -980,14 +980,16 @@ void test_sched_group_barrier() // CHECK-LABEL: @test_iglp_opt // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 0) // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 1) +// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 2) +// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 3) // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 4) -// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 15) void test_iglp_opt() { __builtin_amdgcn_iglp_opt(0); __builtin_amdgcn_iglp_opt(1); + __builtin_amdgcn_iglp_opt(2); + __builtin_amdgcn_iglp_opt(3); __builtin_amdgcn_iglp_opt(4); - __builtin_amdgcn_iglp_opt(15); } // CHECK-LABEL: @test_s_sleep diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9528fb2b446bc..5a99607082ff5 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -379,11 +379,17 @@ def int_amdgcn_sched_group_barrier : ClangBuiltin<"__builtin_amdgcn_sched_group_ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; -// Scheduler optimization hint. -// MASK = 0: Small gemm opt +// Scheduler optimization hint: immediate selects the IGLP strategy in +// AMDGPUIGroupLP.cpp (createIGLPStrategy). Must be a constant in [0,4]. Mappings: +// 0 - MFMA small-GEMM scheduling (MFMASmallGemmOpt). +// 1 - MFMA small-GEMM single-wave variant (MFMASmallGemmSingleWaveOpt). +// 2 - MFMAExpInterleaveOpt (TRANS/MFMA scheduling pipeline; see AMDGPUIGroupLP.cpp). +// 3 - MFMAExpSimpleInterleaveOpt (simpler TRANS-then-MFMA interleave pattern). +// 4 - MFMA (or WMMA) / VALU spacing (MFMAValuSpacingOpt). def int_amdgcn_iglp_opt : ClangBuiltin<"__builtin_amdgcn_iglp_opt">, - Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, - IntrWillReturn, IntrNoCallback, IntrNoFree]>; + Intrinsic<[], [llvm_i32_ty], + [ImmArg<ArgIndex<0>>, Range<ArgIndex<0>, 0, 5>, IntrNoMem, IntrHasSideEffects, + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_waitcnt : ClangBuiltin<"__builtin_amdgcn_s_waitcnt">, Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index df778704bb5d4..f044f49473d17 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -890,6 +890,79 @@ bool MFMASmallGemmOpt::applyIGLPStrategy( return true; } +/// Whether \p MI matches \c SchedGroupMask::VALU classification (e.g. barrier +/// mask \c 0x2) +static bool matchesSchedGroupValu(const MachineInstr &MI, + const SIInstrInfo *TII) { + if (MI.isMetaInstruction()) + return false; + // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS). + // For our purposes, these shall not be classified as VALU as this results + // in unexpected behavior. + return TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI) && + !MI.mayLoadOrStore(); +} + +/// Interleave MFMA/WMMA with VALU slots: each repeating stage is one MFMA (or +/// WMMA), then up to N VALU ops per gap where N = floor(#VALU / #MFMA) in this +/// schedule region (same predicate as \c matchesSchedGroupValu), at least 1. +/// Template length uses MFMACount * 3 for slack, like MFMASmallGemmOpt. +/// \p IsBottomUp is false so SchedGroup pipeline order matches forward program +/// order (MFMA before its VALU gap). +class MFMAValuSpacingOpt final : public IGLPStrategy { +public: + bool applyIGLPStrategy( + DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs, + DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups, + AMDGPU::SchedulingPhase Phase) override; + + bool shouldApplyStrategy(ScheduleDAGInstrs *DAG, + AMDGPU::SchedulingPhase Phase) override { + for (const MachineInstr &I : *DAG) + if (TII->isMFMAorWMMA(I)) + return true; + return false; + } + + MFMAValuSpacingOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII) + : IGLPStrategy(DAG, TII) { + IsBottomUp = false; + } +}; + +bool MFMAValuSpacingOpt::applyIGLPStrategy( + DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs, + DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups, + AMDGPU::SchedulingPhase Phase) { + unsigned MFMACount = 0; + unsigned ValuCount = 0; + for (const MachineInstr &I : *DAG) { + if (TII->isMFMAorWMMA(I)) + ++MFMACount; + else if (matchesSchedGroupValu(I, TII)) + ++ValuCount; + } + + unsigned ValuGap = 1; + if (MFMACount > 0 && ValuCount > MFMACount) { + ValuGap = ValuCount / MFMACount; + } + + const unsigned PipelineSyncID = 0; + SchedGroup *SG = nullptr; + for (unsigned I = 0; I < MFMACount * 3; ++I) { + SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( + SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII); + SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]); + + SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( + SchedGroupMask::VALU, ValuGap, PipelineSyncID, DAG, TII); + SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]); + } + + return true; +} + class MFMAExpInterleaveOpt final : public IGLPStrategy { private: // The count of TRANS SUs involved in the interleaved pipeline @@ -2321,6 +2394,8 @@ createIGLPStrategy(IGLPStrategyID ID, ScheduleDAGInstrs *DAG, return std::make_unique<MFMAExpInterleaveOpt>(DAG, TII); case MFMAExpSimpleInterleaveID: return std::make_unique<MFMAExpSimpleInterleaveOpt>(DAG, TII); + case MFMAValuSpacingOptID: + return std::make_unique<MFMAValuSpacingOpt>(DAG, TII); } llvm_unreachable("Unknown IGLPStrategyID"); @@ -2449,12 +2524,8 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const { Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) && - TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) { - // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS). - // For our purposes, these shall not be classified as VALU as this results - // in unexpected behavior. - Result = !MI.mayLoadOrStore(); - } + matchesSchedGroupValu(MI, TII)) + Result = true; else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) && TII->isSALU(MI)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h index 1cb45ebc315f5..d2c343a460f24 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h @@ -24,6 +24,7 @@ enum IGLPStrategyID : int { MFMASmallGemmSingleWaveOptID = 1, MFMAExpInterleaveID = 2, MFMAExpSimpleInterleaveID = 3, + MFMAValuSpacingOptID = 4, }; } // namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index ad24bad1fd5d7..f48f362940101 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -3237,11 +3237,27 @@ void GCNScheduleDAGMILive::setTargetOccupancy(unsigned TargetOccupancy) { MFI.limitOccupancy(MinOccupancy); } -static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) { +/// Scan for IGLP "mutation-only" instructions (SCHED_GROUP_BARRIER or IGLP_OPT). +/// Returns {any in region, strategy ID if such an instruction is IGLP_OPT}. +static std::pair<bool, std::optional<AMDGPU::IGLPStrategyID>> +hasIGLPInstrs(ScheduleDAGInstrs *DAG) { const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII); - return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) { + auto It = find_if(*DAG, [SII](MachineBasicBlock::iterator MI) { return SII->isIGLPMutationOnly(MI->getOpcode()); }); + if (It == DAG->end()) + return {false, std::nullopt}; + if (It->getOpcode() == AMDGPU::IGLP_OPT) + return {true, static_cast<AMDGPU::IGLPStrategyID>( + It->getOperand(0).getImm())}; + return {true, std::nullopt}; +} + +static bool hasSchedBarrier(ScheduleDAGInstrs *DAG) { + return any_of(*DAG, [](MachineBasicBlock::iterator MI) { + unsigned Opc = MI->getOpcode(); + return Opc == AMDGPU::SCHED_BARRIER || Opc == AMDGPU::SCHED_GROUP_BARRIER; + }); } GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive( @@ -3250,8 +3266,20 @@ GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive( : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {} void GCNPostScheduleDAGMILive::schedule() { - HasIGLPInstrs = hasIGLPInstrs(this); - if (HasIGLPInstrs) { + auto [HasIGLP, Strategy] = hasIGLPInstrs(this); + HasIGLPInstrs = HasIGLP; + if (HasIGLP) { + // MFMAValuSpacingOpt is a pre-RA strategy whose interleaving is correct + // after the initial machine scheduler. The post-RA scheduler would undo + // the reordering, so preserve the pre-RA schedule by skipping here. + // When SCHED_[GROUP_]BARRIER coexists with IGLP_OPT, IGroupLP ignores the + // IGLP_OPT (they are mutually exclusive), so let post-RA scheduling proceed + // normally. + if (Strategy == AMDGPU::MFMAValuSpacingOptID && + !hasSchedBarrier(this)) { + HasIGLPInstrs = false; + return; + } SavedMutations.clear(); SavedMutations.swap(Mutations); addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA)); diff --git a/llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.ll b/llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.ll new file mode 100644 index 0000000000000..744daf9f47da8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Full codegen on gfx950. Two MFMAs fed by loaded floats; three independent +; i32 muls stored to a second buffer. sched.barrier(0) isolates the MUL+MFMA +; region so that address-computation VALUs don't inflate the VALU gap in +; MFMAValuSpacingOpt. +; +; With iglp_opt(4) the expected MFMA/VALU interleaving (ValuGap=1) is: +; MFMA, MUL, MFMA, MUL, MUL +; +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s + +declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32) +declare i32 @llvm.amdgcn.workitem.id.x() +declare void @llvm.amdgcn.iglp.opt(i32 immarg) +declare void @llvm.amdgcn.sched.barrier(i32 immarg) + +define amdgpu_kernel void @mfma_valu_iglp4(ptr addrspace(1) %p, ptr addrspace(1) %q) #0 { +; CHECK-LABEL: mfma_valu_iglp4: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 2, v0 +; CHECK-NEXT: v_mov_b32_e32 v9, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx2 v[6:7], v8, s[0:1] +; CHECK-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; CHECK-NEXT: ; sched_barrier mask(0x00000000) +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; CHECK-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: v_mfma_f32_4x4x1_16b_f32 v[0:3], v6, v7, v[0:3] +; CHECK-NEXT: v_mul_lo_u32 v4, v6, v6 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: v_mfma_f32_4x4x1_16b_f32 v[0:3], v6, v7, v[0:3] +; CHECK-NEXT: v_mul_lo_u32 v5, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v6, v7, v7 +; CHECK-NEXT: ; iglp_opt mask(0x00000004) +; CHECK-NEXT: ; sched_barrier mask(0x00000000) +; CHECK-NEXT: s_nop 1 +; CHECK-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; CHECK-NEXT: global_store_dwordx3 v8, v[4:6], s[2:3] +; CHECK-NEXT: s_endpgm +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %t = load <4 x float>, ptr addrspace(1) %p + %gep0 = getelementptr inbounds float, ptr addrspace(1) %p, i32 %tid + %gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i32 1 + %f0 = load float, ptr addrspace(1) %gep0 + %f1 = load float, ptr addrspace(1) %gep1 + %i0 = bitcast float %f0 to i32 + %i1 = bitcast float %f1 to i32 + call void @llvm.amdgcn.sched.barrier(i32 0) + %m0 = mul nsw i32 %i0, %i0 + %m1 = mul nsw i32 %i0, %i1 + %m2 = mul nsw i32 %i1, %i1 + call void @llvm.amdgcn.iglp.opt(i32 4) + %mai = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %f0, float %f1, <4 x float> %t, i32 0, i32 0, i32 0) + %mai2 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %f0, float %f1, <4 x float> %mai, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.sched.barrier(i32 0) + store <4 x float> %mai2, ptr addrspace(1) %p + %qgep0 = getelementptr inbounds i32, ptr addrspace(1) %q, i32 %tid + %qgep1 = getelementptr inbounds i32, ptr addrspace(1) %qgep0, i32 1 + %qgep2 = getelementptr inbounds i32, ptr addrspace(1) %qgep0, i32 2 + store i32 %m0, ptr addrspace(1) %qgep0 + store i32 %m1, ptr addrspace(1) %qgep1 + store i32 %m2, ptr addrspace(1) %qgep2 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.mir b/llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.mir new file mode 100644 index 0000000000000..7f173ddc8e5aa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/iglp-opt4-mfma-valu-spacing-scheduler.mir @@ -0,0 +1,35 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# Pre-RA machine scheduler with IGroupLP / MFMAValuSpacingOpt (iglp_opt(4)). +# With IGLP_OPT 4 the expected MFMA/VALU interleaving (ValuGap=1) is: +# MFMA, MUL, MFMA, MUL, MUL +# +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- +name: mfma_valu_iglp4 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: mfma_valu_iglp4 + ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF]], [[DEF1]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DEF]], [[DEF]], implicit $exec + ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF]], [[DEF1]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DEF]], [[DEF1]], implicit $exec + ; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DEF1]], [[DEF1]], implicit $exec + ; CHECK-NEXT: IGLP_OPT 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]], implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_1]] + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:areg_128_align2 = IMPLICIT_DEF + %3:vgpr_32 = nsw V_MUL_LO_U32_e64 %0, %0, implicit $exec + %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %0, %1, implicit $exec + %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %1, %1, implicit $exec + %6:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %0, %1, %2, 0, 0, 0, implicit $mode, implicit $exec + %7:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %0, %1, %6, 0, 0, 0, implicit $mode, implicit $exec + IGLP_OPT 4 + S_ENDPGM 0, implicit %3, implicit %4, implicit %5, implicit %7 + +... diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll index 17a4a8d89c5c1..8ac1cfbe45c93 100644 --- a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll +++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -651,3 +651,15 @@ define void @test_mfma_f32_32x32x1f32(float %arg0, float %arg1, <32 x float> %ar ret void } + +declare void @llvm.amdgcn.iglp.opt(i32 immarg) +define void @iglp_opt_invalid(i32 %mask) { + ; CHECK: immarg value 5 out of range [0, 5) + ; CHECK-NEXT: call void @llvm.amdgcn.iglp.opt(i32 5) + call void @llvm.amdgcn.iglp.opt(i32 5) + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %mask + ; CHECK-NEXT: call void @llvm.amdgcn.iglp.opt(i32 %mask) + call void @llvm.amdgcn.iglp.opt(i32 %mask) + ret void +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
