llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) <details> <summary>Changes</summary> --- Patch is 164.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123995.diff 31 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+74-51) - (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+39-41) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll (+25-27) - (modified) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+3-4) - (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+16-16) - (modified) llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/inline-attr.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+18-19) - (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+7-8) - (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+5-6) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+1-1) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 2bc68cf2fd6a4a..dc5ba1afcfdf7a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1109,74 +1109,38 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); - auto TakeRange = [&](std::pair<unsigned, unsigned> R) { - auto [Min, Max] = R; - ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); - IntegerRangeState RangeState(Range); - clampStateAndIndicateChange(this->getState(), RangeState); - indicateOptimisticFixpoint(); - }; - - std::pair<unsigned, unsigned> MaxWavesPerEURange{ - 1U, InfoCache.getMaxWavesPerEU(*F)}; - // If the attribute exists, we will honor it if it is not the default. if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) { + std::pair<unsigned, unsigned> MaxWavesPerEURange{ + 1U, InfoCache.getMaxWavesPerEU(*F)}; if (*Attr != MaxWavesPerEURange) { - TakeRange(*Attr); + auto [Min, Max] = *Attr; + ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState RangeState(Range); + clampStateAndIndicateChange(this->getState(), RangeState); + indicateOptimisticFixpoint(); return; } } - // Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the - // calculation of waves per EU involves flat work group size, we can't - // simply use an assumed flat work group size as a start point, because the - // update of flat work group size is in an inverse direction of waves per - // EU. However, we can still do something if it is an entry function. Since - // an entry function is a terminal node, and flat work group size either - // from attribute or default will be used anyway, we can take that value and - // calculate the waves per EU based on it. This result can't be updated by - // no means, but that could still allow us to propagate it. - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) { - std::pair<unsigned, unsigned> FlatWorkGroupSize; - if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) - FlatWorkGroupSize = *Attr; - else - FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F); - TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange, - FlatWorkGroupSize)); - } + if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) + indicatePessimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { - auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); ChangeStatus Change = ChangeStatus::UNCHANGED; auto CheckCallSite = [&](AbstractCallSite CS) { Function *Caller = CS.getInstruction()->getFunction(); - Function *Func = getAssociatedFunction(); - LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName() - << "->" << Func->getName() << '\n'); - const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>( *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); - const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>( - *this, IRPosition::function(*Func), DepClassTy::REQUIRED); - if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() || - !AssumedGroupSize->isValidState()) + if (!CallerInfo || !CallerInfo->isValidState()) return false; - - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU( - *Caller, - {CallerInfo->getAssumed().getLower().getZExtValue(), - CallerInfo->getAssumed().getUpper().getZExtValue() - 1}, - {AssumedGroupSize->getAssumed().getLower().getZExtValue(), - AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); - ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1)); + unsigned Min = CallerInfo->getAssumed().getLower().getZExtValue(); + unsigned Max = CallerInfo->getAssumed().getUpper().getZExtValue(); + ConstantRange CallerRange(APInt(32, Min), APInt(32, Max)); IntegerRangeState CallerRangeState(CallerRange); Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState); - return true; }; @@ -1329,6 +1293,59 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) { } } +static void checkWavesPerEU(Module &M, TargetMachine &TM) { + for (Function &F : M) { + const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); + + auto FlatWgrpSizeAttr = + AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size"); + auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute( + F, "amdgpu-waves-per-eu", /*OnlyFirstRequired=*/true); + + unsigned MinWavesPerEU = ST.getMinWavesPerEU(); + unsigned MaxWavesPerEU = ST.getMaxWavesPerEU(); + + unsigned MinFlatWgrpSize = 1U; + unsigned MaxFlatWgrpSize = 1024U; + if (FlatWgrpSizeAttr.has_value()) { + MinFlatWgrpSize = FlatWgrpSizeAttr->first; + MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second); + } + + // Start with the max range. + unsigned Min = MinWavesPerEU; + unsigned Max = MaxWavesPerEU; + + // If the attribute exists, set them to the value from the attribute. + if (WavesPerEUAttr.has_value()) { + Min = WavesPerEUAttr->first; + if (WavesPerEUAttr->second.has_value()) + Max = *(WavesPerEUAttr->second); + } + + // Compute the range from flat workgroup size. + auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] = + ST.getWavesPerEU(F, std::make_pair(MinFlatWgrpSize, MaxFlatWgrpSize)); + + // For the lower bound, we have to "tighten" it. + Min = std::max(Min, MinFromFlatWgrpSize); + // For the upper bound, we have to "extend" it. + Max = std::max(Max, MaxFromFlatWgrpSize); + + // Clamp the range to the max range. + Min = std::max(Min, MinWavesPerEU); + Max = std::min(Max, MaxWavesPerEU); + + // Update the attribute if it is not the max. + if (Min != MinWavesPerEU || Max != MaxWavesPerEU) { + SmallString<10> Buffer; + raw_svector_ostream OS(Buffer); + OS << Min << ',' << Max; + F.addFnAttr("amdgpu-waves-per-eu", OS.str()); + } + } +} + static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, AMDGPUAttributorOptions Options, ThinOrFullLTOPhase LTOPhase) { @@ -1421,8 +1438,14 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, } } - ChangeStatus Change = A.run(); - return Change == ChangeStatus::CHANGED; + bool Changed = A.run() == ChangeStatus::CHANGED; + + if (Changed && (LTOPhase == ThinOrFullLTOPhase::None || + LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || + LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink)) + checkWavesPerEU(M, TM); + + return Changed; } class AMDGPUAttributorLegacy : public ModulePass { diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index d316e10037757b..51f0348c050cd9 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -232,9 +232,9 @@ attributes #1 = { nounwind } ; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "amdgpu-waves-per-eu"="4,10" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index 33e7e7a7a019e3..e28caf8f455e21 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -254,11 +254,11 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { attributes #0 = { "amdgpu-no-agpr" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" } ; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-agpr" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll index 7e0208cd1f45aa..28722021e0448f 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll @@ -117,14 +117,14 @@ define void @call_no_dispatch_id() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index ea3f08ede2c5dc..da0f710abe08cf 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -688,7 +688,7 @@ define void @func_call_asm() #3 { ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm ; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { -; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR26:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR24:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void asm sideeffect "", ""() #3 @@ -717,7 +717,7 @@ define amdgpu_kernel void @func_kern_defined() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined -; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { ; ATTRIBUTOR_HSA-NEXT: call void @defined.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -845,7 +845,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -861,7 +861,7 @@ define void @func_sanitize_address() #4 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -877,7 +877,7 @@ define void @func_indirect_sanitize_address() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -893,7 +893,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -928,7 +928,7 @@ define internal void @enqueue_block_def() #6 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -941,7 +941,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl -; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -956,7 +956,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -969,7 +969,7 @@ define void @unused_enqueue_block() { ; AKF_HSA-NEXT: ret void ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/123995 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits