================ @@ -1333,6 +1308,56 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) { } } +/// The final check and update of the attribute 'amdgpu-waves-per-eu' based on +/// the determined 'amdgpu-flat-work-group-size' attribute. We can't do this +/// during attributor run because the two attributes grow in opposite direction, +/// we should not use any intermediate value to calculate waves per eu until we +/// have a determined flat workgroup size. +static void updateWavesPerEU(Module &M, TargetMachine &TM) { + for (Function &F : M) { + const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); + + auto FlatWgrpSizeAttr = + AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size"); + + unsigned MinWavesPerEU = ST.getMinWavesPerEU(); + unsigned MaxWavesPerEU = ST.getMaxWavesPerEU(); + + unsigned MinFlatWgrpSize = ST.getMinFlatWorkGroupSize(); + unsigned MaxFlatWgrpSize = ST.getMaxFlatWorkGroupSize(); + if (FlatWgrpSizeAttr.has_value()) { + MinFlatWgrpSize = FlatWgrpSizeAttr->first; + MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second); + } + + // Start with the max range. + unsigned Min = MinWavesPerEU; + unsigned Max = MinWavesPerEU; + + // Compute the range from flat workgroup size. `getWavesPerEU` will also + // account for the 'amdgpu-waves-er-eu' attribute. + auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] = + ST.getWavesPerEU(F, std::make_pair(MinFlatWgrpSize, MaxFlatWgrpSize)); ---------------- arsenm wrote:
```suggestion ST.getWavesPerEU(F, {MinFlatWgrpSize, MaxFlatWgrpSize}); ``` https://github.com/llvm/llvm-project/pull/123995 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits