llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-analysis Author: Shilei Tian (shiltian) <details> <summary>Changes</summary> --- Patch is 58.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/200989.diff 25 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+3) - (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+5-5) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+35) - (modified) clang/test/CodeGenOpenCL/amdgpu-attrs.cl (+3-4) - (modified) clang/test/SemaOpenCL/amdgpu-attrs.cl (+3) - (modified) llvm/docs/AMDGPUUsage.rst (+3) - (modified) llvm/lib/IR/Verifier.cpp (+78) - (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll (+2-1) - (modified) llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll (+11-7) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/uniform-load-from-tid.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/zext-lid.ll (+4-2) - (modified) llvm/test/Transforms/InstCombine/AMDGPU/mbcnt-negative-cases.ll (+8-8) - (modified) llvm/test/Transforms/InstCombine/AMDGPU/mbcnt-wave32-optimizations.ll (+12-12) - (modified) llvm/test/Transforms/InstCombine/AMDGPU/mbcnt-wave64-optimizations.ll (+7-7) - (added) llvm/test/Verifier/AMDGPU/reqd-work-group-size.ll (+78) - (modified) mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp (+35) - (modified) mlir/test/Target/LLVMIR/rocdl.mlir (+11) ``````````diff diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 077aace321264..75d84b4129e8e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3697,6 +3697,9 @@ def err_swift_abi_parameter_wrong_type : Error< def err_attribute_argument_invalid : Error< "%0 attribute argument is invalid: %select{max must be 0 since min is 0|" "min must not be greater than max}1">; +def err_attribute_amdgpu_flat_work_group_size_mismatch : Error< + "'amdgpu_flat_work_group_size' attribute must match " + "'reqd_work_group_size' product">; def err_attribute_argument_is_zero : Error< "%0 attribute must be greater than 0">; def warn_attribute_argument_n_negative : Warning< diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index a3a596bb9d822..7b37f3f7f9b6e 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -731,15 +731,15 @@ void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr( auto Eval = [&](Expr *E) { return E->EvaluateKnownConstInt(getContext()).getExtValue(); }; - if (FlatWGS) { + if (ReqdWGS) { + Min = Max = Eval(ReqdWGS->getXDim()) * Eval(ReqdWGS->getYDim()) * + Eval(ReqdWGS->getZDim()); + } else if (FlatWGS) { Min = Eval(FlatWGS->getMin()); Max = Eval(FlatWGS->getMax()); } - if (ReqdWGS && Min == 0 && Max == 0) - Min = Max = Eval(ReqdWGS->getXDim()) * Eval(ReqdWGS->getYDim()) * - Eval(ReqdWGS->getZDim()); - if (Min != 0) { + if (Min != 0 || ReqdWGS) { assert(Min <= Max && "Min must be less than or equal Max"); if (MinThreadsVal) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index ae04d3855f01c..c1b95a9dac24d 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8439,6 +8439,40 @@ static bool isKernelDecl(Decl *D) { D->hasAttr<CUDAGlobalAttr>(); } +static void checkAMDGPUReqdWorkGroupSize(Sema &S, Decl *D) { + if (!S.Context.getTargetInfo().getTriple().isAMDGPU()) + return; + + const auto *Flat = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + const auto *Reqd = D->getAttr<ReqdWorkGroupSizeAttr>(); + if (!Flat || !Reqd) + return; + + auto Eval = [&](Expr *E) -> std::optional<uint64_t> { + if (E->isValueDependent()) + return std::nullopt; + std::optional<llvm::APSInt> V = E->getIntegerConstantExpr(S.Context); + if (!V) + return std::nullopt; + return V->getZExtValue(); + }; + + std::optional<uint64_t> X = Eval(Reqd->getXDim()); + std::optional<uint64_t> Y = Eval(Reqd->getYDim()); + std::optional<uint64_t> Z = Eval(Reqd->getZDim()); + std::optional<uint64_t> Min = Eval(Flat->getMin()); + std::optional<uint64_t> Max = Eval(Flat->getMax()); + if (!X || !Y || !Z || !Min || !Max) + return; + + uint64_t Product = *X * *Y * *Z; + if (*Min != Product || *Max != Product) { + S.Diag(Flat->getLocation(), + diag::err_attribute_amdgpu_flat_work_group_size_mismatch); + D->setInvalidDecl(); + } +} + void Sema::ProcessDeclAttributeList( Scope *S, Decl *D, const ParsedAttributesView &AttrList, const ProcessDeclAttributeOptions &Options) { @@ -8502,6 +8536,7 @@ void Sema::ProcessDeclAttributeList( D->setInvalidDecl(); } } + checkAMDGPUReqdWorkGroupSize(*this, D); // CUDA/HIP: restrict explicit CUDA target attributes on deduction guides. // diff --git a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl index 606566e070c41..8251658edb631 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -134,9 +134,9 @@ __attribute__((reqd_work_group_size(32, 2, 1))) // expected-no-diagnostics kernel void reqd_work_group_size_32_2_1() { // CHECK: define{{.*}} amdgpu_kernel void @reqd_work_group_size_32_2_1() [[FLAT_WORK_GROUP_SIZE_64_64:#[0-9]+]] } -__attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(16, 128))) // expected-no-diagnostics -kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() { -// CHECK: define{{.*}} amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]] +__attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(64, 64))) // expected-no-diagnostics +kernel void reqd_work_group_size_32_2_1_flat_work_group_size_64_64() { +// CHECK: define{{.*}} amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_64_64() [[FLAT_WORK_GROUP_SIZE_64_64]] } __attribute__((amdgpu_max_num_work_groups(1, 1, 1))) // expected-no-diagnostics @@ -203,7 +203,6 @@ kernel void default_kernel() { // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = {{.*}} "amdgpu-flat-work-group-size"="64,64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = {{.*}} "amdgpu-flat-work-group-size"="16,128" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" diff --git a/clang/test/SemaOpenCL/amdgpu-attrs.cl b/clang/test/SemaOpenCL/amdgpu-attrs.cl index a6177798f4819..9321c2f83e01c 100644 --- a/clang/test/SemaOpenCL/amdgpu-attrs.cl +++ b/clang/test/SemaOpenCL/amdgpu-attrs.cl @@ -51,6 +51,9 @@ __attribute__((amdgpu_waves_per_eu(0, 4))) kernel void kernel_waves_per_eu_0_4() __attribute__((amdgpu_flat_work_group_size(64, 32))) kernel void kernel_flat_work_group_size_64_32() {} // expected-error {{'amdgpu_flat_work_group_size' attribute argument is invalid: min must not be greater than max}} __attribute__((amdgpu_waves_per_eu(4, 2))) kernel void kernel_waves_per_eu_4_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute argument is invalid: min must not be greater than max}} +__attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(16, 128))) kernel void kernel_reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {} // expected-error {{'amdgpu_flat_work_group_size' attribute must match 'reqd_work_group_size' product}} +__attribute__((amdgpu_flat_work_group_size(16, 128), reqd_work_group_size(32, 2, 1))) kernel void kernel_flat_work_group_size_16_128_reqd_work_group_size_32_2_1() {} // expected-error {{'amdgpu_flat_work_group_size' attribute must match 'reqd_work_group_size' product}} + __attribute__((amdgpu_waves_per_eu(2, 4, 8))) kernel void kernel_waves_per_eu_2_4_8() {} // expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}} __attribute__((amdgpu_flat_work_group_size(0, 0))) kernel void kernel_flat_work_group_size_0_0() {} diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 61841716792ef..03ee0a0709443 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -2249,6 +2249,9 @@ The AMDGPU backend supports the following LLVM IR attributes. "amdgpu-flat-work-group-size"="min,max" Specify the minimum and maximum flat work group sizes that will be specified when the kernel is dispatched. Generated by the ``amdgpu_flat_work_group_size`` CLANG attribute [CLANG-ATTR]_. + If the ``reqd_work_group_size`` metadata is present, the product + of its three workgroup size dimensions must match both ``min`` + and ``max``. The IR implied default value is 1,1024. Clang may emit this attribute with more restrictive bounds depending on language defaults. If the actual block or workgroup size exceeds the limit at any point during diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 96f228f88a46c..072008a01c955 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -123,10 +123,12 @@ #include "llvm/Support/ModRef.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Coroutines/CoroInstr.h" #include <algorithm> #include <cassert> #include <cstdint> +#include <limits> #include <memory> #include <optional> #include <queue> @@ -645,6 +647,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport { void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, const Value *V, bool IsIntrinsic, bool IsInlineAsm); void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs); + void verifyAMDGPUReqdWorkGroupSize(const Function &F); void verifyUnknownProfileMetadata(MDNode *MD); void visitConstantExprsRecursively(const Constant *EntryC); void visitConstantExpr(const ConstantExpr *CE); @@ -2814,6 +2817,80 @@ void Verifier::verifyFunctionMetadata( } } +void Verifier::verifyAMDGPUReqdWorkGroupSize(const Function &F) { + if (!TT.isAMDGPU()) + return; + + MDNode *ReqdWorkGroupSize = F.getMetadata("reqd_work_group_size"); + if (!ReqdWorkGroupSize) + return; + + Check(ReqdWorkGroupSize->getNumOperands() == 3, + "reqd_work_group_size must have exactly three operands", &F, + ReqdWorkGroupSize); + if (ReqdWorkGroupSize->getNumOperands() != 3) + return; + + uint64_t Product = 1; + for (unsigned I = 0; I != 3; ++I) { + ConstantInt *C = + mdconst::dyn_extract<ConstantInt>(ReqdWorkGroupSize->getOperand(I)); + Check(C, "reqd_work_group_size operands must be integer constants", &F, + ReqdWorkGroupSize); + if (!C) + return; + + const APInt &Value = C->getValue(); + Check(Value.getActiveBits() <= 64, + "reqd_work_group_size operands must fit in 64 bits", &F, + ReqdWorkGroupSize); + if (Value.getActiveBits() > 64) + return; + + uint64_t Dim = Value.getZExtValue(); + Check(Dim == 0 || Product <= std::numeric_limits<uint64_t>::max() / Dim, + "reqd_work_group_size product must fit in 64 bits", &F, + ReqdWorkGroupSize); + if (Dim != 0 && Product > std::numeric_limits<uint64_t>::max() / Dim) + return; + Product *= Dim; + } + + Attribute FlatWorkGroupSize = F.getFnAttribute("amdgpu-flat-work-group-size"); + Check(FlatWorkGroupSize.isValid(), + "reqd_work_group_size requires amdgpu-flat-work-group-size", &F, + ReqdWorkGroupSize); + if (!FlatWorkGroupSize.isValid()) + return; + + Check(FlatWorkGroupSize.isStringAttribute(), + "amdgpu-flat-work-group-size must be a string attribute", &F); + if (!FlatWorkGroupSize.isStringAttribute()) + return; + + auto ParseUnsigned = [](StringRef S, uint64_t &Value) { + S = S.trim(); + return !S.empty() && !S.starts_with("-") && !S.getAsInteger(0, Value); + }; + + StringRef AttrValue = FlatWorkGroupSize.getValueAsString(); + std::pair<StringRef, StringRef> Values = AttrValue.split(','); + uint64_t Min = 0; + uint64_t Max = 0; + bool Parsed = !Values.second.contains(',') && + ParseUnsigned(Values.first, Min) && + ParseUnsigned(Values.second, Max); + Check(Parsed, + "amdgpu-flat-work-group-size must be a pair of unsigned integers", &F); + if (!Parsed) + return; + + Check(Min == Product && Max == Product, + "amdgpu-flat-work-group-size must equal the product of " + "reqd_work_group_size operands", + &F, ReqdWorkGroupSize); +} + void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { if (EntryC->getNumOperands() == 0) return; @@ -3284,6 +3361,7 @@ void Verifier::visitFunction(const Function &F) { F.getAllMetadata(MDs); assert(F.hasMetadata() != MDs.empty() && "Bit out-of-sync"); verifyFunctionMetadata(MDs); + verifyAMDGPUReqdWorkGroupSize(F); // Check validity of the personality function if (F.hasPersonalityFn()) { diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll index fbd855b0d691b..96d14dcde2ebc 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll @@ -67,7 +67,7 @@ define amdgpu_kernel void @workitem_id_z_singlethreaded() #2 { ; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded_md': ; CHECK-NOT: DIVERGENT -define amdgpu_kernel void @workitem_id_x_singlethreaded_md() "amdgpu-no-wwm" !reqd_work_group_size !0 { +define amdgpu_kernel void @workitem_id_x_singlethreaded_md() "amdgpu-flat-work-group-size"="1,1" "amdgpu-no-wwm" !reqd_work_group_size !0 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() store volatile i32 %id.x, ptr addrspace(1) undef ret void @@ -75,7 +75,7 @@ define amdgpu_kernel void @workitem_id_x_singlethreaded_md() "amdgpu-no-wwm" !re ; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_singlethreaded_md': ; CHECK-NOT: DIVERGENT -define amdgpu_kernel void @workitem_id_y_singlethreaded_md() !reqd_work_group_size !0 { +define amdgpu_kernel void @workitem_id_y_singlethreaded_md() "amdgpu-flat-work-group-size"="1,1" !reqd_work_group_size !0 { %id.x = call i32 @llvm.amdgcn.workitem.id.y() store volatile i32 %id.x, ptr addrspace(1) undef ret void @@ -83,7 +83,7 @@ define amdgpu_kernel void @workitem_id_y_singlethreaded_md() !reqd_work_group_si ; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_singlethreaded_md': ; CHECK-NOT: DIVERGENT -define amdgpu_kernel void @workitem_id_z_singlethreaded_md() !reqd_work_group_size !0 { +define amdgpu_kernel void @workitem_id_z_singlethreaded_md() "amdgpu-flat-work-group-size"="1,1" !reqd_work_group_size !0 { %id.x = call i32 @llvm.amdgcn.workitem.id.y() store volatile i32 %id.x, ptr addrspace(1) undef ret void @@ -91,7 +91,7 @@ define amdgpu_kernel void @workitem_id_z_singlethreaded_md() !reqd_work_group_si ; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimx': ; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() -define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimx() !reqd_work_group_size !1 { +define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimx() "amdgpu-flat-work-group-size"="2,2" !reqd_work_group_size !1 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() store volatile i32 %id.x, ptr addrspace(1) undef ret void @@ -99,7 +99,7 @@ define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimx() !reqd_work_gr ; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimy': ; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() -define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimy() !reqd_work_group_size !2 { +define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimy() "amdgpu-flat-work-group-size"="2,2" !reqd_work_group_size !2 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() store volatile i32 %id.x, ptr addrspace(1) undef ret void @@ -107,7 +107,7 @@ define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimy() !reqd_work_gr ; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimz': ; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() -define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimz() !reqd_work_group_size !3 { +define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimz() "amdgpu-flat-work-group-size"="2,2" !reqd_work_group_size !3 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() store volatile i32 %id.x, ptr addrspace(1) undef ret void @@ -115,7 +115,7 @@ define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimz() !reqd_work_gr ; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_uniform_len_1' ; CHECK-NOT: DIVERGENT -define amdgpu_kernel void @workitem_id_z_uniform_len_1(ptr %o) !reqd_work_group_size !4 { +define amdgpu_kernel void @workitem_id_z_uniform_len_1(ptr %o) "amdgpu-flat-work-group-size"="64,64" !reqd_work_group_size !4 { %id.z = call i32 @llvm.amdgcn.workitem.id.z() store i32 %id.z, ptr %o ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index d69515591ecee..c66ef8f64df25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -1234,7 +1234,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 declare i32 @llvm.amdgcn.workitem.id.y() #1 declare i32 @llvm.amdgcn.workitem.id.z() #1 -attributes #0 = { nounwind } +attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" } attributes #1 = { nounwind readnone speculatable willreturn } !llvm.module.flags = !{!6} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir index 915139b590fd4..8fcb3b6892a0e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir @@ -2,27 +2,27 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -run-pass=legalizer -o - %s | FileCheck -check-prefix=GCN %s --- | - define amdgpu_kernel void @test_workitem_id_x_unpacked() !reqd_work_group_size !0 { + define amdgpu_kernel void @test_workitem_id_x_unpacked() "amdgpu-flat-work-group-size"="8192,8192" !reqd_work_group_size !0 { ret void } - define amdgpu_kernel void @test_workitem_id_y_unpacked() !reqd_work_group_size !0 { + define amdgpu_kernel void @test_workitem_id_y_unpacked() "amdgpu-flat-work-group-size"="8192,8192" !reqd_work_group_size !0 { ret void } - define amdgpu_kernel void @test_workitem_id_z_unpacked() !reqd_work_group_size !0 { + define amdgpu_kernel void @test_workitem_id_z_unpacked() "amdgpu-flat-work-group-size"="8192,8192" !reqd_work_group_size !0 { ret void } - define amdgpu_kernel void @test_workitem_id_x_packed() !reqd_work_group_size !0 { + define amdgpu_kernel void @test_workitem_id_x_packed() "amdgpu-flat-work-group-size"="8192,8192" !reqd_work_group_size !0 { ret void } - define amdgpu_kernel void @test_workitem_id_y_packed() !reqd_work_group_size !0 { + define amdgpu_kernel void @test_workitem_id_y_packed() "amdgpu-flat-work-group-size"="8192,8192" !reqd_work_group_size !0 { ret void } - define amdgpu_kernel void @test_workitem_id_z_packed() !reqd_work_group_size !0 { + define amdgpu_kernel void @test_workitem_id_z_packed() "amdgpu-flat-work-group-size"="8192,8192" !reqd_work_group_size !0 { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index ef933fc7fdd4a..f84cdc32f1e86 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -141,7 +141,7 @@ define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 { ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] -define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_group_size !0 { +define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) "amdgpu-flat-work-group-size"="64,64" !reqd_work_group_size !0 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() %id.y = call i32 @llvm.amdgcn.workitem.id.y() %id.z = call i32 @llvm.amdgcn.workitem.id.z() @@ -163,7 +163,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_ ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] -define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_group_size !1 { +define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) "amdgpu-flat-work-group-size"="64,64" !reqd_work_group_size !1 { %id.x = call i32 @llvm.amdgcn... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/200989 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
