llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Jun Wang (jwanggit86) <details> <summary>Changes</summary> A new function attribute named amdgpu-num-work-groups is added. This attribute allows programmers to let the compiler know the number of workgroups to be launched and do optimizations based on that information. --- Full diff: https://github.com/llvm/llvm-project/pull/75647.diff 13 Files Affected: - (modified) clang/include/clang/Basic/Attr.td (+7) - (modified) clang/include/clang/Basic/AttrDocs.td (+23) - (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+7) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+13) - (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (+4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+6) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h (+3) - (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+1) - (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+9) - (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+15) - (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+8) - (added) llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll (+82) ``````````diff diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 5943583d92773a..605fcbbff027b9 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2011,6 +2011,13 @@ def AMDGPUNumVGPR : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } +def AMDGPUNumWorkGroups : InheritableAttr { + let Spellings = [Clang<"amdgpu_num_work_groups", 0>]; + let Args = [UnsignedArgument<"NumWorkGroups">]; + let Documentation = [AMDGPUNumWorkGroupsDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; +} + def AMDGPUKernelCall : DeclOrTypeAttr { let Spellings = [Clang<"amdgpu_kernel">]; let Documentation = [Undocumented]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 77950ab6d877ea..0bf3ccf367284c 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2693,6 +2693,29 @@ An error will be given if: }]; } +def AMDGPUNumWorkGroupsDocs : Documentation { + let Category = DocCatAMDGPUAttributes; + let Content = [{ +The number of work groups specifies the number of work groups when the kernel +is dispatched. + +Clang supports the +``__attribute__((amdgpu_num_work_groups(<num>)))`` attribute for the +AMDGPU target. This attribute may be attached to a kernel function definition +and is an optimization hint. + +``<num>`` parameter specifies the number of work groups. + +If specified, the AMDGPU target backend might be able to produce better machine +code. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes. + }]; +} + def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> { let Content = [{ Clang supports several different calling conventions, depending on the target diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 03ac6b78598fc8..11a0835f37f4a9 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -356,6 +356,13 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (const auto *Attr = FD->getAttr<AMDGPUNumWorkGroupsAttr>()) { + uint32_t NumWG = Attr->getNumWorkGroups(); + + if (NumWG != 0) + F->addFnAttr("amdgpu-num-work-groups", llvm::utostr(NumWG)); + } } /// Emits control constants used to change per-architecture behaviour in the diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 5b29b05dee54b3..3737dd256aff02 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8051,6 +8051,16 @@ static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(::new (S.Context) AMDGPUNumVGPRAttr(S.Context, AL, NumVGPR)); } +static void handleAMDGPUNumWorkGroupsAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + uint32_t NumWG = 0; + Expr *NumWGExpr = AL.getArgAsExpr(0); + if (!checkUInt32Argument(S, AL, NumWGExpr, NumWG)) + return; + + D->addAttr(::new (S.Context) AMDGPUNumWorkGroupsAttr(S.Context, AL, NumWG)); +} + static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // If we try to apply it to a function pointer, don't warn, but don't @@ -9058,6 +9068,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AMDGPUNumVGPR: handleAMDGPUNumVGPRAttr(S, D, AL); break; + case ParsedAttr::AT_AMDGPUNumWorkGroups: + handleAMDGPUNumWorkGroupsAttr(S, D, AL); + break; case ParsedAttr::AT_AVRSignal: handleAVRSignalAttr(S, D, AL); break; diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index bdfda430eea86c..d42bb52cc8bcfa 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -6,6 +6,7 @@ // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function) +// CHECK-NEXT: AMDGPUNumWorkGroups (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUWavesPerEU (SubjectMatchRule_function) // CHECK-NEXT: AVRSignal (SubjectMatchRule_function) // CHECK-NEXT: AbiTag (SubjectMatchRule_record_not_is_union, SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_namespace) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index b51a876750b58b..b9ede45e174a7d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -494,6 +494,10 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, Kern[".max_flat_workgroup_size"] = Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); + unsigned NumWG = MFI.getNumWorkGroups(); + if (NumWG != 0) { + Kern[".num_work_groups"] = Kern.getDocument()->getNode(NumWG); + } Kern[".sgpr_spill_count"] = Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); Kern[".vgpr_spill_count"] = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index f19c5766856408..d7f5c456706ecd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -1108,3 +1108,9 @@ void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) { unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() { return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs; } + +unsigned AMDGPUSubtarget::getNumWorkGroups(const Function &F) const { + const unsigned Default = 0; + return AMDGPU::getUnsignedIntegerAttribute(F, "amdgpu-num-work-groups", Default); +} + diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index b72697973be7a1..b791399c38dff8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -288,6 +288,9 @@ class AMDGPUSubtarget { /// 2) dimension. unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; + /// Return the number of work groups for the function. + unsigned getNumWorkGroups(const Function &F) const; + /// Return true if only a single workitem can be active in a wave. bool isSingleLaneExecution(const Function &Kernel) const; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 48c341917ddec7..2f483e18544a78 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -46,6 +46,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); + NumWorkGroups = ST.getNumWorkGroups(F); Occupancy = ST.computeOccupancy(F, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 7ff50c80081d30..fc244552f40da8 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -426,6 +426,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; + // Default/requested number of work groups for the function. + unsigned NumWorkGroups = 0; + private: unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; @@ -1094,6 +1097,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // \returns true if a function needs or may need AGPRs. bool usesAGPRs(const MachineFunction &MF) const; + + /// \returns Default/requested number of work groups for this function. + unsigned getNumWorkGroups() const { + return NumWorkGroups; + } + }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 4edd7960bd8c40..82e3bca7ab73b1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1221,6 +1221,21 @@ getIntegerPairAttribute(const Function &F, StringRef Name, return Ints; } +unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default) { + Attribute A = F.getFnAttribute(Name); + if (!A.isStringAttribute()) + return Default; + + LLVMContext &Ctx = F.getContext(); + unsigned IntVal = Default; + StringRef Str = A.getValueAsString(); + if (Str.trim().getAsInteger(0, IntVal)) { + Ctx.emitError("can't parse integer attribute " + Name); + return Default; + } + return IntVal; +} + unsigned getVmcntBitMask(const IsaVersion &Version) { return (1 << (getVmcntBitWidthLo(Version.Major) + getVmcntBitWidthHi(Version.Major))) - diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 3c9f330cbcded9..c54c1638fa97a1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -818,6 +818,14 @@ bool shouldEmitConstantsToTextSection(const Triple &TT); /// to integer. int getIntegerAttribute(const Function &F, StringRef Name, int Default); +/// \returns Unsigned Integer value requested using \p F's \p Name attribute. +/// +/// \returns \p Default if attribute is not present. +/// +/// \returns \p Default and emits error if requested value cannot be converted +/// to integer. +unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default); + /// \returns A pair of integer values requested using \p F's \p Name attribute /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired /// is false). diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll new file mode 100644 index 00000000000000..315cd7dc0c0d91 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll @@ -0,0 +1,82 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; Attribute not specified. +; CHECK-LABEL: {{^}}empty_no_attribute: +define amdgpu_kernel void @empty_no_attribute() { +entry: + ret void +} + +; Ignore if number of work groups is 0. +; CHECK-LABEL: {{^}}empty_num_work_groups_0: +define amdgpu_kernel void @empty_num_work_groups_0() #0 { +entry: + ret void +} +attributes #0 = {"amdgpu-num-work-groups"="0"} + +; Exactly 1 work group. +; CHECK-LABEL: {{^}}empty_num_work_groups_1: +define amdgpu_kernel void @empty_num_work_groups_1() #1 { +entry: + ret void +} +attributes #1 = {"amdgpu-num-work-groups"="1"} + +; Exactly 5 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_5: +define amdgpu_kernel void @empty_num_work_groups_5() #2 { +entry: + ret void +} +attributes #2 = {"amdgpu-num-work-groups"="5"} + +; Exactly 32 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_32: +define amdgpu_kernel void @empty_num_work_groups_32() #3 { +entry: + ret void +} +attributes #3 = {"amdgpu-num-work-groups"="32"} + +; Exactly 50 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_50: +define amdgpu_kernel void @empty_num_work_groups_50() #4 { +entry: + ret void +} +attributes #4 = {"amdgpu-num-work-groups"="50"} + +; Exactly 256 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_256: +define amdgpu_kernel void @empty_num_work_groups_256() #5 { +entry: + ret void +} +attributes #5 = {"amdgpu-num-work-groups"="256"} + +; Exactly 1024 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_1024: +define amdgpu_kernel void @empty_num_work_groups_1024() #6 { +entry: + ret void +} +attributes #6 = {"amdgpu-num-work-groups"="1024"} + +; CHECK: .amdgpu_metadata +; CHECK: .name: empty_no_attribute +; CHECK-NEXT: .private_segment_fixed_size: 0 +; CHECK: .name: empty_num_work_groups_0 +; CHECK-NEXT: .private_segment_fixed_size: 0 +; CHECK: .name: empty_num_work_groups_1 +; CHECK-NEXT: .num_work_groups: 1 +; CHECK: .name: empty_num_work_groups_5 +; CHECK-NEXT: .num_work_groups: 5 +; CHECK: .name: empty_num_work_groups_32 +; CHECK-NEXT: .num_work_groups: 32 +; CHECK: .name: empty_num_work_groups_50 +; CHECK-NEXT: .num_work_groups: 50 +; CHECK: .name: empty_num_work_groups_256 +; CHECK-NEXT: .num_work_groups: 256 +; CHECK: .name: empty_num_work_groups_1024 +; CHECK-NEXT: .num_work_groups: 1024 `````````` </details> https://github.com/llvm/llvm-project/pull/75647 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits