https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/150267
>From 0575957f6c7f9524778f85220c426be6e56f9714 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Thu, 26 Jun 2025 15:52:11 +0100 Subject: [PATCH 1/4] [FMV][AArch64] Allow user to override version priority. Implements https://github.com/ARM-software/acle/pull/404 This allows the user to specify "priority=[1-255];featA+featB" where priority=255 means highest priority. If the explicit priority string is omitted then the priority of "featA+featB" is implied, which is lower than priority=1. Internally this gets expanded using special FMV features P0 ... P7 which can encode up to 256-1 priority levels (excluding all zeros). Those do not have corresponding detection bit at pos FEAT_#enum so I made this field optional in FMVInfo. Also they don't affect the codegen or name mangling of versioned functions. --- .../clang/Basic/DiagnosticSemaKinds.td | 6 + clang/include/clang/Sema/SemaARM.h | 3 +- clang/include/clang/Sema/SemaRISCV.h | 3 +- clang/lib/CodeGen/Targets/AArch64.cpp | 7 +- clang/lib/Sema/SemaARM.cpp | 63 +++++- clang/lib/Sema/SemaDeclAttr.cpp | 7 +- clang/lib/Sema/SemaRISCV.cpp | 4 +- clang/test/AST/attr-target-version.c | 30 ++- .../AArch64/fmv-duplicate-mangled-name.c | 16 ++ .../CodeGen/AArch64/fmv-explicit-priority.c | 193 ++++++++++++++++++ clang/test/Sema/attr-target-clones-aarch64.c | 10 + clang/test/Sema/attr-target-version.c | 9 + .../llvm/Analysis/TargetTransformInfo.h | 6 +- .../llvm/Analysis/TargetTransformInfoImpl.h | 4 + .../TargetParser/AArch64FeatPriorities.inc | 11 +- .../llvm/TargetParser/AArch64TargetParser.h | 11 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 4 + llvm/lib/Target/AArch64/AArch64FMV.td | 11 + .../AArch64/AArch64TargetTransformInfo.cpp | 17 +- .../AArch64/AArch64TargetTransformInfo.h | 1 + llvm/lib/TargetParser/AArch64TargetParser.cpp | 31 ++- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 22 +- .../TableGen/Basic/ARMTargetDefEmitter.cpp | 9 +- 23 files changed, 427 insertions(+), 51 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fmv-explicit-priority.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 116341f4b66d5..c002608cb6ecd 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12781,6 +12781,12 @@ def warn_target_clone_duplicate_options def warn_target_clone_no_impact_options : Warning<"version list contains entries that don't impact code generation">, InGroup<FunctionMultiVersioning>; +def warn_version_priority_out_of_range + : Warning<"version priority '%0' is outside the allowed range [1-255]; ignoring priority">, + InGroup<FunctionMultiVersioning>; +def warn_invalid_default_version_priority + : Warning<"priority of default version cannot be overridden; ignoring priority">, + InGroup<FunctionMultiVersioning>; // three-way comparison operator diagnostics def err_implied_comparison_category_type_not_found : Error< diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h index 104992e8826c3..66eb87c568c8f 100644 --- a/clang/include/clang/Sema/SemaARM.h +++ b/clang/include/clang/Sema/SemaARM.h @@ -92,7 +92,8 @@ class SemaARM : public SemaBase { /// false otherwise. bool areLaxCompatibleSveTypes(QualType FirstType, QualType SecondType); - bool checkTargetVersionAttr(const StringRef Str, const SourceLocation Loc); + bool checkTargetVersionAttr(const StringRef Param, const SourceLocation Loc, + SmallString<64> &NewParam); bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs, SmallVectorImpl<SmallString<64>> &NewParams); diff --git a/clang/include/clang/Sema/SemaRISCV.h b/clang/include/clang/Sema/SemaRISCV.h index 844cc3ce4a440..863b8a143f48a 100644 --- a/clang/include/clang/Sema/SemaRISCV.h +++ b/clang/include/clang/Sema/SemaRISCV.h @@ -56,7 +56,8 @@ class SemaRISCV : public SemaBase { std::unique_ptr<sema::RISCVIntrinsicManager> IntrinsicManager; - bool checkTargetVersionAttr(const StringRef Param, const SourceLocation Loc); + bool checkTargetVersionAttr(const StringRef Param, const SourceLocation Loc, + SmallString<64> &NewParam); bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs, SmallVectorImpl<SmallString<64>> &NewParams); diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 289f8a9dcf211..89d4c83d727ed 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -1338,9 +1338,10 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr, llvm::SmallDenseSet<StringRef, 8> UniqueFeats; for (auto &Feat : Features) - if (auto Ext = llvm::AArch64::parseFMVExtension(Feat)) - if (UniqueFeats.insert(Ext->Name).second) - Out << 'M' << Ext->Name; + if (getTarget().doesFeatureAffectCodeGen(Feat)) + if (auto Ext = llvm::AArch64::parseFMVExtension(Feat)) + if (UniqueFeats.insert(Ext->Name).second) + Out << 'M' << Ext->Name; } std::unique_ptr<TargetCodeGenInfo> diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index e09c35296ef3b..167790b77d791 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1579,19 +1579,53 @@ bool SemaARM::areLaxCompatibleSveTypes(QualType FirstType, IsLaxCompatible(SecondType, FirstType); } +static void appendFeature(StringRef Feat, SmallString<64> &Buffer) { + if (!Buffer.empty()) + Buffer.append("+"); + Buffer.append(Feat); +} + +static void convertPriorityString(unsigned Priority, + SmallString<64> &NewParam) { + StringRef PriorityString[8] = {"P0", "P1", "P2", "P3", + "P4", "P5", "P6", "P7"}; + + assert(Priority > 0 && Priority < 256 && "priority out of range"); + // Convert priority=[1-31] -> P0 + ... + P4 + for (unsigned BitPos = 0; BitPos < 8; ++BitPos) + if (Priority & (1U << BitPos)) + appendFeature(PriorityString[BitPos], NewParam); +} + bool SemaARM::checkTargetVersionAttr(const StringRef Param, - const SourceLocation Loc) { + const SourceLocation Loc, + SmallString<64> &NewParam) { using namespace DiagAttrParams; + auto [LHS, RHS] = Param.split(';'); + bool IsDefault = false; llvm::SmallVector<StringRef, 8> Features; - Param.split(Features, '+'); + LHS.split(Features, '+'); for (StringRef Feat : Features) { Feat = Feat.trim(); if (Feat == "default") - continue; - if (!getASTContext().getTargetInfo().validateCpuSupports(Feat)) + IsDefault = true; + else if (!getASTContext().getTargetInfo().validateCpuSupports(Feat)) return Diag(Loc, diag::warn_unsupported_target_attribute) << Unsupported << None << Feat << TargetVersion; + appendFeature(Feat, NewParam); + } + + if (!RHS.empty() && RHS.consume_front("priority=")) { + if (IsDefault) + Diag(Loc, diag::warn_invalid_default_version_priority); + else { + unsigned Digit; + if (RHS.getAsInteger(0, Digit) || Digit < 1 || Digit > 255) + Diag(Loc, diag::warn_version_priority_out_of_range) << RHS; + else + convertPriorityString(Digit, NewParam); + } } return false; } @@ -1613,15 +1647,20 @@ bool SemaARM::checkTargetClonesAttr( const StringRef Param = Params[I].trim(); const SourceLocation &Loc = Locs[I]; - if (Param.empty()) + auto [LHS, RHS] = Param.split(';'); + bool HasPriority = !RHS.empty() && RHS.consume_front("priority="); + + if (LHS.empty()) return Diag(Loc, diag::warn_unsupported_target_attribute) << Unsupported << None << "" << TargetClones; - if (Param == "default") { + if (LHS == "default") { if (HasDefault) Diag(Loc, diag::warn_target_clone_duplicate_options); else { - NewParams.push_back(Param); + if (HasPriority) + Diag(Loc, diag::warn_invalid_default_version_priority); + NewParams.push_back(LHS); HasDefault = true; } continue; @@ -1630,7 +1669,7 @@ bool SemaARM::checkTargetClonesAttr( bool HasCodeGenImpact = false; llvm::SmallVector<StringRef, 8> Features; llvm::SmallVector<StringRef, 8> ValidFeatures; - Param.split(Features, '+'); + LHS.split(Features, '+'); for (StringRef Feat : Features) { Feat = Feat.trim(); if (!getASTContext().getTargetInfo().validateCpuSupports(Feat)) { @@ -1660,6 +1699,14 @@ bool SemaARM::checkTargetClonesAttr( continue; } + if (HasPriority) { + unsigned Digit; + if (RHS.getAsInteger(0, Digit) || Digit < 1 || Digit > 255) + Diag(Loc, diag::warn_version_priority_out_of_range) << RHS; + else + convertPriorityString(Digit, NewParam); + } + // Valid non-default argument. NewParams.push_back(NewParam); HasNonDefault = true; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 7a185106e4c6e..7726639ed987c 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3343,19 +3343,20 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { static void handleTargetVersionAttr(Sema &S, Decl *D, const ParsedAttr &AL) { StringRef Param; SourceLocation Loc; + SmallString<64> NewParam; if (!S.checkStringLiteralArgumentAttr(AL, 0, Param, &Loc)) return; if (S.Context.getTargetInfo().getTriple().isAArch64()) { - if (S.ARM().checkTargetVersionAttr(Param, Loc)) + if (S.ARM().checkTargetVersionAttr(Param, Loc, NewParam)) return; } else if (S.Context.getTargetInfo().getTriple().isRISCV()) { - if (S.RISCV().checkTargetVersionAttr(Param, Loc)) + if (S.RISCV().checkTargetVersionAttr(Param, Loc, NewParam)) return; } TargetVersionAttr *NewAttr = - ::new (S.Context) TargetVersionAttr(S.Context, AL, Param); + ::new (S.Context) TargetVersionAttr(S.Context, AL, NewParam); D->addAttr(NewAttr); } diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp index 7b16d080603bf..9bbcd4680f895 100644 --- a/clang/lib/Sema/SemaRISCV.cpp +++ b/clang/lib/Sema/SemaRISCV.cpp @@ -1646,7 +1646,8 @@ bool SemaRISCV::isValidFMVExtension(StringRef Ext) { } bool SemaRISCV::checkTargetVersionAttr(const StringRef Param, - const SourceLocation Loc) { + const SourceLocation Loc, + SmallString<64> &NewParam) { using namespace DiagAttrParams; llvm::SmallVector<StringRef, 8> AttrStrs; @@ -1692,6 +1693,7 @@ bool SemaRISCV::checkTargetVersionAttr(const StringRef Param, return Diag(Loc, diag::warn_unsupported_target_attribute) << Unsupported << None << Param << TargetVersion; + NewParam = Param; return false; } diff --git a/clang/test/AST/attr-target-version.c b/clang/test/AST/attr-target-version.c index b537f5e685a31..c216cd6d1a28a 100644 --- a/clang/test/AST/attr-target-version.c +++ b/clang/test/AST/attr-target-version.c @@ -2,7 +2,29 @@ int __attribute__((target_version("sve2-bitperm + sha2"))) foov(void) { return 1; } int __attribute__((target_clones(" lse + fp + sha3 ", "default"))) fooc(void) { return 2; } -// CHECK: TargetVersionAttr -// CHECK: sve2-bitperm + sha2 -// CHECK: TargetClonesAttr -// CHECK: fp+lse+sha3 default + +int __attribute__((target_version("aes;priority=1"))) explicit_priority(void) { return 1; } +int __attribute__((target_version("bf16;priority=2"))) explicit_priority(void) { return 2; } +int __attribute__((target_version("crc;priority=4"))) explicit_priority(void) { return 4; } +int __attribute__((target_version("dpb2;priority=8"))) explicit_priority(void) { return 8; } +int __attribute__((target_version("fp16fml;priority=16"))) explicit_priority(void) { return 16; } +int __attribute__((target_version("dotprod;priority=32"))) explicit_priority(void) { return 32; } +int __attribute__((target_version("sve;priority=64"))) explicit_priority(void) { return 64; } +int __attribute__((target_version("mops;priority=128"))) explicit_priority(void) { return 128; } + +int __attribute__((target_clones("simd;priority=255", "default"))) explicit_priority(void) { + return 0; +} + +// CHECK: TargetVersionAttr {{.*}} "sve2-bitperm+sha2" +// CHECK: TargetClonesAttr {{.*}} fp+lse+sha3 default + +// CHECK: TargetVersionAttr {{.*}} "aes+P0" +// CHECK: TargetVersionAttr {{.*}} "bf16+P1" +// CHECK: TargetVersionAttr {{.*}} "crc+P2" +// CHECK: TargetVersionAttr {{.*}} "dpb2+P3" +// CHECK: TargetVersionAttr {{.*}} "fp16fml+P4" +// CHECK: TargetVersionAttr {{.*}} "dotprod+P5" +// CHECK: TargetVersionAttr {{.*}} "sve+P6" +// CHECK: TargetVersionAttr {{.*}} "mops+P7" +// CHECK: TargetClonesAttr {{.*}} simd+P0+P1+P2+P3+P4+P5+P6+P7 default diff --git a/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c b/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c index e7e611e09542e..ebe5b75cf7946 100644 --- a/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c +++ b/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_IMPLICIT_DEFAULT // RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_DEFAULT +// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_VERSION_PRIORITY +// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_CLONES_PRIORITY #if defined(CHECK_IMPLICIT_DEFAULT) @@ -21,4 +23,18 @@ __attribute__((target_version("default"))) int explicit_default_bad(void) { retu // expected-note@-2 {{previous definition is here}} __attribute__((target_clones("aes", "lse", "default"))) int explicit_default_bad(void) { return 1; } +#elif defined(CHECK_EXPLICIT_VERSION_PRIORITY) + +__attribute__((target_version("aes"))) int explicit_version_priority(void) { return 0; } +// expected-error@+2 {{definition with same mangled name 'explicit_version_priority._Maes' as another definition}} +// expected-note@-2 {{previous definition is here}} +__attribute__((target_version("aes;priority=10"))) int explicit_version_priority(void) { return 1; } + +#elif defined(CHECK_EXPLICIT_CLONES_PRIORITY) + +__attribute__((target_version("aes;priority=20"))) int explicit_clones_priority(void) { return 0; } +// expected-error@+2 {{definition with same mangled name 'explicit_clones_priority._Maes' as another definition}} +// expected-note@-2 {{previous definition is here}} +__attribute__((target_clones("aes;priority=5", "lse"))) int explicit_clones_priority(void) { return 1; } + #endif diff --git a/clang/test/CodeGen/AArch64/fmv-explicit-priority.c b/clang/test/CodeGen/AArch64/fmv-explicit-priority.c new file mode 100644 index 0000000000000..437221c95542b --- /dev/null +++ b/clang/test/CodeGen/AArch64/fmv-explicit-priority.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O3 -fno-inline -emit-llvm -o - %s | FileCheck %s + +__attribute__((target_version("lse;priority=30"))) int foo(void) { return 1; } +__attribute__((target_version("sve2;priority=20"))) int foo(void) { return 2; } +__attribute__((target_version("sve;priority=10"))) int foo(void) { return 3; } +__attribute__((target_version( "default"))) int foo(void) { return 0; } + +__attribute__((target_clones("lse+sve2;priority=3", "lse;priority=2", "sve;priority=1", "default"))) +int fmv_caller(void) { return foo(); } + + +__attribute__((target_version("aes"))) int bar(void) { return 1; } +__attribute__((target_version("sm4;priority=5"))) int bar(void) { return 2; } +__attribute__((target_version("default"))) int bar(void) { return 0; } + +__attribute__((target("aes"))) int regular_caller_aes() { return bar(); } +__attribute__((target("sm4"))) int regular_caller_sm4() { return bar(); } +//. +// CHECK: @__aarch64_cpu_features = external dso_local local_unnamed_addr global { i64 } +// CHECK: @foo = weak_odr ifunc i32 (), ptr @foo.resolver +// CHECK: @fmv_caller = weak_odr ifunc i32 (), ptr @fmv_caller.resolver +// CHECK: @bar = weak_odr ifunc i32 (), ptr @bar.resolver +//. +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@foo._Mlse +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@foo._Msve2 +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@foo._Msve +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 3 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@foo.default +// CHECK-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller._MlseMsve2 +// CHECK-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Mlse +// CHECK-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: noinline nounwind vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Msve +// CHECK-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo() #[[ATTR12:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) +// CHECK-LABEL: define {{[^@]+}}@fmv_caller.default +// CHECK-SAME: () #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo.default() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@bar._Maes +// CHECK-SAME: () #[[ATTR8:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@bar._Msm4 +// CHECK-SAME: () #[[ATTR9:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 2 +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@bar.default +// CHECK-SAME: () #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK: Function Attrs: noinline nounwind +// CHECK-LABEL: define {{[^@]+}}@regular_caller_aes +// CHECK-SAME: () local_unnamed_addr #[[ATTR10:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar() #[[ATTR12]] +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) +// CHECK-LABEL: define {{[^@]+}}@regular_caller_sm4 +// CHECK-SAME: () local_unnamed_addr #[[ATTR11:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar._Msm4() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define {{[^@]+}}@foo.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: tail call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 128 +// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0 +// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE:%.*]], label [[COMMON_RET:%.*]] +// CHECK: common.ret: +// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @foo._Mlse, [[RESOLVER_ENTRY:%.*]] ], [ @foo._Msve2, [[RESOLVER_ELSE]] ], [ [[FOO__MSVE_FOO_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ] +// CHECK-NEXT: ret ptr [[COMMON_RET_OP]] +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 69793284352 +// CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 69793284352 +// CHECK-NEXT: br i1 [[TMP3]], label [[COMMON_RET]], label [[RESOLVER_ELSE2]] +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616 +// CHECK-NEXT: [[FOO__MSVE_FOO_DEFAULT]] = select i1 [[TMP5]], ptr @foo._Msve, ptr @foo.default +// CHECK-NEXT: br label [[COMMON_RET]] +// +// +// CHECK-LABEL: define {{[^@]+}}@fmv_caller.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: tail call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284480 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284480 +// CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: common.ret: +// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @fmv_caller._MlseMsve2, [[RESOLVER_ENTRY:%.*]] ], [ @fmv_caller._Mlse, [[RESOLVER_ELSE]] ], [ [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ] +// CHECK-NEXT: ret ptr [[COMMON_RET_OP]] +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 128 +// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE2]], label [[COMMON_RET]] +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616 +// CHECK-NEXT: [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT]] = select i1 [[TMP5]], ptr @fmv_caller._Msve, ptr @fmv_caller.default +// CHECK-NEXT: br label [[COMMON_RET]] +// +// +// CHECK-LABEL: define {{[^@]+}}@bar.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: tail call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 800 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 800 +// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 33536 +// CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 33536 +// CHECK-NEXT: [[BAR__MAES_BAR_DEFAULT:%.*]] = select i1 [[TMP4]], ptr @bar._Maes, ptr @bar.default +// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP2]], ptr @bar._Msm4, ptr [[BAR__MAES_BAR_DEFAULT]] +// CHECK-NEXT: ret ptr [[COMMON_RET_OP]] +// +//. +// CHECK: attributes #[[ATTR0]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="P1,P2,P3,P4,lse" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" } +// CHECK: attributes #[[ATTR1]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P2,P4,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve,+sve2" } +// CHECK: attributes #[[ATTR2]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P1,P3,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #[[ATTR3]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #[[ATTR4]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P0,P1,lse,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+lse,+sve,+sve2" } +// CHECK: attributes #[[ATTR5]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P1,lse" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" } +// CHECK: attributes #[[ATTR6]] = { noinline nounwind vscale_range(1,16) "fmv-features"="P0,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #[[ATTR7]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #[[ATTR8]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="aes" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" } +// CHECK: attributes #[[ATTR9]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="P0,P2,sm4" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" } +// CHECK: attributes #[[ATTR10]] = { noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" } +// CHECK: attributes #[[ATTR11]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" } +// CHECK: attributes #[[ATTR12]] = { nounwind } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c index 93d87cef54569..d3354915737c2 100644 --- a/clang/test/Sema/attr-target-clones-aarch64.c +++ b/clang/test/Sema/attr-target-clones-aarch64.c @@ -80,3 +80,13 @@ int useage(void) { int __attribute__((target_clones("sve2-sha3+ssbs", "sm4"))) mv_after_use(void) { return 1; } // expected-error@+1 {{'main' cannot be a multiversioned function}} int __attribute__((target_clones("i8mm"))) main() { return 1; } + +//expected-warning@+2 {{unsupported 'priority=10' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} +//expected-warning@+1 {{version list contains entries that don't impact code generation}} +int __attribute__((target_clones("priority=10;aes", "default"))) priority_before_features(void) { return 0; } + +//expected-warning@+1 {{version priority '0' is outside the allowed range [1-255]; ignoring priority}} +int __attribute__((target_clones("aes;priority=0", "default"))) priority_out_of_range(void) { return 0; } + +//expected-warning@+1 {{priority of default version cannot be overridden; ignoring priority}} +int __attribute__((target_clones("aes", "default;priority=10"))) priority_default_version(void) { return 0; } diff --git a/clang/test/Sema/attr-target-version.c b/clang/test/Sema/attr-target-version.c index d062212848daf..22e15aae890ba 100644 --- a/clang/test/Sema/attr-target-version.c +++ b/clang/test/Sema/attr-target-version.c @@ -117,3 +117,12 @@ int unspec_args_implicit_default_first(); int __attribute__((target_version("aes"))) unspec_args_implicit_default_first() { return -1; } // expected-note@+1 {{function multiversioning caused by this declaration}} int __attribute__((target_version("default"))) unspec_args_implicit_default_first() { return 0; } + +//expected-warning@+1 {{unsupported 'priority=10' in the 'target_version' attribute string; 'target_version' attribute ignored}} +int __attribute__((target_version("priority=10;aes"))) priority_before_features(void) { return 0; } + +//expected-warning@+1 {{version priority '256' is outside the allowed range [1-255]; ignoring priority}} +int __attribute__((target_version("aes;priority=256"))) priority_out_of_range(void) { return 0; } + +//expected-warning@+1 {{priority of default version cannot be overridden; ignoring priority}} +int __attribute__((target_version("default;priority=10"))) priority_default_version(void) { return 0; } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 53c91bfe16804..51602f32d5102 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1930,9 +1930,13 @@ class TargetTransformInfo { LLVM_ABI bool hasArmWideBranch(bool Thumb) const; /// Returns a bitmask constructed from the target-features or fmv-features - /// metadata of a function. + /// metadata of a function corresponding to its Arch Extensions. LLVM_ABI APInt getFeatureMask(const Function &F) const; + /// Returns a bitmask constructed from the target-features or fmv-features + /// metadata of a function corresponding to its FMV priority. + LLVM_ABI APInt getPriorityMask(const Function &F) const; + /// Returns true if this is an instance of a function with multiple versions. LLVM_ABI bool isMultiversionedFunction(const Function &F) const; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index e879712121b59..5a8952117c0d7 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1135,6 +1135,10 @@ class TargetTransformInfoImplBase { return APInt::getZero(32); } + virtual APInt getPriorityMask(const Function &F) const { + return APInt::getZero(32); + } + virtual bool isMultiversionedFunction(const Function &F) const { return false; } diff --git a/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc index f2bad28ada93e..f0291926bbe75 100644 --- a/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc +++ b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc @@ -59,7 +59,16 @@ enum FeatPriorities { PRIOR_SME_I64, PRIOR_SME2, PRIOR_MOPS, - PRIOR_CSSC + PRIOR_CSSC, + PRIOR_MAX, + PRIOR_P0 = 120, + PRIOR_P1, + PRIOR_P2, + PRIOR_P3, + PRIOR_P4, + PRIOR_P5, + PRIOR_P6, + PRIOR_P7 }; #endif diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 8e83b04681f58..4c9fb17104c6a 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -42,6 +42,8 @@ struct CpuInfo; static_assert(FEAT_MAX < 62, "Number of features in CPUFeatures are limited to 62 entries"); +static_assert(PRIOR_MAX < 120, "FeatPriorities is limited to 120 entries"); + // Each ArchExtKind correponds directly to a possible -target-feature. #define EMIT_ARCHEXTKIND_ENUM #include "llvm/TargetParser/AArch64TargetParserDef.inc" @@ -72,12 +74,13 @@ struct ExtensionInfo { struct FMVInfo { StringRef Name; // The target_version/target_clones spelling. - CPUFeatures FeatureBit; // Index of the bit in the FMV feature bitset. + std::optional<CPUFeatures> + FeatureBit; // Index of the bit in the FMV feature bitset. FeatPriorities PriorityBit; // Index of the bit in the FMV priority bitset. std::optional<ArchExtKind> ID; // The architecture extension to enable. - FMVInfo(StringRef Name, CPUFeatures FeatureBit, FeatPriorities PriorityBit, - std::optional<ArchExtKind> ID) - : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID) {}; + FMVInfo(StringRef Name, std::optional<CPUFeatures> FeatureBit, + FeatPriorities PriorityBit, std::optional<ArchExtKind> ID) + : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID){}; }; LLVM_ABI const std::vector<FMVInfo> &getFMVInfo(); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 4f04209cf4cfc..885e8a38e61d6 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1427,6 +1427,10 @@ APInt TargetTransformInfo::getFeatureMask(const Function &F) const { return TTIImpl->getFeatureMask(F); } +APInt TargetTransformInfo::getPriorityMask(const Function &F) const { + return TTIImpl->getPriorityMask(F); +} + bool TargetTransformInfo::isMultiversionedFunction(const Function &F) const { return TTIImpl->isMultiversionedFunction(F); } diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td index b0f76ec6a6480..12939997401ac 100644 --- a/llvm/lib/Target/AArch64/AArch64FMV.td +++ b/llvm/lib/Target/AArch64/AArch64FMV.td @@ -83,3 +83,14 @@ def : FMVExtension<"sve2-sha3", "SVE_SHA3">; def : FMVExtension<"sve2-sm4", "SVE_SM4">; def : FMVExtension<"wfxt", "WFXT">; def : FMVExtension<"cssc", "CSSC">; + +// Extensions which allow the user to override version priority. +// 8-bits allow 256-1 priority levels (excluding all zeros). +def : FMVExtension<"P0", "P0">; +def : FMVExtension<"P1", "P1">; +def : FMVExtension<"P2", "P2">; +def : FMVExtension<"P3", "P3">; +def : FMVExtension<"P4", "P4">; +def : FMVExtension<"P5", "P5">; +def : FMVExtension<"P6", "P6">; +def : FMVExtension<"P7", "P7">; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 24ef92f5835fe..16fcc7891b8b2 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -245,12 +245,23 @@ static bool hasPossibleIncompatibleOps(const Function *F, return false; } -APInt AArch64TTIImpl::getFeatureMask(const Function &F) const { +static void extractAttrFeatures(const Function &F, const AArch64TTIImpl *TTI, + SmallVectorImpl<StringRef> &Features) { StringRef AttributeStr = - isMultiversionedFunction(F) ? "fmv-features" : "target-features"; + TTI->isMultiversionedFunction(F) ? "fmv-features" : "target-features"; StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString(); - SmallVector<StringRef, 8> Features; FeatureStr.split(Features, ","); +} + +APInt AArch64TTIImpl::getFeatureMask(const Function &F) const { + SmallVector<StringRef, 8> Features; + extractAttrFeatures(F, this, Features); + return AArch64::getCpuSupportsMask(Features); +} + +APInt AArch64TTIImpl::getPriorityMask(const Function &F) const { + SmallVector<StringRef, 8> Features; + extractAttrFeatures(F, this, Features); return AArch64::getFMVPriority(Features); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 647b242d74fb3..f1e0002f602b2 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -90,6 +90,7 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> { unsigned DefaultCallPenalty) const override; APInt getFeatureMask(const Function &F) const override; + APInt getPriorityMask(const Function &F) const override; bool isMultiversionedFunction(const Function &F) const override; diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index 7e3583275a734..2c0211b3a2919 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -55,21 +55,30 @@ std::optional<AArch64::FMVInfo> lookupFMVByID(AArch64::ArchExtKind ExtID) { return {}; } +std::optional<AArch64::FMVInfo> getFMVInfoFrom(StringRef Feature) { + std::optional<AArch64::FMVInfo> FMV = AArch64::parseFMVExtension(Feature); + if (!FMV && Feature.starts_with('+')) + if (std::optional<AArch64::ExtensionInfo> Ext = + AArch64::targetFeatureToExtension(Feature)) + FMV = lookupFMVByID(Ext->ID); + return FMV; +} + APInt AArch64::getFMVPriority(ArrayRef<StringRef> Features) { // Transitively enable the Arch Extensions which correspond to each feature. ExtensionSet FeatureBits; + APInt PriorityMask = APInt::getZero(128); for (const StringRef Feature : Features) { - std::optional<FMVInfo> FMV = parseFMVExtension(Feature); - if (!FMV && Feature.starts_with('+')) { - if (std::optional<ExtensionInfo> Info = targetFeatureToExtension(Feature)) - FMV = lookupFMVByID(Info->ID); + if (std::optional<FMVInfo> FMV = getFMVInfoFrom(Feature)) { + // FMV feature without a corresponding Arch Extension may affect priority + if (FMV->ID) + FeatureBits.enable(*FMV->ID); + else + PriorityMask.setBit(FMV->PriorityBit); } - if (FMV && FMV->ID) - FeatureBits.enable(*FMV->ID); } // Construct a bitmask for all the transitively enabled Arch Extensions. - APInt PriorityMask = APInt::getZero(128); for (const FMVInfo &Info : getFMVInfo()) if (Info.ID && FeatureBits.Enabled.test(*Info.ID)) PriorityMask.setBit(Info.PriorityBit); @@ -81,15 +90,15 @@ APInt AArch64::getCpuSupportsMask(ArrayRef<StringRef> Features) { // Transitively enable the Arch Extensions which correspond to each feature. ExtensionSet FeatureBits; for (const StringRef Feature : Features) - if (std::optional<FMVInfo> Info = parseFMVExtension(Feature)) - if (Info->ID) - FeatureBits.enable(*Info->ID); + if (std::optional<FMVInfo> FMV = getFMVInfoFrom(Feature)) + if (FMV->ID) + FeatureBits.enable(*FMV->ID); // Construct a bitmask for all the transitively enabled Arch Extensions. APInt FeaturesMask = APInt::getZero(128); for (const FMVInfo &Info : getFMVInfo()) if (Info.ID && FeatureBits.Enabled.test(*Info.ID)) - FeaturesMask.setBit(Info.FeatureBit); + FeaturesMask.setBit(*Info.FeatureBit); return FeaturesMask; } diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index bdda4980c1005..689ab2cb99db4 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2528,8 +2528,10 @@ static bool OptimizeNonTrivialIFuncs( Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) { bool Changed = false; - // Cache containing the mask constructed from a function's target features. + // Cache containing the feature mask constructed from a function's metadata. DenseMap<Function *, APInt> FeatureMask; + // Cache containing the priority mask constructed from a function's metadata. + DenseMap<Function *, APInt> PriorityMask; for (GlobalIFunc &IF : M.ifuncs()) { if (IF.isInterposable()) @@ -2559,16 +2561,19 @@ static bool OptimizeNonTrivialIFuncs( LLVM_DEBUG(dbgs() << "Statically resolving calls to function " << Resolver->getName() << "\n"); - // Cache the feature mask for each callee. + // Cache the masks for each callee. for (Function *Callee : Callees) { - auto [It, Inserted] = FeatureMask.try_emplace(Callee); - if (Inserted) - It->second = TTI.getFeatureMask(*Callee); + auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Callee); + if (FeatInserted) + FeatIt->second = TTI.getFeatureMask(*Callee); + auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Callee); + if (PriorInserted) + PriorIt->second = TTI.getPriorityMask(*Callee); } // Sort the callee versions in decreasing priority order. sort(Callees, [&](auto *LHS, auto *RHS) { - return FeatureMask[LHS].ugt(FeatureMask[RHS]); + return PriorityMask[LHS].ugt(PriorityMask[RHS]); }); // Find the callsites and cache the feature mask for each caller. @@ -2581,6 +2586,9 @@ static bool OptimizeNonTrivialIFuncs( auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller); if (FeatInserted) FeatIt->second = TTI.getFeatureMask(*Caller); + auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Caller); + if (PriorInserted) + PriorIt->second = TTI.getPriorityMask(*Caller); auto [CallIt, CallInserted] = CallSites.try_emplace(Caller); if (CallInserted) Callers.push_back(Caller); @@ -2591,7 +2599,7 @@ static bool OptimizeNonTrivialIFuncs( // Sort the caller versions in decreasing priority order. sort(Callers, [&](auto *LHS, auto *RHS) { - return FeatureMask[LHS].ugt(FeatureMask[RHS]); + return PriorityMask[LHS].ugt(PriorityMask[RHS]); }); auto implies = [](APInt A, APInt B) { return B.isSubsetOf(A); }; diff --git a/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp index 3f284ee1b1032..4368551676939 100644 --- a/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp @@ -159,12 +159,15 @@ static void emitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { << " if(I.size()) return I;\n" << " I.reserve(" << FMVExts.size() << ");\n"; for (const Record *Rec : FMVExts) { + auto FeatName = Rec->getValueAsString("BackendFeature"); + const Record *FeatRec = ExtensionMap[FeatName]; OS << " I.emplace_back("; OS << "\"" << Rec->getValueAsString("Name") << "\""; - OS << ", " << Rec->getValueAsString("FeatureBit"); + if (FeatRec) + OS << ", " << Rec->getValueAsString("FeatureBit"); + else + OS << ", std::nullopt"; OS << ", " << Rec->getValueAsString("PriorityBit"); - auto FeatName = Rec->getValueAsString("BackendFeature"); - const Record *FeatRec = ExtensionMap[FeatName]; if (FeatRec) OS << ", " << FeatRec->getValueAsString("ArchExtKindSpelling").upper(); else >From b932c05e25522e815eb4e2d30844042543f15286 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Wed, 13 Aug 2025 12:07:15 +0300 Subject: [PATCH 2/4] Update AArch64TargetParser.h clang format --- llvm/include/llvm/TargetParser/AArch64TargetParser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 4c9fb17104c6a..b4b291b402197 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -80,7 +80,7 @@ struct FMVInfo { std::optional<ArchExtKind> ID; // The architecture extension to enable. FMVInfo(StringRef Name, std::optional<CPUFeatures> FeatureBit, FeatPriorities PriorityBit, std::optional<ArchExtKind> ID) - : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID){}; + : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID) {}; }; LLVM_ABI const std::vector<FMVInfo> &getFMVInfo(); >From 79243c408b964cd9087927e3cd388b32c1937097 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Wed, 13 Aug 2025 13:39:10 +0300 Subject: [PATCH 3/4] Update SemaARM.cpp update comment --- clang/lib/Sema/SemaARM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index 167790b77d791..5742d59f52636 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1591,7 +1591,7 @@ static void convertPriorityString(unsigned Priority, "P4", "P5", "P6", "P7"}; assert(Priority > 0 && Priority < 256 && "priority out of range"); - // Convert priority=[1-31] -> P0 + ... + P4 + // Convert priority=[1-255] -> P0 + ... + P7 for (unsigned BitPos = 0; BitPos < 8; ++BitPos) if (Priority & (1U << BitPos)) appendFeature(PriorityString[BitPos], NewParam); >From 1ffde1537494ba9af75f6d58360ac83791755fa3 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Sat, 13 Sep 2025 17:06:39 +0100 Subject: [PATCH 4/4] Add a test for static resolution of calls --- .../Transforms/GlobalOpt/resolve-fmv-ifunc.ll | 80 ++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll index 4b6a19d3f05cf..1bf10423d798e 100644 --- a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll +++ b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call i32 @(test_single_bb_resolver|test_multi_bb_resolver|test_caller_feats_not_implied|test_non_fmv_caller|test_priority|test_alternative_names)" --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call i32 @(test_single_bb_resolver|test_multi_bb_resolver|test_caller_feats_not_implied|test_non_fmv_caller|test_priority|test_alternative_names|test_explicit_priority)" --version 4 ; REQUIRES: aarch64-registered-target @@ -13,6 +13,7 @@ $test_caller_feats_not_implied.resolver = comdat any $test_non_fmv_caller.resolver = comdat any $test_priority.resolver = comdat any $test_alternative_names.resolver = comdat any +$test_explicit_priority.resolver = comdat any @__aarch64_cpu_features = external local_unnamed_addr global { i64 } @@ -22,6 +23,7 @@ $test_alternative_names.resolver = comdat any @test_non_fmv_caller = weak_odr ifunc i32 (), ptr @test_non_fmv_caller.resolver @test_priority = weak_odr ifunc i32 (), ptr @test_priority.resolver @test_alternative_names = weak_odr ifunc i32 (), ptr @test_alternative_names.resolver +@test_explicit_priority = weak_odr ifunc i32 (), ptr @test_explicit_priority.resolver declare void @__init_cpu_features_resolver() local_unnamed_addr @@ -348,6 +350,76 @@ entry: ret i32 %call } +declare i32 @test_explicit_priority._Mmops() #18 +declare i32 @test_explicit_priority._Msve2() #19 +declare i32 @test_explicit_priority._Msve() #20 +declare i32 @test_explicit_priority.default() #0 + +define weak_odr ptr @test_explicit_priority.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @test_explicit_priority.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 1073807616 + %2 = icmp eq i64 %1, 1073807616 + br i1 %2, label %common.ret, label %resolver_else + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @test_explicit_priority._Msve, %resolver_entry ], [ @test_explicit_priority._Msve2, %resolver_else ], [ %test_explicit_priority._Mmops.test_explicit_priority.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %3 = and i64 %0, 69793284352 + %4 = icmp eq i64 %3, 69793284352 + br i1 %4, label %common.ret, label %resolver_else2 + +resolver_else2: ; preds = %resolver_else + %5 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %5, 0 + %test_explicit_priority._Mmops.test_explicit_priority.default = select i1 %.not, ptr @test_explicit_priority.default, ptr @test_explicit_priority._Mmops + br label %common.ret +} + +define i32 @caller8._MmopsMsve2() #21 { +; CHECK-LABEL: define i32 @caller8._MmopsMsve2( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR20:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_explicit_priority._Msve2() +; +entry: + %call = tail call i32 @test_explicit_priority() + ret i32 %call +} + +define i32 @caller8._Mmops() #22 { +; CHECK-LABEL: define i32 @caller8._Mmops( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR21:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_explicit_priority() +; +entry: + %call = tail call i32 @test_explicit_priority() + ret i32 %call +} + +define i32 @caller8._Msve() #23 { +; CHECK-LABEL: define i32 @caller8._Msve( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR22:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_explicit_priority._Msve() +; +entry: + %call = tail call i32 @test_explicit_priority() + ret i32 %call +} + +define i32 @caller8.default() #0 { +; CHECK-LABEL: define i32 @caller8.default( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_explicit_priority() +; +entry: + %call = tail call i32 @test_explicit_priority() + ret i32 %call +} + attributes #0 = { "fmv-features" } attributes #1 = { "fmv-features"="sve" } attributes #2 = { "fmv-features"="sve2" } @@ -366,3 +438,9 @@ attributes #14 = { "fmv-features"="dpb2,frintts" } attributes #15 = { "fmv-features"="flagm2,frintts" } attributes #16 = { "fmv-features"="rcpc2" } attributes #17 = { "fmv-features"="frintts" } +attributes #18 = { "fmv-features"="P1,P3,mops" } ; priority=10 +attributes #19 = { "fmv-features"="P2,P4,sve2" } ; priority=20 +attributes #20 = { "fmv-features"="P1,P2,P3,P4,sve" } ; priority=30 +attributes #21 = { "fmv-features"="P1,P4,P5,mops,sve2" } ; priority=50 +attributes #22 = { "fmv-features"="P2,P5,P6,mops" } ; priority=100 +attributes #23 = { "fmv-features"="P3,P6,P7,sve" } ; priority=200 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits