https://github.com/w2yehia updated https://github.com/llvm/llvm-project/pull/177428
>From 2dc3d49b5c6c216c9115dc7e931d9bf1a74f17fe Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 23 Sep 2025 14:22:57 -0400 Subject: [PATCH 01/21] refactor EmitPPCBuiltinCpu --- clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 62 +++++++++++++----------- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 226950ab599e3..aecc4ec40aa97 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4889,6 +4889,7 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp index 6568959351a5d..8360a17c470dd 100644 --- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp @@ -70,31 +70,18 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, return CI; } -Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - // Do not emit the builtin arguments in the arguments of a function call, - // because the evaluation order of function arguments is not specified in C++. - // This is important when testing to ensure the arguments are emitted in the - // same order every time. Eg: - // Instead of: - // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), - // EmitScalarExpr(E->getArg(1)), "swdiv"); - // Use: - // Value *Op0 = EmitScalarExpr(E->getArg(0)); - // Value *Op1 = EmitScalarExpr(E->getArg(1)); - // return Builder.CreateFDiv(Op0, Op1, "swdiv") - - Intrinsic::ID ID = Intrinsic::not_intrinsic; +Value *CodeGenFunction::EmitPPCBuiltinCpu( + unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr) { #include "llvm/TargetParser/PPCTargetParser.def" auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, unsigned Mask, CmpInst::Predicate CompOp, unsigned OpValue) -> Value * { if (SupportMethod == BUILTIN_PPC_FALSE) - return llvm::ConstantInt::getFalse(ConvertType(E->getType())); + return llvm::ConstantInt::getFalse(ReturnType); if (SupportMethod == BUILTIN_PPC_TRUE) - return llvm::ConstantInt::getTrue(ConvertType(E->getType())); + return llvm::ConstantInt::getTrue(ReturnType); assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod."); @@ -137,12 +124,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue)); }; - switch (BuiltinID) { - default: return nullptr; - - case Builtin::BI__builtin_cpu_is: { - const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + if (BuiltinID == Builtin::BI__builtin_cpu_is) { llvm::Triple Triple = getTarget().getTriple(); typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo; @@ -170,7 +152,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, "Invalid CPU name. Missed by SemaChecking?"); if (LinuxSupportMethod == BUILTIN_PPC_FALSE) - return llvm::ConstantInt::getFalse(ConvertType(E->getType())); + return llvm::ConstantInt::getFalse(ReturnType); Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); @@ -178,10 +160,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Builder.CreateICmpEQ(TheCall, llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); } - case Builtin::BI__builtin_cpu_supports: { + else if (BuiltinID == Builtin::BI__builtin_cpu_supports) { llvm::Triple Triple = getTarget().getTriple(); - const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); if (Triple.isOSAIX()) { typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, unsigned> @@ -218,7 +198,35 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, #undef PPC_FAWORD_HWCAP2 #undef PPC_FAWORD_CPUID } + else + assert(0 && "unexpected builtin"); +} +Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + // Do not emit the builtin arguments in the arguments of a function call, + // because the evaluation order of function arguments is not specified in C++. + // This is important when testing to ensure the arguments are emitted in the + // same order every time. Eg: + // Instead of: + // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), + // EmitScalarExpr(E->getArg(1)), "swdiv"); + // Use: + // Value *Op0 = EmitScalarExpr(E->getArg(0)); + // Value *Op1 = EmitScalarExpr(E->getArg(1)); + // return Builder.CreateFDiv(Op0, Op1, "swdiv") + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + + switch (BuiltinID) { + default: return nullptr; + + case Builtin::BI__builtin_cpu_is: + case Builtin::BI__builtin_cpu_supports: { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + return EmitPPCBuiltinCpu(BuiltinID, ConvertType(E->getType()), CPUStr); + } // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we // call __builtin_readcyclecounter. case PPC::BI__builtin_ppc_get_timebase: >From 6e4997e5b00442e3d73d81c047a34875bfdef294 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Wed, 16 Jul 2025 20:02:11 +0000 Subject: [PATCH 02/21] clang codegen for target_clones --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/include/clang/Sema/SemaPPC.h | 4 ++ clang/lib/AST/ASTContext.cpp | 2 + clang/lib/Basic/Targets/PPC.cpp | 36 +++++++++++++ clang/lib/Basic/Targets/PPC.h | 4 ++ clang/lib/CodeGen/CodeGenFunction.cpp | 70 +++++++++++++++++++++++++- clang/lib/CodeGen/CodeGenFunction.h | 3 ++ clang/lib/CodeGen/CodeGenModule.cpp | 10 ++-- clang/lib/CodeGen/Targets/PPC.cpp | 47 +++++++++++++++++ clang/lib/Sema/SemaDeclAttr.cpp | 4 ++ clang/lib/Sema/SemaPPC.cpp | 56 +++++++++++++++++++++ 11 files changed, 231 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ec6cd2be7c3c5..68160e9bd9b29 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1567,7 +1567,7 @@ class TargetInfo : public TransferrableTargetInfo, /// which requires support for cpu_supports and cpu_is functionality. bool supportsMultiVersioning() const { return getTriple().isX86() || getTriple().isAArch64() || - getTriple().isRISCV(); + getTriple().isRISCV() || getTriple().isOSBinFormatXCOFF(); } /// Identify whether this target supports IFuncs. diff --git a/clang/include/clang/Sema/SemaPPC.h b/clang/include/clang/Sema/SemaPPC.h index f8edecc4fcb7b..0cf6ba7ff29dd 100644 --- a/clang/include/clang/Sema/SemaPPC.h +++ b/clang/include/clang/Sema/SemaPPC.h @@ -53,6 +53,10 @@ class SemaPPC : public SemaBase { // vector double vec_xxpermdi(vector double, vector double, int); // vector short vec_xxsldwi(vector short, vector short, int); bool BuiltinVSX(CallExpr *TheCall); + + bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params, + SmallVectorImpl<SourceLocation> &Locs, + SmallVectorImpl<SmallString<64>> &NewParams); }; } // namespace clang diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 3f63420cae91e..6f5784d7d4a64 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -15033,6 +15033,8 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); if (VersionStr.starts_with("arch=")) TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); + else if (Target->getTriple().isOSAIX() && VersionStr.starts_with("cpu=")) // TODO make a function that extracts CPU from a feature string + TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1); else if (VersionStr != "default") Features.push_back((StringRef{"+"} + VersionStr).str()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index a37a68ad91724..7f8005db3cd3e 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -678,6 +678,42 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features, } } +ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { + ParsedTargetAttr Ret; + if (Features == "default") + return Ret; + SmallVector<StringRef, 1> AttrFeatures; + Features.split(AttrFeatures, ","); + + // Grab the various features and prepend a "+" to turn on the feature to + // the backend and add them to our existing set of features. + for (auto &Feature : AttrFeatures) { + // Go ahead and trim whitespace rather than either erroring or + // accepting it weirdly. + Feature = Feature.trim(); + + // While we're here iterating check for a different target cpu. + if (Feature.starts_with("cpu=")) { + assert(Ret.CPU.empty()); + Ret.CPU = Feature.split("=").second.trim(); + } else assert(0); +// else if (Feature.starts_with("tune=")) { +// if (!Ret.Tune.empty()) +// Ret.Duplicate = "tune="; +// else +// Ret.Tune = Feature.split("=").second.trim(); +// } else if (Feature.starts_with("no-")) +// Ret.Features.push_back("-" + Feature.split("-").second.str()); +// else +// Ret.Features.push_back("+" + Feature.str()); + } + return Ret; +} + +llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { + return llvm::APInt(32, Features.empty() ? 0 : 1); +} + // Make sure that registers are added in the correct array index which should be // the DWARF number for PPC registers. const char *const PPCTargetInfo::GCCRegNames[] = { diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 664c9e15d8d18..6f90ff1f5d57c 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -199,6 +199,10 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool supportsTargetAttributeTune() const override { return true; } + ParsedTargetAttr parseTargetAttr(StringRef Str) const override; + + llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override; + ArrayRef<const char *> getGCCRegNames() const override; ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 61128316963ac..e4ef527a536d3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/CRC.h" #include "llvm/Support/xxhash.h" @@ -3027,12 +3028,77 @@ void CodeGenFunction::EmitMultiVersionResolver( case llvm::Triple::riscv64: EmitRISCVMultiVersionResolver(Resolver, Options); return; - + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + if (getContext().getTargetInfo().getTriple().isOSAIX()) { + EmitPPCAIXMultiVersionResolver(Resolver, Options); + return; + } + [[fallthrough]]; default: - assert(false && "Only implemented for x86, AArch64 and RISC-V targets"); + assert(false && "Only implemented for x86, AArch64, RISC-V, and PowerPC targets"); + } +} + +/* + * Desc_t *foo_desc = ppc_get_function_descriptor(&foo); + * if (foo_desc->addr == ppc_get_function_entry(&foo)) { + * FuncPtr fp = resolver(); + * __c11_atomic_store((_Atomic FuncPtr *)&foo_desc->addr, fp, 0); + * } + * return ((int (*)(int)) foo_desc)(a); + */ +void CodeGenFunction::EmitPPCAIXMultiVersionResolver( + llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) { + + llvm::PointerType *PtrTy = Builder.getPtrTy(); + // entry: + llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver); + + SmallVector<std::pair<llvm::Value *, llvm::BasicBlock *>, 3> PhiArgs; + for (const FMVResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + // The 'default' or 'generic' case. + if (!RO.Architecture && RO.Features.empty()) { + // if.default: + // %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default) + // br label %resolver_exit + assert(&RO == Options.end() - 1 && "Default or Generic case must be last"); + Builder.CreateRet(RO.Function); + break; + } + // if.else_n: + // %is_version_n = __builtin_cpu_supports(version_n) + // br i1 %is_version_n, label %if.version_n, label %if.default + // + // if.version_n: + // %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n) + // br label %resolver_exit + assert(RO.Features.size() == 1 && "for now one feature requirement per version"); + llvm::Value *Condition; + if (RO.Features[0].starts_with("cpu=")) { + Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), RO.Features[0].split("=").second.trim()); + } else { + Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), RO.Features[0]); + } + llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver); + CurBlock = createBasicBlock("if.else", Resolver); + Builder.CreateCondBr(Condition, ThenBlock, CurBlock); + + Builder.SetInsertPoint(ThenBlock); + Builder.CreateRet(RO.Function); } + + // If no generic/default, emit an unreachable. +// Builder.SetInsertPoint(CurBlock); +// llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); +// TrapCall->setDoesNotReturn(); +// TrapCall->setDoesNotThrow(); +// Builder.CreateUnreachable(); +// Builder.ClearInsertionPoint(); } + void CodeGenFunction::EmitRISCVMultiVersionResolver( llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index aecc4ec40aa97..7fb1b634f857f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5557,6 +5557,9 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitRISCVMultiVersionResolver(llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options); + void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver, + ArrayRef<FMVResolverOption> Options); + private: QualType getVarArgType(const Expr *Arg); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d50c9605a30b3..576b4c6ba0f3e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3008,9 +3008,10 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // While we populated the feature map above, we still need to // get and parse the target attribute so we can get the cpu for // the function. - if (TD) { - ParsedTargetAttr ParsedAttr = - Target.parseTargetAttr(TD->getFeaturesStr()); + StringRef FeatureStr = TD ? TD->getFeaturesStr() : + (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef()); + if (!FeatureStr.empty()) { + ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr); if (!ParsedAttr.CPU.empty() && getTarget().isValidCPUName(ParsedAttr.CPU)) { TargetCPU = ParsedAttr.CPU; @@ -4795,7 +4796,8 @@ void CodeGenModule::emitMultiVersionFunctions() { if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) { ResolverConstant = IFunc->getResolver(); if (FD->isTargetClonesMultiVersion() && - !getTarget().getTriple().isAArch64()) { + !getTarget().getTriple().isAArch64() && + !getTarget().getTriple().isOSAIX()) { std::string MangledName = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); if (!GetGlobalValue(MangledName + ".ifunc")) { diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index 35e7655646ade..bc357e0908a5e 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -128,8 +128,55 @@ class AIXABIInfo : public ABIInfo { RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, AggValueSlot Slot) const override; + + using ABIInfo::appendAttributeMangling; + void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index, + raw_ostream &Out) const override; + void appendAttributeMangling(StringRef AttrStr, + raw_ostream &Out) const override; }; +void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, + unsigned Index, + raw_ostream &Out) const { + appendAttributeMangling(Attr->getFeatureStr(Index), Out); +} + +void AIXABIInfo::appendAttributeMangling(StringRef AttrStr, + raw_ostream &Out) const { + if (AttrStr == "default") { + Out << ".default"; + return; + } + + Out << '.'; + const TargetInfo &TI = CGT.getTarget(); + ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr); + + llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) { + // Multiversioning doesn't allow "no-${feature}", so we can + // only have "+" prefixes here. + assert(LHS.starts_with("+") && RHS.starts_with("+") && + "Features should always have a prefix."); + return TI.getFMVPriority({LHS.substr(1)}) + .ugt(TI.getFMVPriority({RHS.substr(1)})); + }); + + bool IsFirst = true; + if (!Info.CPU.empty()) { + IsFirst = false; + Out << "cpu_" << Info.CPU; + } + + assert(Info.Features.empty() && "unhandled case"); + for (StringRef Feat : Info.Features) { + if (!IsFirst) + Out << '_'; + IsFirst = false; + Out << Feat.substr(1); + } +} + class AIXTargetCodeGenInfo : public TargetCodeGenInfo { const bool Is64Bit; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index bee42cce09aca..9c4dea25b53ab 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -54,6 +54,7 @@ #include "clang/Sema/SemaObjC.h" #include "clang/Sema/SemaOpenCL.h" #include "clang/Sema/SemaOpenMP.h" +#include "clang/Sema/SemaPPC.h" #include "clang/Sema/SemaRISCV.h" #include "clang/Sema/SemaSYCL.h" #include "clang/Sema/SemaSwift.h" @@ -3623,6 +3624,9 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (S.X86().checkTargetClonesAttr(Params, Locations, NewParams, AL.getLoc())) return; + } else if (S.Context.getTargetInfo().getTriple().isOSAIX()) { + if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams)) + return; } Params.clear(); for (auto &SmallStr : NewParams) diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 149c564bd5b84..2fd6a3e911fd1 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -562,4 +562,60 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) { return false; } +bool SemaPPC::checkTargetClonesAttr( + SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs, + SmallVectorImpl<SmallString<64>> &NewParams) { + using namespace DiagAttrParams; + + assert(Params.size() == Locs.size() && + "Mismatch between number of string parameters and locations"); + + bool HasDefault = false; + bool HasComma = false; + for (unsigned I = 0, E = Params.size(); I < E; ++I) { + const StringRef Param = Params[I].trim(); + const SourceLocation &Loc = Locs[I]; + + if (Param.empty() || Param.ends_with(',')) + return Diag(Loc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + + if (Param.contains(',')) + HasComma = true; + + StringRef LHS; + StringRef RHS = Param; + do { + std::tie(LHS, RHS) = RHS.split(','); + LHS = LHS.trim(); + const SourceLocation &CurLoc = + Loc.getLocWithOffset(LHS.data() - Param.data()); + + if (LHS.starts_with("cpu=")) { + if (!getASTContext().getTargetInfo().isValidCPUName( + LHS.drop_front(sizeof("cpu=") - 1))) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1) + << TargetClones; + } else if (LHS == "default") + HasDefault = true; + else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) || + getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << LHS << TargetClones; + + if (llvm::is_contained(NewParams, LHS)) + Diag(CurLoc, diag::warn_target_clone_duplicate_options); + // Note: Add even if there are duplicates, since it changes name mangling. + NewParams.push_back(LHS); + } while (!RHS.empty()); + } + if (HasComma && Params.size() > 1) + Diag(Locs[0], diag::warn_target_clone_mixed_values); + + if (!HasDefault) + return Diag(Locs[0], diag::err_target_clone_must_have_default); + + return false; +} } // namespace clang >From bdd750ef734e025f53313f19b579b223db719888 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Sat, 27 Sep 2025 23:26:33 -0400 Subject: [PATCH 03/21] ignore target_clones on a declaration and internalize the resolver and the clones --- clang/lib/CodeGen/CodeGenModule.cpp | 40 ++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 576b4c6ba0f3e..00c82a7b70ef9 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2092,6 +2092,19 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD, (CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage); } +// On certain platforms, a declared (but not defined) FMV shall be treated +// like a regular non-FMV function. +static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, const FunctionDecl *FD) { + if (!FD->isMultiVersion()) + return false; + + if (Triple.isOSAIX()) { + assert(FD->isTargetClonesMultiVersion()); + return !FD->isDefined(); + } + return false; +} + static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { @@ -2141,8 +2154,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, Out << CGM.getModuleNameHash(); } - if (const auto *FD = dyn_cast<FunctionDecl>(ND)) - if (FD->isMultiVersion() && !OmitMultiVersionMangling) { + if (const auto *FD = dyn_cast<FunctionDecl>(ND)) { + if (FD->isMultiVersion() && !OmitMultiVersionMangling && + !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: @@ -2179,6 +2193,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, llvm_unreachable("None multiversion type isn't valid here"); } } + } // Make unique name for device side static file-scope variable for HIP. if (CGM.getContext().shouldExternalize(ND) && @@ -4710,7 +4725,8 @@ getFMVPriority(const TargetInfo &TI, static llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) { const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); - if (FD->getFormalLinkage() == Linkage::Internal) + if (FD->getFormalLinkage() == Linkage::Internal || + CGM.getTriple().isOSAIX()) return llvm::GlobalValue::InternalLinkage; return llvm::GlobalValue::WeakODRLinkage; } @@ -4744,7 +4760,7 @@ void CodeGenModule::emitMultiVersionFunctions() { // For AArch64, a resolver is only emitted if a function marked with // target_version("default")) or target_clones("default") is defined // in this TU. For other architectures it is always emitted. - bool ShouldEmitResolver = !getTarget().getTriple().isAArch64(); + bool ShouldEmitResolver = !getTriple().isAArch64(); SmallVector<CodeGenFunction::FMVResolverOption, 10> Options; llvm::DenseMap<llvm::Function *, const FunctionDecl *> DeclMap; @@ -5063,8 +5079,11 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { llvm::Constant *Resolver = GetOrCreateLLVMFunction( MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); + + auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD) : getMultiversionLinkage(*this, GD); + llvm::GlobalIFunc *GIF = - llvm::GlobalIFunc::create(DeclTy, AS, getMultiversionLinkage(*this, GD), + llvm::GlobalIFunc::create(DeclTy, AS, Linkage, "", Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); @@ -5084,7 +5103,9 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver, GlobalDecl GD) { const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl()); - Resolver->setLinkage(getMultiversionLinkage(*this, GD)); + + auto ResolverLinkage = getTriple().isOSAIX() ? llvm::GlobalValue::InternalLinkage : getMultiversionLinkage(*this, GD); + Resolver->setLinkage(ResolverLinkage); // Function body has to be emitted before calling setGlobalVisibility // for Resolver to be considered as definition. @@ -5163,6 +5184,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( AddDeferredMultiVersionResolverToEmit(GD); NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); + } else if (IgnoreFMVOnADeclaration(getTriple(), FD)) { + // TODO this might not be necessary after fix in getMangledNameImpl + NameWithoutMultiVersionMangling = getMangledNameImpl( + *this, GD, FD, /*OmitMultiVersionMangling=*/true); } else return GetOrCreateMultiVersionResolver(GD); } @@ -6623,6 +6648,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, auto *Fn = cast<llvm::Function>(GV); setFunctionLinkage(GD, Fn); + if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion()) + Fn->setLinkage(llvm::GlobalValue::InternalLinkage); + // FIXME: this is redundant with part of setFunctionDefinitionAttributes setGVProperties(Fn, GD); >From 104732c74757e123f682f872939d8a49ca3dd89f Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 30 Sep 2025 15:27:07 -0400 Subject: [PATCH 04/21] fix PPCTargetInfo::parseTargetAttr --- clang/lib/Basic/Targets/PPC.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 7f8005db3cd3e..dd51b46727a6a 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -694,18 +694,20 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { // While we're here iterating check for a different target cpu. if (Feature.starts_with("cpu=")) { - assert(Ret.CPU.empty()); - Ret.CPU = Feature.split("=").second.trim(); - } else assert(0); -// else if (Feature.starts_with("tune=")) { -// if (!Ret.Tune.empty()) -// Ret.Duplicate = "tune="; -// else -// Ret.Tune = Feature.split("=").second.trim(); -// } else if (Feature.starts_with("no-")) -// Ret.Features.push_back("-" + Feature.split("-").second.str()); -// else -// Ret.Features.push_back("+" + Feature.str()); + if (!Ret.CPU.empty()) + Ret.Duplicate = "cpu="; + else + Ret.CPU = Feature.split("=").second.trim(); + } + else if (Feature.starts_with("tune=")) { + if (!Ret.Tune.empty()) + Ret.Duplicate = "tune="; + else + Ret.Tune = Feature.split("=").second.trim(); + } else if (Feature.starts_with("no-")) + Ret.Features.push_back("-" + Feature.split("-").second.str()); + else + Ret.Features.push_back("+" + Feature.str()); } return Ret; } >From 72ced461a47aa2ace5de58e27eb4cd2aa0e58cd5 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 30 Sep 2025 19:38:35 +0000 Subject: [PATCH 05/21] fix Sema/attr-target.c --- clang/test/Sema/attr-target.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/clang/test/Sema/attr-target.c b/clang/test/Sema/attr-target.c index 65ece3c27d299..ddf6654632187 100644 --- a/clang/test/Sema/attr-target.c +++ b/clang/test/Sema/attr-target.c @@ -75,15 +75,13 @@ int __attribute__((target("tune=pwr8"))) baz(void) { return 4; } //expected-warning@+1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}} int __attribute__((target("fpmath=387"))) walrus(void) { return 4; } //expected-warning@+1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}} -int __attribute__((target("float128,arch=hiss"))) meow(void) { return 4; } +int __attribute__((target("float128,cpu=hiss"))) meow(void) { return 4; } // no warning, same as saying 'nothing'. -int __attribute__((target("arch="))) turtle(void) { return 4; } +int __attribute__((target("cpu="))) turtle(void) { return 4; } //expected-warning@+1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}} -int __attribute__((target("arch=hiss,arch=woof"))) pine_tree(void) { return 4; } -//expected-warning@+1 {{duplicate 'arch=' in the 'target' attribute string; 'target' attribute ignored}} -int __attribute__((target("arch=pwr9,arch=pwr10"))) oak_tree(void) { return 4; } -//expected-warning@+1 {{unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored}} -int __attribute__((target("branch-protection=none"))) birch_tree(void) { return 5; } +int __attribute__((target("cpu=hiss,cpu=woof"))) pine_tree(void) { return 4; } +//expected-warning@+1 {{duplicate 'cpu=' in the 'target' attribute string; 'target' attribute ignored}} +int __attribute__((target("cpu=pwr9,cpu=pwr10"))) oak_tree(void) { return 4; } //expected-warning@+1 {{unknown tune CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}} int __attribute__((target("tune=hiss,tune=woof"))) apple_tree(void) { return 4; } >From 7d29d4358b1514367a464d5e20dce832e6f3d9a6 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 7 Oct 2025 23:11:21 +0000 Subject: [PATCH 06/21] clang-format --- clang/lib/AST/ASTContext.cpp | 5 +++- clang/lib/Basic/Targets/PPC.cpp | 3 +-- clang/lib/CodeGen/CodeGenFunction.cpp | 31 ++++++++++++++---------- clang/lib/CodeGen/CodeGenFunction.h | 5 ++-- clang/lib/CodeGen/CodeGenModule.cpp | 29 ++++++++++++---------- clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 14 +++++------ clang/lib/CodeGen/Targets/PPC.cpp | 5 ++-- 7 files changed, 51 insertions(+), 41 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 6f5784d7d4a64..2152b685411fa 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -15033,7 +15033,10 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); if (VersionStr.starts_with("arch=")) TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); - else if (Target->getTriple().isOSAIX() && VersionStr.starts_with("cpu=")) // TODO make a function that extracts CPU from a feature string + else if (Target->getTriple().isOSAIX() && + VersionStr.starts_with( + "cpu=")) // TODO make a function that extracts CPU from a + // feature string TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1); else if (VersionStr != "default") Features.push_back((StringRef{"+"} + VersionStr).str()); diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index dd51b46727a6a..5b86f84264905 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -698,8 +698,7 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { Ret.Duplicate = "cpu="; else Ret.CPU = Feature.split("=").second.trim(); - } - else if (Feature.starts_with("tune=")) { + } else if (Feature.starts_with("tune=")) { if (!Ret.Tune.empty()) Ret.Duplicate = "tune="; else diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index e4ef527a536d3..882eea2f52361 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -3036,7 +3036,8 @@ void CodeGenFunction::EmitMultiVersionResolver( } [[fallthrough]]; default: - assert(false && "Only implemented for x86, AArch64, RISC-V, and PowerPC targets"); + assert(false && + "Only implemented for x86, AArch64, RISC-V, and PowerPC targets"); } } @@ -3049,7 +3050,7 @@ void CodeGenFunction::EmitMultiVersionResolver( * return ((int (*)(int)) foo_desc)(a); */ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( - llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) { + llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) { llvm::PointerType *PtrTy = Builder.getPtrTy(); // entry: @@ -3063,7 +3064,8 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( // if.default: // %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default) // br label %resolver_exit - assert(&RO == Options.end() - 1 && "Default or Generic case must be last"); + assert(&RO == Options.end() - 1 && + "Default or Generic case must be last"); Builder.CreateRet(RO.Function); break; } @@ -3074,12 +3076,16 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( // if.version_n: // %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n) // br label %resolver_exit - assert(RO.Features.size() == 1 && "for now one feature requirement per version"); + assert(RO.Features.size() == 1 && + "for now one feature requirement per version"); llvm::Value *Condition; if (RO.Features[0].starts_with("cpu=")) { - Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), RO.Features[0].split("=").second.trim()); + Condition = + EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), + RO.Features[0].split("=").second.trim()); } else { - Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), RO.Features[0]); + Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, + Builder.getInt1Ty(), RO.Features[0]); } llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver); CurBlock = createBasicBlock("if.else", Resolver); @@ -3090,15 +3096,14 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( } // If no generic/default, emit an unreachable. -// Builder.SetInsertPoint(CurBlock); -// llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); -// TrapCall->setDoesNotReturn(); -// TrapCall->setDoesNotThrow(); -// Builder.CreateUnreachable(); -// Builder.ClearInsertionPoint(); + // Builder.SetInsertPoint(CurBlock); + // llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); + // TrapCall->setDoesNotReturn(); + // TrapCall->setDoesNotThrow(); + // Builder.CreateUnreachable(); + // Builder.ClearInsertionPoint(); } - void CodeGenFunction::EmitRISCVMultiVersionResolver( llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 7fb1b634f857f..226e22cb3992e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4889,7 +4889,8 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); - llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr); + llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, + StringRef CPUStr); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, @@ -5558,7 +5559,7 @@ class CodeGenFunction : public CodeGenTypeCache { ArrayRef<FMVResolverOption> Options); void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver, - ArrayRef<FMVResolverOption> Options); + ArrayRef<FMVResolverOption> Options); private: QualType getVarArgType(const Expr *Arg); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 00c82a7b70ef9..a2c81ab00b021 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2094,7 +2094,8 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD, // On certain platforms, a declared (but not defined) FMV shall be treated // like a regular non-FMV function. -static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, const FunctionDecl *FD) { +static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, + const FunctionDecl *FD) { if (!FD->isMultiVersion()) return false; @@ -2156,7 +2157,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, if (const auto *FD = dyn_cast<FunctionDecl>(ND)) { if (FD->isMultiVersion() && !OmitMultiVersionMangling && - !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) { + !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: @@ -3023,8 +3024,9 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // While we populated the feature map above, we still need to // get and parse the target attribute so we can get the cpu for // the function. - StringRef FeatureStr = TD ? TD->getFeaturesStr() : - (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef()); + StringRef FeatureStr = + TD ? TD->getFeaturesStr() + : (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef()); if (!FeatureStr.empty()) { ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr); if (!ParsedAttr.CPU.empty() && @@ -4725,8 +4727,7 @@ getFMVPriority(const TargetInfo &TI, static llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) { const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); - if (FD->getFormalLinkage() == Linkage::Internal || - CGM.getTriple().isOSAIX()) + if (FD->getFormalLinkage() == Linkage::Internal || CGM.getTriple().isOSAIX()) return llvm::GlobalValue::InternalLinkage; return llvm::GlobalValue::WeakODRLinkage; } @@ -4813,7 +4814,7 @@ void CodeGenModule::emitMultiVersionFunctions() { ResolverConstant = IFunc->getResolver(); if (FD->isTargetClonesMultiVersion() && !getTarget().getTriple().isAArch64() && - !getTarget().getTriple().isOSAIX()) { + !getTarget().getTriple().isOSAIX()) { std::string MangledName = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); if (!GetGlobalValue(MangledName + ".ifunc")) { @@ -5080,11 +5081,11 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); - auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD) : getMultiversionLinkage(*this, GD); + auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD) + : getMultiversionLinkage(*this, GD); - llvm::GlobalIFunc *GIF = - llvm::GlobalIFunc::create(DeclTy, AS, Linkage, - "", Resolver, &getModule()); + llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, AS, Linkage, "", + Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); if (ResolverGV) @@ -5104,7 +5105,9 @@ void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver, GlobalDecl GD) { const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl()); - auto ResolverLinkage = getTriple().isOSAIX() ? llvm::GlobalValue::InternalLinkage : getMultiversionLinkage(*this, GD); + auto ResolverLinkage = getTriple().isOSAIX() + ? llvm::GlobalValue::InternalLinkage + : getMultiversionLinkage(*this, GD); Resolver->setLinkage(ResolverLinkage); // Function body has to be emitted before calling setGlobalVisibility @@ -6649,7 +6652,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, setFunctionLinkage(GD, Fn); if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion()) - Fn->setLinkage(llvm::GlobalValue::InternalLinkage); + Fn->setLinkage(llvm::GlobalValue::InternalLinkage); // FIXME: this is redundant with part of setFunctionDefinitionAttributes setGVProperties(Fn, GD); diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp index 8360a17c470dd..a730de0fb856d 100644 --- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp @@ -70,8 +70,9 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, return CI; } -Value *CodeGenFunction::EmitPPCBuiltinCpu( - unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr) { +Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID, + llvm::Type *ReturnType, + StringRef CPUStr) { #include "llvm/TargetParser/PPCTargetParser.def" auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, @@ -159,8 +160,7 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu( Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); return Builder.CreateICmpEQ(TheCall, llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); - } - else if (BuiltinID == Builtin::BI__builtin_cpu_supports) { + } else if (BuiltinID == Builtin::BI__builtin_cpu_supports) { llvm::Triple Triple = getTarget().getTriple(); if (Triple.isOSAIX()) { typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, @@ -197,8 +197,7 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu( #undef PPC_FAWORD_HWCAP #undef PPC_FAWORD_HWCAP2 #undef PPC_FAWORD_CPUID - } - else + } else assert(0 && "unexpected builtin"); } @@ -219,7 +218,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID = Intrinsic::not_intrinsic; switch (BuiltinID) { - default: return nullptr; + default: + return nullptr; case Builtin::BI__builtin_cpu_is: case Builtin::BI__builtin_cpu_supports: { diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index bc357e0908a5e..61d110e3c7ae3 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -136,8 +136,7 @@ class AIXABIInfo : public ABIInfo { raw_ostream &Out) const override; }; -void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, - unsigned Index, +void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index, raw_ostream &Out) const { appendAttributeMangling(Attr->getFeatureStr(Index), Out); } @@ -159,7 +158,7 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr, assert(LHS.starts_with("+") && RHS.starts_with("+") && "Features should always have a prefix."); return TI.getFMVPriority({LHS.substr(1)}) - .ugt(TI.getFMVPriority({RHS.substr(1)})); + .ugt(TI.getFMVPriority({RHS.substr(1)})); }); bool IsFirst = true; >From e7d14991b3fc6fa028ecc91049698b8eaf880ee7 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Fri, 6 Feb 2026 04:12:47 +0000 Subject: [PATCH 07/21] normalize the CPU name on the target* attribute --- clang/lib/Basic/Targets/PPC.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 5b86f84264905..53c7af1b57767 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -708,6 +708,8 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { else Ret.Features.push_back("+" + Feature.str()); } + Ret.CPU = llvm::PPC::normalizeCPUName(Ret.CPU); + Ret.Tune = llvm::PPC::normalizeCPUName(Ret.Tune); return Ret; } >From 2501fcbe5da5631d4f9eec5a5798e4178af93dae Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 3 Mar 2026 17:10:55 +0000 Subject: [PATCH 08/21] limit support to cpu-only versions, and implement getFMVPriority --- clang/lib/AST/ASTContext.cpp | 13 ++++++----- clang/lib/Basic/Targets/PPC.cpp | 18 ++++++++++++++- clang/lib/CodeGen/CodeGenFunction.cpp | 32 +++++++++++++-------------- clang/lib/CodeGen/Targets/PPC.cpp | 24 ++++---------------- clang/lib/Sema/SemaPPC.cpp | 13 ++++++----- 5 files changed, 52 insertions(+), 48 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 2152b685411fa..98463ab05e7a7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -15028,16 +15028,19 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, Target->getTargetOpts().FeaturesAsWritten.begin(), Target->getTargetOpts().FeaturesAsWritten.end()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); + } else if (Target->getTriple().isOSAIX()) { + std::vector<std::string> Features; + StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); + if (VersionStr.starts_with("cpu=")) + TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1); + else + assert(VersionStr == "default"); + Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } else { std::vector<std::string> Features; StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); if (VersionStr.starts_with("arch=")) TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); - else if (Target->getTriple().isOSAIX() && - VersionStr.starts_with( - "cpu=")) // TODO make a function that extracts CPU from a - // feature string - TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1); else if (VersionStr != "default") Features.push_back((StringRef{"+"} + VersionStr).str()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 53c7af1b57767..fbff0af711b13 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -714,7 +714,23 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { } llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { - return llvm::APInt(32, Features.empty() ? 0 : 1); + if (Features.empty()) + return llvm::APInt(32, 0); + assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC"); + ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]); + if (!ParsedAttr.CPU.empty()) { + StringRef CPU = llvm::PPC::normalizeCPUName(ParsedAttr.CPU); + int Priority = llvm::StringSwitch<int>(CPU) + .Case("pwr7", 1) + .Case("pwr8", 2) + .Case("pwr9", 3) + .Case("pwr10", 4) + .Case("pwr11", 5) + .Default(0); + return llvm::APInt(32, Priority); + } + assert(false && "unimplemented"); + return llvm::APInt(32, 0); } // Make sure that registers are added in the correct array index which should be diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 882eea2f52361..5897123f4019d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -3041,13 +3041,8 @@ void CodeGenFunction::EmitMultiVersionResolver( } } -/* - * Desc_t *foo_desc = ppc_get_function_descriptor(&foo); - * if (foo_desc->addr == ppc_get_function_entry(&foo)) { - * FuncPtr fp = resolver(); - * __c11_atomic_store((_Atomic FuncPtr *)&foo_desc->addr, fp, 0); - * } - * return ((int (*)(int)) foo_desc)(a); +/** + * */ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) { @@ -3078,15 +3073,20 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( // br label %resolver_exit assert(RO.Features.size() == 1 && "for now one feature requirement per version"); - llvm::Value *Condition; - if (RO.Features[0].starts_with("cpu=")) { - Condition = - EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), - RO.Features[0].split("=").second.trim()); - } else { - Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, - Builder.getInt1Ty(), RO.Features[0]); - } + + assert(RO.Features[0].starts_with("cpu=")); + StringRef CPU = RO.Features[0].split("=").second.trim(); + StringRef Feature = llvm::StringSwitch<StringRef>(CPU) + .Cases({"power7","pwr7"}, "arch_2_06") + .Cases({"power8","pwr8"}, "arch_2_07") + .Cases({"power9","pwr9"}, "arch_3_00") + .Cases({"power10","pwr10"}, "arch_3_1") + .Cases({"power11","pwr11"}, "arch_3_1") + .Default("error"); + + llvm::Value *Condition = EmitPPCBuiltinCpu( + Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature); + llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver); CurBlock = createBasicBlock("if.else", Resolver); Builder.CreateCondBr(Condition, ThenBlock, CurBlock); diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index 61d110e3c7ae3..6694725277144 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -148,32 +148,16 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr, return; } - Out << '.'; const TargetInfo &TI = CGT.getTarget(); ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr); - llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) { - // Multiversioning doesn't allow "no-${feature}", so we can - // only have "+" prefixes here. - assert(LHS.starts_with("+") && RHS.starts_with("+") && - "Features should always have a prefix."); - return TI.getFMVPriority({LHS.substr(1)}) - .ugt(TI.getFMVPriority({RHS.substr(1)})); - }); - - bool IsFirst = true; if (!Info.CPU.empty()) { - IsFirst = false; - Out << "cpu_" << Info.CPU; + assert(Info.Features.empty() && "cannot have both a CPU and a feature"); + Out << ".cpu_" << Info.CPU; + return; } - assert(Info.Features.empty() && "unhandled case"); - for (StringRef Feat : Info.Features) { - if (!IsFirst) - Out << '_'; - IsFirst = false; - Out << Feat.substr(1); - } + assert(false && "specifying target features on an FMV is not supported on AIX"); } class AIXTargetCodeGenInfo : public TargetCodeGenInfo { diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 2fd6a3e911fd1..46c9cf172fbc6 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -597,16 +597,17 @@ bool SemaPPC::checkTargetClonesAttr( return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1) << TargetClones; - } else if (LHS == "default") + } else if (LHS == "default") { HasDefault = true; - else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) || - getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) + } else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) || + getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) { return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << LHS << TargetClones; - - if (llvm::is_contained(NewParams, LHS)) + } + if (llvm::is_contained(NewParams, LHS)) { Diag(CurLoc, diag::warn_target_clone_duplicate_options); - // Note: Add even if there are duplicates, since it changes name mangling. + continue; + } NewParams.push_back(LHS); } while (!RHS.empty()); } >From 0fe0be1a63026ba0cd076fb579d9805e289517ba Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Mon, 9 Mar 2026 18:35:26 +0000 Subject: [PATCH 09/21] Handle case when an FMV function is declared, used, then defined by: fixing getMangledNameImpl such that it does not need to special case for FMV declarations because GetOrCreateLLVMFunction already can return the non-mangled name of declared FMV functions --- clang/lib/CodeGen/CodeGenModule.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a2c81ab00b021..0760b92af3dfe 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2156,8 +2156,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } if (const auto *FD = dyn_cast<FunctionDecl>(ND)) { - if (FD->isMultiVersion() && !OmitMultiVersionMangling && - !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) { + if (FD->isMultiVersion() && !OmitMultiVersionMangling) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: @@ -5188,7 +5187,6 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); } else if (IgnoreFMVOnADeclaration(getTriple(), FD)) { - // TODO this might not be necessary after fix in getMangledNameImpl NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); } else >From e6f34146aa90f417633ac19386b2ebac64354ead Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 3 Mar 2026 19:04:28 -0500 Subject: [PATCH 10/21] test test test --- clang/lib/CodeGen/CodeGenFunction.cpp | 23 +++- clang/test/CodeGen/attr-target-clones-ppc.c | 116 ++++++++++++++++++++ 2 files changed, 133 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/attr-target-clones-ppc.c diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 5897123f4019d..6287095211099 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -3042,7 +3042,20 @@ void CodeGenFunction::EmitMultiVersionResolver( } /** + * define internal ptr @foo.resolver() { + * entry: + * %is_version_1 = __builtin_cpu_supports(version_1) + * br i1 %1, label %if.version_1, label %if.else_2 * + * if.version_1: + * ret ptr @foo.version_1 + * + * if.else_2: + * %is_version_2 = __builtin_cpu_supports(version_2) + * ... + * if.else: ; preds = %entry + * ret ptr @foo.default + * } */ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) { @@ -3056,9 +3069,8 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( Builder.SetInsertPoint(CurBlock); // The 'default' or 'generic' case. if (!RO.Architecture && RO.Features.empty()) { - // if.default: - // %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default) - // br label %resolver_exit + // if.else: + // ret ptr @foo.default assert(&RO == Options.end() - 1 && "Default or Generic case must be last"); Builder.CreateRet(RO.Function); @@ -3066,11 +3078,10 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( } // if.else_n: // %is_version_n = __builtin_cpu_supports(version_n) - // br i1 %is_version_n, label %if.version_n, label %if.default + // br i1 %is_version_n, label %if.version_n, label %if.else_n+1 // // if.version_n: - // %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n) - // br label %resolver_exit + // ret ptr @foo_version_n assert(RO.Features.size() == 1 && "for now one feature requirement per version"); diff --git a/clang/test/CodeGen/attr-target-clones-ppc.c b/clang/test/CodeGen/attr-target-clones-ppc.c new file mode 100644 index 0000000000000..08d54a391b58f --- /dev/null +++ b/clang/test/CodeGen/attr-target-clones-ppc.c @@ -0,0 +1,116 @@ +// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s + +// CHECK: @internal = internal ifunc i32 (), ptr @internal.resolver +// CHECK: @foo = ifunc i32 (), ptr @foo.resolver +// CHECK: @foo_dupes = ifunc void (), ptr @foo_dupes.resolver +// CHECK: @unused = ifunc void (), ptr @unused.resolver +// CHECK: @foo_inline = linkonce ifunc i32 (), ptr @foo_inline.resolver +// CHECK: @foo_ref_then_def = ifunc i32 (), ptr @foo_ref_then_def.resolver +// CHECK: @foo_priority = ifunc i32 (i32), ptr @foo_priority.resolver +// CHEECK: @isa_level = ifunc i32 (i32), ptr @isa_level.resolver + + +static int __attribute__((target_clones("cpu=power10, default"))) internal(void) { return 0; } +int use(void) { return internal(); } +// CHECK: define internal ptr @internal.resolver() + +int __attribute__((target_clones("cpu=power10, default"))) foo(void) { return 0; } +// CHECK: define internal {{.*}}i32 @foo.cpu_pwr10() #[[#ATTR_P10:]] +// CHECK: define internal {{.*}}i32 @foo.default() #[[#ATTR_P7:]] +// CHECK: define internal ptr @foo.resolver() +// CHECK: ret ptr @foo.cpu_pwr10 +// CHECK: ret ptr @foo.default + +__attribute__((target_clones("default,default ,cpu=pwr8"))) void foo_dupes(void) {} +// CHECK: define internal void @foo_dupes.default() #[[#ATTR_P7]] +// CHECK: define internal void @foo_dupes.cpu_pwr8() #[[#ATTR_P8:]] +// CHECK: define internal ptr @foo_dupes.resolver() +// CHECK: ret ptr @foo_dupes.cpu_pwr8 +// CHECK: ret ptr @foo_dupes.default + +void bar2(void) { + // CHECK: define {{.*}}void @bar2() + foo_dupes(); + // CHECK: call void @foo_dupes() +} + +int bar(void) { + // CHECK: define {{.*}}i32 @bar() + return foo(); + // CHECK: call {{.*}}i32 @foo() +} + +void __attribute__((target_clones("default, cpu=pwr9"))) unused(void) {} +// CHECK: define internal void @unused.default() #[[#ATTR_P7]] +// CHECK: define internal void @unused.cpu_pwr9() #[[#ATTR_P9:]] +// CHECK: define internal ptr @unused.resolver() +// CHECK: ret ptr @unused.cpu_pwr9 +// CHECK: ret ptr @unused.default + +int __attribute__((target_clones("cpu=power10, default"))) inherited(void); +int inherited(void) { return 0; } +// CHECK: define internal {{.*}}i32 @inherited.cpu_pwr10() #[[#ATTR_P10]] +// CHECK: define internal {{.*}}i32 @inherited.default() #[[#ATTR_P7]] +// CHECK: define internal ptr @inherited.resolver() +// CHECK: ret ptr @inherited.cpu_pwr10 +// CHECK: ret ptr @inherited.default + + +int test_inherited(void) { + // CHECK: define {{.*}}i32 @test_inherited() + return inherited(); + // CHECK: call {{.*}}i32 @inherited() +} + +inline int __attribute__((target_clones("default,cpu=pwr8"))) +foo_inline(void) { return 0; } +int __attribute__((target_clones("cpu=pwr7,default"))) +foo_ref_then_def(void); + +int bar3(void) { + // CHECK: define {{.*}}i32 @bar3() + return foo_inline() + foo_ref_then_def(); + // CHECK: call {{.*}}i32 @foo_inline() + // CHECK: call {{.*}}i32 @foo_ref_then_def() +} + +// CHECK: define internal ptr @foo_inline.resolver() +// CHECK: ret ptr @foo_inline.cpu_pwr8 +// CHECK: ret ptr @foo_inline.default + +int __attribute__((target_clones("cpu=pwr7,default"))) +foo_ref_then_def(void){ return 0; } +// CHECK: define internal ptr @foo_ref_then_def.resolver() +// CHECK: ret ptr @foo_ref_then_def.cpu_pwr7 +// CHECK: ret ptr @foo_ref_then_def.default + +int __attribute__((target_clones("default", "cpu=pwr8"))) +foo_unused_no_defn(void); +// CHECK-NOT: foo_unused_no_defn + +int __attribute__((target_clones("default", "cpu=pwr9"))) +foo_used_no_defn(void); + +int test_foo_used_no_defn(void) { + // CHECK: define {{.*}}i32 @test_foo_used_no_defn() + return foo_used_no_defn(); + // CHECK: call {{.*}}i32 @foo_used_no_defn() +} +// CHECK: declare {{.*}}i32 @foo_used_no_defn() + +// test that the CPU checks are done in most to least restrictive (highest to lowest CPU) +int __attribute__((target_clones("cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8"))) +foo_priority(int x) { return x & (x - 1); } +// CHECK: define internal ptr @foo_priority.resolver() +// CHECK: ret ptr @foo_priority.cpu_pwr9 +// CHECK: ret ptr @foo_priority.cpu_pwr8 +// CHECK: ret ptr @foo_priority.cpu_pwr7 +// CHECK: ret ptr @foo_priority.default + + +// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7" +// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10" +// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8" +// CHECK: attributes #[[#ATTR_P9]] = {{.*}} "target-cpu"="pwr9" + >From 3ef7c2773de0a3a9b6422e4395d5abcc61fb7a62 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 10 Mar 2026 04:02:06 +0000 Subject: [PATCH 11/21] clang-formt --- clang/lib/CodeGen/CodeGenFunction.cpp | 12 ++++++------ clang/lib/CodeGen/Targets/PPC.cpp | 2 +- clang/lib/Sema/SemaPPC.cpp | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 6287095211099..84030cb421a40 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -3088,12 +3088,12 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( assert(RO.Features[0].starts_with("cpu=")); StringRef CPU = RO.Features[0].split("=").second.trim(); StringRef Feature = llvm::StringSwitch<StringRef>(CPU) - .Cases({"power7","pwr7"}, "arch_2_06") - .Cases({"power8","pwr8"}, "arch_2_07") - .Cases({"power9","pwr9"}, "arch_3_00") - .Cases({"power10","pwr10"}, "arch_3_1") - .Cases({"power11","pwr11"}, "arch_3_1") - .Default("error"); + .Cases({"power7", "pwr7"}, "arch_2_06") + .Cases({"power8", "pwr8"}, "arch_2_07") + .Cases({"power9", "pwr9"}, "arch_3_00") + .Cases({"power10", "pwr10"}, "arch_3_1") + .Cases({"power11", "pwr11"}, "arch_3_1") + .Default("error"); llvm::Value *Condition = EmitPPCBuiltinCpu( Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature); diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index 6694725277144..ab069bfbd1b51 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -157,7 +157,7 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr, return; } - assert(false && "specifying target features on an FMV is not supported on AIX"); + assert(0 && "specifying target features on an FMV is unsupported on AIX"); } class AIXTargetCodeGenInfo : public TargetCodeGenInfo { diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 46c9cf172fbc6..705cfa4c4d739 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -600,7 +600,7 @@ bool SemaPPC::checkTargetClonesAttr( } else if (LHS == "default") { HasDefault = true; } else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) || - getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) { + getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) { return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << LHS << TargetClones; } >From de2f0abe4472c9dc0f5f7325e757d61d63b6f930 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 10 Mar 2026 14:49:11 +0000 Subject: [PATCH 12/21] add IR codegen test --- clang/test/CodeGen/attr-target-clones-ppc.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGen/attr-target-clones-ppc.c b/clang/test/CodeGen/attr-target-clones-ppc.c index 08d54a391b58f..05a9b788f701a 100644 --- a/clang/test/CodeGen/attr-target-clones-ppc.c +++ b/clang/test/CodeGen/attr-target-clones-ppc.c @@ -99,16 +99,31 @@ int test_foo_used_no_defn(void) { } // CHECK: declare {{.*}}i32 @foo_used_no_defn() -// test that the CPU checks are done in most to least restrictive (highest to lowest CPU) -int __attribute__((target_clones("cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8"))) +// Test that the CPU conditions are checked from the most to the least +// restrictive (highest to lowest CPU). Also test the codegen for the +// conditions +int __attribute__((target_clones("cpu=pwr10", "cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8"))) foo_priority(int x) { return x & (x - 1); } // CHECK: define internal ptr @foo_priority.resolver() +// CHECK-NEXT: entry +// if (__builtin_cpu_supports("arch_3_1")) return &foo_priority.cpu_pwr10; +// CHECK-NEXT: %[[#L1:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1) +// CHECK-NEXT: icmp uge i32 %[[#L1]], 262144 +// CHECK: ret ptr @foo_priority.cpu_pwr10 +// if (__builtin_cpu_supports("arch_3_00")) return &foo_priority.cpu_pwr9; +// CHECK: %[[#L2:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1) +// CHECK-NEXT: icmp uge i32 %[[#L2]], 131072 // CHECK: ret ptr @foo_priority.cpu_pwr9 +// if (__builtin_cpu_supports("arch_2_07")) return &foo_priority.cpu_pwr8; +// CHECK: %[[#L3:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1) +// CHECK-NEXT: icmp uge i32 %[[#L3]], 65536 // CHECK: ret ptr @foo_priority.cpu_pwr8 +// if (__builtin_cpu_supports("arch_2_06")) return &foo_priority.cpu_pwr8; +// CHECK: %[[#L4:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1) +// CHECK-NEXT: icmp uge i32 %[[#L4]], 32768 // CHECK: ret ptr @foo_priority.cpu_pwr7 // CHECK: ret ptr @foo_priority.default - // CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7" // CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10" // CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8" >From 5c91dfce8b510c80d0195d16a52fcf2ffe812cea Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 10 Mar 2026 15:17:18 +0000 Subject: [PATCH 13/21] code review --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 57 ++++++++++++------------ 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 68160e9bd9b29..b08631baf2532 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1567,7 +1567,7 @@ class TargetInfo : public TransferrableTargetInfo, /// which requires support for cpu_supports and cpu_is functionality. bool supportsMultiVersioning() const { return getTriple().isX86() || getTriple().isAArch64() || - getTriple().isRISCV() || getTriple().isOSBinFormatXCOFF(); + getTriple().isRISCV() || getTriple().isOSAIX(); } /// Identify whether this target supports IFuncs. diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp index a730de0fb856d..e87b987eeb340 100644 --- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp @@ -73,6 +73,8 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr) { + assert(BuiltinID == Builtin::BI__builtin_cpu_is || + BuiltinID == Builtin::BI__builtin_cpu_supports); #include "llvm/TargetParser/PPCTargetParser.def" auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, @@ -160,45 +162,44 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID, Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); return Builder.CreateICmpEQ(TheCall, llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); - } else if (BuiltinID == Builtin::BI__builtin_cpu_supports) { - llvm::Triple Triple = getTarget().getTriple(); - if (Triple.isOSAIX()) { - typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, - unsigned> - CPUSupportType; - auto [SupportMethod, FieldIdx, Mask, CompOp, Value] = - static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr) + } + // else BuiltinID == Builtin::BI__builtin_cpu_supports + llvm::Triple Triple = getTarget().getTriple(); + if (Triple.isOSAIX()) { + typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, + unsigned> + CPUSupportType; + auto [SupportMethod, FieldIdx, Mask, CompOp, Value] = + static_cast<CPUSupportType>( + StringSwitch<CPUSupportType>(CPUStr) #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \ VALUE) \ .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE}) #include "llvm/TargetParser/PPCTargetParser.def" - .Default({BUILTIN_PPC_FALSE, 0, 0, - CmpInst::Predicate(), 0})); - return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, - Value); - } + .Default({BUILTIN_PPC_FALSE, 0, 0, CmpInst::Predicate(), 0})); + return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, + Value); + } - assert(Triple.isOSLinux() && - "__builtin_cpu_supports() is only supported for AIX and Linux."); - auto [FeatureWord, BitMask] = - StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) + assert(Triple.isOSLinux() && + "__builtin_cpu_supports() is only supported for AIX and Linux."); + auto [FeatureWord, BitMask] = + StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ .Case(Name, {FA_WORD, Bitmask}) #include "llvm/TargetParser/PPCTargetParser.def" - .Default({0, 0}); - if (!BitMask) - return Builder.getFalse(); - Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); - llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); - Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); - Value *Mask = - Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); - return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); + .Default({0, 0}); + if (!BitMask) + return Builder.getFalse(); + Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); + Value *Mask = + Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); + return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); #undef PPC_FAWORD_HWCAP #undef PPC_FAWORD_HWCAP2 #undef PPC_FAWORD_CPUID - } else - assert(0 && "unexpected builtin"); } Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, >From 4f77cb543089f529f851236eea4d33b0970ce6ef Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 10 Mar 2026 16:34:59 +0000 Subject: [PATCH 14/21] inline the only call to IgnoreFMVOnADeclaration --- clang/lib/CodeGen/CodeGenModule.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 0760b92af3dfe..6e8e3682dcc3f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2092,20 +2092,6 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD, (CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage); } -// On certain platforms, a declared (but not defined) FMV shall be treated -// like a regular non-FMV function. -static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, - const FunctionDecl *FD) { - if (!FD->isMultiVersion()) - return false; - - if (Triple.isOSAIX()) { - assert(FD->isTargetClonesMultiVersion()); - return !FD->isDefined(); - } - return false; -} - static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { @@ -5186,7 +5172,13 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( AddDeferredMultiVersionResolverToEmit(GD); NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); - } else if (IgnoreFMVOnADeclaration(getTriple(), FD)) { + } + // On AIX, a declared (but not defined) FMV shall be treated like a + // regular non-FMV function. If a definition is later seen, then + // GetOrCreateMultiVersionResolver will get called (when processing said + // definition) which will replace the IR declaration we're creating here + // with the FMV ifunc. + else if (getTriple().isOSAIX() && !FD->isDefined()) { NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); } else >From c21bc94021edf2a908a5f80ac0d63221e47777c1 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 10 Mar 2026 17:01:28 +0000 Subject: [PATCH 15/21] code review: add const to parameters --- clang/include/clang/Sema/SemaPPC.h | 4 ++-- clang/lib/Sema/SemaPPC.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Sema/SemaPPC.h b/clang/include/clang/Sema/SemaPPC.h index 0cf6ba7ff29dd..8dcd638d3f722 100644 --- a/clang/include/clang/Sema/SemaPPC.h +++ b/clang/include/clang/Sema/SemaPPC.h @@ -54,8 +54,8 @@ class SemaPPC : public SemaBase { // vector short vec_xxsldwi(vector short, vector short, int); bool BuiltinVSX(CallExpr *TheCall); - bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params, - SmallVectorImpl<SourceLocation> &Locs, + bool checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params, + const SmallVectorImpl<SourceLocation> &Locs, SmallVectorImpl<SmallString<64>> &NewParams); }; } // namespace clang diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 705cfa4c4d739..11c306d1fb49f 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -563,7 +563,8 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) { } bool SemaPPC::checkTargetClonesAttr( - SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs, + const SmallVectorImpl<StringRef> &Params, + const SmallVectorImpl<SourceLocation> &Locs, SmallVectorImpl<SmallString<64>> &NewParams) { using namespace DiagAttrParams; >From 81b9dcf138749233964ee8fbb8d5ce7f4b349363 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 10 Mar 2026 17:17:39 +0000 Subject: [PATCH 16/21] checkTargetClonesAttr: compute TargetInfo once --- clang/lib/Sema/SemaPPC.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 11c306d1fb49f..9ed9216aad7b0 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -571,6 +571,7 @@ bool SemaPPC::checkTargetClonesAttr( assert(Params.size() == Locs.size() && "Mismatch between number of string parameters and locations"); + auto &TargetInfo = getASTContext().getTargetInfo(); bool HasDefault = false; bool HasComma = false; for (unsigned I = 0, E = Params.size(); I < E; ++I) { @@ -593,15 +594,14 @@ bool SemaPPC::checkTargetClonesAttr( Loc.getLocWithOffset(LHS.data() - Param.data()); if (LHS.starts_with("cpu=")) { - if (!getASTContext().getTargetInfo().isValidCPUName( - LHS.drop_front(sizeof("cpu=") - 1))) + if (!TargetInfo.isValidCPUName(LHS.drop_front(sizeof("cpu=") - 1))) return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1) << TargetClones; } else if (LHS == "default") { HasDefault = true; - } else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) || - getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) { + } else if (!TargetInfo.isValidFeatureName(LHS) || + TargetInfo.getFMVPriority(LHS) == 0) { return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << LHS << TargetClones; } >From d86700a941d96f96275dba6002fbd4fd62644efe Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Tue, 10 Mar 2026 22:46:51 -0400 Subject: [PATCH 17/21] normalize CPU during Sema fix Sema and create ppc target_clones tests based on the x86 test --- clang/include/clang/Sema/SemaPPC.h | 3 +- clang/lib/Basic/Targets/PPC.cpp | 7 ++ clang/lib/Basic/Targets/PPC.h | 1 + clang/lib/Sema/SemaDeclAttr.cpp | 3 +- clang/lib/Sema/SemaPPC.cpp | 20 +++- clang/test/Sema/attr-target-clones-ppc.c | 130 +++++++++++++++++++++++ 6 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 clang/test/Sema/attr-target-clones-ppc.c diff --git a/clang/include/clang/Sema/SemaPPC.h b/clang/include/clang/Sema/SemaPPC.h index 8dcd638d3f722..9dad80acc1747 100644 --- a/clang/include/clang/Sema/SemaPPC.h +++ b/clang/include/clang/Sema/SemaPPC.h @@ -56,7 +56,8 @@ class SemaPPC : public SemaBase { bool checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params, const SmallVectorImpl<SourceLocation> &Locs, - SmallVectorImpl<SmallString<64>> &NewParams); + SmallVectorImpl<SmallString<64>> &NewParams, + SourceLocation AttrLoc); }; } // namespace clang diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index fbff0af711b13..0f498c382f5d0 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -678,6 +678,13 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features, } } +bool PPCTargetInfo::isValidFeatureName(StringRef Name) const { + if (Name.empty()) + return false; + // TODO: filter out unknown features + return true; +} + ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { ParsedTargetAttr Ret; if (Features == "default") diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 6f90ff1f5d57c..9dc501d33c95f 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -199,6 +199,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool supportsTargetAttributeTune() const override { return true; } + bool isValidFeatureName(StringRef Name) const override; ParsedTargetAttr parseTargetAttr(StringRef Str) const override; llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 9c4dea25b53ab..b9504883dbe58 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3625,7 +3625,8 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { AL.getLoc())) return; } else if (S.Context.getTargetInfo().getTriple().isOSAIX()) { - if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams)) + if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams, + AL.getLoc())) return; } Params.clear(); diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 9ed9216aad7b0..4daaceabbf2fc 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -22,6 +22,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Sema/Sema.h" #include "llvm/ADT/APSInt.h" +#include "llvm/TargetParser/PPCTargetParser.h" namespace clang { @@ -562,10 +563,10 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) { return false; } -bool SemaPPC::checkTargetClonesAttr( - const SmallVectorImpl<StringRef> &Params, - const SmallVectorImpl<SourceLocation> &Locs, - SmallVectorImpl<SmallString<64>> &NewParams) { +bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params, + const SmallVectorImpl<SourceLocation> &Locs, + SmallVectorImpl<SmallString<64>> &NewParams, + SourceLocation AttrLoc) { using namespace DiagAttrParams; assert(Params.size() == Locs.size() && @@ -604,6 +605,15 @@ bool SemaPPC::checkTargetClonesAttr( TargetInfo.getFMVPriority(LHS) == 0) { return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << LHS << TargetClones; + } else + assert(0 && "specifying target-features on target clones not supported yet"); + + SmallString<64> CPU; + if (LHS.starts_with("cpu=")) { + CPU.append("cpu="); + CPU.append( + llvm::PPC::normalizeCPUName(LHS.drop_front(sizeof("cpu=") - 1))); + LHS = CPU.str(); } if (llvm::is_contained(NewParams, LHS)) { Diag(CurLoc, diag::warn_target_clone_duplicate_options); @@ -616,7 +626,7 @@ bool SemaPPC::checkTargetClonesAttr( Diag(Locs[0], diag::warn_target_clone_mixed_values); if (!HasDefault) - return Diag(Locs[0], diag::err_target_clone_must_have_default); + return Diag(AttrLoc, diag::err_target_clone_must_have_default); return false; } diff --git a/clang/test/Sema/attr-target-clones-ppc.c b/clang/test/Sema/attr-target-clones-ppc.c new file mode 100644 index 0000000000000..2f6aadde528fb --- /dev/null +++ b/clang/test/Sema/attr-target-clones-ppc.c @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fsyntax-only -verify %s + +// expected-error@+1 {{'target_clones' multiversioning requires a default target}} +void __attribute__((target_clones("cpu=pwr7"))) +no_default(void); + +// expected-error@+2 {{'target_clones' and 'target' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target("cpu=pwr7"), target_clones("cpu=pwr8"))) +ignored_attr(void); + +// expected-error@+2 {{'target' and 'target_clones' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target_clones("default", "cpu=pwr8"), target("cpu=pwr7"))) +ignored_attr2(void); + +int __attribute__((target_clones("cpu=pwr9", "default"))) redecl4(void); +// expected-error@+3 {{'target_clones' attribute does not match previous declaration}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("cpu=pwr7", "default"))) +redecl4(void) { return 1; } + +int __attribute__((target_clones("cpu=pwr7", "default"))) redecl7(void); +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target("cpu=pwr8"))) redecl7(void) { return 1; } + +int __attribute__((target("cpu=pwr9"))) redef2(void) { return 1; } +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target_clones("cpu=pwr7", "default"))) redef2(void) { return 1; } + +int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; } +// expected-error@+2 {{redefinition of 'redef3'}} +// expected-note@-2 {{previous definition is here}} +int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; } + +// Duplicates are allowed +// expected-warning@+2 {{mixing 'target_clones' specifier mechanisms is permitted for GCC compatibility}} +// expected-warning@+1 2 {{version list contains duplicate entries}} +int __attribute__((target_clones("cpu=pwr9,cpu=power9", "cpu=power9, default"))) +dupes(void) { return 1; } + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(""))) +empty_target_1(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones(",default"))) +empty_target_2(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,"))) +empty_target_3(void); +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default, ,cpu=pwr7"))) +empty_target_4(void); + +// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} +void __attribute__((target_clones("default,cpu=pwr7", ""))) +empty_target_5(void); + +// expected-warning@+1 {{version list contains duplicate entries}} +void __attribute__((target_clones("default", "default"))) +dupe_default(void); + +// expected-warning@+1 {{version list contains duplicate entries}} +void __attribute__((target_clones("cpu=pwr9,cpu=power9,default"))) +dupe_normal(void); + +// expected-error@+2 {{attribute 'target_clones' cannot appear more than once on a declaration}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((target_clones("cpu=pwr7,default"), target_clones("cpu=pwr8,default"))) +dupe_normal2(void); + +int mv_after_use(void); +int useage(void) { + return mv_after_use(); +} +// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} +int __attribute__((target_clones("cpu=pwr9", "default"))) mv_after_use(void) { return 1; } + +void bad_overload1(void) __attribute__((target_clones("cpu=pwr8", "default"))); +// expected-error@+2 {{conflicting types for 'bad_overload1'}} +// expected-note@-2 {{previous declaration is here}} +void bad_overload1(int p) {} + +void bad_overload2(int p) {} +// expected-error@+2 {{conflicting types for 'bad_overload2'}} +// expected-note@-2 {{previous definition is here}} +void bad_overload2(void) __attribute__((target_clones("cpu=pwr8", "default"))); + +void bad_overload3(void) __attribute__((target_clones("cpu=pwr8", "default"))); +// expected-error@+2 {{conflicting types for 'bad_overload3'}} +// expected-note@-2 {{previous declaration is here}} +void bad_overload3(int) __attribute__((target_clones("cpu=pwr8", "default"))); + + +void good_overload1(void) __attribute__((target_clones("cpu=pwr7", "cpu=power10", "default"))); +void __attribute__((__overloadable__)) good_overload1(int p) {} + +// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}} +void __attribute__((__overloadable__)) good_overload2(void) __attribute__((target_clones("cpu=pwr7", "default"))); +void good_overload2(int p) {} +// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}} +void __attribute__((__overloadable__)) good_overload3(void) __attribute__((target_clones("cpu=pwr7", "default"))); +// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}} +void __attribute__((__overloadable__)) good_overload3(int) __attribute__((target_clones("cpu=pwr7", "default"))); + +void good_overload4(void) __attribute__((target_clones("cpu=pwr7", "default"))); +// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}} +void __attribute__((__overloadable__)) good_overload4(int) __attribute__((target_clones("cpu=pwr7", "default"))); + +// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}} +void __attribute__((__overloadable__)) good_overload5(void) __attribute__((target_clones("cpu=pwr7", "default"))); +void good_overload5(int) __attribute__((target_clones("cpu=pwr7", "default"))); + + +void good_isa_level(int) __attribute__((target_clones("default", "cpu=pwr7", "cpu=pwr8", "cpu=pwr9", "cpu=pwr10"))); + +// expected-warning@+1 {{unsupported CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} +void bad_cpu(int) __attribute__((target_clones("default", "cpu=bad-cpu"))); + + +// expected-error@+1 {{'target_clones' multiversioning requires a default target}} +void __attribute__((target_clones())) +gh173684_empty_attribute_args(void); + +// expected-error@+1 {{'target_clones' multiversioning requires a default target}} +void __attribute__((target_clones)) +gh173684_empty_attribute_args_2(void); >From 1dbae6934e27f427024bc0f422654db732e1e5c6 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Wed, 11 Mar 2026 18:39:35 +0000 Subject: [PATCH 18/21] now that we normalize CPU on target_clones in Sema, remove normalization in codegen --- clang/lib/Basic/Targets/PPC.cpp | 9 ++------- clang/lib/CodeGen/CodeGenFunction.cpp | 10 +++++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 0f498c382f5d0..788d2d25b6cd5 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -536,8 +536,7 @@ bool PPCTargetInfo::initFeatureMap( const llvm::Triple &TheTriple = getTriple(); std::optional<llvm::StringMap<bool>> FeaturesOpt = - llvm::PPC::getPPCDefaultTargetFeatures(TheTriple, - llvm::PPC::normalizeCPUName(CPU)); + llvm::PPC::getPPCDefaultTargetFeatures(TheTriple, CPU); if (FeaturesOpt) Features = FeaturesOpt.value(); @@ -699,7 +698,6 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { // accepting it weirdly. Feature = Feature.trim(); - // While we're here iterating check for a different target cpu. if (Feature.starts_with("cpu=")) { if (!Ret.CPU.empty()) Ret.Duplicate = "cpu="; @@ -715,8 +713,6 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const { else Ret.Features.push_back("+" + Feature.str()); } - Ret.CPU = llvm::PPC::normalizeCPUName(Ret.CPU); - Ret.Tune = llvm::PPC::normalizeCPUName(Ret.Tune); return Ret; } @@ -726,8 +722,7 @@ llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC"); ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]); if (!ParsedAttr.CPU.empty()) { - StringRef CPU = llvm::PPC::normalizeCPUName(ParsedAttr.CPU); - int Priority = llvm::StringSwitch<int>(CPU) + int Priority = llvm::StringSwitch<int>(ParsedAttr.CPU) .Case("pwr7", 1) .Case("pwr8", 2) .Case("pwr9", 3) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 84030cb421a40..b896a8d8767c9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -3088,11 +3088,11 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver( assert(RO.Features[0].starts_with("cpu=")); StringRef CPU = RO.Features[0].split("=").second.trim(); StringRef Feature = llvm::StringSwitch<StringRef>(CPU) - .Cases({"power7", "pwr7"}, "arch_2_06") - .Cases({"power8", "pwr8"}, "arch_2_07") - .Cases({"power9", "pwr9"}, "arch_3_00") - .Cases({"power10", "pwr10"}, "arch_3_1") - .Cases({"power11", "pwr11"}, "arch_3_1") + .Case("pwr7", "arch_2_06") + .Case("pwr8", "arch_2_07") + .Case("pwr9", "arch_3_00") + .Case("pwr10", "arch_3_1") + .Case("pwr11", "arch_3_1") .Default("error"); llvm::Value *Condition = EmitPPCBuiltinCpu( >From 2b6ca07c94d45f5d40d1247150a49c01d83dd3c8 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Wed, 11 Mar 2026 23:14:36 +0000 Subject: [PATCH 19/21] diagnose non-cpu strings in target_clones in Sema --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- clang/lib/Sema/SemaPPC.cpp | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 6e8e3682dcc3f..ebf1dfc8b00aa 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5177,7 +5177,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // regular non-FMV function. If a definition is later seen, then // GetOrCreateMultiVersionResolver will get called (when processing said // definition) which will replace the IR declaration we're creating here - // with the FMV ifunc. + // with the FMV ifunc (see replaceDeclarationWith). else if (getTriple().isOSAIX() && !FD->isDefined()) { NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 4daaceabbf2fc..974552417bc4d 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -601,13 +601,11 @@ bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params, << TargetClones; } else if (LHS == "default") { HasDefault = true; - } else if (!TargetInfo.isValidFeatureName(LHS) || - TargetInfo.getFMVPriority(LHS) == 0) { + } else { + // it's a feature string, but not supported yet. return Diag(CurLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << LHS << TargetClones; - } else - assert(0 && "specifying target-features on target clones not supported yet"); - + } SmallString<64> CPU; if (LHS.starts_with("cpu=")) { CPU.append("cpu="); >From 3b7e5b9ecb2ffcd3df88c37e1b117b24697912d8 Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Wed, 11 Mar 2026 20:42:03 -0400 Subject: [PATCH 20/21] create PPCTargetInfo::isTargetClonesSupportedCPU to filter out unsupported CPUs during Sema --- clang/lib/Sema/SemaPPC.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 974552417bc4d..3418b1447de9b 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -595,10 +595,13 @@ bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params, Loc.getLocWithOffset(LHS.data() - Param.data()); if (LHS.starts_with("cpu=")) { - if (!TargetInfo.isValidCPUName(LHS.drop_front(sizeof("cpu=") - 1))) + StringRef CPUStr = LHS.drop_front(sizeof("cpu=") - 1); + if (!TargetInfo.isValidCPUName(CPUStr)) return Diag(CurLoc, diag::warn_unsupported_target_attribute) - << Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1) - << TargetClones; + << Unknown << CPU << CPUStr << TargetClones; + else if (!TargetInfo.validateCpuIs(CPUStr)) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << CPU << CPUStr << TargetClones; } else if (LHS == "default") { HasDefault = true; } else { >From 2cfce29f13b1a5e0209e4457afbf9e580e1f7ffe Mon Sep 17 00:00:00 2001 From: Wael Yehia <[email protected]> Date: Thu, 12 Mar 2026 03:10:25 +0000 Subject: [PATCH 21/21] fix test --- clang/test/Sema/attr-target-clones-ppc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/test/Sema/attr-target-clones-ppc.c b/clang/test/Sema/attr-target-clones-ppc.c index 2f6aadde528fb..96acc974320b0 100644 --- a/clang/test/Sema/attr-target-clones-ppc.c +++ b/clang/test/Sema/attr-target-clones-ppc.c @@ -117,9 +117,11 @@ void good_overload5(int) __attribute__((target_clones("cpu=pwr7", "default"))); void good_isa_level(int) __attribute__((target_clones("default", "cpu=pwr7", "cpu=pwr8", "cpu=pwr9", "cpu=pwr10"))); -// expected-warning@+1 {{unsupported CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} +// expected-warning@+1 {{unknown CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} void bad_cpu(int) __attribute__((target_clones("default", "cpu=bad-cpu"))); +// expected-warning@+1 {{unsupported CPU 'pwr3' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} +void bad_cpu(int) __attribute__((target_clones("default", "cpu=pwr3"))); // expected-error@+1 {{'target_clones' multiversioning requires a default target}} void __attribute__((target_clones())) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
