[PATCH] D55527: Normalize GlobalDecls when used with CPUDispatch

Erich Keane via Phabricator via cfe-commits Wed, 12 Dec 2018 07:14:31 -0800

erichkeane updated this revision to Diff 177853.
erichkeane added a comment.


As mentioned, the @rsmith comments that I thought were doable without feedback.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55527/new/

https://reviews.llvm.org/D55527

Files:
  include/clang/AST/GlobalDecl.h
  include/clang/Basic/Attr.td
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/attr-cpuspecific.c

Index: test/CodeGen/attr-cpuspecific.c
===================================================================
--- test/CodeGen/attr-cpuspecific.c
+++ test/CodeGen/attr-cpuspecific.c
@@ -29,21 +29,7 @@
 
 ATTR(cpu_dispatch(ivybridge, knl))
 void TwoVersions(void);
-// LINUX: define void ()* @TwoVersions.resolver()
-// LINUX: call void @__cpu_indicator_init
-// LINUX: ret void ()* @TwoVersions.Z
-// LINUX: ret void ()* @TwoVersions.S
-// LINUX: call void @llvm.trap
-// LINUX: unreachable
-
-// WINDOWS: define dso_local void @TwoVersions()
-// WINDOWS: call void @__cpu_indicator_init()
-// WINDOWS: call void @TwoVersions.Z()
-// WINDOWS-NEXT: ret void
-// WINDOWS: call void @TwoVersions.S()
-// WINDOWS-NEXT: ret void
-// WINDOWS: call void @llvm.trap
-// WINDOWS: unreachable
+// Resolvers are emitted at the end, so the check lines are at the bottom.
 
 ATTR(cpu_specific(ivybridge))
 void TwoVersions(void){}
@@ -82,6 +68,59 @@
 // has an extra config to emit!
 ATTR(cpu_dispatch(ivybridge, knl, atom))
 void TwoVersionsSameAttr(void);
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+ATTR(cpu_dispatch(atom, ivybridge, knl))
+void ThreeVersionsSameAttr(void){}
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+// No Cpu Specific options.
+ATTR(cpu_dispatch(atom, ivybridge, knl))
+void NoSpecifics(void);
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+void HasGeneric(void);
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+void HasParams(int i, double d);
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+int HasParamsAndReturn(int i, double d);
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+ATTR(cpu_dispatch(atom, generic, pentium))
+int GenericAndPentium(int i, double d);
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+ATTR(cpu_dispatch(atom, pentium))
+int DispatchFirst(void);
+// Resolvers are emitted at the end, so the check lines are at the bottom.
+
+ATTR(cpu_specific(atom))
+int DispatchFirst(void) {return 0;}
+ATTR(cpu_specific(pentium))
+int DispatchFirst(void) {return 1;}
+// Resolver emit causes these to be emited, so they happen later.
+
+// LINUX: define void ()* @TwoVersions.resolver()
+// LINUX: call void @__cpu_indicator_init
+// LINUX: ret void ()* @TwoVersions.Z
+// LINUX: ret void ()* @TwoVersions.S
+// LINUX: call void @llvm.trap
+// LINUX: unreachable
+
+// WINDOWS: define dso_local void @TwoVersions()
+// WINDOWS: call void @__cpu_indicator_init()
+// WINDOWS: call void @TwoVersions.Z()
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @TwoVersions.S()
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @llvm.trap
+// WINDOWS: unreachable
+
 // LINUX: define void ()* @TwoVersionsSameAttr.resolver()
 // LINUX: ret void ()* @TwoVersionsSameAttr.Z
 // LINUX: ret void ()* @TwoVersionsSameAttr.S
@@ -99,8 +138,6 @@
 // WINDOWS: call void @llvm.trap
 // WINDOWS: unreachable
 
-ATTR(cpu_dispatch(atom, ivybridge, knl))
-void ThreeVersionsSameAttr(void){}
 // LINUX: define void ()* @ThreeVersionsSameAttr.resolver()
 // LINUX: call void @__cpu_indicator_init
 // LINUX: ret void ()* @ThreeVersionsSameAttr.Z
@@ -120,9 +157,16 @@
 // WINDOWS: call void @llvm.trap
 // WINDOWS: unreachable
 
-// No Cpu Specific options.
-ATTR(cpu_dispatch(atom, ivybridge, knl))
-void NoSpecifics(void);
+// LINUX: define i32 @DispatchFirst.O
+// LINUX: ret i32 0
+// LINUX: define i32 @DispatchFirst.B
+// LINUX: ret i32 1
+
+// WINDOWS: define dso_local i32 @DispatchFirst.O()
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @DispatchFirst.B
+// WINDOWS: ret i32 1
+
 // LINUX: define void ()* @NoSpecifics.resolver()
 // LINUX: call void @__cpu_indicator_init
 // LINUX: ret void ()* @NoSpecifics.Z
@@ -142,8 +186,6 @@
 // WINDOWS: call void @llvm.trap
 // WINDOWS: unreachable
 
-ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
-void HasGeneric(void);
 // LINUX: define void ()* @HasGeneric.resolver()
 // LINUX: call void @__cpu_indicator_init
 // LINUX: ret void ()* @HasGeneric.Z
@@ -164,8 +206,6 @@
 // WINDOWS-NEXT: ret void
 // WINDOWS-NOT: call void @llvm.trap
 
-ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
-void HasParams(int i, double d);
 // LINUX: define void (i32, double)* @HasParams.resolver()
 // LINUX: call void @__cpu_indicator_init
 // LINUX: ret void (i32, double)* @HasParams.Z
@@ -186,8 +226,6 @@
 // WINDOWS-NEXT: ret void
 // WINDOWS-NOT: call void @llvm.trap
 
-ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
-int HasParamsAndReturn(int i, double d);
 // LINUX: define i32 (i32, double)* @HasParamsAndReturn.resolver()
 // LINUX: call void @__cpu_indicator_init
 // LINUX: ret i32 (i32, double)* @HasParamsAndReturn.Z
@@ -208,8 +246,6 @@
 // WINDOWS-NEXT: ret i32 %[[RET]]
 // WINDOWS-NOT: call void @llvm.trap
 
-ATTR(cpu_dispatch(atom, generic, pentium))
-int GenericAndPentium(int i, double d);
 // LINUX: define i32 (i32, double)* @GenericAndPentium.resolver()
 // LINUX: call void @__cpu_indicator_init
 // LINUX: ret i32 (i32, double)* @GenericAndPentium.O
@@ -226,8 +262,6 @@
 // WINDOWS-NOT: call i32 @GenericAndPentium.A
 // WINDOWS-NOT: call void @llvm.trap
 
-ATTR(cpu_dispatch(atom, pentium))
-int DispatchFirst(void);
 // LINUX: define i32 ()* @DispatchFirst.resolver
 // LINUX: ret i32 ()* @DispatchFirst.O
 // LINUX: ret i32 ()* @DispatchFirst.B
@@ -238,22 +272,6 @@
 // WINDOWS: %[[RET:.+]] = musttail call i32 @DispatchFirst.B()
 // WINDOWS-NEXT: ret i32 %[[RET]]
 
-ATTR(cpu_specific(atom))
-int DispatchFirst(void) {return 0;}
-// LINUX: define i32 @DispatchFirst.O
-// LINUX: ret i32 0
-
-// WINDOWS: define dso_local i32 @DispatchFirst.O()
-// WINDOWS: ret i32 0
-
-ATTR(cpu_specific(pentium))
-int DispatchFirst(void) {return 1;}
-// LINUX: define i32 @DispatchFirst.B
-// LINUX: ret i32 1
-
-// WINDOWS: define dso_local i32 @DispatchFirst.B
-// WINDOWS: ret i32 1
-
 // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
 // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
 // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
Index: lib/CodeGen/CodeGenModule.h
===================================================================
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -1324,7 +1324,10 @@
   void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false);
   void EmitAliasDefinition(GlobalDecl GD);
   void emitIFuncDefinition(GlobalDecl GD);
-  void emitCPUDispatchDefinition(GlobalDecl GD);
+  void emitTargetMultiVersionResolver(GlobalDecl GD);
+  llvm::Function *getCPUDispatchResolverFunction(GlobalDecl GD,
+                                                 llvm::Type *&DeclTy);
+  void emitCPUDispatchResolver(GlobalDecl GD);
   void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
   void EmitObjCIvarInitializations(ObjCImplementationDecl *D);
 
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -892,15 +892,28 @@
   return (Twine('.') + Twine(Target.CPUSpecificManglingCharacter(Name))).str();
 }
 
-static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
-                                                 const CPUSpecificAttr *Attr,
-                                                 unsigned CPUIndex,
-                                                 raw_ostream &Out) {
+static void AppendCPUSpecificCPUDispatchMangling(
+    const CodeGenModule &CGM, const CPUSpecificAttr *SpecAttr,
+    const CPUDispatchAttr *DispAttr, unsigned CPUOrdinal, raw_ostream &Out) {
+  assert((!SpecAttr || !DispAttr) && "Cannot be both dispatch and specific");
   // cpu_specific gets the current name, dispatch gets the resolver if IFunc is
   // supported.
-  if (Attr)
-    Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName());
-  else if (CGM.getTarget().supportsIFunc())
+  if (SpecAttr && CPUOrdinal)
+    Out << getCPUSpecificMangling(CGM,
+                                  SpecAttr->getCPUName(CPUOrdinal)->getName());
+  else if (SpecAttr && !CPUOrdinal) {
+    // This name should never be emitted, however it should have a unique name.
+    // In order to accomplish that, we simply append all of the CPUs and .ifunc
+    // to it so it is more easily identified and is unique.
+    for (const IdentifierInfo *II : SpecAttr->cpus())
+      Out << "." << getCPUSpecificMangling(CGM, II->getName());
+    Out << ".ifunc";
+  } else if (CPUOrdinal) {
+    // This should be the materialized version of an externally declared version
+    // of this function.
+    Out << getCPUSpecificMangling(CGM,
+                                  DispAttr->getCPUName(CPUOrdinal)->getName());
+  } else if (CGM.getTarget().supportsIFunc())
     Out << ".resolver";
 }
 
@@ -971,7 +984,8 @@
       case MultiVersionKind::CPUSpecific:
         AppendCPUSpecificCPUDispatchMangling(CGM,
                                              FD->getAttr<CPUSpecificAttr>(),
-                                             GD.getMultiVersionIndex(), Out);
+                                             FD->getAttr<CPUDispatchAttr>(),
+                                             GD.getMultiVersionOrdinal(), Out);
         break;
       case MultiVersionKind::Target:
         AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
@@ -2125,9 +2139,19 @@
   if (Global->hasAttr<IFuncAttr>())
     return emitIFuncDefinition(GD);
 
-  // If this is a cpu_dispatch multiversion function, emit the resolver.
-  if (Global->hasAttr<CPUDispatchAttr>())
-    return emitCPUDispatchDefinition(GD);
+  // If this is a cpu_dispatch multiversion function, designate it for emission
+  // at the end of the Translation Unit.
+  if (Global->hasAttr<CPUDispatchAttr>()) {
+    MultiVersionFuncs.push_back(GD);
+    return;
+  }
+
+  const auto *SpecAttr = Global->getAttr<CPUSpecificAttr>();
+  if (SpecAttr && GD.getMultiVersionOrdinal() == 0) {
+    for (unsigned I = 1; I <= SpecAttr->cpus_size(); ++I)
+      EmitGlobal(GD.getWithMultiVersionOrdinal(I));
+    return;
+  }
 
   // If this is CUDA, be selective about which declarations we emit.
   if (LangOpts.CUDA) {
@@ -2420,13 +2444,26 @@
 
 void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
                                                        llvm::GlobalValue *GV) {
+  // The usage of GlobalDecls containing a CPUDispatch/CPUSpecific function are
+  // a touch subtle. The value of the Ordinal is significant in differentiating
+  // each version. The meaning of each of the 4 combinations are as follows:
+  // (CPDispatchFunc, 0): This refers to the resolver itself.
+  // (CPUDispatchFunc, N): refers to a an externally declared (outside of this
+  //   Module) symbol corresponding to the Nth CPU in the CPUDispatchAttr's CPU
+  //   list.
+  // (CPUSpecificFunc, 0): This is a placeholder used to refer to the collection
+  //   collection of function versions represented by the function declaration.
+  //   These symbols should never be emitted, since usages will be replaced by
+  //   the call to the resolver/ifunc directly.
+  // (CPUSpecificFunc, N): refers to the internal (inside this Module) symbol
+  // for the Nth CPU in the CPUSpecificAttr's CPU list.
   const auto *FD = cast<FunctionDecl>(GD.getDecl());
 
   if (FD->isCPUSpecificMultiVersion()) {
-    auto *Spec = FD->getAttr<CPUSpecificAttr>();
-    for (unsigned I = 0; I < Spec->cpus_size(); ++I)
-      EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
     // Requires multiple emits.
+    const auto *Spec = FD->getAttr<CPUSpecificAttr>();
+    for (unsigned I = 1; I <= Spec->cpus_size(); ++I)
+      EmitGlobalFunctionDefinition(GD.getWithMultiVersionOrdinal(I), nullptr);
   } else
     EmitGlobalFunctionDefinition(GD, GV);
 }
@@ -2489,129 +2526,155 @@
   return Priority;
 }
 
-void CodeGenModule::emitMultiVersionFunctions() {
-  for (GlobalDecl GD : MultiVersionFuncs) {
-    SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
-    const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
-    getContext().forEachMultiversionedFunctionVersion(
-        FD, [this, &GD, &Options](const FunctionDecl *CurFD) {
-          GlobalDecl CurGD{
-              (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)};
-          StringRef MangledName = getMangledName(CurGD);
-          llvm::Constant *Func = GetGlobalValue(MangledName);
-          if (!Func) {
-            if (CurFD->isDefined()) {
-              EmitGlobalFunctionDefinition(CurGD, nullptr);
-              Func = GetGlobalValue(MangledName);
-            } else {
-              const CGFunctionInfo &FI =
-                  getTypes().arrangeGlobalDeclaration(GD);
-              llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
-              Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false,
-                                       /*DontDefer=*/false, ForDefinition);
-            }
-            assert(Func && "This should have just been created");
+void CodeGenModule::emitTargetMultiVersionResolver(GlobalDecl GD) {
+  SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
+  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  assert(FD->isTargetMultiVersion() && "Not a target multiversion function?");
+
+  getContext().forEachMultiversionedFunctionVersion(
+      FD, [this, &GD, &Options](const FunctionDecl *CurFD) {
+        GlobalDecl CurGD{(CurFD->isDefined() ? CurFD->getDefinition() : CurFD)};
+        StringRef MangledName = getMangledName(CurGD);
+        llvm::Constant *Func = GetGlobalValue(MangledName);
+        if (!Func) {
+          if (CurFD->isDefined()) {
+            EmitGlobalFunctionDefinition(CurGD, nullptr);
+            Func = GetGlobalValue(MangledName);
+          } else {
+            const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
+            llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+            Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false,
+                                     /*DontDefer=*/false, ForDefinition);
           }
+          assert(Func && "This should have just been created");
+        }
 
-          const auto *TA = CurFD->getAttr<TargetAttr>();
-          llvm::SmallVector<StringRef, 8> Feats;
-          TA->getAddedFeatures(Feats);
+        const auto *TA = CurFD->getAttr<TargetAttr>();
+        llvm::SmallVector<StringRef, 8> Feats;
+        TA->getAddedFeatures(Feats);
 
-          Options.emplace_back(cast<llvm::Function>(Func),
-                               TA->getArchitecture(), Feats);
-        });
+        Options.emplace_back(cast<llvm::Function>(Func), TA->getArchitecture(),
+                             Feats);
+      });
+  llvm::Function *ResolverFunc;
+  const TargetInfo &TI = getTarget();
 
-    llvm::Function *ResolverFunc;
-    const TargetInfo &TI = getTarget();
+  if (TI.supportsIFunc() || FD->isTargetMultiVersion())
+    ResolverFunc = cast<llvm::Function>(
+        GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+  else
+    ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
 
-    if (TI.supportsIFunc() || FD->isTargetMultiVersion())
-      ResolverFunc = cast<llvm::Function>(
-          GetGlobalValue((getMangledName(GD) + ".resolver").str()));
-    else
-      ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
+  if (supportsCOMDAT())
+    ResolverFunc->setComdat(
+        getModule().getOrInsertComdat(ResolverFunc->getName()));
+
+  std::stable_sort(
+      Options.begin(), Options.end(),
+      [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
+            const CodeGenFunction::MultiVersionResolverOption &RHS) {
+        return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS);
+      });
+  CodeGenFunction CGF(*this);
+  CGF.EmitMultiVersionResolver(ResolverFunc, Options);
+}
 
-    if (supportsCOMDAT())
-      ResolverFunc->setComdat(
-          getModule().getOrInsertComdat(ResolverFunc->getName()));
+void CodeGenModule::emitMultiVersionFunctions() {
+  for (GlobalDecl GD : MultiVersionFuncs) {
+    const auto *FD = cast<FunctionDecl>(GD.getDecl());
+    assert(FD && FD->isMultiVersion() && "Not a multiversion function?");
 
-    std::stable_sort(
-        Options.begin(), Options.end(),
-        [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
-              const CodeGenFunction::MultiVersionResolverOption &RHS) {
-          return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS);
-        });
-    CodeGenFunction CGF(*this);
-    CGF.EmitMultiVersionResolver(ResolverFunc, Options);
+    switch (FD->getMultiVersionKind()) {
+    case MultiVersionKind::Target:
+      emitTargetMultiVersionResolver(GD);
+      break;
+    case MultiVersionKind::CPUDispatch:
+      emitCPUDispatchResolver(GD);
+      break;
+    case MultiVersionKind::CPUSpecific:
+    case MultiVersionKind::None:
+      llvm_unreachable("Function does not cause a multiversion resolver.");
+    }
   }
 }
 
-void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
+// Helper function to get the llvm::Function object for the resolver so that
+// it can be filled in.
+llvm::Function *
+CodeGenModule::getCPUDispatchResolverFunction(GlobalDecl GD,
+                                              llvm::Type *&DeclTy) {
   const auto *FD = cast<FunctionDecl>(GD.getDecl());
-  assert(FD && "Not a FunctionDecl?");
-  const auto *DD = FD->getAttr<CPUDispatchAttr>();
-  assert(DD && "Not a cpu_dispatch Function?");
   QualType CanonTy = Context.getCanonicalType(FD->getType());
-  llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD);
-
+  DeclTy = getTypes().ConvertFunctionType(CanonTy, FD);
   if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) {
-    const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD);
+    const CGFunctionInfo &FInfo =
+        getTypes().arrangeCXXMethodDeclaration(CXXFD);
     DeclTy = getTypes().GetFunctionType(FInfo);
   }
-
   StringRef ResolverName = getMangledName(GD);
-
   llvm::Type *ResolverType;
   GlobalDecl ResolverGD;
   if (getTarget().supportsIFunc())
     ResolverType = llvm::FunctionType::get(
-        llvm::PointerType::get(DeclTy,
-                               Context.getTargetAddressSpace(FD->getType())),
+        llvm::PointerType::get(
+            DeclTy, getContext().getTargetAddressSpace(FD->getType())),
         false);
   else {
     ResolverType = DeclTy;
     ResolverGD = GD;
   }
 
-  auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction(
+  return cast<llvm::Function>(GetOrCreateLLVMFunction(
       ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false));
+}
 
+void CodeGenModule::emitCPUDispatchResolver(GlobalDecl GD) {
   SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
   const TargetInfo &Target = getTarget();
-  unsigned Index = 0;
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+  assert(FD->isCPUDispatchMultiVersion() &&
+         "Not a target multiversion function?");
+  const auto *DD = FD->getAttr<CPUDispatchAttr>();
+  llvm::Type *DeclTy = nullptr;
+  llvm::Function *ResolverFunc = getCPUDispatchResolverFunction(GD, DeclTy);
+
+  unsigned Ordinal = 1;
   for (const IdentifierInfo *II : DD->cpus()) {
-    // Get the name of the target function so we can look it up/create it.
-    std::string MangledName = getMangledNameImpl(*this, GD, FD, true) +
-                              getCPUSpecificMangling(*this, II->getName());
-
-    llvm::Constant *Func = GetGlobalValue(MangledName);
-
-    if (!Func) {
-      GlobalDecl ExistingDecl = Manglings.lookup(MangledName);
-      if (ExistingDecl.getDecl() &&
-          ExistingDecl.getDecl()->getAsFunction()->isDefined()) {
-        EmitGlobalFunctionDefinition(ExistingDecl, nullptr);
-        Func = GetGlobalValue(MangledName);
-      } else {
-        if (!ExistingDecl.getDecl())
-          ExistingDecl = GD.getWithMultiVersionIndex(Index);
+    GlobalDecl ImplDecl;
+    // If the function version was declared/defined in the translation unit, use
+    // that one.
+    getContext().forEachMultiversionedFunctionVersion(
+        FD, [&ImplDecl, II](const FunctionDecl *CurFD) {
+          CPUSpecificAttr *SA = CurFD->getAttr<CPUSpecificAttr>();
+          unsigned Ord;
+          if (SA && (Ord = SA->getCPUNameOrdinal(II->getName())))
+            ImplDecl =
+                GlobalDecl(CurFD->isDefined() ? CurFD->getDefinition()
+                                              : CurFD->getCanonicalDecl(),
+                           Ord);
+        });
 
-      Func = GetOrCreateLLVMFunction(
-          MangledName, DeclTy, ExistingDecl,
-          /*ForVTable=*/false, /*DontDefer=*/true,
-          /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
-      }
-    }
+    if (!ImplDecl.getDecl())
+      ImplDecl = GD.getWithMultiVersionOrdinal(Ordinal);
+    else if (ImplDecl.getDecl()->getAsFunction()->isDefined())
+        EmitGlobalFunctionDefinition(ImplDecl, nullptr);
+
+    auto *Func = cast<llvm::Function>(
+        GetAddrOfFunction(ImplDecl, DeclTy, /*ForVTable=*/false,
+                          /*DontDefer*/ false, ForDefinition));
+    assert(Func && "This should have just been created");
 
     llvm::SmallVector<StringRef, 32> Features;
     Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features);
     llvm::transform(Features, Features.begin(),
                     [](StringRef Str) { return Str.substr(1); });
-    Features.erase(std::remove_if(
-        Features.begin(), Features.end(), [&Target](StringRef Feat) {
-          return !Target.validateCpuSupports(Feat);
-        }), Features.end());
+    Features.erase(std::remove_if(Features.begin(), Features.end(),
+                                  [&Target](StringRef Feat) {
+                                    return !Target.validateCpuSupports(Feat);
+                                  }),
+                   Features.end());
     Options.emplace_back(cast<llvm::Function>(Func), StringRef{}, Features);
-    ++Index;
+    ++Ordinal;
   }
 
   llvm::sort(
@@ -2620,7 +2683,6 @@
         return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) >
                CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features);
       });
-
   // If the list contains multiple 'default' versions, such as when it contains
   // 'pentium' and 'generic', don't emit the call to the generic one (since we
   // always run on at least a 'pentium'). We do this by deleting the 'least
@@ -5450,7 +5512,7 @@
   } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
     llvm::SmallVector<StringRef, 32> FeaturesTmp;
     Target.getCPUSpecificCPUDispatchFeatures(
-        SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp);
+        SD->getCPUName(GD.getMultiVersionOrdinal())->getName(), FeaturesTmp);
     std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
     Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
   } else {
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -893,8 +893,21 @@
   let Subjects = SubjectList<[Function]>;
   let Documentation = [CPUSpecificCPUDispatchDocs];
   let AdditionalMembers = [{
-    IdentifierInfo *getCPUName(unsigned Index) const {
-      return *(cpus_begin() + Index);
+    // Gets the ordinal of the requested CPU name, or 0 if it isn't in the cpu
+    // list.
+    unsigned getCPUNameOrdinal(StringRef Name) const {
+      unsigned Ord = 1;
+      for (const IdentifierInfo *II : cpus()) {
+        if (II->isStr(Name))
+          return Ord;
+        ++Ord;
+      }
+      return 0;
+    }
+
+    const IdentifierInfo *getCPUName(unsigned Ordinal) const {
+      assert(Ordinal > 0 && "Invalid Ordinal");
+      return *(cpus_begin() + Ordinal - 1);
     }
   }];
 }
@@ -904,6 +917,12 @@
   let Args = [VariadicIdentifierArgument<"Cpus">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [CPUSpecificCPUDispatchDocs];
+  let AdditionalMembers = [{
+    const IdentifierInfo *getCPUName(unsigned Ordinal) const {
+      assert(Ordinal > 0 && "Invalid Ordinal");
+      return *(cpus_begin() + Ordinal - 1);
+    }
+  }];
 }
 
 // CUDA attributes are spelled __attribute__((attr)) or __declspec(__attr__),
Index: include/clang/AST/GlobalDecl.h
===================================================================
--- include/clang/AST/GlobalDecl.h
+++ include/clang/AST/GlobalDecl.h
@@ -34,7 +34,7 @@
 /// a VarDecl, a FunctionDecl or a BlockDecl.
 class GlobalDecl {
   llvm::PointerIntPair<const Decl *, 2> Value;
-  unsigned MultiVersionIndex = 0;
+  unsigned MultiVersionOrdinal = 0;
 
   void Init(const Decl *D) {
     assert(!isa<CXXConstructorDecl>(D) && "Use other ctor with ctor decls!");
@@ -46,8 +46,8 @@
 public:
   GlobalDecl() = default;
   GlobalDecl(const VarDecl *D) { Init(D);}
-  GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
-      : MultiVersionIndex(MVIndex) {
+  GlobalDecl(const FunctionDecl *D, unsigned MVOrd = 0)
+      : MultiVersionOrdinal(MVOrd) {
     Init(D);
   }
   GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -61,7 +61,7 @@
     GlobalDecl CanonGD;
     CanonGD.Value.setPointer(Value.getPointer()->getCanonicalDecl());
     CanonGD.Value.setInt(Value.getInt());
-    CanonGD.MultiVersionIndex = MultiVersionIndex;
+    CanonGD.MultiVersionOrdinal = MultiVersionOrdinal;
 
     return CanonGD;
   }
@@ -78,17 +78,17 @@
     return static_cast<CXXDtorType>(Value.getInt());
   }
 
-  unsigned getMultiVersionIndex() const {
+  unsigned getMultiVersionOrdinal() const {
     assert(isa<FunctionDecl>(getDecl()) &&
            !isa<CXXConstructorDecl>(getDecl()) &&
            !isa<CXXDestructorDecl>(getDecl()) &&
            "Decl is not a plain FunctionDecl!");
-    return MultiVersionIndex;
+    return MultiVersionOrdinal;
   }
 
   friend bool operator==(const GlobalDecl &LHS, const GlobalDecl &RHS) {
     return LHS.Value == RHS.Value &&
-           LHS.MultiVersionIndex == RHS.MultiVersionIndex;
+           LHS.MultiVersionOrdinal== RHS.MultiVersionOrdinal;
   }
 
   void *getAsOpaquePtr() const { return Value.getOpaqueValue(); }
@@ -105,13 +105,13 @@
     return Result;
   }
 
-  GlobalDecl getWithMultiVersionIndex(unsigned Index) {
+  GlobalDecl getWithMultiVersionOrdinal(unsigned Ordinal) {
     assert(isa<FunctionDecl>(getDecl()) &&
            !isa<CXXConstructorDecl>(getDecl()) &&
            !isa<CXXDestructorDecl>(getDecl()) &&
            "Decl is not a plain FunctionDecl!");
     GlobalDecl Result(*this);
-    Result.MultiVersionIndex = Index;
+    Result.MultiVersionOrdinal = Ordinal;
     return Result;
   }
 };

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D55527: Normalize GlobalDecls when used with CPUDispatch

Reply via email to