serge-sans-paille updated this revision to Diff 376812.
serge-sans-paille added a comment.

+ extra test case
+ use internal linkage instead of available_externally linkage for generated 
clone


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111009/new/

https://reviews.llvm.org/D111009

Files:
  clang/lib/AST/Decl.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGen/memcmp-inline-builtin-to-asm.c
  clang/test/CodeGen/memcpy-inline-builtin-no-extern.c
  clang/test/CodeGen/memcpy-inline-builtin.c
  clang/test/CodeGen/memcpy-nobuiltin.c
  clang/test/CodeGen/memcpy-nobuiltin.inc
  clang/test/CodeGen/pr9614.c

Index: clang/test/CodeGen/pr9614.c
===================================================================
--- clang/test/CodeGen/pr9614.c
+++ clang/test/CodeGen/pr9614.c
@@ -32,14 +32,14 @@
 
 // CHECK-LABEL: define{{.*}} void @f()
 // CHECK: call void @foo()
-// CHECK: call i32 @abs(i32 %0)
+// CHECK: call i32 @abs(i32 0)
 // CHECK: call i8* @strrchr(
 // CHECK: call void @llvm.prefetch.p0i8(
 // CHECK: call i8* @memchr(
 // CHECK: ret void
 
 // CHECK: declare void @foo()
+// CHECK: declare i32 @abs(i32
 // CHECK: declare i8* @strrchr(i8*, i32)
 // CHECK: declare i8* @memchr(
-// CHECK: declare i32 @abs(i32
 // CHECK: declare void @llvm.prefetch.p0i8(
Index: clang/test/CodeGen/memcpy-nobuiltin.inc
===================================================================
--- clang/test/CodeGen/memcpy-nobuiltin.inc
+++ clang/test/CodeGen/memcpy-nobuiltin.inc
@@ -2,7 +2,7 @@
 extern void *memcpy(void *dest, void const *from, size_t n);
 
 #ifdef WITH_DECL
-inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
+inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
   char const *ifrom = from;
   char *idest = dest;
   while (n--)
@@ -11,7 +11,7 @@
 }
 #endif
 #ifdef WITH_SELF_REFERENCE_DECL
-inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
+inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
   if (n != 0)
     memcpy(dest, from, n);
   return dest;
Index: clang/test/CodeGen/memcpy-nobuiltin.c
===================================================================
--- clang/test/CodeGen/memcpy-nobuiltin.c
+++ clang/test/CodeGen/memcpy-nobuiltin.c
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_DECL | FileCheck --check-prefix=CHECK-WITH-DECL %s
 // RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -UWITH_DECL | FileCheck --check-prefix=CHECK-NO-DECL %s
-// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s
+// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -disable-llvm-passes -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s
 //
 // CHECK-WITH-DECL-NOT: @llvm.memcpy
 // CHECK-NO-DECL: @llvm.memcpy
 // CHECK-SELF-REF-DECL-LABEL: define {{.*}}i8* @memcpy.inline
-// CHECK-SELF-REF-DECL:       @memcpy(
+// CHECK-SELF-REF-DECL:       @llvm.memcpy.{{.*}}(
 //
 #include <memcpy-nobuiltin.inc>
 void test(void *dest, void const *from, size_t n) {
Index: clang/test/CodeGen/memcpy-inline-builtin.c
===================================================================
--- clang/test/CodeGen/memcpy-inline-builtin.c
+++ clang/test/CodeGen/memcpy-inline-builtin.c
@@ -32,13 +32,39 @@
 // CHECK-NEXT:    store i8* [[TMP0]], i8** [[A_ADDR_I]], align 8
 // CHECK-NEXT:    store i8* [[TMP1]], i8** [[B_ADDR_I]], align 8
 // CHECK-NEXT:    store i64 [[TMP2]], i64* [[C_ADDR_I]], align 8
-// CHECK-NEXT:    call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc !2
+// CHECK-NEXT:    call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc !2
 // CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[A_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[B_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[C_ADDR_I]], align 8
-// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR2]]
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR4]]
+// CHECK-NEXT:    ret i8* [[TMP3]]
+//
+void *foo(void *a, const void *b, size_t c) {
+  return memcpy(a, b, c);
+}
+
+// CHECK-LABEL: @bar(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[CPY:%.*]] = alloca i8* (i8*, i8*, i64)*, align 8
+// CHECK-NEXT:    store i8* [[A:%.*]], i8** [[A_ADDR]], align 8
+// CHECK-NEXT:    store i8* [[B:%.*]], i8** [[B_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[C:%.*]], i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 10
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[CMP]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)* @foo
+// CHECK-NEXT:    store i8* (i8*, i8*, i64)* [[COND]], i8* (i8*, i8*, i64)** [[CPY]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** [[CPY]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call i8* [[TMP2]](i8* [[TMP3]], i8* [[TMP4]], i64 [[TMP5]])
 // CHECK-NEXT:    ret void
 //
-void foo(void *a, const void *b, size_t c) {
-  memcpy(a, b, c);
+void bar(void *a, const void *b, size_t c) {
+  void *(*cpy)(void *, const void *, size_t) = c > 10 ? memcpy : foo;
+  cpy(a, b, c);
 }
Index: clang/test/CodeGen/memcpy-inline-builtin-no-extern.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/memcpy-inline-builtin-no-extern.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+//
+// Verifies that clang-generated *.inline are flagged as internal.
+
+typedef unsigned long size_t;
+
+#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
+// Clang recognizes an inline builtin and renames it to memcmp.inline to prevent conflict with builtins.
+AVAILABLE_EXTERNALLY int memcmp(const void *a, const void *b, size_t c) {
+  return __builtin_memcmp(a, b, c);
+}
+
+// CHECK: internal{{.*}}memcmp.inline
+int bar(const void *a, const void *b, size_t c) {
+  return memcmp(a, b, c);
+}
+
+// Note that extern has been omitted here.
+#define TRIPLE_INLINE inline __attribute__((always_inline)) __attribute__((gnu_inline))
+
+// Clang recognizes an inline builtin and renames it to memcpy.inline to prevent conflict with builtins.
+TRIPLE_INLINE void *memcpy(void *a, const void *b, size_t c) {
+  return __builtin_memcpy(a, b, c);
+}
+
+// CHECK: internal{{.*}}memcpy.inline
+void *foo(void *a, const void *b, size_t c) {
+  return memcpy(a, b, c);
+}
Index: clang/test/CodeGen/memcmp-inline-builtin-to-asm.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/memcmp-inline-builtin-to-asm.c
@@ -0,0 +1,36 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -o - %s | opt -S -verify | FileCheck %s
+//
+// Verifies that clang detects memcmp inline version and uses it instead of the builtin.
+
+typedef unsigned long size_t;
+
+// Clang requires these attributes for a function to be redefined.
+#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
+
+const void *con_unify_unimap_p1;
+
+AVAILABLE_EXTERNALLY
+int memcmp(const void *p, const void *q, unsigned long size) {
+  return __builtin_memcmp(p, q, size);
+}
+
+// CHECK-LABEL: @con_unify_unimap_q1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[P_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[Q_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[SIZE_ADDR_I:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** @con_unify_unimap_p1, align 8
+// CHECK-NEXT:    store i8* [[TMP0]], i8** [[P_ADDR_I]], align 8
+// CHECK-NEXT:    store i8* bitcast (i32 ()* @con_unify_unimap_q1 to i8*), i8** [[Q_ADDR_I]], align 8
+// CHECK-NEXT:    store i64 4, i64* [[SIZE_ADDR_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[P_ADDR_I]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[Q_ADDR_I]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[SIZE_ADDR_I]], align 8
+// CHECK-NEXT:    [[CALL_I:%.*]] = call i32 @memcmp(i8* [[TMP1]], i8* [[TMP2]], i64 [[TMP3]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    ret i32 [[CALL_I]]
+//
+int con_unify_unimap_q1(void) {
+  return memcmp(con_unify_unimap_p1, con_unify_unimap_q1, sizeof(int));
+}
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/CRC.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -1294,10 +1295,22 @@
   FunctionArgList Args;
   QualType ResTy = BuildFunctionArgList(GD, Args);
 
-  // Give a different name to inline builtin to avoid conflict with actual
-  // builtins.
-  if (FD->isInlineBuiltinDeclaration() && Fn)
-    Fn->setName(Fn->getName() + ".inline");
+  // When generating code for a builtin with an inline declaration, use a
+  // mangled name to hold the actual body, while keeping an external definition
+  // in case the function pointer is referenced somewhere.
+  if (FD->isInlineBuiltinDeclaration() && Fn) {
+    std::string FDInlineName = (Fn->getName() + ".inline").str();
+    llvm::Module *M = Fn->getParent();
+    llvm::Function *Clone = M->getFunction(FDInlineName);
+    if (!Clone) {
+      Clone = llvm::Function::Create(Fn->getFunctionType(),
+                                     llvm::GlobalValue::InternalLinkage,
+                                     Fn->getAddressSpace(), FDInlineName, M);
+      Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+    }
+    Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
+    Fn = Clone;
+  }
 
   // Check if we should generate debug info for this function.
   if (FD->hasAttr<NoDebugAttr>()) {
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -4891,12 +4891,28 @@
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
 
   if (auto builtinID = FD->getBuiltinID()) {
-    // Replaceable builtin provide their own implementation of a builtin. Unless
-    // we are in the builtin implementation itself, don't call the actual
-    // builtin. If we are in the builtin implementation, avoid trivial infinite
+    std::string FDInlineName = (FD->getName() + ".inline").str();
+    // When directing calling an inline builtin, call it through it's mangled
+    // name to make it clear it's not the actual builtin.
+    if (FD->isInlineBuiltinDeclaration() &&
+        CGF.CurFn->getName() != FDInlineName) {
+      llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
+      llvm::Function *Fn = llvm::cast<llvm::Function>(CalleePtr);
+      llvm::Module &M = CGF.CGM.getModule();
+      llvm::Function *Clone = M.getFunction(FDInlineName);
+      if (!Clone) {
+        Clone = llvm::Function::Create(Fn->getFunctionType(),
+                                       llvm::GlobalValue::InternalLinkage,
+                                       Fn->getAddressSpace(), FDInlineName, &M);
+        Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+      return CGCallee::forDirect(Clone, GD);
+    }
+
+    // Replaceable builtins provide their own implementation of a builtin. If we
+    // are in an inline builtin implementation, avoid trivial infinite
     // recursion.
-    if (!FD->isInlineBuiltinDeclaration() ||
-        CGF.CurFn->getName() == FD->getName())
+    else
       return CGCallee::forBuiltin(builtinID, FD);
   }
 
@@ -4905,6 +4921,7 @@
       FD->hasAttr<CUDAGlobalAttr>())
     CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub(
         cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts()));
+
   return CGCallee::forDirect(CalleePtr, GD);
 }
 
Index: clang/lib/AST/Decl.cpp
===================================================================
--- clang/lib/AST/Decl.cpp
+++ clang/lib/AST/Decl.cpp
@@ -3177,7 +3177,8 @@
 
   const FunctionDecl *Definition;
   return hasBody(Definition) && Definition->isInlineSpecified() &&
-         Definition->hasAttr<AlwaysInlineAttr>();
+         Definition->hasAttr<AlwaysInlineAttr>() &&
+         Definition->hasAttr<GNUInlineAttr>();
 }
 
 bool FunctionDecl::isDestroyingOperatorDelete() const {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to