This revision was automatically updated to reflect the committed changes.
Closed by commit rL371269: Use musttail for variadic method thunks when 
possible (authored by rnk, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D67028?vs=218995&id=219193#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67028/new/

https://reviews.llvm.org/D67028

Files:
  cfe/trunk/lib/CodeGen/CGVTables.cpp
  cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp
  cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp
  cfe/trunk/test/CodeGenCXX/thunks.cpp

Index: cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp
===================================================================
--- cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp
+++ cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp
@@ -1,5 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
+// Sparc64 is used because AArch64 and X86_64 would both use musttail.
+// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
 // Crasher for PR22929.
 class Base {
   virtual void VariadicFunction(...);
Index: cfe/trunk/test/CodeGenCXX/thunks.cpp
===================================================================
--- cfe/trunk/test/CodeGenCXX/thunks.cpp
+++ cfe/trunk/test/CodeGenCXX/thunks.cpp
@@ -1,6 +1,20 @@
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT %s
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT --check-prefix=CHECK-DBG %s
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes | FileCheck --check-prefix=CHECK --check-prefix=CHECK-OPT %s
+// Sparc64 doesn't support musttail (yet), so it uses method cloning for
+// variadic thunks. Use it for testing.
+// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT %s
+// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT,CHECK-DBG %s
+// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-OPT %s
+
+// Test x86_64, which uses musttail for variadic thunks.
+// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-TAIL,CHECK-OPT %s
+
+// Finally, reuse these tests for the MS ABI.
+// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
+// RUN:     | FileCheck --check-prefixes=WIN64 %s
+
 
 namespace Test1 {
 
@@ -23,6 +37,11 @@
 // CHECK-LABEL: define void @_ZThn8_N5Test11C1fEv(
 // CHECK-DBG-NOT: dbg.declare
 // CHECK: ret void
+//
+// WIN64-LABEL: define dso_local void @"?f@C@Test1@@UEAAXXZ"(
+// WIN64-LABEL: define linkonce_odr dso_local void @"?f@C@Test1@@W7EAAXXZ"(
+// WIN64: getelementptr i8, i8* {{.*}}, i32 -8
+// WIN64: ret void
 void C::f() { }
 
 }
@@ -45,6 +64,10 @@
 // CHECK: ret void
 void B::f() { }
 
+// No thunk is used for this case in the MS ABI.
+// WIN64-LABEL: define dso_local void @"?f@B@Test2@@UEAAXXZ"(
+// WIN64-NOT: define {{.*}} void @"?f@B@Test2
+
 }
 
 namespace Test3 {
@@ -65,6 +88,7 @@
 };
 
 // CHECK: define %{{.*}}* @_ZTch0_v0_n24_N5Test31B1fEv(
+// WIN64: define weak_odr dso_local %{{.*}} @"?f@B@Test3@@QEAAPEAUV1@2@XZ"(
 V2 *B::f() { return 0; }
 
 }
@@ -92,6 +116,10 @@
 // CHECK: ret void
 void C::f() { }
 
+// Visibility doesn't matter on COFF, but whatever. We could add an ELF test
+// mode later.
+// WIN64-LABEL: define protected void @"?f@C@Test4@@UEAAXXZ"(
+// WIN64-LABEL: define linkonce_odr protected void @"?f@C@Test4@@W7EAAXXZ"(
 }
 
 // Check that the thunk gets internal linkage.
@@ -119,6 +147,8 @@
     c.f();
   }
 }
+// Not sure why this isn't delayed like in Itanium.
+// WIN64-LABEL: define internal void @"?f@C@?A0xAEF74CE7@Test4B@@UEAAXXZ"(
 
 namespace Test5 {
 
@@ -134,6 +164,7 @@
 void f(B b) {
   b.f();
 }
+// No thunk in MS ABI in this case.
 }
 
 namespace Test6 {
@@ -178,6 +209,10 @@
   // CHECK: {{call void @_ZN5Test66Thunks1fEv.*sret}}
   // CHECK: ret void
   X Thunks::f() { return X(); }
+
+  // WIN64-LABEL: define linkonce_odr dso_local void @"?f@Thunks@Test6@@WBA@EAA?AUX@2@XZ"({{.*}} sret %{{.*}})
+  // WIN64-NOT: memcpy
+  // WIN64: tail call void @"?f@Thunks@Test6@@UEAA?AUX@2@XZ"({{.*}} sret %{{.*}})
 }
 
 namespace Test7 {
@@ -224,6 +259,8 @@
   // CHECK-NOT: memcpy
   // CHECK: ret void
   void testD() { D d; }
+
+  // MS C++ ABI doesn't use a thunk, so this case isn't interesting.
 }
 
 namespace Test8 {
@@ -241,6 +278,8 @@
   // CHECK-NOT: memcpy
   // CHECK: ret void
   void C::bar(NonPOD var) {}
+
+  // MS C++ ABI doesn't use a thunk, so this case isn't interesting.
 }
 
 // PR7241: Emitting thunks for a method shouldn't require the vtable for
@@ -287,6 +326,16 @@
   // CHECK: define {{.*}} @_ZTch0_v0_n32_N6Test111C1fEv(
   // CHECK-DBG-NOT: dbg.declare
   // CHECK: ret
+
+  // WIN64-LABEL: define dso_local %{{.*}}* @"?f@C@Test11@@UEAAPEAU12@XZ"(i8*
+
+  // WIN64-LABEL: define weak_odr dso_local %{{.*}}* @"?f@C@Test11@@QEAAPEAUA@2@XZ"(i8*
+  // WIN64: call %{{.*}}* @"?f@C@Test11@@UEAAPEAU12@XZ"(i8* %{{.*}})
+  //
+  // Match the vbtable return adjustment.
+  // WIN64: load i32*, i32** %{{[^,]*}}, align 8
+  // WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
+  // WIN64: load i32, i32* %{{[^,]*}}, align 4
 }
 
 // Varargs thunk test.
@@ -301,7 +350,8 @@
     virtual void c();
     virtual C* f(int x, ...);
   };
-  C* C::f(int x, ...) { return this; }
+  C* makeC();
+  C* C::f(int x, ...) { return makeC(); }
 
   // C::f
   // CHECK: define {{.*}} @_ZN6Test121C1fEiz
@@ -312,6 +362,28 @@
   // CHECK-DBG-NOT: dbg.declare
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -8
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8
+
+  // The vtable layout goes:
+  // C vtable in A:
+  // - f impl, no adjustment
+  // C vtable in B:
+  // - f thunk 2, covariant, clone
+  // - f thunk 2, musttail this adjust to impl
+  // FIXME: The weak_odr linkage is probably not necessary and just an artifact
+  // of Itanium ABI details.
+  // WIN64-LABEL: define dso_local {{.*}} @"?f@C@Test12@@UEAAPEAU12@HZZ"(
+  // WIN64: call %{{.*}}* @"?makeC@Test12@@YAPEAUC@1@XZ"()
+  //
+  // This thunk needs return adjustment, clone.
+  // WIN64-LABEL: define weak_odr dso_local {{.*}} @"?f@C@Test12@@W7EAAPEAUB@2@HZZ"(
+  // WIN64: call %{{.*}}* @"?makeC@Test12@@YAPEAUC@1@XZ"()
+  // WIN64: getelementptr inbounds i8, i8* %{{.*}}, i32 8
+  //
+  // Musttail call back to the A implementation after this adjustment from B to A.
+  // WIN64-LABEL: define linkonce_odr dso_local %{{.*}}* @"?f@C@Test12@@W7EAAPEAU12@HZZ"(
+  // WIN64: getelementptr i8, i8* %{{[^,]*}}, i32 -8
+  // WIN64: musttail call {{.*}} @"?f@C@Test12@@UEAAPEAU12@HZZ"(
+  C c;
 }
 
 // PR13832
@@ -339,6 +411,17 @@
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -24
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8
   // CHECK: ret %"struct.Test13::D"*
+
+  // WIN64-LABEL: define weak_odr dso_local dereferenceable(32) %"struct.Test13::D"* @"?foo1@D@Test13@@$4PPPPPPPE@A@EAAAEAUB1@2@XZ"(
+  //    This adjustment.
+  // WIN64: getelementptr inbounds i8, i8* {{.*}}, i64 -12
+  //    Call implementation.
+  // WIN64: call {{.*}} @"?foo1@D@Test13@@UEAAAEAU12@XZ"(i8* {{.*}})
+  //    Virtual + nonvirtual return adjustment.
+  // WIN64: load i32*, i32** %{{[^,]*}}, align 8
+  // WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
+  // WIN64: load i32, i32* %{{[^,]*}}, align 4
+  // WIN64: getelementptr inbounds i8, i8* %{{[^,]*}}, i32 %{{[^,]*}}
 }
 
 namespace Test14 {
@@ -374,9 +457,16 @@
   void C::c() {}
 
   // C::c
-  // CHECK: declare void @_ZN6Test151C1fEiz
+  // CHECK-CLONE: declare void @_ZN6Test151C1fEiz
   // non-virtual thunk to C::f
-  // CHECK: declare void @_ZThn8_N6Test151C1fEiz
+  // CHECK-CLONE: declare void @_ZThn8_N6Test151C1fEiz
+
+  // If we have musttail, then we emit the thunk as available_externally.
+  // CHECK-TAIL: declare void @_ZN6Test151C1fEiz
+  // CHECK-TAIL: define available_externally void @_ZThn8_N6Test151C1fEiz({{.*}})
+  // CHECK-TAIL: musttail call void (%"struct.Test15::C"*, i32, ...) @_ZN6Test151C1fEiz({{.*}}, ...)
+
+  // MS C++ ABI doesn't use a thunk, so this case isn't interesting.
 }
 
 namespace Test16 {
@@ -398,6 +488,33 @@
 // CHECK: ret void
 }
 
+namespace Test17 {
+class A {
+  virtual void f(const char *, ...);
+};
+class B {
+  virtual void f(const char *, ...);
+};
+class C : A, B {
+  virtual void anchor();
+  void f(const char *, ...) override;
+};
+// Key method and object anchor vtable for Itanium and MSVC.
+void C::anchor() {}
+C c;
+
+// CHECK-CLONE-LABEL: declare void @_ZThn8_N6Test171C1fEPKcz(
+
+// CHECK-TAIL-LABEL: define available_externally void @_ZThn8_N6Test171C1fEPKcz(
+// CHECK-TAIL: getelementptr inbounds i8, i8* %{{.*}}, i64 -8
+// CHECK-TAIL: musttail call {{.*}} @_ZN6Test171C1fEPKcz({{.*}}, ...)
+
+// MSVC-LABEL: define linkonce_odr dso_local void @"?f@C@Test17@@G7EAAXPEBDZZ"
+// MSVC-SAME: (%"class.Test17::C"* %this, i8* %[[ARG:[^,]+]], ...)
+// MSVC: getelementptr i8, i8* %{{.*}}, i32 -8
+// MSVC: musttail call void (%"class.Test17::C"*, i8*, ...) @"?f@C@Test17@@EEAAXPEBDZZ"(%"class.Test17::C"* %{{.*}}, i8* %[[ARG]], ...)
+}
+
 /**** The following has to go at the end of the file ****/
 
 // checking without opt
@@ -421,5 +538,9 @@
 // CHECK-OPT-LABEL: define linkonce_odr void @_ZN6Test101C3fooEv
 // CHECK-OPT-LABEL: define linkonce_odr void @_ZThn8_N6Test101C3fooEv
 
+// This is from Test10:
+// WIN64-LABEL: define linkonce_odr dso_local void @"?foo@C@Test10@@UEAAXXZ"(
+// WIN64-LABEL: define linkonce_odr dso_local void @"?foo@C@Test10@@W7EAAXXZ"(
+
 // CHECK-NONOPT: attributes [[NUW]] = { noinline nounwind optnone uwtable{{.*}} }
 // CHECK-OPT: attributes [[NUW]] = { nounwind uwtable{{.*}} }
Index: cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp
===================================================================
--- cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp
+++ cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fno-rtti-data -triple x86_64-windows-msvc -emit-llvm-only %s -verify
+
+// Verify that we error out on this return adjusting thunk that we can't emit.
+
+struct A {
+  virtual A *clone(const char *f, ...) = 0;
+};
+struct B : virtual A {
+  // expected-error@+1 2 {{cannot compile this return-adjusting thunk with variadic arguments yet}}
+  B *clone(const char *f, ...) override;
+};
+struct C : B { int c; };
+C c;
Index: cfe/trunk/lib/CodeGen/CGVTables.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/CGVTables.cpp
+++ cfe/trunk/lib/CodeGen/CGVTables.cpp
@@ -166,6 +166,15 @@
   llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);
   llvm::Function *BaseFn = cast<llvm::Function>(Callee);
 
+  // Cloning can't work if we don't have a definition. The Microsoft ABI may
+  // require thunks when a definition is not available. Emit an error in these
+  // cases.
+  if (!MD->isDefined()) {
+    CGM.ErrorUnsupported(MD, "return-adjusting thunk with variadic arguments");
+    return Fn;
+  }
+  assert(!BaseFn->isDeclaration() && "cannot clone undefined variadic method");
+
   // Clone to thunk.
   llvm::ValueToValueMapTy VMap;
 
@@ -201,6 +210,8 @@
   Builder.SetInsertPoint(&*ThisStore);
   llvm::Value *AdjustedThisPtr =
       CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This);
+  AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr,
+                                          ThisStore->getOperand(0)->getType());
   ThisStore->setOperand(0, AdjustedThisPtr);
 
   if (!Thunk.Return.isEmpty()) {
@@ -291,14 +302,17 @@
                           *this, LoadCXXThisAddress(), Thunk->This)
           : LoadCXXThis();
 
-  if (CurFnInfo->usesInAlloca() || IsUnprototyped) {
-    // We don't handle return adjusting thunks, because they require us to call
-    // the copy constructor.  For now, fall through and pretend the return
-    // adjustment was empty so we don't crash.
+  // If perfect forwarding is required a variadic method, a method using
+  // inalloca, or an unprototyped thunk, use musttail. Emit an error if this
+  // thunk requires a return adjustment, since that is impossible with musttail.
+  if (CurFnInfo->usesInAlloca() || CurFnInfo->isVariadic() || IsUnprototyped) {
     if (Thunk && !Thunk->Return.isEmpty()) {
       if (IsUnprototyped)
         CGM.ErrorUnsupported(
             MD, "return-adjusting thunk with incomplete parameter type");
+      else if (CurFnInfo->isVariadic())
+        llvm_unreachable("shouldn't try to emit musttail return-adjusting "
+                         "thunks for variadic functions");
       else
         CGM.ErrorUnsupported(
             MD, "non-trivial argument copy for return-adjusting thunk");
@@ -549,16 +563,32 @@
 
   CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn);
 
+  // Thunks for variadic methods are special because in general variadic
+  // arguments cannot be perferctly forwarded. In the general case, clang
+  // implements such thunks by cloning the original function body. However, for
+  // thunks with no return adjustment on targets that support musttail, we can
+  // use musttail to perfectly forward the variadic arguments.
+  bool ShouldCloneVarArgs = false;
   if (!IsUnprototyped && ThunkFn->isVarArg()) {
-    // Varargs thunks are special; we can't just generate a call because
-    // we can't copy the varargs.  Our implementation is rather
-    // expensive/sucky at the moment, so don't generate the thunk unless
-    // we have to.
-    // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
+    ShouldCloneVarArgs = true;
+    if (TI.Return.isEmpty()) {
+      switch (CGM.getTriple().getArch()) {
+      case llvm::Triple::x86_64:
+      case llvm::Triple::x86:
+      case llvm::Triple::aarch64:
+        ShouldCloneVarArgs = false;
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  if (ShouldCloneVarArgs) {
     if (UseAvailableExternallyLinkage)
       return ThunkFn;
-    ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD,
-                                                        TI);
+    ThunkFn =
+        CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, TI);
   } else {
     // Normal thunk body generation.
     CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to