================
@@ -811,6 +813,72 @@ void 
CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder,
         return builder.add(
             llvm::ConstantExpr::getNullValue(CGM.GlobalsInt8PtrTy));
       }
+      // For an implicit H+D virtual dtor of an explicit template
+      // instantiation, force-emitting the dtor body on device can pull
+      // host-only callees (e.g. via libstdc++ destructor chains). Skip
+      // unless device code has already referenced the dtor or any ctor
+      // of the class (a ctor implies an instance, hence possible
+      // polymorphic delete through this vtable).
+      if (CGM.getLangOpts().CUDAIsDevice) {
+        if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(MD)) {
+          const auto *HAttr = Dtor->getAttr<CUDAHostAttr>();
+          const auto *DAttr = Dtor->getAttr<CUDADeviceAttr>();
+          bool IsImplicitHD =
+              HAttr && DAttr && HAttr->isImplicit() && DAttr->isImplicit();
+          const auto *Spec =
+              dyn_cast<ClassTemplateSpecializationDecl>(Dtor->getParent());
+          bool IsExplicitInst =
+              Spec && (Spec->getTemplateSpecializationKind() ==
+                           TSK_ExplicitInstantiationDeclaration ||
+                       Spec->getTemplateSpecializationKind() ==
+                           TSK_ExplicitInstantiationDefinition);
+          bool ClassUsedOnDevice = CGM.GetGlobalValue(CGM.getMangledName(GD));
+          if (IsImplicitHD && IsExplicitInst && !ClassUsedOnDevice) {
+            for (const auto *Ctor : Spec->ctors()) {
+              if (CGM.GetGlobalValue(
+                      CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete))) ||
+                  CGM.GetGlobalValue(
+                      CGM.getMangledName(GlobalDecl(Ctor, Ctor_Base)))) {
+                ClassUsedOnDevice = true;
+                break;
+              }
+            }
+          }
+          if (IsImplicitHD && IsExplicitInst && !ClassUsedOnDevice) {
+            if (IsThunk)
+              nextVTableThunkIndex++;
+            // Emit a per-dtor trap stub instead of NULL so that, if the
+            // heuristic is wrong, the crash backtrace names the offending
+            // destructor.
+            SmallString<128> StubName(CGM.getLangOpts().HIP
+                                          ? "__clang_hip_unreachable_dtor."
+                                          : "__clang_cuda_unreachable_dtor.");
+            StubName += CGM.getMangledName(GD);
+            llvm::Module &M = CGM.getModule();
+            llvm::Function *Stub = M.getFunction(StubName);
+            if (!Stub) {
+              llvm::FunctionType *FT = llvm::FunctionType::get(
+                  CGM.VoidTy, {CGM.GlobalsInt8PtrTy}, /*isVarArg=*/false);
+              Stub = llvm::Function::Create(
+                  FT, llvm::GlobalValue::InternalLinkage, StubName, &M);
----------------
yxsamliu wrote:

The redesign removes this inline stub. The trap body is now emitted into the 
existing llvm::Function for the dtor itself. Therefore no longer need this.

https://github.com/llvm/llvm-project/pull/197214
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to