Prince781 created this revision.
Prince781 added reviewers: ABataev, rsmith.
Herald added subscribers: cfe-commits, jfb.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.

For static TLS vars only visible inside a function, clang will only generate an 
initializer inside the function body where the variable was declared. However, 
it is possible for the variable to be indirectly referenced without ever 
calling the function it was declared in, if a scope referring to the variable 
gets outlined into a function that is executed on a new thread. Here are two 
examples that demonstrate this:

  #include <thread>
  #include <iostream>
  
  struct Object {
      int i;
      Object() : i(3) {}
  };
  
  int main(void) {
      static thread_local Object o;
  
      std::cout << "[main] o.i = " << o.i << std::endl;
      std::thread t([] { std::cout << "[new thread] o.i = " << o.i << 
std::endl; });
      t.join();
  }



  #include <iostream>
  #include <omp.h>
  
  struct Object {
      int i;
      Object() : i(3) {}
  };
  
  int main(void) {
      static thread_local Object o;
  
      #pragma omp parallel
      #pragma omp critical
      std::cout << "[" << omp_get_thread_num() << "] o.i = " << o.i << 
std::endl;
  }

In this patch, we generate an initializer in a function for every unique 
reference to a static TLS var that was declared in a different function.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D66122

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGenCXX/cxx11-thread-local.cpp

Index: clang/test/CodeGenCXX/cxx11-thread-local.cpp
===================================================================
--- clang/test/CodeGenCXX/cxx11-thread-local.cpp
+++ clang/test/CodeGenCXX/cxx11-thread-local.cpp
@@ -268,15 +268,37 @@
   return this->n;
 }
 
+namespace static_tls_in_lambda {
+  struct X {
+    X() {}
+  };
+
+
+  X (*f())() {
+    static thread_local X x;
+
+    return [] {
+            return x;
+    };
+  }
+
+  auto y = f();
+
+  void g() { y(); }
+}
+
 namespace {
 thread_local int anon_i{1};
 }
 void set_anon_i() {
   anon_i = 2;
 }
+
+
 // LINUX-LABEL: define internal i32* @_ZTWN12_GLOBAL__N_16anon_iE()
 // DARWIN-LABEL: define internal cxx_fast_tlscc i32* @_ZTWN12_GLOBAL__N_16anon_iE()
 
+
 // LINUX: define internal void @[[V_M_INIT]]()
 // DARWIN: define internal cxx_fast_tlscc void @[[V_M_INIT]]()
 // LINUX-SAME: comdat($_ZN1VIiE1mE)
@@ -290,6 +312,8 @@
 // CHECK: store i64 1, i64* @_ZGVN1VIiE1mE
 // CHECK: br label
 
+
+
 // LINUX: define internal void @[[X_M_INIT]]()
 // DARWIN: define internal cxx_fast_tlscc void @[[X_M_INIT]]()
 // LINUX-SAME: comdat($_ZN1XIiE1mE)
@@ -303,6 +327,14 @@
 // CHECK: store i64 1, i64* @_ZGVN1XIiE1mE
 // CHECK: br label
 
+// CHECK: define internal void @"_ZZN20static_tls_in_lambda1fEvENK3$_1clEv"
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: call void @_ZN20static_tls_in_lambda1XC1Ev(%"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda1fEvE1x)
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x, align 1
+
+
 // CHECK: define {{.*}}@[[GLOBAL_INIT:.*]]()
 // CHECK: call void @[[C_INIT]]()
 // CHECK: call void @[[E_INIT]]()
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -467,6 +467,10 @@
   /// should emit cleanups.
   bool CurFuncIsThunk = false;
 
+  /// static thread-local variables we've referenced that were declared in a
+  /// parent function.
+  llvm::SmallSet<const VarDecl *, 32> ForeignStaticTLSVars;
+
   /// In ARC, whether we should autorelease the return value.
   bool AutoreleaseResult = false;
 
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -31,12 +31,16 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/IR/ValueSymbolTable.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -384,6 +388,64 @@
     CGBuilderTy(*this, AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs);
   }
 
+  // Emit initializers for static local variables that we referenced that are
+  // declared in another function, which may be uninitialized on entry if this
+  // function may execute on a separate thread.  For example, when we're
+  // emitting the lambda in the following code:
+  // 
+  // class Object {
+  //   int init;
+  //   Object() : init(1) {}
+  // };
+  //
+  // main() {
+  //    static thread_local Object var;
+  //    std::thread([] {
+  //        ...emit initializer for var here...
+  //    });
+  // }
+  // 
+  // or another example:
+  //
+  // main() {
+  //    static Object var;
+  //    #pragma omp threadprivate(var)
+  //    #pragma omp parallel
+  //    {
+  //        ...emit initializer for var here...
+  //    }
+  // }
+  for (const VarDecl *VD : ForeignStaticTLSVars) {
+    assert(VD->isStaticLocal() && "expected a static local VarDecl");
+
+    // CUDA's local and local static __shared__ variables should not
+    // have any non-empty initializers. This is ensured by Sema.
+    // Whatever initializer such variable may have when it gets here is
+    // a no-op and should not be emitted.
+    bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      VD->hasAttr<CUDASharedAttr>();
+    // If this value has an initializer, and it's thread-local, emit it.
+    if (VD->getInit() && !isCudaSharedVar) {
+      auto *GV = dyn_cast<llvm::GlobalVariable>(CGM.getStaticLocalDeclAddress(VD));
+      auto IP = Builder.saveAndClearIP();
+      llvm::BasicBlock *BBParent = AllocaInsertPt->getParent();
+      llvm::Instruction *INext = AllocaInsertPt->getNextNonDebugInstruction();
+      llvm::BasicBlock *BBNext = BBParent->splitBasicBlock(INext, BBParent->getName() + ".next");
+
+      INext = AllocaInsertPt->getNextNonDebugInstruction();
+
+      Builder.SetInsertPoint(BBParent);
+      // the global variable shouldn't change, as this function should've
+      // been called first when generating the parent function
+      AddInitializerToStaticVarDecl(*VD, GV);
+      if (INext != BBParent->getTerminator()) {
+        INext->eraseFromParent();
+        Builder.CreateBr(BBNext);
+      }
+      Builder.restoreIP(IP);
+    }
+  }
+
   // Remove the AllocaInsertPt instruction, which is just a convenience for us.
   llvm::Instruction *Ptr = AllocaInsertPt;
   AllocaInsertPt = nullptr;
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2614,7 +2614,12 @@
           *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)),
                      getContext().getDeclAlign(VD));
 
-    // No other cases for now.
+      // add to ForeignStaticVarDecls if this is a thread-local variable
+      // declared in a different function
+      const Decl *DC = cast<Decl>(VD->getDeclContext());
+      if (DC != CurGD.getDecl() && VD->getTLSKind() == VarDecl::TLS_Dynamic)
+          ForeignStaticTLSVars.insert(VD);
+     // No other cases for now.
     } else {
       llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
     }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to