Prince781 created this revision. Prince781 added reviewers: ABataev, rsmith. Herald added subscribers: cfe-commits, jfb. Herald added a reviewer: jdoerfert. Herald added a project: clang.
For static TLS vars only visible inside a function, clang will only generate an initializer inside the function body where the variable was declared. However, it is possible for the variable to be indirectly referenced without ever calling the function it was declared in, if a scope referring to the variable gets outlined into a function that is executed on a new thread. Here are two examples that demonstrate this: #include <thread> #include <iostream> struct Object { int i; Object() : i(3) {} }; int main(void) { static thread_local Object o; std::cout << "[main] o.i = " << o.i << std::endl; std::thread t([] { std::cout << "[new thread] o.i = " << o.i << std::endl; }); t.join(); } #include <iostream> #include <omp.h> struct Object { int i; Object() : i(3) {} }; int main(void) { static thread_local Object o; #pragma omp parallel #pragma omp critical std::cout << "[" << omp_get_thread_num() << "] o.i = " << o.i << std::endl; } In this patch, we generate an initializer in a function for every unique reference to a static TLS var that was declared in a different function. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D66122 Files: clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CodeGenFunction.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGenCXX/cxx11-thread-local.cpp
Index: clang/test/CodeGenCXX/cxx11-thread-local.cpp =================================================================== --- clang/test/CodeGenCXX/cxx11-thread-local.cpp +++ clang/test/CodeGenCXX/cxx11-thread-local.cpp @@ -268,15 +268,37 @@ return this->n; } +namespace static_tls_in_lambda { + struct X { + X() {} + }; + + + X (*f())() { + static thread_local X x; + + return [] { + return x; + }; + } + + auto y = f(); + + void g() { y(); } +} + namespace { thread_local int anon_i{1}; } void set_anon_i() { anon_i = 2; } + + // LINUX-LABEL: define internal i32* @_ZTWN12_GLOBAL__N_16anon_iE() // DARWIN-LABEL: define internal cxx_fast_tlscc i32* @_ZTWN12_GLOBAL__N_16anon_iE() + // LINUX: define internal void @[[V_M_INIT]]() // DARWIN: define internal cxx_fast_tlscc void @[[V_M_INIT]]() // LINUX-SAME: comdat($_ZN1VIiE1mE) @@ -290,6 +312,8 @@ // CHECK: store i64 1, i64* @_ZGVN1VIiE1mE // CHECK: br label + + // LINUX: define internal void @[[X_M_INIT]]() // DARWIN: define internal cxx_fast_tlscc void @[[X_M_INIT]]() // LINUX-SAME: comdat($_ZN1XIiE1mE) @@ -303,6 +327,14 @@ // CHECK: store i64 1, i64* @_ZGVN1XIiE1mE // CHECK: br label +// CHECK: define internal void @"_ZZN20static_tls_in_lambda1fEvENK3$_1clEv" +// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x +// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0 +// CHECK: br i1 %[[static_tls_guard_init]], +// CHECK: call void @_ZN20static_tls_in_lambda1XC1Ev(%"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda1fEvE1x) +// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x, align 1 + + // CHECK: define {{.*}}@[[GLOBAL_INIT:.*]]() // CHECK: call void @[[C_INIT]]() // CHECK: call void @[[E_INIT]]() Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -467,6 +467,10 @@ /// should emit cleanups. bool CurFuncIsThunk = false; + /// static thread-local variables we've referenced that were declared in a + /// parent function. + llvm::SmallSet<const VarDecl *, 32> ForeignStaticTLSVars; + /// In ARC, whether we should autorelease the return value. bool AutoreleaseResult = false; Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -31,12 +31,16 @@ #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/IR/ValueSymbolTable.h" using namespace clang; using namespace CodeGen; @@ -384,6 +388,64 @@ CGBuilderTy(*this, AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs); } + // Emit initializers for static local variables that we referenced that are + // declared in another function, which may be uninitialized on entry if this + // function may execute on a separate thread. For example, when we're + // emitting the lambda in the following code: + // + // class Object { + // int init; + // Object() : init(1) {} + // }; + // + // main() { + // static thread_local Object var; + // std::thread([] { + // ...emit initializer for var here... + // }); + // } + // + // or another example: + // + // main() { + // static Object var; + // #pragma omp threadprivate(var) + // #pragma omp parallel + // { + // ...emit initializer for var here... + // } + // } + for (const VarDecl *VD : ForeignStaticTLSVars) { + assert(VD->isStaticLocal() && "expected a static local VarDecl"); + + // CUDA's local and local static __shared__ variables should not + // have any non-empty initializers. This is ensured by Sema. + // Whatever initializer such variable may have when it gets here is + // a no-op and should not be emitted. + bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + VD->hasAttr<CUDASharedAttr>(); + // If this value has an initializer, and it's thread-local, emit it. + if (VD->getInit() && !isCudaSharedVar) { + auto *GV = dyn_cast<llvm::GlobalVariable>(CGM.getStaticLocalDeclAddress(VD)); + auto IP = Builder.saveAndClearIP(); + llvm::BasicBlock *BBParent = AllocaInsertPt->getParent(); + llvm::Instruction *INext = AllocaInsertPt->getNextNonDebugInstruction(); + llvm::BasicBlock *BBNext = BBParent->splitBasicBlock(INext, BBParent->getName() + ".next"); + + INext = AllocaInsertPt->getNextNonDebugInstruction(); + + Builder.SetInsertPoint(BBParent); + // the global variable shouldn't change, as this function should've + // been called first when generating the parent function + AddInitializerToStaticVarDecl(*VD, GV); + if (INext != BBParent->getTerminator()) { + INext->eraseFromParent(); + Builder.CreateBr(BBNext); + } + Builder.restoreIP(IP); + } + } + // Remove the AllocaInsertPt instruction, which is just a convenience for us. llvm::Instruction *Ptr = AllocaInsertPt; AllocaInsertPt = nullptr; Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -2614,7 +2614,12 @@ *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)), getContext().getDeclAlign(VD)); - // No other cases for now. + // add to ForeignStaticVarDecls if this is a thread-local variable + // declared in a different function + const Decl *DC = cast<Decl>(VD->getDeclContext()); + if (DC != CurGD.getDecl() && VD->getTLSKind() == VarDecl::TLS_Dynamic) + ForeignStaticTLSVars.insert(VD); + // No other cases for now. } else { llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits