Hi kcc, eugenis,
This changes introduces llvm.asan.globals metadata, which can be
used by the frontend to provide additional information about the LLVM
global variables, namely:
1) source location (file/line/column) of corresponding globals in the user code.
These locations will be available at runtime, and will be printed in the
error report if needed, even if the binary is built without debug info.
2) whether global is dynamically initialized. This replaces
llvm.asan.dynamically_initialized_globals metadata used to detect init-order
bugs.
3) whether the global is blacklisted. This is a first step to move all blacklist
functionality to the frontend.
Note: This is an ABI-breaking change. All users of ASan runtime library will
have
to emit new layout of __asan_global structure (now it has one more field).
If you're OK with this change, I will commit it with extra ASan output tests,
checking that we're actually reporting source locations for global variables,
function-static variables and simple string literals.
http://reviews.llvm.org/D4203
Files:
lib/Transforms/Instrumentation/AddressSanitizer.cpp
projects/compiler-rt/lib/asan/asan_interface_internal.h
projects/compiler-rt/lib/asan/asan_report.cc
test/Instrumentation/AddressSanitizer/instrument_global.ll
test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
tools/clang/lib/CodeGen/CGDecl.cpp
tools/clang/lib/CodeGen/CodeGenModule.cpp
tools/clang/lib/CodeGen/CodeGenModule.h
tools/clang/test/CodeGen/sanitize-init-order.cpp
Index: lib/Transforms/Instrumentation/AddressSanitizer.cpp
===================================================================
--- lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
@@ -215,28 +216,88 @@
"Number of optimized accesses to global vars");
namespace {
-/// A set of dynamically initialized globals extracted from metadata.
-class SetOfDynamicallyInitializedGlobals {
+/// Frontend-provided metadata for global variables.
+class GlobalsMetadata {
public:
- void Init(Module& M) {
- // Clang generates metadata identifying all dynamically initialized globals.
- NamedMDNode *DynamicGlobals =
- M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
- if (!DynamicGlobals)
+ void init(Module& M) {
+ assert(!inited_);
+ inited_ = true;
+ NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+ if (!Globals)
return;
- for (const auto MDN : DynamicGlobals->operands()) {
- assert(MDN->getNumOperands() == 1);
- Value *VG = MDN->getOperand(0);
- // The optimizer may optimize away a global entirely, in which case we
- // cannot instrument access to it.
- if (!VG)
+ for (auto MDN : Globals->operands()) {
+ // Format of the metadata node for the global:
+ // {
+ // global,
+ // source_location,
+ // i1 is_dynamically_initialized,
+ // i1 is_blacklisted
+ // }
+ assert(MDN->getNumOperands() == 4);
+ Value *V = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely.
+ if (!V)
continue;
- DynInitGlobals.insert(cast<GlobalVariable>(VG));
+ GlobalVariable *GV = cast<GlobalVariable>(V);
+ // We may already know the source location for GV, if it was merged
+ // with another global.
+ if (!SourceLocation.count(GV)) {
+ if (Value *Loc = MDN->getOperand(1)) {
+ GlobalVariable *GVLoc = cast<GlobalVariable>(Loc);
+ SourceLocation[GV] = GVLoc;
+ addSourceLocationGlobal(GVLoc);
+ }
+ }
+ ConstantInt *IsDynInit = cast<ConstantInt>(MDN->getOperand(2));
+ if (IsDynInit->isOne())
+ DynInitGlobals.insert(GV);
+ ConstantInt *IsBlacklisted = cast<ConstantInt>(MDN->getOperand(3));
+ if (IsBlacklisted->isOne())
+ BlacklistedGlobals.insert(GV);
}
}
- bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; }
+
+ GlobalVariable *getSourceLocation(GlobalVariable *G) const {
+ auto Pos = SourceLocation.find(G);
+ return (Pos != SourceLocation.end()) ? Pos->second : nullptr;
+ }
+
+ /// Check if the global is dynamically initialized.
+ bool isDynInit(GlobalVariable *G) const {
+ return DynInitGlobals.count(G);
+ }
+
+ /// Check if the global was blacklisted.
+ bool isBlacklisted(GlobalVariable *G) const {
+ return BlacklistedGlobals.count(G);
+ }
+
+ /// Check if the global was generated to describe source location of another
+ /// global (we don't want to instrument them).
+ bool isSourceLocationGlobal(GlobalVariable *G) const {
+ return LocationGlobals.count(G);
+ }
+
private:
- SmallSet<GlobalValue*, 32> DynInitGlobals;
+ bool inited_ = false;
+ DenseMap<GlobalVariable*, GlobalVariable*> SourceLocation;
+ DenseSet<GlobalVariable*> DynInitGlobals;
+ DenseSet<GlobalVariable*> BlacklistedGlobals;
+ DenseSet<GlobalVariable*> LocationGlobals;
+
+ void addSourceLocationGlobal(GlobalVariable *SourceLocGV) {
+ // Source location global is a struct with layout:
+ // {
+ // filename,
+ // i32 line_number,
+ // i32 column_number,
+ // }
+ LocationGlobals.insert(SourceLocGV);
+ ConstantStruct *Contents =
+ cast<ConstantStruct>(SourceLocGV->getInitializer());
+ GlobalVariable *FilenameGV = cast<GlobalVariable>(Contents->getOperand(0));
+ LocationGlobals.insert(FilenameGV);
+ }
};
/// This struct defines the shadow mapping using the rule:
@@ -351,7 +412,7 @@
*AsanMemoryAccessCallbackSized[2];
Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
InlineAsm *EmptyAsm;
- SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+ GlobalsMetadata GlobalsMD;
friend struct FunctionStackPoisoner;
};
@@ -381,7 +442,7 @@
SmallString<64> BlacklistFile;
std::unique_ptr<SpecialCaseList> BL;
- SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+ GlobalsMetadata GlobalsMD;
Type *IntptrTy;
LLVMContext *C;
const DataLayout *DL;
@@ -659,7 +720,7 @@
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global does not have initializer
// at all, we assume it has dynamic initializer (in other TU).
- return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
+ return G->hasInitializer() && !GlobalsMD.isDynInit(G);
}
void
@@ -866,7 +927,11 @@
Type *Ty = cast<PointerType>(G->getType())->getElementType();
DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
+ // FIXME: Don't use the blacklist here, all the data should be collected
+ // by the frontend and passed in globals metadata.
if (BL->isIn(*G)) return false;
+ if (GlobalsMD.isBlacklisted(G)) return false;
+ if (GlobalsMD.isSourceLocationGlobal(G)) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
if (GlobalWasGeneratedByAsan(G)) return false; // Our own global.
@@ -967,7 +1032,7 @@
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
- DynamicallyInitializedGlobals.Init(M);
+ GlobalsMD.init(M);
SmallVector<GlobalVariable *, 16> GlobalsToChange;
@@ -986,10 +1051,11 @@
// const char *name;
// const char *module_name;
// size_t has_dynamic_init;
+ // void *source_location;
// We initialize an array of such structures and pass it to a run-time call.
- StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
- IntptrTy, IntptrTy,
- IntptrTy, IntptrTy, NULL);
+ StructType *GlobalStructTy =
+ StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
+ IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n);
bool HasDynamicallyInitializedGlobals = false;
@@ -1002,6 +1068,7 @@
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
+
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
uint64_t SizeInBytes = DL->getTypeAllocSize(Ty);
@@ -1017,9 +1084,6 @@
RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
- // Determine whether this global should be poisoned in initialization.
- bool GlobalHasDynamicInitializer =
- DynamicallyInitializedGlobals.Contains(G);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -1048,17 +1112,20 @@
NewGlobal->takeName(G);
G->eraseFromParent();
+ bool GlobalHasDynamicInitializer = GlobalsMD.isDynInit(G);
+ GlobalVariable *SourceLoc = GlobalsMD.getSourceLocation(G);
+
Initializers[i] = ConstantStruct::get(
- GlobalStructTy,
- ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
+ GlobalStructTy, ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
+ SourceLoc ? ConstantExpr::getPointerCast(SourceLoc, IntptrTy)
+ : ConstantInt::get(IntptrTy, 0),
NULL);
- // Populate the first and last globals declared in this TU.
if (ClInitializers && GlobalHasDynamicInitializer)
HasDynamicallyInitializedGlobals = true;
@@ -1186,7 +1253,7 @@
report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
- DynamicallyInitializedGlobals.Init(M);
+ GlobalsMD.init(M);
C = &(M.getContext());
LongSize = DL->getPointerSizeInBits();
Index: test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
===================================================================
--- test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -7,9 +7,11 @@
@YYY = global i32 0, align 4 ; W/o dynamic initializer.
; Clang will emit the following metadata identifying @xxx as dynamically
; initialized.
-!0 = metadata !{i32* @xxx}
-!1 = metadata !{i32* @XXX}
-!llvm.asan.dynamically_initialized_globals = !{!0, !1}
+!0 = metadata !{i32* @xxx, null, i1 true, i1 false}
+!1 = metadata !{i32* @XXX, null, i1 true, i1 false}
+!2 = metadata !{i32* @yyy, null, i1 false, i1 false}
+!3 = metadata !{i32* @YYY, null, i1 false, i1 false}
+!llvm.asan.globals = !{!0, !1, !2, !3}
define i32 @initializer() uwtable {
entry:
Index: test/Instrumentation/AddressSanitizer/instrument_global.ll
===================================================================
--- test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -68,8 +68,8 @@
}
-!llvm.asan.dynamically_initialized_globals = !{!0}
-!0 = metadata !{[10 x i32]* @GlobDy}
+!llvm.asan.globals = !{!0}
+!0 = metadata !{[10 x i32]* @GlobDy, null, i1 true, i1 false}
; CHECK-LABEL: define internal void @asan.module_ctor
; CHECK-NOT: ret
Index: tools/clang/lib/CodeGen/CGDecl.cpp
===================================================================
--- tools/clang/lib/CodeGen/CGDecl.cpp
+++ tools/clang/lib/CodeGen/CGDecl.cpp
@@ -345,6 +345,8 @@
DMEntry = castedAddr;
CGM.setStaticLocalDeclAddress(&D, castedAddr);
+ CGM.reportGlobalToASan(var, D.getLocation());
+
// Emit global variable debug descriptor for static vars.
CGDebugInfo *DI = getDebugInfo();
if (DI &&
Index: tools/clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- tools/clang/lib/CodeGen/CodeGenModule.h
+++ tools/clang/lib/CodeGen/CodeGenModule.h
@@ -1001,6 +1001,9 @@
const SanitizerOptions &getSanOpts() const { return SanOpts; }
+ void reportGlobalToASan(llvm::GlobalVariable *GV, SourceLocation Loc,
+ bool IsDynInit = false);
+
void addDeferredVTable(const CXXRecordDecl *RD) {
DeferredVTables.push_back(RD);
}
Index: tools/clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1929,23 +1929,59 @@
if (NeedsGlobalCtor || NeedsGlobalDtor)
EmitCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor);
- // If we are compiling with ASan, add metadata indicating dynamically
- // initialized (and not blacklisted) globals.
- if (SanOpts.Address && NeedsGlobalCtor &&
- !SanitizerBlacklist->isIn(*GV, "init")) {
- llvm::NamedMDNode *DynamicInitializers = TheModule.getOrInsertNamedMetadata(
- "llvm.asan.dynamically_initialized_globals");
- llvm::Value *GlobalToAdd[] = { GV };
- llvm::MDNode *ThisGlobal = llvm::MDNode::get(VMContext, GlobalToAdd);
- DynamicInitializers->addOperand(ThisGlobal);
- }
+ reportGlobalToASan(GV, D->getLocation(), NeedsGlobalCtor);
// Emit global variable debug information.
if (CGDebugInfo *DI = getModuleDebugInfo())
if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
DI->EmitGlobalVariable(GV, D);
}
+void CodeGenModule::reportGlobalToASan(llvm::GlobalVariable *GV,
+ SourceLocation Loc, bool IsDynInit) {
+ if (!SanOpts.Address)
+ return;
+ IsDynInit &= !SanitizerBlacklist->isIn(*GV, "init");
+ bool IsBlacklisted = SanitizerBlacklist->isIn(*GV);
+
+ llvm::LLVMContext &LLVMCtx = TheModule.getContext();
+
+ llvm::GlobalVariable *LocDescr = nullptr;
+ if (!IsBlacklisted) {
+ // Don't generate source location if a global is blacklisted - it won't
+ // be instrumented anyway.
+ PresumedLoc PLoc = Context.getSourceManager().getPresumedLoc(Loc);
+ if (PLoc.isValid()) {
+ llvm::Constant *LocData[] = {
+ GetAddrOfConstantCString(PLoc.getFilename()),
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVMCtx), PLoc.getLine()),
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVMCtx),
+ PLoc.getColumn()),
+ };
+ auto LocStruct = llvm::ConstantStruct::getAnon(LocData);
+ LocDescr = new llvm::GlobalVariable(TheModule, LocStruct->getType(), true,
+ llvm::GlobalValue::PrivateLinkage,
+ LocStruct, ".asan_loc_descr");
+ LocDescr->setUnnamedAddr(true);
+ // Add LocDescr to llvm.compiler.used, so that it won't be removed by
+ // the optimizer before the ASan instrumentation pass.
+ addCompilerUsedGlobal(LocDescr);
+ }
+ }
+
+ llvm::Value *GlobalMetadata[] = {
+ GV,
+ LocDescr,
+ llvm::ConstantInt::get(llvm::Type::getInt1Ty(LLVMCtx), IsDynInit),
+ llvm::ConstantInt::get(llvm::Type::getInt1Ty(LLVMCtx), IsBlacklisted)
+ };
+
+ llvm::MDNode *ThisGlobal = llvm::MDNode::get(VMContext, GlobalMetadata);
+ llvm::NamedMDNode *AsanGlobals =
+ TheModule.getOrInsertNamedMetadata("llvm.asan.globals");
+ AsanGlobals->addOperand(ThisGlobal);
+}
+
static bool isVarDeclStrongDefinition(const VarDecl *D, bool NoCommon) {
// Don't give variables common linkage if -fno-common was specified unless it
// was overridden by a NoCommon attribute.
@@ -2750,6 +2786,8 @@
auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, Alignment);
if (Entry)
Entry->setValue(GV);
+
+ reportGlobalToASan(GV, S->getStrTokenLoc(0));
return GV;
}
Index: tools/clang/test/CodeGen/sanitize-init-order.cpp
===================================================================
--- tools/clang/test/CodeGen/sanitize-init-order.cpp
+++ tools/clang/test/CodeGen/sanitize-init-order.cpp
@@ -27,7 +27,12 @@
// Check that ASan init-order checking ignores structs with trivial default
// constructor.
-// CHECK: !llvm.asan.dynamically_initialized_globals = !{[[GLOB:![0-9]+]]}
-// CHECK: [[GLOB]] = metadata !{%struct.PODWithCtorAndDtor
-
-// BLACKLIST-NOT: llvm.asan.dynamically_initialized_globals
+// CHECK: !llvm.asan.globals = !{![[GLOB_1:[0-9]+]], ![[GLOB_2:[0-9]+]], ![[GLOB_3:[0-9]]]}
+// CHECK: ![[GLOB_1]] = metadata !{%struct.PODStruct* {{.*}}, i1 false, i1 false}
+// CHECK: ![[GLOB_2]] = metadata !{%struct.PODWithDtor* {{.*}}, i1 false, i1 false}
+// CHECK: ![[GLOB_3]] = metadata !{%struct.PODWithCtorAndDtor* {{.*}}, i1 true, i1 false}
+
+// BLACKLIST: !llvm.asan.globals = !{![[GLOB_1:[0-9]+]], ![[GLOB_2:[0-9]+]], ![[GLOB_3:[0-9]]]}
+// BLACKLIST: ![[GLOB_1]] = metadata !{%struct.PODStruct* {{.*}}, i1 false, i1 false}
+// BLACKLIST: ![[GLOB_2]] = metadata !{%struct.PODWithDtor* {{.*}}, i1 false, i1 false}
+// BLACKLIST: ![[GLOB_3]] = metadata !{%struct.PODWithCtorAndDtor* {{.*}}, i1 false, i1 false}
Index: projects/compiler-rt/lib/asan/asan_report.cc
===================================================================
--- projects/compiler-rt/lib/asan/asan_report.cc
+++ projects/compiler-rt/lib/asan/asan_report.cc
@@ -212,6 +212,26 @@
(char *)g.beg);
}
+static const char *GlobalFilename(const __asan_global &g) {
+ const char *res = g.module_name;
+ // Prefer the filename from source location, if is available.
+ if (g.location)
+ res = g.location->filename;
+ CHECK(res);
+ return res;
+}
+
+static void PrintGlobalLocation(InternalScopedString *str,
+ const __asan_global &g) {
+ str->append("%s", GlobalFilename(g));
+ if (!g.location)
+ return;
+ if (g.location->line_no)
+ str->append(":%d", g.location->line_no);
+ if (g.location->column_no)
+ str->append(":%d", g.location->column_no);
+}
+
bool DescribeAddressRelativeToGlobal(uptr addr, uptr size,
const __asan_global &g) {
static const uptr kMinimalDistanceFromAnotherGlobal = 64;
@@ -232,8 +252,10 @@
// Can it happen?
str.append("%p is located %zd bytes inside", (void *)addr, addr - g.beg);
}
- str.append(" of global variable '%s' from '%s' (0x%zx) of size %zu\n",
- MaybeDemangleGlobalName(g.name), g.module_name, g.beg, g.size);
+ str.append(" of global variable '%s' defined in '",
+ MaybeDemangleGlobalName(g.name));
+ PrintGlobalLocation(&str, g);
+ str.append("' (0x%zx) of size %zu\n", g.beg, g.size);
str.append("%s", d.EndLocation());
PrintGlobalNameIfASCII(&str, g);
Printf("%s", str.data());
@@ -741,11 +763,14 @@
Printf("%s", d.Warning());
Report("ERROR: AddressSanitizer: odr-violation (%p):\n", g1->beg);
Printf("%s", d.EndWarning());
- Printf(" [1] size=%zd %s %s\n", g1->size, g1->name, g1->module_name);
- Printf(" [2] size=%zd %s %s\n", g2->size, g2->name, g2->module_name);
+ InternalScopedString g1_loc(256), g2_loc(256);
+ PrintGlobalLocation(&g1_loc, *g1);
+ PrintGlobalLocation(&g2_loc, *g2);
+ Printf(" [1] size=%zd %s %s\n", g1->size, g1->name, g1_loc.data());
+ Printf(" [2] size=%zd %s %s\n", g2->size, g2->name, g2_loc.data());
Report("HINT: if you don't care about these warnings you may set "
"ASAN_OPTIONS=detect_odr_violation=0\n");
- ReportErrorSummary("odr-violation", g1->module_name, 0, g1->name);
+ ReportErrorSummary("odr-violation", g1_loc.data(), 0, g1->name);
}
// ----------------------- CheckForInvalidPointerPair ----------- {{{1
Index: projects/compiler-rt/lib/asan/asan_interface_internal.h
===================================================================
--- projects/compiler-rt/lib/asan/asan_interface_internal.h
+++ projects/compiler-rt/lib/asan/asan_interface_internal.h
@@ -33,6 +33,14 @@
SANITIZER_INTERFACE_ATTRIBUTE void __asan_init_v3();
#define __asan_init __asan_init_v3
+ // This structure is used to describe the source location of a place where
+ // global was defined.
+ struct __asan_global_source_location {
+ const char *filename;
+ int line_no;
+ int column_no;
+ };
+
// This structure describes an instrumented global variable.
struct __asan_global {
uptr beg; // The address of the global.
@@ -42,6 +50,8 @@
const char *module_name; // Module name as a C string. This pointer is a
// unique identifier of a module.
uptr has_dynamic_init; // Non-zero if the global has dynamic initializer.
+ __asan_global_source_location *location; // Source location of a global,
+ // or NULL if it is unknown.
};
// These two functions should be called by the instrumented code.
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits