llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Marco Elver (melver) <details> <summary>Changes</summary> Implement the TypeHashPointerSplit mode: This mode assigns a token ID based on the hash of the allocated type's name, where the top half ID-space is reserved for types that contain pointers and the bottom half for types that do not contain pointers. This mode with max tokens of 2 (`-falloc-token-max=2`) may also be valuable for heap hardening strategies that simply separate pointer types from non-pointer types. Make it the new default mode. Link: https://discourse.llvm.org/t/rfc-a-framework-for-allocator-partitioning-hints/87434 --- This change is part of the following series: 1. https://github.com/llvm/llvm-project/pull/156838 2. https://github.com/llvm/llvm-project/pull/156839 3. https://github.com/llvm/llvm-project/pull/156840 4. https://github.com/llvm/llvm-project/pull/156841 5. https://github.com/llvm/llvm-project/pull/156842 --- Full diff: https://github.com/llvm/llvm-project/pull/156840.diff 7 Files Affected: - (modified) clang/docs/AllocToken.rst (+7-2) - (modified) clang/lib/CodeGen/CGExpr.cpp (+64-3) - (added) clang/test/CodeGenCXX/alloc-token-pointer.cpp (+177) - (modified) llvm/lib/Transforms/Instrumentation/AllocToken.cpp (+51-6) - (modified) llvm/test/Instrumentation/AllocToken/extralibfuncs.ll (+1-1) - (modified) llvm/test/Instrumentation/AllocToken/nonlibcalls.ll (+1-1) - (modified) llvm/test/Instrumentation/AllocToken/remark.ll (+1-1) ``````````diff diff --git a/clang/docs/AllocToken.rst b/clang/docs/AllocToken.rst index a7bb8877f371b..fb354d6738ea3 100644 --- a/clang/docs/AllocToken.rst +++ b/clang/docs/AllocToken.rst @@ -31,13 +31,18 @@ Token Assignment Mode The default mode to calculate tokens is: -* *TypeHash* (mode=2): This mode assigns a token ID based on the hash of - the allocated type's name. +* *TypeHashPointerSplit* (mode=3): This mode assigns a token ID based on + the hash of the allocated type's name, where the top half ID-space is + reserved for types that contain pointers and the bottom half for types that + do not contain pointers. Other token ID assignment modes are supported, but they may be subject to change or removal. These may (experimentally) be selected with ``-mllvm -alloc-token-mode=<mode>``: +* *TypeHash* (mode=2): This mode assigns a token ID based on the hash of + the allocated type's name. + * *Random* (mode=1): This mode assigns a statically-determined random token ID to each allocation site. diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 14e4d648428ef..e7a0e7696e204 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1277,14 +1277,75 @@ void CodeGenFunction::EmitAllocTokenHint(llvm::CallBase *CB, assert(SanOpts.has(SanitizerKind::AllocToken) && "Only needed with -fsanitize=alloc-token"); + llvm::MDBuilder MDB(getLLVMContext()); + + // Get unique type name. PrintingPolicy Policy(CGM.getContext().getLangOpts()); Policy.SuppressTagKeyword = true; Policy.FullyQualifiedName = true; std::string TypeName = AllocType.getCanonicalType().getAsString(Policy); - auto *TypeMDS = llvm::MDString::get(CGM.getLLVMContext(), TypeName); + auto *TypeNameMD = MDB.createString(TypeName); + + // Check if QualType contains a pointer. Implements a simple DFS to + // recursively check if a type contains a pointer type. + llvm::SmallPtrSet<const RecordDecl *, 4> VisitedRD; + bool IncompleteType = false; + auto TypeContainsPtr = [&](auto &&self, QualType T) -> bool { + QualType CanonicalType = T.getCanonicalType(); + if (CanonicalType->isPointerType()) + return true; // base case + + // Look through typedef chain to check for special types. + for (QualType CurrentT = T; const auto *TT = CurrentT->getAs<TypedefType>(); + CurrentT = TT->getDecl()->getUnderlyingType()) { + const IdentifierInfo *II = TT->getDecl()->getIdentifier(); + if (!II) + continue; + // Special Case: Syntactically uintptr_t is not a pointer; semantically, + // however, very likely used as such. Therefore, classify uintptr_t as a + // pointer, too. + if (II->isStr("uintptr_t")) + return true; + } + + // The type is an array; check the element type. + if (const ArrayType *AT = CanonicalType->getAsArrayTypeUnsafe()) + return self(self, AT->getElementType()); + // The type is a struct, class, or union. + if (const RecordDecl *RD = CanonicalType->getAsRecordDecl()) { + if (!RD->isCompleteDefinition()) { + IncompleteType = true; + return false; + } + if (!VisitedRD.insert(RD).second) + return false; // already visited + // Check all fields. + for (const FieldDecl *Field : RD->fields()) { + if (self(self, Field->getType())) + return true; + } + // For C++ classes, also check base classes. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + // Polymorphic types require a vptr. + if (CXXRD->isPolymorphic()) + return true; + for (const CXXBaseSpecifier &Base : CXXRD->bases()) { + if (self(self, Base.getType())) + return true; + } + } + } + return false; + }; + const bool ContainsPtr = TypeContainsPtr(TypeContainsPtr, AllocType); + if (!ContainsPtr && IncompleteType) + return; + auto *ContainsPtrC = Builder.getInt1(ContainsPtr); + auto *ContainsPtrMD = MDB.createConstant(ContainsPtrC); - // Format: !{<type-name>} - auto *MDN = llvm::MDNode::get(CGM.getLLVMContext(), {TypeMDS}); + // Format: !{<type-name>, <contains-pointer>} + auto *MDN = + llvm::MDNode::get(CGM.getLLVMContext(), {TypeNameMD, ContainsPtrMD}); CB->setMetadata(llvm::LLVMContext::MD_alloc_token_hint, MDN); } diff --git a/clang/test/CodeGenCXX/alloc-token-pointer.cpp b/clang/test/CodeGenCXX/alloc-token-pointer.cpp new file mode 100644 index 0000000000000..7adb75c7afebb --- /dev/null +++ b/clang/test/CodeGenCXX/alloc-token-pointer.cpp @@ -0,0 +1,177 @@ +// Check -fsanitize=alloc-token TypeHashPointerSplit mode with only 2 +// tokens so we effectively only test the contains-pointer logic. +// +// RUN: %clang_cc1 -fsanitize=alloc-token -falloc-token-max=2 -triple x86_64-linux-gnu -std=c++20 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -O -fsanitize=alloc-token -falloc-token-max=2 -triple x86_64-linux-gnu -std=c++20 -emit-llvm %s -o - | FileCheck %s + +#include "../Analysis/Inputs/system-header-simulator-cxx.h" + +typedef __UINTPTR_TYPE__ uintptr_t; + +extern "C" { +void *malloc(size_t size); +} + +// CHECK-LABEL: @_Z15test_malloc_intv( +void *test_malloc_int() { + // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 4, i64 0) + int *a = (int *)malloc(sizeof(int)); + *a = 42; + return a; +} + +// CHECK-LABEL: @_Z15test_malloc_ptrv( +int **test_malloc_ptr() { + // FIXME: This should not be token ID 0! + // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 8, i64 0) + int **a = (int **)malloc(sizeof(int*)); + *a = nullptr; + return a; +} + +// CHECK-LABEL: @_Z12test_new_intv( +int *test_new_int() { + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 4, i64 0){{.*}} !alloc_token_hint + return new int; +} + +// CHECK-LABEL: @_Z20test_new_ulong_arrayv( +unsigned long *test_new_ulong_array() { + // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 80, i64 0){{.*}} !alloc_token_hint + return new unsigned long[10]; +} + +// CHECK-LABEL: @_Z12test_new_ptrv( +int **test_new_ptr() { + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 8, i64 1){{.*}} !alloc_token_hint + return new int*; +} + +// CHECK-LABEL: @_Z18test_new_ptr_arrayv( +int **test_new_ptr_array() { + // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 80, i64 1){{.*}} !alloc_token_hint + return new int*[10]; +} + +struct ContainsPtr { + int a; + char *buf; +}; + +// CHECK-LABEL: @_Z27test_malloc_struct_with_ptrv( +ContainsPtr *test_malloc_struct_with_ptr() { + // FIXME: This should not be token ID 0! + // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 16, i64 0) + ContainsPtr *c = (ContainsPtr *)malloc(sizeof(ContainsPtr)); + return c; +} + +// CHECK-LABEL: @_Z33test_malloc_struct_array_with_ptrv( +ContainsPtr *test_malloc_struct_array_with_ptr() { + // FIXME: This should not be token ID 0! + // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 160, i64 0) + ContainsPtr *c = (ContainsPtr *)malloc(10 * sizeof(ContainsPtr)); + return c; +} + +// CHECK-LABEL: @_Z32test_operatornew_struct_with_ptrv( +ContainsPtr *test_operatornew_struct_with_ptr() { + // FIXME: This should not be token ID 0! + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 0) + ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(ContainsPtr)); + return c; +} + +// CHECK-LABEL: @_Z38test_operatornew_struct_array_with_ptrv( +ContainsPtr *test_operatornew_struct_array_with_ptr() { + // FIXME: This should not be token ID 0! + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 160, i64 0) + ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(ContainsPtr)); + return c; +} + +// CHECK-LABEL: @_Z33test_operatornew_struct_with_ptr2v( +ContainsPtr *test_operatornew_struct_with_ptr2() { + // FIXME: This should not be token ID 0! + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 0) + ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(*c)); + return c; +} + +// CHECK-LABEL: @_Z39test_operatornew_struct_array_with_ptr2v( +ContainsPtr *test_operatornew_struct_array_with_ptr2() { + // FIXME: This should not be token ID 0! + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 160, i64 0) + ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(*c)); + return c; +} + +// CHECK-LABEL: @_Z24test_new_struct_with_ptrv( +ContainsPtr *test_new_struct_with_ptr() { + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 1){{.*}} !alloc_token_hint + return new ContainsPtr; +} + +// CHECK-LABEL: @_Z30test_new_struct_array_with_ptrv( +ContainsPtr *test_new_struct_array_with_ptr() { + // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 160, i64 1){{.*}} !alloc_token_hint + return new ContainsPtr[10]; +} + +class TestClass { +public: + void Foo(); + ~TestClass(); + int data[16]; +}; + +// CHECK-LABEL: @_Z14test_new_classv( +TestClass *test_new_class() { + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 64, i64 0){{.*}} !alloc_token_hint + return new TestClass(); +} + +// CHECK-LABEL: @_Z20test_new_class_arrayv( +TestClass *test_new_class_array() { + // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 648, i64 0){{.*}} !alloc_token_hint + return new TestClass[10]; +} + +// Test that we detect that virtual classes have implicit vtable pointer. +class VirtualTestClass { +public: + virtual void Foo(); + virtual ~VirtualTestClass(); + int data[16]; +}; + +// CHECK-LABEL: @_Z22test_new_virtual_classv( +VirtualTestClass *test_new_virtual_class() { + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 72, i64 1){{.*}} !alloc_token_hint + return new VirtualTestClass(); +} + +// CHECK-LABEL: @_Z28test_new_virtual_class_arrayv( +VirtualTestClass *test_new_virtual_class_array() { + // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 728, i64 1){{.*}} !alloc_token_hint + return new VirtualTestClass[10]; +} + +// uintptr_t is treated as a pointer. +struct MyStructUintptr { + int a; + uintptr_t ptr; +}; + +// CHECK-LABEL: @_Z18test_uintptr_isptrv( +MyStructUintptr *test_uintptr_isptr() { + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 1) + return new MyStructUintptr; +} + +using uptr = uintptr_t; +// CHECK-LABEL: @_Z19test_uintptr_isptr2v( +uptr *test_uintptr_isptr2() { + // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 8, i64 1) + return new uptr; +} diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 4ea68470ca684..74cda227d50a7 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -70,6 +70,11 @@ enum class TokenMode : unsigned { /// Token ID based on allocated type hash. TypeHash = 2, + /// Token ID based on allocated type hash, where the top half ID-space is + /// reserved for types that contain pointers and the bottom half for types + /// that do not contain pointers. + TypeHashPointerSplit = 3, + // Mode count - keep last ModeCount }; @@ -89,7 +94,7 @@ struct ModeParser : public cl::parser<unsigned> { cl::opt<unsigned, false, ModeParser> ClMode("alloc-token-mode", cl::desc("Token assignment mode"), cl::Hidden, - cl::init(static_cast<unsigned>(TokenMode::TypeHash))); + cl::init(static_cast<unsigned>(TokenMode::TypeHashPointerSplit))); cl::opt<std::string> ClFuncPrefix("alloc-token-prefix", cl::desc("The allocation function prefix"), @@ -142,16 +147,23 @@ STATISTIC(NumAllocations, "Allocations found"); /// Returns the !alloc_token_hint metadata if available. /// -/// Expected format is: !{<type-name>} +/// Expected format is: !{<type-name>, <contains-pointer>} MDNode *getAllocTokenHintMetadata(const CallBase &CB) { MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token_hint); if (!Ret) return nullptr; - assert(Ret->getNumOperands() == 1 && "bad !alloc_token_hint"); + assert(Ret->getNumOperands() == 2 && "bad !alloc_token_hint"); assert(isa<MDString>(Ret->getOperand(0))); + assert(isa<ConstantAsMetadata>(Ret->getOperand(1))); return Ret; } +bool containsPointer(const MDNode *MD) { + ConstantAsMetadata *C = cast<ConstantAsMetadata>(MD->getOperand(1)); + auto *CI = cast<ConstantInt>(C->getValue()); + return CI->getValue().getBoolValue(); +} + class ModeBase { public: explicit ModeBase(uint64_t MaxTokens) : MaxTokens(MaxTokens) {} @@ -198,12 +210,20 @@ class TypeHashMode : public ModeBase { using ModeBase::ModeBase; uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + const auto [N, H] = getHash(CB, ORE); + return N ? boundedToken(H) : H; + } + +protected: + std::pair<MDNode *, uint64_t> getHash(const CallBase &CB, + OptimizationRemarkEmitter &ORE) { if (MDNode *N = getAllocTokenHintMetadata(CB)) { MDString *S = cast<MDString>(N->getOperand(0)); - return boundedToken(xxHash64(S->getString())); + return {N, xxHash64(S->getString())}; } + // Fallback. remarkNoHint(CB, ORE); - return ClFallbackToken; + return {nullptr, ClFallbackToken}; } /// Remark that there was no precise type information. @@ -219,6 +239,26 @@ class TypeHashMode : public ModeBase { } }; +/// Implementation for TokenMode::TypeHashPointerSplit. +class TypeHashPointerSplitMode : public TypeHashMode { +public: + using TypeHashMode::TypeHashMode; + + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + if (MaxTokens == 1) + return 0; + const uint64_t HalfTokens = + (MaxTokens ? MaxTokens : std::numeric_limits<uint64_t>::max()) / 2; + const auto [N, H] = getHash(CB, ORE); + if (!N) + return H; // fallback token + uint64_t Hash = H % HalfTokens; // base hash + if (containsPointer(N)) + Hash += HalfTokens; + return Hash; + } +}; + // Apply opt overrides. AllocTokenOptions &&transformOptionsFromCl(AllocTokenOptions &&Opts) { if (!Opts.MaxTokens.has_value()) @@ -244,6 +284,9 @@ class AllocToken { case TokenMode::TypeHash: Mode.emplace<TypeHashMode>(*Options.MaxTokens); break; + case TokenMode::TypeHashPointerSplit: + Mode.emplace<TypeHashPointerSplitMode>(*Options.MaxTokens); + break; case TokenMode::ModeCount: llvm_unreachable(""); break; @@ -281,7 +324,9 @@ class AllocToken { // Cache for replacement functions. DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions; // Selected mode. - std::variant<IncrementMode, RandomMode, TypeHashMode> Mode; + std::variant<IncrementMode, RandomMode, TypeHashMode, + TypeHashPointerSplitMode> + Mode; }; bool AllocToken::instrumentFunction(Function &F) { diff --git a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll index 37c6d6463ffd2..94fc12b20caff 100644 --- a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll +++ b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll @@ -29,4 +29,4 @@ entry: ret ptr %ptr1 } -!0 = !{!"int"} +!0 = !{!"int", i1 0} diff --git a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll index be99e5ef14b16..b933c0bd352e9 100644 --- a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll +++ b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll @@ -60,4 +60,4 @@ entry: ret ptr %ptr1 } -!0 = !{!"int"} +!0 = !{!"int", i1 0} diff --git a/llvm/test/Instrumentation/AllocToken/remark.ll b/llvm/test/Instrumentation/AllocToken/remark.ll index d5ecfc41bace5..d2bc273060403 100644 --- a/llvm/test/Instrumentation/AllocToken/remark.ll +++ b/llvm/test/Instrumentation/AllocToken/remark.ll @@ -24,4 +24,4 @@ entry: ret ptr %ptr1 } -!0 = !{!"int"} +!0 = !{!"int", i1 0} `````````` </details> https://github.com/llvm/llvm-project/pull/156840 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits