https://github.com/dingxiangfei2009 updated https://github.com/llvm/llvm-project/pull/188001
>From cca93e30b6fa358c6923363637ac9c6d7b9405fa Mon Sep 17 00:00:00 2001 From: Xiangfei Ding <[email protected]> Date: Thu, 19 Mar 2026 16:04:41 +0000 Subject: [PATCH] MSan: poison the default-init allocation before calling constructors This change is to align with the standards pertaining to reserved global placement `new`s in the default initialisation style. Signed-off-by: Xiangfei Ding <[email protected]> --- clang/lib/CodeGen/CGExprCXX.cpp | 93 +++++++++++++++++++ clang/test/CXX/drs/cwg1748.cpp | 20 ++-- clang/test/CodeGenCXX/new.cpp | 10 +- .../test/CodeGenCXX/sanitize-default-init.cpp | 41 ++++++++ 4 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGenCXX/sanitize-default-init.cpp diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 82300c3ede183..1b22915583689 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -17,8 +17,11 @@ #include "CodeGenFunction.h" #include "ConstantEmitter.h" #include "TargetInfo.h" +#include "clang/AST/ExprCXX.h" #include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/Sanitizers.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -1489,6 +1492,7 @@ class CallDeleteDuringNew final : public EHScopeStack::Cleanup { EmitNewDeleteCall(CGF, OperatorDelete, FPT, DeleteArgs); } }; + } // namespace /// Enter a cleanup to call 'operator delete' if the initializer in a @@ -1553,6 +1557,86 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF, const CXXNewExpr *E, CGF.initFullExprCleanup(); } +namespace { +void PoisonTrivialField(CodeGenFunction &CGF, QualType const &Ty, + Address Dest) { + CharUnits Size = CGF.getContext().getTypeSizeInChars(Ty); + llvm::Value *SizeVal = CGF.CGM.getSize(Size); + llvm::Value *PoisonByte = llvm::PoisonValue::get(CGF.Builder.getInt8Ty()); + CGF.Builder.CreateMemSet(Dest, PoisonByte, SizeVal, Ty.isVolatileQualified()); +} + +void PoisonArrayLValue(CodeGenFunction &CGF, QualType const &ElementQualTy, + llvm::Type *ElementTy, LValue const &Dest, + llvm::Value *numElements); + +void PoisonLValueRecursive(CodeGenFunction &CGF, QualType const &Ty, + LValue const &Dest) { + if (Ty.isTriviallyCopyableType(CGF.getContext()) || Ty->isReferenceType()) { + return PoisonTrivialField(CGF, Ty, Dest.getAddress()); + } + + auto *RD = Ty->castAsCXXRecordDecl(); + assert(RD && + "type is not trivially copyable but it is not a record type either"); + for (auto *FD : RD->fields()) { + // There is no need to poison unnamed fields. + if (FD->isUnnamedBitField()) { + continue; + } + QualType FieldTy = FD->getType(); + LValue FieldLV = CGF.EmitLValueForField(Dest, FD); + if (FieldTy->isRecordType()) { + return PoisonLValueRecursive(CGF, FieldTy, FieldLV); + } + if (auto *AQualTy = dyn_cast<clang::ArrayType>(FieldTy)) { + if (auto *ATy = + dyn_cast<llvm::ArrayType>(CGF.ConvertTypeForMem(FieldTy))) { + if (uint64_t NumArrayElements = ATy->getNumElements()) { + PoisonArrayLValue( + CGF, AQualTy->getElementType(), ATy->getElementType(), FieldLV, + llvm::ConstantInt::get(CGF.SizeTy, NumArrayElements)); + } + } + return; + } + // Every other case is trivial to poison. + PoisonTrivialField(CGF, FieldTy, FieldLV.getAddress()); + } +} + +void PoisonArrayLValue(CodeGenFunction &CGF, QualType const &ElementQualTy, + llvm::Type *ElementTy, LValue const &Dest, + llvm::Value *NumElements) { + auto ElementAlign = Dest.getAlignment().alignmentOfArrayElement( + CGF.getContext().getTypeSizeInChars(ElementQualTy)); + + auto &Builder = CGF.Builder; + llvm::Value *BeginPtr = Dest.emitRawPointer(CGF); + llvm::Value *EndPtr = Builder.CreateInBoundsGEP( + ElementTy, BeginPtr, NumElements, "arraypoison.end"); + llvm::Value *One = llvm::ConstantInt::get(CGF.SizeTy, 1); + auto *EntryBB = Builder.GetInsertBlock(); + // The loop head. + auto *BodyBB = CGF.createBasicBlock("arraypoison.body"); + CGF.EmitBlock(BodyBB); + auto *CurElementPtr = + Builder.CreatePHI(BeginPtr->getType(), 2, "arraypoison.cur"); + CurElementPtr->addIncoming(BeginPtr, EntryBB); + LValue ElementDest = CGF.MakeAddrLValue( + Address(CurElementPtr, ElementTy, ElementAlign), ElementQualTy); + PoisonLValueRecursive(CGF, ElementQualTy, ElementDest); + llvm::Value *NextElementPtr = Builder.CreateInBoundsGEP( + ElementTy, CurElementPtr, One, "arraypoison.next"); + llvm::Value *Done = + Builder.CreateICmpEQ(NextElementPtr, EndPtr, "arraypoison.done"); + auto *EndBB = CGF.createBasicBlock("arraypoison.end"); + Builder.CreateCondBr(Done, EndBB, BodyBB); + CurElementPtr->addIncoming(NextElementPtr, Builder.GetInsertBlock()); + CGF.EmitBlock(EndBB); +} +} // namespace + llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { // The element type being allocated. QualType allocType = getContext().getBaseElementType(E->getAllocatedType()); @@ -1611,6 +1695,15 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { allocatorArgs.add(RValue::get(allocation, *this), arg->getType()); } + if (E->getInitializationStyle() == CXXNewInitializationStyle::None) { + auto *ElemTy = ConvertTypeForMem(allocType); + auto Dest = MakeAddrLValue(allocation.withElementType(ElemTy), allocType); + if (numElements) { + PoisonArrayLValue(*this, allocType, ElemTy, Dest, numElements); + } else { + PoisonLValueRecursive(*this, allocType, Dest); + } + } } else { const FunctionProtoType *allocatorType = allocator->getType()->castAs<FunctionProtoType>(); diff --git a/clang/test/CXX/drs/cwg1748.cpp b/clang/test/CXX/drs/cwg1748.cpp index a0fe737539392..aeee0bb6b3558 100644 --- a/clang/test/CXX/drs/cwg1748.cpp +++ b/clang/test/CXX/drs/cwg1748.cpp @@ -21,6 +21,7 @@ struct X { X(); }; // perform a null check. // CHECK-LABEL: define {{.*}} @_Z1fPv( +// CHECK: call void @llvm.memset{{.*}}(ptr {{.*}}, {{.*}} poison, {{.*}}) // CHECK-NOT: call // CHECK-NOT: icmp{{.*}} null // CHECK-NOT: br i1 @@ -29,10 +30,17 @@ struct X { X(); }; X *f(void *p) { return new (p) X; } // CHECK-LABEL: define {{.*}} @_Z1gPv( -// CHECK-NOT: call -// CHECK-NOT: icmp{{.*}} null -// CHECK-NOT: br i1 -// CHECK: call void @_ZN1XC1Ev( -// CHECK: br i1 -// CHECK: } +// CHECK: [[ARRAY_POISON_END:%arraypoison\.end.*]] = getelementptr inbounds [[A:.+]], ptr [[P:%.+]], i{{.+}} 5 +// CHECK: [[LARRAY_POISON_BODY:arraypoison\.body.*]]: +// CHECK-NEXT: [[ARRAY_POISON_CUR:%arraypoison\.cur.*]] = phi ptr [ [[P]], %entry ], [ [[ARRAY_POISON_NEXT:%arraypoison\.next.*]], %[[LARRAY_POISON_BODY]] ] +// CHECK: call void @llvm.memset.p0.i64(ptr align 1 [[ARRAY_POISON_CUR]], i8 poison, i64 1, i1 false) +// CHECK: br i1 {{.+}}, label %[[LARRAY_POISON_END:arraypoison\.end.*]], label %[[LARRAY_POISON_BODY]] +// CHECK: [[LARRAY_POISON_END]]: +// CHECK: arrayctor.loop{{.*}}: +// CHECK-NOT: call +// CHECK-NOT: icmp{{.*}} null +// CHECK-NOT: br i1 +// CHECK: call void @_ZN1XC1Ev( +// CHECK: br i1 +// CHECK: } X *g(void *p) { return new (p) X[5]; } diff --git a/clang/test/CodeGenCXX/new.cpp b/clang/test/CodeGenCXX/new.cpp index af225529c494e..bfa9eb4bfdea4 100644 --- a/clang/test/CodeGenCXX/new.cpp +++ b/clang/test/CodeGenCXX/new.cpp @@ -223,7 +223,11 @@ namespace test15 { // CHECK: [[P:%.*]] = load ptr, ptr // CHECK-NOT: icmp eq ptr [[P]], null // CHECK-NOT: br i1 - // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [[A:.*]], ptr [[P]], i64 5 + // CHECK: [[ARRAY_POISON_END:%arraypoison\.end.*]] = getelementptr inbounds [[A:.*]], ptr [[P]], i64 5 + // CHECK: [[LARRAY_POISON_BODY:arraypoison\.body.*]]: + // CHECK-NEXT: [[ARRAY_POISON_CUR:%arraypoison\.cur.*]] = phi ptr [ [[P]], %entry ], [ [[ARRAY_POISON_NEXT:%arraypoison\.next.*]], %[[LARRAY_POISON_BODY]] ] + // CHECK: arraypoison.end{{.*}}: + // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [[A]], ptr [[P]], i64 5 // CHECK-NEXT: br label // CHECK: [[CUR:%.*]] = phi ptr [ [[P]], {{%.*}} ], [ [[NEXT:%.*]], {{%.*}} ] // CHECK-NEXT: call void @_ZN6test151AC1Ev(ptr {{[^,]*}} [[CUR]]) @@ -257,6 +261,10 @@ namespace test15 { // CHECK: [[N:%.*]] = load i32, ptr // CHECK-NEXT: [[T0:%.*]] = sext i32 [[N]] to i64 // CHECK-NEXT: [[P:%.*]] = load ptr, ptr + // CHECK: [[ARRAY_POISON_END:%arraypoison\.end.*]] = getelementptr inbounds [[A]], ptr [[P]], i64 [[T0]] + // CHECK: [[LARRAY_POISON_BODY:arraypoison\.body.*]]: + // CHECK-NEXT: [[ARRAY_POISON_CUR:%arraypoison\.cur.*]] = phi ptr [ [[P]], %entry ], [ [[ARRAY_POISON_NEXT:%arraypoison\.next.*]], %[[LARRAY_POISON_BODY]] ] + // CHECK: arraypoison.end{{.*}}: // CHECK-NEXT: [[ISEMPTY:%.*]] = icmp eq i64 [[T0]], 0 // CHECK-NEXT: br i1 [[ISEMPTY]], // CHECK: [[END:%.*]] = getelementptr inbounds [[A]], ptr [[P]], i64 [[T0]] diff --git a/clang/test/CodeGenCXX/sanitize-default-init.cpp b/clang/test/CodeGenCXX/sanitize-default-init.cpp new file mode 100644 index 0000000000000..fc3c2fd658fc6 --- /dev/null +++ b/clang/test/CodeGenCXX/sanitize-default-init.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -x c++ -fsanitize=memory -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s + +/// Sanitise the placement new with default initialisation style. + +namespace std { + using size_t = decltype(sizeof(0)); +} + +void *operator new(std::size_t, void *p) noexcept { return p; } + +struct Simple { + int x; +}; + +struct WithCtor { + int x; + int y[4]; + WithCtor() { + bool flag = x > 0; /// This is UB + } +}; + +// CHECK-LABEL: define {{.*}} i32 @main() +int main() { + { + Simple s; + // CHECK: [[S:%.+]] = alloca %struct.Simple, align 4 + // CHECK: [[W:%.+]] = alloca %struct.WithCtor, align 4 + s.x = 42; + // CHECK: {{%.+}} = call ptr @__msan_memset(ptr [[S]], i32 poison, i64 4) + new (&s) Simple; + bool flag = s.x == 42; /// This is UB + } + { + WithCtor w; + w.x = 42; + // CHECK: {{%.+}} = call ptr @__msan_memset(ptr [[W]], i32 poison, i64 20) + auto *ptr = new (&w) WithCtor; /// This is UB + // CHECK: call void @_ZN8WithCtorC1Ev + } +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
