Author: Andy Kaylor Date: 2026-03-23T11:47:16-07:00 New Revision: 729480c4aec6ea8a18149fd3c73de57aaa677d2a
URL: https://github.com/llvm/llvm-project/commit/729480c4aec6ea8a18149fd3c73de57aaa677d2a DIFF: https://github.com/llvm/llvm-project/commit/729480c4aec6ea8a18149fd3c73de57aaa677d2a.diff LOG: [CIR] Generalize cxx alloc new size handling (#187790) The non-constant size handling in `emitCXXNewAllocSize` was making the incorrect assumption that the default behavior of the size value being explicitly cast to size_t would be the only behavior we'd see. This is actually only true with C++14 and later. To properly handle earlier standards, we need the more robust checking that classic codegen does in the equivalent function. This change adds that handling. Assisted-by: Cursor / claude-4.6-opus-high Added: clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp clang/test/CIR/CodeGen/new-array-size-conv.cpp Modified: clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp clang/lib/CIR/CodeGen/CIRGenFunction.cpp clang/lib/CIR/CodeGen/CIRGenFunction.h Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 03298faf79ce3..30a833564ec2f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -534,39 +534,97 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, // Create a value for the variable number of elements numElements = cgf.emitScalarExpr(*e->getArraySize()); auto numElementsType = mlir::cast<cir::IntType>(numElements.getType()); - [[maybe_unused]] unsigned numElementsWidth = numElementsType.getWidth(); - - // We might need check for overflow. - - mlir::Value hasOverflow; - // Classic codegen checks for the size variable being signed, having a - // smaller width than size_t, and having a larger width than size_t. - // However, the AST implicitly casts the size variable to size_t so none of - // these conditions will ever be met. - assert( - !(*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType() && - (numElementsWidth == sizeWidth) && - (numElements.getType() == cgf.sizeTy) && - "Expected array size to be implicitly cast to size_t!"); - - // There are up to three conditions we need to test for: - // 1) if minElements > 0, we need to check whether numElements is smaller + unsigned numElementsWidth = numElementsType.getWidth(); + + // The number of elements can have an arbitrary integer type; + // essentially, we need to multiply it by a constant factor, add a + // cookie size, and verify that the result is representable as a + // size_t. That's just a gloss, though, and it's wrong in one + // important way: if the count is negative, it's an error even if + // the cookie size would bring the total size >= 0. + bool isSigned = + (*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType(); + + // There are up to five conditions we need to test for: + // 1) if isSigned, we need to check whether numElements is negative; + // 2) if numElementsWidth > sizeWidth, we need to check whether + // numElements is larger than something representable in size_t; + // 3) if minElements > 0, we need to check whether numElements is smaller // than that. - // 2) we need to compute + // 4) we need to compute // sizeWithoutCookie := numElements * typeSizeMultiplier // and check whether it overflows; and - // 3) if we need a cookie, we need to compute + // 5) if we need a cookie, we need to compute // size := sizeWithoutCookie + cookieSize // and check whether it overflows. + mlir::Value hasOverflow; + + // If numElementsWidth > sizeWidth, then one way or another, we're + // going to have to do a comparison for (2), and this happens to + // take care of (1), too. + if (numElementsWidth > sizeWidth) { + llvm::APInt threshold = + llvm::APInt::getOneBitSet(numElementsWidth, sizeWidth); + + // Use an unsigned comparison regardless of the sign of numElements. + mlir::Value unsignedNumElements = numElements; + if (isSigned) + unsignedNumElements = cgf.getBuilder().createIntCast( + numElements, cgf.getBuilder().getUIntNTy(numElementsWidth)); + + mlir::Value thresholdV = + cgf.getBuilder().getConstInt(loc, threshold, /*isUnsigned=*/true); + hasOverflow = cgf.getBuilder().createCompare( + loc, cir::CmpOpKind::ge, unsignedNumElements, thresholdV); + numElements = cgf.getBuilder().createIntCast( + unsignedNumElements, mlir::cast<cir::IntType>(cgf.sizeTy)); + + // Otherwise, if we're signed, we want to sext up to size_t. + } else if (isSigned) { + if (numElementsWidth < sizeWidth) + numElements = cgf.getBuilder().createIntCast( + numElements, cgf.getBuilder().getSIntNTy(sizeWidth)); + + // If there's a non-1 type size multiplier, then we can do the + // signedness check at the same time as we do the multiply + // because a negative number times anything will cause an + // unsigned overflow. Otherwise, we have to do it here. But at + // least in this case, we can subsume the >= minElements check. + if (typeSizeMultiplier == 1) + hasOverflow = cgf.getBuilder().createCompare( + loc, cir::CmpOpKind::lt, numElements, + cgf.getBuilder().getConstInt(loc, numElements.getType(), + minElements)); + + numElements = cgf.getBuilder().createIntCast( + numElements, mlir::cast<cir::IntType>(cgf.sizeTy)); + + // Otherwise, zext up to size_t if necessary. + } else if (numElementsWidth < sizeWidth) { + numElements = cgf.getBuilder().createIntCast( + numElements, mlir::cast<cir::IntType>(cgf.sizeTy)); + } + + assert(numElements.getType() == cgf.sizeTy); + if (minElements) { // Don't allow allocation of fewer elements than we have initializers. if (!hasOverflow) { - // FIXME: Avoid creating this twice. It may happen above. mlir::Value minElementsV = cgf.getBuilder().getConstInt( loc, llvm::APInt(sizeWidth, minElements)); hasOverflow = cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt, numElements, minElementsV); + } else if (numElementsWidth > sizeWidth) { + // The other existing overflow subsumes this check. + // We do an unsigned comparison, since any signed value < -1 is + // taken care of either above or below. + mlir::Value minElementsV = cgf.getBuilder().getConstInt( + loc, llvm::APInt(sizeWidth, minElements)); + hasOverflow = cgf.getBuilder().createOr( + loc, hasOverflow, + cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt, numElements, + minElementsV)); } } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 4862dacfdeb1e..58fe699eeff1f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -344,13 +344,23 @@ void CIRGenFunction::LexicalScope::cleanup() { return; // Get rid of any empty block at the end of the scope. - bool entryBlock = builder.getInsertionBlock()->isEntryBlock(); - if (!entryBlock && curBlock->empty()) { + bool isEntryBlock = builder.getInsertionBlock()->isEntryBlock(); + if (!isEntryBlock && curBlock->empty()) { curBlock->erase(); for (mlir::Block *retBlock : retBlocks) { if (retBlock->getUses().empty()) retBlock->erase(); } + // The empty block was created by a terminator (return/break/continue) + // and is now erased. If there are pending cleanup scopes (from variables + // with destructors), we need to pop them and ensure the containing scope + // block gets a proper terminator (e.g. cir.yield). Without this, the + // cleanup-scope-op popping that would otherwise happen in + // ~RunCleanupsScope leaves the scope block without a terminator. + if (hasPendingCleanups()) { + builder.setInsertionPointToEnd(entryBlock); + insertCleanupAndLeave(entryBlock); + } return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index a3eb000871013..153bc19ab2f31 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1037,6 +1037,12 @@ class CIRGenFunction : public CIRGenTypeCache { performCleanup = false; cgf.currentCleanupStackDepth = oldCleanupStackDepth; } + + /// Whether there are any pending cleanups that have been pushed since + /// this scope was entered. + bool hasPendingCleanups() const { + return cgf.ehStack.stable_begin() != cleanupStackDepth; + } }; // Cleanup stack depth of the RunCleanupsScope that was pushed most recently. diff --git a/clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp b/clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp new file mode 100644 index 0000000000000..38e0f314ee52f --- /dev/null +++ b/clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM + +struct Struk { + ~Struk(); + bool check(); +}; + +// Test that a for-loop body containing a local variable with a destructor +// followed by 'continue' and 'return' produces a properly terminated +// cir.scope. This exercises the interaction between LexicalScope cleanup, +// CleanupScopeOp popping, and empty-block erasure. + +int test_cleanup_return_in_loop(int n) { + for (int i = 0; i < n; i++) { + Struk s; + if (s.check()) + continue; + return i; + } + return -1; +} + +// CIR: cir.func {{.*}} @_Z27test_cleanup_return_in_loopi +// CIR: cir.scope { +// CIR: cir.for : cond { +// CIR: } body { +// CIR: cir.scope { +// CIR: cir.cleanup.scope { +// CIR: cir.continue +// CIR: cir.return +// CIR: } cleanup normal { +// CIR: cir.call @_ZN5StrukD1Ev +// CIR: cir.yield +// CIR: cir.yield +// CIR: } step { +// CIR: cir.return + +// LLVM: define dso_local noundef i32 @_Z27test_cleanup_return_in_loopi(i32 noundef %{{.*}}) +// LLVM: [[LOOP_COND:.*]]: +// LLVM: %[[ICMP:.*]] = icmp slt i32 +// LLVM-NEXT: br i1 %[[ICMP]] +// LLVM: [[LOOP_BODY:.*]]: +// LLVM: %[[CALL:.*]] = call noundef {{.*}} @_ZN5Struk5checkEv( +// LLVM-NEXT: br i1 %[[CALL]] +// LLVM: [[CLEANUP:.*]]: +// LLVM: call void @_ZN5StrukD1Ev( +// LLVM: switch i32 %{{.*}}, label %{{.*}} [ +// LLVM: ] +// LLVM: [[LOOP_STEP:.*]]: +// LLVM: add nsw i32 %{{.*}}, 1 +// LLVM: [[POST_LOOP:.*]]: +// LLVM: store i32 -1, ptr %{{.*}}, align 4 +// LLVM: ret i32 diff --git a/clang/test/CIR/CodeGen/new-array-size-conv.cpp b/clang/test/CIR/CodeGen/new-array-size-conv.cpp new file mode 100644 index 0000000000000..e109b99b91af2 --- /dev/null +++ b/clang/test/CIR/CodeGen/new-array-size-conv.cpp @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// Tests for array new expressions where the array size is not implicitly +// converted to size_t by Sema (pre-C++14 behavior). The CIR codegen must +// handle the signed/width conversion itself. + +typedef __typeof__(sizeof(int)) size_t; + +// Sized non-allocating (placement) new. +void *operator new[](size_t, void *p) noexcept { return p; } + +struct S { + int x; + S(); +}; + +// Signed int array size with multi-byte element (typeSizeMultiplier != 1). +// The sign extension is done and the multiply overflow catches negative values. +void t_new_signed_size(int n) { + auto p = new double[n]; +} + +// CIR-LABEL: cir.func {{.*}} @_Z17t_new_signed_sizei +// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]] +// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i +// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i +// CIR: %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i +// CIR: %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.mul.overflow %[[N_SIZE_T]], %[[ELEMENT_SIZE]] : !u64i -> !u64i +// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i +// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]] + +// LLVM-LABEL: define{{.*}} void @_Z17t_new_signed_sizei +// LLVM: %[[N:.*]] = load i32, ptr %{{.+}} +// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64 +// LLVM: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8) + +// OGCG-LABEL: define{{.*}} void @_Z17t_new_signed_sizei +// OGCG: %[[N:.*]] = load i32, ptr %{{.+}} +// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64 +// OGCG: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8) + +// Signed int array size with single-byte element (typeSizeMultiplier == 1). +// A signed comparison catches negative values directly. +void t_new_signed_size_char(int n) { + auto p = new char[n]; +} + +// CIR-LABEL: cir.func {{.*}} @_Z22t_new_signed_size_chari +// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]] +// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CIR: %[[IS_NEG:.*]] = cir.cmp lt %[[N_SEXT]], %[[ZERO]] : !s64i +// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i +// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i +// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[IS_NEG]] then %[[ALL_ONES]] else %[[N_SIZE_T]] + +// LLVM-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari +// LLVM: %[[N:.*]] = load i32, ptr %{{.+}} +// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64 +// LLVM: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0 + +// OGCG-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari +// OGCG: %[[N:.*]] = load i32, ptr %{{.+}} +// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64 +// OGCG: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
