https://github.com/balazs-benics-sonarsource updated https://github.com/llvm/llvm-project/pull/156668
From 5d776f7eec057c51e8d9346492b1d4f4128bd1be Mon Sep 17 00:00:00 2001 From: Balazs Benics <balazs.ben...@sonarsource.com> Date: Wed, 3 Sep 2025 15:18:41 +0200 Subject: [PATCH 1/3] [analyzer] Canonicalize the Decls of FieldRegions When calculating the offset of a FieldRegion, we need to find out which field index the given field refers to. Previously, if for some reason the field was not found, then the `Idx` passed to `Layout.getFieldOffset` was out of bounds and caused undefined behavior when dereferenced an out of bounds element in `ASTVector::FieldOffsets::operator[]`, which asserts this in debug builds, but exposes the undefined behavior in release builds. In this patch, I refactored how we enumerate the fields, and gracefully handle the scenario where the field is not found. That case is still bad, but at least it should not expose the undefined behavior in release builds, and should assert earlier in debug builds than before. The motivational case was transformed into a regression test, that would fail if no canonicalization would happen when creating a FieldRegion. This was reduced from a production crash. In the test case, due to how modules work, there would be multiple copies of the same template specialization in the AST. This could lead into inconsistent state when the FieldRegion's Decl was different to the RecordDecl's field - because one referred to the first and the other to the second. This made `calculateOffset` fail to compute the field index, triggering the undefined behavior in production. While this inconsistency gets fixed now, I think the assertion is still warranted to avoid potential undefined behavior in release builds. CPP-6691,CPP-6849 Co-authored-by: Marco Borgeaud <marco.borge...@sonarsource.com> --- clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 23 +++++++----- .../explicit-templ-inst-crash-in-modules.cppm | 35 +++++++++++++++++++ 2 files changed, 50 insertions(+), 8 deletions(-) create mode 100644 clang/test/Analysis/modules/explicit-templ-inst-crash-in-modules.cppm diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index 5f271963e3d09..f68fb1e6df759 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -1271,7 +1271,7 @@ const SymbolicRegion *MemRegionManager::getSymbolicHeapRegion(SymbolRef Sym) { const FieldRegion* MemRegionManager::getFieldRegion(const FieldDecl *d, const SubRegion* superRegion){ - return getSubRegion<FieldRegion>(d, superRegion); + return getSubRegion<FieldRegion>(d->getCanonicalDecl(), superRegion); } const ObjCIvarRegion* @@ -1704,16 +1704,23 @@ static RegionOffset calculateOffset(const MemRegion *R) { if (SymbolicOffsetBase) continue; - // Get the field number. - unsigned idx = 0; - for (RecordDecl::field_iterator FI = RD->field_begin(), - FE = RD->field_end(); FI != FE; ++FI, ++idx) { - if (FR->getDecl() == *FI) - break; + auto MaybeFieldIdx = [FR, RD]() -> std::optional<unsigned> { + assert(FR->getDecl()->getCanonicalDecl() == FR->getDecl()); + for (auto [Idx, Field] : llvm::enumerate(RD->fields())) { + if (FR->getDecl() == Field->getCanonicalDecl()) + return Idx; + } + return std::nullopt; + }(); + + if (!MaybeFieldIdx.has_value()) { + assert(false && "Field not found"); + goto Finish; // Invalid offset. } + const ASTRecordLayout &Layout = R->getContext().getASTRecordLayout(RD); // This is offset in bits. - Offset += Layout.getFieldOffset(idx); + Offset += Layout.getFieldOffset(MaybeFieldIdx.value()); break; } } diff --git a/clang/test/Analysis/modules/explicit-templ-inst-crash-in-modules.cppm b/clang/test/Analysis/modules/explicit-templ-inst-crash-in-modules.cppm new file mode 100644 index 0000000000000..6eec29c7187ba --- /dev/null +++ b/clang/test/Analysis/modules/explicit-templ-inst-crash-in-modules.cppm @@ -0,0 +1,35 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// DEFINE: %{common-flags}= -std=c++20 -I %t -fprebuilt-module-path=%t +// +// RUN: %clang_cc1 %{common-flags} %t/other.cppm -emit-module-interface -o %t/other.pcm +// RUN: %clang_analyze_cc1 -analyzer-checker=core %{common-flags} %t/entry.cppm -verify + + +//--- MyStruct.h +template <typename T> struct MyStruct { + T data = 0; +}; +template struct MyStruct<int>; // Explicit template instantiation. + +//--- other.cppm +module; +#include "MyStruct.h" +export module other; +static void implicit_instantiate_MyStruct() { + MyStruct<int> var; + (void)var; +} + +//--- entry.cppm +// expected-no-diagnostics +module; +#include "MyStruct.h" +module other; + +void entry_point() { + MyStruct<int> var; // no-crash + (void)var; +} From 2d18911419310ef16e16e5d567b112f2c8d3805e Mon Sep 17 00:00:00 2001 From: Balazs Benics <balazs.ben...@sonarsource.com> Date: Wed, 3 Sep 2025 16:34:06 +0200 Subject: [PATCH 2/3] Honor LLVM variable casing --- .../clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h | 4 ++-- clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index 89d306fb94046..c59413abc352d 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -1548,8 +1548,8 @@ class MemRegionManager { /// a specified FieldDecl. 'superRegion' corresponds to the containing /// memory region (which typically represents the memory representing /// a structure or class). - const FieldRegion *getFieldRegion(const FieldDecl *fd, - const SubRegion* superRegion); + const FieldRegion *getFieldRegion(const FieldDecl *FD, + const SubRegion *SuperRegion); const FieldRegion *getFieldRegionWithSuper(const FieldRegion *FR, const SubRegion *superRegion) { diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index f68fb1e6df759..29073047695b0 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -1268,10 +1268,10 @@ const SymbolicRegion *MemRegionManager::getSymbolicHeapRegion(SymbolRef Sym) { return getSubRegion<SymbolicRegion>(Sym, getHeapRegion()); } -const FieldRegion* -MemRegionManager::getFieldRegion(const FieldDecl *d, - const SubRegion* superRegion){ - return getSubRegion<FieldRegion>(d->getCanonicalDecl(), superRegion); +const FieldRegion * +MemRegionManager::getFieldRegion(const FieldDecl *FD, + const SubRegion *SuperRegion) { + return getSubRegion<FieldRegion>(FD->getCanonicalDecl(), SuperRegion); } const ObjCIvarRegion* From 1d289652b54d22db2566e63391c86501ecaefff5 Mon Sep 17 00:00:00 2001 From: Balazs Benics <balazs.ben...@sonarsource.com> Date: Wed, 3 Sep 2025 16:35:27 +0200 Subject: [PATCH 3/3] Hoist assert out --- clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index 29073047695b0..f20e79ae675a4 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -1704,8 +1704,8 @@ static RegionOffset calculateOffset(const MemRegion *R) { if (SymbolicOffsetBase) continue; + assert(FR->getDecl()->getCanonicalDecl() == FR->getDecl()); auto MaybeFieldIdx = [FR, RD]() -> std::optional<unsigned> { - assert(FR->getDecl()->getCanonicalDecl() == FR->getDecl()); for (auto [Idx, Field] : llvm::enumerate(RD->fields())) { if (FR->getDecl() == Field->getCanonicalDecl()) return Idx; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits