https://github.com/marlus updated https://github.com/llvm/llvm-project/pull/204139
>From 055db081f85fe617fa1025e2bf582338e4d747bd Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 08:42:14 -0400 Subject: [PATCH 01/10] [Clang] Fix offsetof sign-extending unsigned array indices >= 128 When evaluating __builtin_offsetof with an unsigned integer array index (e.g. uint8_t, uint16_t) whose value has the high bit set, Clang was calling getSExtValue() on the APSInt index, which sign-extends the value and produces a large bogus offset. Fix this by checking whether the APSInt is unsigned and using getZExtValue() in that case instead. Fixes #199319 --- clang/lib/AST/ExprConstant.cpp | 3 +- clang/test/Sema/offsetof-unsigned-index.c | 25 +++++++++++++++ .../test/SemaCXX/offsetof-unsigned-index.cpp | 31 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 clang/test/Sema/offsetof-unsigned-index.c create mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index bc98c0d86bb65..a2a3349b21baf 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19284,7 +19284,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); - Result += IdxResult.getSExtValue() * ElementSize; + Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() + : IdxResult.getSExtValue()) * ElementSize; break; } diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c new file mode 100644 index 0000000000000..480e486fbad72 --- /dev/null +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter + +// expected-no-diagnostics + +// Test that offsetof correctly zero-extends unsigned array indices >= 128. +// Previously, Clang would sign-extend uint8_t indices >= 128, producing +// a large bogus offset value instead of the correct one. +// https://github.com/llvm/llvm-project/issues/199319 + +#include <stdint.h> +#include <stddef.h> + +struct MyStruct { + void *ptrs[256]; +}; + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), + "offsetof with uint8_t index 127 should be correct"); + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), + "offsetof with uint8_t index 128 should be correctly zero-extended, not sign-extended"); + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), + "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp new file mode 100644 index 0000000000000..50bf50ef0fc35 --- /dev/null +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter + +// expected-no-diagnostics + +// Test that offsetof correctly zero-extends unsigned array indices >= 128. +// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit +// was set, producing a large bogus offset value instead of the correct one. +// https://github.com/llvm/llvm-project/issues/199319 + +#include <cstdint> +#include <cstddef> + +struct MyStruct { + void *ptrs[256]; +}; + +// uint8_t index: values >= 128 were incorrectly sign-extended +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), + "offsetof with uint8_t index 127 should be correct"); +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), + "offsetof with uint8_t index 128 should be correctly zero-extended"); +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), + "offsetof with uint8_t index 255 should be correctly zero-extended"); + +// uint16_t index: values >= 32768 were also affected +struct BigStruct { + char data[65536]; +}; +static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, + "offsetof with uint16_t index 32768 should be correctly zero-extended"); >From 71f80764449046ef26d2ad399fcd2853359a2a6a Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 14:52:53 -0400 Subject: [PATCH 02/10] Apply clang-format --- clang/lib/AST/ExprConstant.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a2a3349b21baf..eb76c10076669 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19285,7 +19285,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue()) * ElementSize; + : IdxResult.getSExtValue()) * + ElementSize; break; } >From d23b24d816caca1c90bf7b279736f05eb059a1f8 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 15:02:17 -0400 Subject: [PATCH 03/10] Keep ByteCode compiler in sync: fix offsetof sign-extending unsigned array indices --- clang/lib/AST/ByteCode/Compiler.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 638e6ecafb295..48425d26aeb22 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3801,8 +3801,14 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. + // Cast to Sint64. For unsigned types, cast to Uint64 first to + // avoid sign-extending values with the high bit set (e.g. uint8_t >= 128). if (IndexT != PT_Sint64) { + if (!isSignedType(IndexT) && IndexT != PT_Uint64) { + if (!this->emitCast(IndexT, PT_Uint64, E)) + return false; + IndexT = PT_Uint64; + } if (!this->emitCast(IndexT, PT_Sint64, E)) return false; } >From fcd22f3cf909345718f2820c9f6d3ad3ef806250 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:00:44 -0400 Subject: [PATCH 04/10] Reject negative and oversized offsetof array indices --- clang/lib/AST/ExprConstant.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index eb76c10076669..578aba948c1d7 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19284,6 +19284,11 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); + // Reject negative indices and indices too large to fit in int64_t, + // to avoid sign-extension issues or crashes in getZExtValue(). + if (IdxResult.isSigned() ? IdxResult.isNegative() + : IdxResult.ugt(APSInt::getMaxValue(64, /*Unsigned=*/false))) + return Error(OOE); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() : IdxResult.getSExtValue()) * ElementSize; >From 224d40a02ca3ae64857122cf0279e37a817cb5b0 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:09:59 -0400 Subject: [PATCH 05/10] Reject negative and oversized offsetof array indices in ByteCode interpreter Apply the same guards to InterpretOffsetOf in the ByteCode interpreter (clang/lib/AST/ByteCode/InterpBuiltin.cpp) as were added to ExprConstant.cpp: reject negative indices and unsigned indices that exceed INT64_MAX. In the ByteCode path, indices arrive as int64_t after the Uint64->Sint64 cast chain in Compiler.cpp. A negative int64_t covers both explicitly negative signed indices and unsigned values >= 0x8000000000000000 (which wrap to negative after the cast), so a single Index < 0 guard handles both cases. Also extend the test files with expected-error cases for negative and __uint128_t indices. --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/test/Sema/offsetof-unsigned-index.c | 11 +++++++++-- clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 55907bf11506b..91432e204d3e2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6544,6 +6544,14 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, // When generating bytecode, we put all the index expressions as Sint64 on // the stack. int64_t Index = ArrayIndices[ArrayIndex]; + // Reject negative indices and unsigned indices that wrapped to negative + // after the Uint64->Sint64 cast (e.g. __uint128_t >= 0x8000000000000000). + if (Index < 0) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_invalid_subexpr_in_const_expr) + << S.Current->getRange(OpPC); + return false; + } const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType); if (!AT) return false; diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 480e486fbad72..55f1e08f3a535 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -1,11 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter -// expected-no-diagnostics - // Test that offsetof correctly zero-extends unsigned array indices >= 128. // Previously, Clang would sign-extend uint8_t indices >= 128, producing // a large bogus offset value instead of the correct one. +// Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 #include <stdint.h> @@ -15,6 +14,7 @@ struct MyStruct { void *ptrs[256]; }; +// Unsigned indices that were previously sign-extended must be zero-extended. _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), "offsetof with uint8_t index 127 should be correct"); @@ -23,3 +23,10 @@ _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); + +// Negative indices must be rejected. +struct NegIdxStruct { int a; int x[1]; }; +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices >= 0x8000000000000000 must be rejected. +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 50bf50ef0fc35..2cea1f32be472 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -1,11 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter -// expected-no-diagnostics - // Test that offsetof correctly zero-extends unsigned array indices >= 128. // Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit // was set, producing a large bogus offset value instead of the correct one. +// Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 #include <cstdint> @@ -29,3 +28,10 @@ struct BigStruct { }; static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, "offsetof with uint16_t index 32768 should be correctly zero-extended"); + +// Negative indices must be rejected. +struct NegIdxStruct { int a; int x[1]; }; +static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices >= 0x8000000000000000 must be rejected. +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} >From 1861378a79242dd66140bcd1042d12374eaad18d Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:20:17 -0400 Subject: [PATCH 06/10] clang-format: break long line in VisitOffsetOfExpr --- clang/lib/AST/ExprConstant.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 578aba948c1d7..307b5db0b231f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19286,8 +19286,9 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); // Reject negative indices and indices too large to fit in int64_t, // to avoid sign-extension issues or crashes in getZExtValue(). + APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); if (IdxResult.isSigned() ? IdxResult.isNegative() - : IdxResult.ugt(APSInt::getMaxValue(64, /*Unsigned=*/false))) + : IdxResult.ugt(MaxIdx)) return Error(OOE); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() : IdxResult.getSExtValue()) * >From bee3053414d1ff74bb12366112f62008233e1e15 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Wed, 17 Jun 2026 09:40:26 -0400 Subject: [PATCH 07/10] Fix offsetof overflow and AP index handling --- clang/lib/AST/ByteCode/Compiler.cpp | 7 +++++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 +++++++----- clang/lib/AST/ExprConstant.cpp | 23 ++++++++++++++++------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 48425d26aeb22..159b910f82302 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3801,8 +3801,11 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. For unsigned types, cast to Uint64 first to - // avoid sign-extending values with the high bit set (e.g. uint8_t >= 128). + // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid + // sign-extending values with the high bit set (e.g. uint8_t >= 128). + // AP types cannot be safely narrowed to Sint64; fail constant evaluation. + if (IndexT == PT_IntAP || IndexT == PT_IntAPS) + return false; if (IndexT != PT_Sint64) { if (!isSignedType(IndexT) && IndexT != PT_Uint64) { if (!this->emitCast(IndexT, PT_Uint64, E)) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 91432e204d3e2..07af289f99243 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6541,11 +6541,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, break; } case OffsetOfNode::Array: { - // When generating bytecode, we put all the index expressions as Sint64 on - // the stack. int64_t Index = ArrayIndices[ArrayIndex]; - // Reject negative indices and unsigned indices that wrapped to negative - // after the Uint64->Sint64 cast (e.g. __uint128_t >= 0x8000000000000000). if (Index < 0) { S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) @@ -6557,7 +6553,13 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, return false; CurrentType = AT->getElementType(); CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType); - Result += Index * ElementSize; + int64_t ElemSize = ElementSize.getQuantity(); + if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) + return false; + int64_t Offset = Index * ElemSize; + if (Result.getQuantity() > llvm::maxIntN(64) - Offset) + return false; + Result += CharUnits::fromQuantity(Offset); ++ArrayIndex; break; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 307b5db0b231f..3b96c884a50f0 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19284,15 +19284,24 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); - // Reject negative indices and indices too large to fit in int64_t, - // to avoid sign-extension issues or crashes in getZExtValue(). + // Reject negative indices, indices too large to fit in int64_t, + // and overflow in the offset computation. APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); - if (IdxResult.isSigned() ? IdxResult.isNegative() - : IdxResult.ugt(MaxIdx)) + if (IdxResult.isSigned() + ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx)) + : IdxResult.ugt(MaxIdx)) return Error(OOE); - Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue()) * - ElementSize; + int64_t IdxVal = IdxResult.isUnsigned() + ? (int64_t)IdxResult.getZExtValue() + : IdxResult.getSExtValue(); + int64_t ElemSize = ElementSize.getQuantity(); + if (IdxVal != 0 && + ElemSize > std::numeric_limits<int64_t>::max() / IdxVal) + return Error(OOE); + int64_t Offset = IdxVal * ElemSize; + if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset) + return Error(OOE); + Result += CharUnits::fromQuantity(Offset); break; } >From e526016bdeeb2662fc6c0864f26ef3af8d1bf379 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Wed, 17 Jun 2026 10:06:38 -0400 Subject: [PATCH 08/10] Add tests for __uint128_t > UINT64_MAX and multiply overflow --- clang/test/Sema/offsetof-unsigned-index.c | 10 ++++++++++ clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 55f1e08f3a535..1b34ebe60443f 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -30,3 +30,13 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expe // __uint128_t indices >= 0x8000000000000000 must be rejected. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: +// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a +// wrong result instead of an error). +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// A uint64_t index that causes index*sizeof(element) to overflow int64_t must +// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. +struct ShortArray { short data[2]; }; +_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 2cea1f32be472..2b3995d405705 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -35,3 +35,13 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-err // __uint128_t indices >= 0x8000000000000000 must be rejected. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: +// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a +// wrong result instead of an error). +static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// A uint64_t index that causes index*sizeof(element) to overflow int64_t must +// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. +struct ShortArray { short data[2]; }; +static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} >From b30608ccf58263ec66b266ac6064c1b04a9942c7 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Fri, 19 Jun 2026 09:54:59 -0400 Subject: [PATCH 09/10] Handle small AP-typed offsetof indices correctly --- clang/lib/AST/ByteCode/Compiler.cpp | 16 +++++++++++++--- clang/test/Sema/offsetof-unsigned-index.c | 5 +++++ clang/test/SemaCXX/offsetof-unsigned-index.cpp | 5 +++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 159b910f82302..e954e2915bfc0 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3799,13 +3799,23 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { continue; } + if (IndexT == PT_IntAP || IndexT == PT_IntAPS) { + // AP types (e.g. __uint128_t, __int128) cannot be safely cast to + // Sint64. Evaluate the constant and push it directly as Sint64. + Expr::EvalResult EvalResult; + if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext())) + return false; + llvm::APSInt IdxVal = EvalResult.Val.getInt(); + if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64)) + return false; + if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E)) + return false; + continue; + } if (!this->visit(ArrayIndexExpr)) return false; // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid // sign-extending values with the high bit set (e.g. uint8_t >= 128). - // AP types cannot be safely narrowed to Sint64; fail constant evaluation. - if (IndexT == PT_IntAP || IndexT == PT_IntAPS) - return false; if (IndexT != PT_Sint64) { if (!isSignedType(IndexT) && IndexT != PT_Uint64) { if (!this->emitCast(IndexT, PT_Uint64, E)) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 1b34ebe60443f..57bce06a25c98 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -31,6 +31,11 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expe // __uint128_t indices >= 0x8000000000000000 must be rejected. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +// Small __uint128_t values that fit in int64_t must work correctly. +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) == + __builtin_offsetof(struct NegIdxStruct, x), + "offsetof with __uint128_t index 0 should work"); + // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 2b3995d405705..880c45dec0e3e 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -36,6 +36,11 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-err // __uint128_t indices >= 0x8000000000000000 must be rejected. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +// Small __uint128_t values that fit in int64_t must work correctly. +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == + __builtin_offsetof(NegIdxStruct, x), + "offsetof with __uint128_t index 0 should work"); + // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). >From d55a576884e468527b202fd07e4e412acb43a8d1 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 23 Jun 2026 09:49:51 -0400 Subject: [PATCH 10/10] Fix offsetof AP index handling: add CastNoOverflow opcode and APInt crash fix - ExprConstant.cpp: replace sgt/ugt(MaxIdx) with getActiveBits() > 63 to avoid APInt bit-width assertion crash when comparing 128-bit values - Opcodes.td: add APOnlyTypeClass (IntAP, IntAPS) and CastNoOverflow opcode - Interp.h: implement CastNoOverflow -- pops AP value, rejects negative or values that don't fit in int64_t (activeBits > 63), pushes as Sint64 - Compiler.cpp: for AP types, emit visit + CastNoOverflow instead of calling EvaluateAsInt (which is not allowed in the compiler phase) --- clang/lib/AST/ByteCode/Compiler.cpp | 10 ++-------- clang/lib/AST/ByteCode/Interp.h | 17 +++++++++++++++++ clang/lib/AST/ByteCode/Opcodes.td | 10 ++++++++++ clang/lib/AST/ExprConstant.cpp | 5 +---- clang/test/Sema/offsetof-unsigned-index.c | 3 +++ clang/test/SemaCXX/offsetof-unsigned-index.cpp | 3 +++ 6 files changed, 36 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index e954e2915bfc0..23c4bd4108a2f 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3800,15 +3800,9 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { } if (IndexT == PT_IntAP || IndexT == PT_IntAPS) { - // AP types (e.g. __uint128_t, __int128) cannot be safely cast to - // Sint64. Evaluate the constant and push it directly as Sint64. - Expr::EvalResult EvalResult; - if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext())) + if (!this->visit(ArrayIndexExpr)) return false; - llvm::APSInt IdxVal = EvalResult.Val.getInt(); - if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64)) - return false; - if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E)) + if (!this->emitCastNoOverflow(IndexT, E)) return false; continue; } diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index ad807816aa904..c8709ce0f960b 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2877,6 +2877,23 @@ bool CastAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { return true; } +// Cast an AP integer to Sint64, failing constant evaluation if the value is +// negative or too large to fit (i.e. truncation would change the value). +template <PrimType Name, class T = typename PrimConv<Name>::T> +bool CastNoOverflow(InterpState &S, CodePtr OpPC) { + T Source = S.Stk.pop<T>(); + APSInt Val = Source.toAPSInt(); + if (Val.isNegative() || Val.getActiveBits() > 63) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_invalid_subexpr_in_const_expr) + << S.Current->getRange(OpPC); + return false; + } + S.Stk.push<Integral<64, true>>( + Integral<64, true>::from((int64_t)Val.getZExtValue())); + return true; +} + template <PrimType Name, class T = typename PrimConv<Name>::T> bool CastIntegralFloating(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem, uint32_t FPOI) { diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index e350d7b2e547d..b375cb5f6b34d 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -748,6 +748,16 @@ def CastAPS : Opcode { let HasGroup = 1; } +def APOnlyTypeClass : TypeClass { + let Types = [IntAP, IntAPS]; +} + +// Cast from an AP integer type to Sint64, failing if the value doesn't fit. +def CastNoOverflow : Opcode { + let Types = [APOnlyTypeClass]; + let HasGroup = 1; +} + // Cast an integer to a floating type def CastIntegralFloating : Opcode { let Types = [AluTypeClass]; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3b96c884a50f0..efdc777127bef 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19286,10 +19286,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); // Reject negative indices, indices too large to fit in int64_t, // and overflow in the offset computation. - APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); - if (IdxResult.isSigned() - ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx)) - : IdxResult.ugt(MaxIdx)) + if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63) return Error(OOE); int64_t IdxVal = IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 57bce06a25c98..8eb764710baf5 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -35,6 +35,9 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x80000000 _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) == __builtin_offsetof(struct NegIdxStruct, x), "offsetof with __uint128_t index 0 should work"); +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)1]) == + __builtin_offsetof(struct NegIdxStruct, x) + sizeof(int), + "offsetof with __uint128_t index 1 should work"); // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 880c45dec0e3e..e4c041bf5d7e3 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -40,6 +40,9 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000 static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == __builtin_offsetof(NegIdxStruct, x), "offsetof with __uint128_t index 0 should work"); +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) == + __builtin_offsetof(NegIdxStruct, x) + sizeof(int), + "offsetof with __uint128_t index 1 should work"); // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
