Author: Oliver Hunt Date: 2025-12-24T00:30:17-08:00 New Revision: c0f4a8a7fcc759fb390ae420f877a30027e1f936
URL: https://github.com/llvm/llvm-project/commit/c0f4a8a7fcc759fb390ae420f877a30027e1f936 DIFF: https://github.com/llvm/llvm-project/commit/c0f4a8a7fcc759fb390ae420f877a30027e1f936.diff LOG: [clang][bytecode] Fix incorrect handling of arithmetic on string literals (#173212) The fast path for string literals fails to consider the offset of the pointer into an array of chars, this change simply adds the required checks and bounds corrections prior to calling strnlen. Fixes #173175 Added: clang/test/AST/ByteCode/char-buffer-arithmetic.c clang/test/Sema/constant-string-evaluation.c Modified: clang/lib/AST/ByteCode/Context.cpp Removed: ################################################################################ diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 74ec986e49ca7..208fcb2a2732e 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -294,13 +294,15 @@ bool Context::evaluateStrlen(State &Parent, const Expr *E, uint64_t &Result) { if (!FieldDesc->isPrimitiveArray()) return false; - if (Ptr.isDummy() || Ptr.isUnknownSizeArray()) + if (Ptr.isDummy() || Ptr.isUnknownSizeArray() || Ptr.isPastEnd()) return false; unsigned N = Ptr.getNumElems(); if (Ptr.elemSize() == 1) { - Result = strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), N); - return Result != N; + unsigned Size = N - Ptr.getIndex(); + Result = + strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size); + return Result != Size; } PrimType ElemT = FieldDesc->getPrimType(); diff --git a/clang/test/AST/ByteCode/char-buffer-arithmetic.c b/clang/test/AST/ByteCode/char-buffer-arithmetic.c new file mode 100644 index 0000000000000..cf3aa70689163 --- /dev/null +++ b/clang/test/AST/ByteCode/char-buffer-arithmetic.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only -Wfortify-source -Wno-string-plus-int %s +// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only -Wfortify-source -Wno-string-plus-int -fexperimental-new-constant-interpreter %s + +void test(char *c) { + // We test offsets 0 to 4. + // 0: a no op of course + // 1,2: these result in a diff erent length string than the buffer size + // 3: the last position: this hits ptr+object_size being a valid pointer, + // but not dereferencable + // 4: completely invalid pointer + __builtin_strcat(c, "42" + 0); + __builtin_strcat(c, "42" + 1); + __builtin_strcat(c, "42" + 2); + __builtin_strcat(c, "42" + 3); + __builtin_strcat(c, "42" + 4); + _Static_assert(__builtin_strlen("42" + 0) == 2); + + // A test without a null terminator, this captures incorrect size computation + // and incorrectly specifying the buffer size to strlen. + char buffer[1]; + static const char test_buffer[] = {'4','2'}; + __builtin_strcpy(buffer, test_buffer + 0); + __builtin_strcpy(buffer, test_buffer + 1); + // Note: these show that we will not issue a fortify warning when the source + // buffer is not null terminated. + __builtin_strcpy(buffer, test_buffer + 2); + __builtin_strcpy(buffer, test_buffer + 3); + + // Verifying strlen computes from the correct starting point. + _Static_assert(__builtin_strlen("42" + 1) == 1); + _Static_assert(__builtin_strlen("42" + 2) == 0); + _Static_assert(__builtin_strlen("42" + 3)); + // expected-error@-1 {{static assertion expression is not an integral constant expression}} + _Static_assert(__builtin_strlen("42" + 4)); + // expected-error@-1 {{static assertion expression is not an integral constant expression}} + // expected-note@-2 {{cannot refer to element 4 of array of 3 elements in a constant expression}} +} diff --git a/clang/test/Sema/constant-string-evaluation.c b/clang/test/Sema/constant-string-evaluation.c new file mode 100644 index 0000000000000..7cc5fc888933e --- /dev/null +++ b/clang/test/Sema/constant-string-evaluation.c @@ -0,0 +1,155 @@ +// RUN: %clang_cc1 -x c -std=c2x -fsyntax-only -verify -Wno-string-plus-int -Wno-unused-value %s +// RUN: %clang_cc1 -x c -std=c2x -fsyntax-only -verify -Wno-string-plus-int -Wno-unused-value %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -x c++ -std=c++23 -fsyntax-only -verify -Wno-string-plus-int -Wno-unused-value %s +// RUN: %clang_cc1 -x c++ -std=c++23 -fsyntax-only -verify -Wno-string-plus-int -Wno-unused-value %s -fexperimental-new-constant-interpreter + +void test(void) { + char buffer[10]; + __builtin_sprintf(buffer, "%%d%d%d"+0, 1); + // expected-warning@-1 {{more '%' conversions than data arguments}} + __builtin_sprintf(buffer, "%%d%d%d"+1, 1); + // expected-warning@-1 {{more '%' conversions than data arguments}} + __builtin_sprintf(buffer, "%%d%d%d"+2, 1); + // expected-warning@-1 {{more '%' conversions than data arguments}} + __builtin_sprintf(buffer, "%%d%d%d"+3, 1); + // expected-warning@-1 {{more '%' conversions than data arguments}} + __builtin_sprintf(buffer, "%%d%d%d"+4, 1); + __builtin_sprintf(buffer, "%%d%d%d"+5, 1); + __builtin_sprintf(buffer, "%%d%d%d"+6, 1); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+7, 1); + // expected-warning@-1 {{format string is empty}} + __builtin_sprintf(buffer, "%%d%d%d"+8, 1); + // TODO: we should probably warning about the format string being out of bounds + + __builtin_sprintf(buffer, "%%d%d%d"+0, 1, 2); + __builtin_sprintf(buffer, "%%d%d%d"+1, 1, 2); + // expected-warning@-1 {{more '%' conversions than data arguments}} + __builtin_sprintf(buffer, "%%d%d%d"+2, 1, 2); + __builtin_sprintf(buffer, "%%d%d%d"+3, 1, 2); + __builtin_sprintf(buffer, "%%d%d%d"+4, 1, 2); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+5, 1, 2); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+6, 1, 2); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+7, 1, 2); + // expected-warning@-1 {{format string is empty}} + __builtin_sprintf(buffer, "%%d%d%d"+8, 1, 2); + __builtin_sprintf(buffer, "%%d%d%d"+9, 1, 2); + + __builtin_sprintf(buffer, "%%d%d%d"+0, 1, 2, 3); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+1, 1, 2, 3); + __builtin_sprintf(buffer, "%%d%d%d"+2, 1, 2, 3); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+3, 1, 2, 3); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+4, 1, 2, 3); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+5, 1, 2, 3); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+6, 1, 2, 3); + // expected-warning@-1 {{data argument not used by format string}} + __builtin_sprintf(buffer, "%%d%d%d"+7, 1, 2, 3); + // expected-warning@-1 {{format string is empty}} + __builtin_sprintf(buffer, "%%d%d%d"+8, 1, 2, 3); + __builtin_sprintf(buffer, "%%d%d%d"+9, 1, 2, 3); + static const char format_string[] = {'%', '%', 'd', '%', 'd', '%', 'd'}; + __builtin_sprintf(buffer, format_string+0, 1); + __builtin_sprintf(buffer, format_string+1, 1); + __builtin_sprintf(buffer, format_string+2, 1); + __builtin_sprintf(buffer, format_string+3, 1); + __builtin_sprintf(buffer, format_string+4, 1); + __builtin_sprintf(buffer, format_string+5, 1); + __builtin_sprintf(buffer, format_string+6, 1); + __builtin_sprintf(buffer, format_string+7, 1); + #ifdef __cplusplus + static constexpr char ce_format_string[] = {'%', '%', 'd', '%', 'd', '%', 'd'}; + __builtin_sprintf(buffer, ce_format_string+0, 1); + __builtin_sprintf(buffer, ce_format_string+1, 1); + __builtin_sprintf(buffer, ce_format_string+2, 1); + __builtin_sprintf(buffer, ce_format_string+3, 1); + __builtin_sprintf(buffer, ce_format_string+4, 1); + __builtin_sprintf(buffer, ce_format_string+5, 1); + __builtin_sprintf(buffer, ce_format_string+6, 1); + __builtin_sprintf(buffer, ce_format_string+7, 1); + #endif +} + +#ifdef __cplusplus +template <class FormatStringSource> bool test_template() { + char buffer[10]; + __builtin_sprintf(buffer, FormatStringSource::format(0), 1); // #template_test1 + __builtin_sprintf(buffer, FormatStringSource::format(1), 1); // #template_test2 + __builtin_sprintf(buffer, FormatStringSource::format(2), 1); // #template_test3 + __builtin_sprintf(buffer, FormatStringSource::format(3), 1); // #template_test4 + __builtin_sprintf(buffer, FormatStringSource::format(4), 1); // #template_test5 + __builtin_sprintf(buffer, FormatStringSource::format(5), 1); // #template_test6 + __builtin_sprintf(buffer, FormatStringSource::format(6), 1); // #template_test7 + __builtin_sprintf(buffer, FormatStringSource::format(7), 1); // #template_test8 + __builtin_sprintf(buffer, FormatStringSource::format(8), 1); // #template_test9 + return true; +} + +struct LiteralFormatStr { + static consteval const char *format(int N) { + return "%%d%d%d" + N; // #LiteralFormatStrLiteral + } +}; + +struct ConstLiteralFormatStr { + static constexpr const char *formatStr = "%%d%d%d"; // #ConstLiteralFormatStrLiteral + static consteval const char *format(int N) { + return formatStr + N; + } +}; + +struct NullTerminatedArrayFormatStr { + static constexpr char formatStr[] = {'%', '%', 'd', '%', 'd', '%', 'd', 0}; + static consteval const char *format(int N) { + return formatStr + N; + } +}; + +struct NoNullTerminatedArrayFormatStr { + static constexpr char formatStr[] = {'%', '%', 'd', '%', 'd', '%', 'd'}; + static consteval const char *format(int N) { + return formatStr + N; // #NoNullTerminatedArrayFormatStr_format + } +}; + +void test_templates() { + test_template<LiteralFormatStr>(); + // expected-note@-1 {{in instantiation of function template specialization 'test_template<LiteralFormatStr>' requested here}} + // expected-warning@#template_test1 {{more '%' conversions than data arguments}} + // expected-warning@#template_test2 {{more '%' conversions than data arguments}} + // expected-warning@#template_test3 {{more '%' conversions than data arguments}} + // expected-warning@#template_test4 {{more '%' conversions than data arguments}} + // expected-warning@#template_test5 {{more '%' conversions than data arguments}} + // expected-warning@#template_test6 {{more '%' conversions than data arguments}} + // expected-warning@#template_test7 {{more '%' conversions than data arguments}} + // expected-warning@#template_test8 {{more '%' conversions than data arguments}} + // expected-warning@#template_test9 {{more '%' conversions than data arguments}} + // expected-note@#LiteralFormatStrLiteral 9 {{format string is defined here}} + test_template<ConstLiteralFormatStr>(); + // expected-note@-1 {{in instantiation of function template specialization 'test_template<ConstLiteralFormatStr>' requested here}} + // expected-warning@#template_test1 {{more '%' conversions than data arguments}} + // expected-warning@#template_test2 {{more '%' conversions than data arguments}} + // expected-warning@#template_test3 {{more '%' conversions than data arguments}} + // expected-warning@#template_test4 {{more '%' conversions than data arguments}} + // expected-warning@#template_test5 {{more '%' conversions than data arguments}} + // expected-warning@#template_test6 {{more '%' conversions than data arguments}} + // expected-warning@#template_test7 {{more '%' conversions than data arguments}} + // expected-warning@#template_test8 {{more '%' conversions than data arguments}} + // expected-warning@#template_test9 {{more '%' conversions than data arguments}} + // expected-note@#ConstLiteralFormatStrLiteral 9 {{format string is defined here}} + test_template<NullTerminatedArrayFormatStr>(); + test_template<NoNullTerminatedArrayFormatStr>(); + // expected-note@-1 {{in instantiation of function template specialization 'test_template<NoNullTerminatedArrayFormatStr>' requested here}} + // expected-note@#NoNullTerminatedArrayFormatStr_format {{cannot refer to element 8 of array of 7 elements in a constant expression}} + // expected-error@#template_test9 {{call to consteval function 'NoNullTerminatedArrayFormatStr::format' is not a constant expression}} + // expected-note@#template_test9 {{in call to 'format(8)'}} +} + +#endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
