https://github.com/hazohelet updated https://github.com/llvm/llvm-project/pull/65969:
>From 80d8743441ce1e25f19dbc48bce480b5becfa208 Mon Sep 17 00:00:00 2001 From: Takuya Shimizu <shimizu2...@gmail.com> Date: Tue, 12 Sep 2023 00:39:44 +0900 Subject: [PATCH 1/3] [clang][Sema] Stop format size estimator upon %p to adapt to linux kernel's extension GCC stops counting format string's size when it sees %p format in order to avoid `Wformat-truncation` false positive against Linux kernel's format extension (%pOF, for example). This change makes clang's behavior align with GCC's. As requested in https://github.com/llvm/llvm-project/issues/64871 --- clang/include/clang/AST/FormatString.h | 6 ++++++ clang/lib/AST/PrintfFormatString.cpp | 8 +++++++- clang/lib/Sema/SemaChecking.cpp | 10 ++++++++++ clang/test/Sema/warn-fortify-source.c | 16 +++++++++++++--- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index 5c4ad9baaef608c..97ce3cfe0b25c68 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -742,6 +742,12 @@ class FormatStringHandler { return true; } + virtual bool + ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS, + unsigned RemainLen) { + return false; + } + /// Handle mask types whose sizes are not between one and eight bytes. virtual void handleInvalidMaskType(StringRef MaskType) {} diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index f0b9d0ecaf23461..750f584d8d37a76 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -429,7 +429,13 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, // we can recover from? if (!FSR.hasValue()) continue; - // We have a format specifier. Pass it to the callback. + // We have a format specifier. + // In case the handler needs to abort parsing upon specific specifiers + if (H.ShouldStopOnFormatSpecifier( + FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) { + return false; + } + // Pass the format specifier to the callback. if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), I - FSR.getStart(), Target)) return true; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 3b4ac613da76aa8..1a6ff6cfa19b661 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -860,6 +860,16 @@ class EstimateSizeFormatHandler : Size(std::min(Format.find(0), Format.size()) + 1 /* null byte always written by sprintf */) {} + bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS, + unsigned RemainLen) override { + if (FS.getConversionSpecifier().getKind() == + analyze_printf::PrintfConversionSpecifier::pArg) { + assert(Size >= RemainLen && "no underflow"); + Size -= RemainLen; + return true; + } + return false; + } bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, const char *, unsigned SpecifierLen, const TargetInfo &) override { diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c index 5ed9782b26fb788..fba6c718be4a33a 100644 --- a/clang/test/Sema/warn-fortify-source.c +++ b/clang/test/Sema/warn-fortify-source.c @@ -85,7 +85,7 @@ void call_memset(void) { __builtin_memset(buf, 0xff, 11); // expected-warning {{'memset' will always overflow; destination buffer has size 10, but size argument is 11}} } -void call_snprintf(double d) { +void call_snprintf(double d, int *ptr) { char buf[10]; __builtin_snprintf(buf, 10, "merp"); __builtin_snprintf(buf, 11, "merp"); // expected-warning {{'snprintf' size argument is too large; destination buffer has size 10, but size argument is 11}} @@ -96,6 +96,11 @@ void call_snprintf(double d) { __builtin_snprintf(buf, 1, "%.1000g", d); // expected-warning {{'snprintf' will always be truncated; specified size is 1, but format string expands to at least 2}} __builtin_snprintf(buf, 5, "%.1000g", d); __builtin_snprintf(buf, 5, "%.1000G", d); + char node_name[6]; + __builtin_snprintf(node_name, sizeof(node_name), "%pOFn", ptr); + __builtin_snprintf(node_name, sizeof(node_name), "12345%pOFn", ptr); + __builtin_snprintf(node_name, sizeof(node_name), "123456%pOFn", ptr); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 7}} + __builtin_snprintf(node_name, sizeof(node_name), "1234567%pOFn", ptr); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 8}} } void call_vsnprintf(void) { @@ -110,6 +115,11 @@ void call_vsnprintf(void) { __builtin_vsnprintf(buf, 1, "%.1000g", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 1, but format string expands to at least 2}} __builtin_vsnprintf(buf, 5, "%.1000g", list); __builtin_vsnprintf(buf, 5, "%.1000G", list); + char node_name[6]; + __builtin_snprintf(node_name, sizeof(node_name), "%pOFn", list); + __builtin_snprintf(node_name, sizeof(node_name), "12345%pOFn", list); + __builtin_snprintf(node_name, sizeof(node_name), "123456%pOFn", list); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 7}} + __builtin_snprintf(node_name, sizeof(node_name), "1234567%pOFn", list); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 8}} } void call_sprintf_chk(char *buf) { @@ -149,7 +159,7 @@ void call_sprintf_chk(char *buf) { __builtin___sprintf_chk(buf, 1, 4, "%#x", 9); __builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} __builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9); - __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} + __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); __builtin___sprintf_chk(buf, 1, 3, "%+d", 9); __builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}} __builtin___sprintf_chk(buf, 1, 3, "% i", 9); @@ -186,7 +196,7 @@ void call_sprintf(void) { sprintf(buf, "12%#x", 9); sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} sprintf(buf, "12%p", (void *)9); - sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} + sprintf(buf, "123%p", (void *)9); sprintf(buf, "123%+d", 9); sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} sprintf(buf, "123% i", 9); >From 82d09fabb82aa7eec9e34dffa5d3ff934d0746aa Mon Sep 17 00:00:00 2001 From: Takuya Shimizu <shimizu2...@gmail.com> Date: Tue, 12 Sep 2023 17:09:45 +0900 Subject: [PATCH 2/3] Address comments from nickdesaulniers NFC stylistic changes --- clang/lib/AST/PrintfFormatString.cpp | 2 +- clang/lib/Sema/SemaChecking.cpp | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index 750f584d8d37a76..bcbbcc3882ab57f 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -430,7 +430,7 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, if (!FSR.hasValue()) continue; // We have a format specifier. - // In case the handler needs to abort parsing upon specific specifiers + // In case the handler needs to abort parsing upon specific specifiers. if (H.ShouldStopOnFormatSpecifier( FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) { return false; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 1a6ff6cfa19b661..cf48ddd5ae1b896 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -862,13 +862,18 @@ class EstimateSizeFormatHandler bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS, unsigned RemainLen) override { - if (FS.getConversionSpecifier().getKind() == - analyze_printf::PrintfConversionSpecifier::pArg) { - assert(Size >= RemainLen && "no underflow"); - Size -= RemainLen; - return true; - } - return false; + if (FS.getConversionSpecifier().getKind() != + analyze_printf::PrintfConversionSpecifier::pArg) + return false; + // Linux Kernel has its own extension for %p format specifier (%pOF, for + // example) + // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html + // If the format specifier is %p, format string size estimator should stop + // so as not to emit false-positive against kernel codes. + // This behavior aligns with GCC's + assert(Size >= RemainLen && "no underflow"); + Size -= RemainLen; + return true; } bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, const char *, unsigned SpecifierLen, >From 84481f55b1cd3c37acc063d36b3515328aa08b20 Mon Sep 17 00:00:00 2001 From: Takuya Shimizu <shimizu2...@gmail.com> Date: Tue, 12 Sep 2023 21:13:59 +0900 Subject: [PATCH 3/3] Check subsequent character to say whether or not the format is likely to be linux extension --- clang/docs/ReleaseNotes.rst | 4 ++ clang/include/clang/AST/FormatString.h | 3 +- clang/lib/AST/PrintfFormatString.cpp | 4 +- clang/lib/Sema/SemaChecking.cpp | 62 ++++++++++++++++++++++---- clang/test/Sema/warn-fortify-source.c | 4 +- 5 files changed, 63 insertions(+), 14 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index cc8b2c3808933cb..b0d544c39d73f1a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -163,6 +163,10 @@ Improvements to Clang's diagnostics - Clang's ``-Wfortify-source`` now diagnoses ``snprintf`` call that is known to result in string truncation. (`#64871: <https://github.com/llvm/llvm-project/issues/64871>`_). + Clang stops calculating the minimum length of the output format string upon %p + if the succeeding character can be a part of the format specifier in linux's + format extension in order to avoid false positive warnings against the linux + code base. Also clang no longer emits false positive warnings about the output length of ``%g`` format specifier. - Clang now emits ``-Wcast-qual`` for functional-style cast expressions. diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index 97ce3cfe0b25c68..81bbeb00b0766d8 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -744,7 +744,8 @@ class FormatStringHandler { virtual bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS, - unsigned RemainLen) { + const char *StartSpecifier, const char *I, + const char *E) { return false; } diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index bcbbcc3882ab57f..2297135a96e8976 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -431,10 +431,8 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, continue; // We have a format specifier. // In case the handler needs to abort parsing upon specific specifiers. - if (H.ShouldStopOnFormatSpecifier( - FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) { + if (H.ShouldStopOnFormatSpecifier(FSR.getValue(), FSR.getStart(), I, E)) return false; - } // Pass the format specifier to the callback. if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), I - FSR.getStart(), Target)) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cf48ddd5ae1b896..9ac200497ccc181 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -851,6 +851,50 @@ class ScanfDiagnosticFormatHandler } }; +/// `I` points to the next character of `%p` format. +/// This functon checks if the subsequent character can be linux kernel's +/// extnded format specifier +static inline constexpr bool canBeLinuxFormatExtension(const char *I, + const char *E) { + assert(I < E && "format string not yet exhausted"); + // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html + switch (*I) { + default: + return false; + case 'S': + case 's': + case 'B': + case 'R': + case 'r': + case 'h': + case 'b': + case 'M': + case 'm': + case 'I': + case 'i': + case 'E': + case 'U': + case 'V': + case 'K': + case 'N': + case '4': + case 'a': + case 'd': + case 't': + case 'C': + case 'D': + case 'g': + case 'G': + case 'O': + case 'f': + case 'x': + case 'e': + case 'u': + case 'k': + return true; + } +} + class EstimateSizeFormatHandler : public analyze_format_string::FormatStringHandler { size_t Size; @@ -861,16 +905,18 @@ class EstimateSizeFormatHandler 1 /* null byte always written by sprintf */) {} bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS, - unsigned RemainLen) override { - if (FS.getConversionSpecifier().getKind() != - analyze_printf::PrintfConversionSpecifier::pArg) - return false; + const char *StartSpecifier, const char *I, + const char *E) override { // Linux Kernel has its own extension for %p format specifier (%pOF, for // example) - // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html - // If the format specifier is %p, format string size estimator should stop - // so as not to emit false-positive against kernel codes. - // This behavior aligns with GCC's + // If the format is likely to be linux kernel extension, we should stop + // the format size estimation so as not to emit false positive in linux + // codes. + if (FS.getConversionSpecifier().getKind() != + analyze_printf::PrintfConversionSpecifier::pArg || + I == E || !canBeLinuxFormatExtension(I, E)) + return false; + unsigned RemainLen = static_cast<unsigned>(E - StartSpecifier); assert(Size >= RemainLen && "no underflow"); Size -= RemainLen; return true; diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c index fba6c718be4a33a..6ad507861562f96 100644 --- a/clang/test/Sema/warn-fortify-source.c +++ b/clang/test/Sema/warn-fortify-source.c @@ -159,7 +159,7 @@ void call_sprintf_chk(char *buf) { __builtin___sprintf_chk(buf, 1, 4, "%#x", 9); __builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} __builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9); - __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); + __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} __builtin___sprintf_chk(buf, 1, 3, "%+d", 9); __builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}} __builtin___sprintf_chk(buf, 1, 3, "% i", 9); @@ -196,7 +196,7 @@ void call_sprintf(void) { sprintf(buf, "12%#x", 9); sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} sprintf(buf, "12%p", (void *)9); - sprintf(buf, "123%p", (void *)9); + sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} sprintf(buf, "123%+d", 9); sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}} sprintf(buf, "123% i", 9); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits