https://github.com/zeyi2 updated 
https://github.com/llvm/llvm-project/pull/169215

>From 65513c1712bf0d62ec02b6f7c8fae723b9d0f877 Mon Sep 17 00:00:00 2001
From: mtx <[email protected]>
Date: Sun, 23 Nov 2025 22:15:15 +0800
Subject: [PATCH 1/6] [clang-tidy] Fix OOB access in `FormatStringConverter`
 with signed chars

---
 .../clang-tidy/utils/FormatStringConverter.cpp             | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp 
b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
index 23dae04916e9b..a3af9504e6542 100644
--- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
+++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
@@ -700,6 +700,7 @@ void FormatStringConverter::finalizeFormatText() {
 /// Append literal parts of the format text, reinstating escapes as required.
 void FormatStringConverter::appendFormatText(const StringRef Text) {
   for (const char Ch : Text) {
+    const unsigned char UCh = static_cast<unsigned char>(Ch);
     if (Ch == '\a')
       StandardFormatString += "\\a";
     else if (Ch == '\b')
@@ -724,10 +725,10 @@ void FormatStringConverter::appendFormatText(const 
StringRef Text) {
     } else if (Ch == '}') {
       StandardFormatString += "}}";
       FormatStringNeededRewriting = true;
-    } else if (Ch < 32) {
+    } else if (UCh < 32) {
       StandardFormatString += "\\x";
-      StandardFormatString += llvm::hexdigit(Ch >> 4, true);
-      StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
+      StandardFormatString += llvm::hexdigit(UCh >> 4, true);
+      StandardFormatString += llvm::hexdigit(UCh & 0xf, true);
     } else
       StandardFormatString += Ch;
   }

>From 785cf305295e09e4838a9b1514397d176f8f6b24 Mon Sep 17 00:00:00 2001
From: mtx <[email protected]>
Date: Sun, 23 Nov 2025 22:30:55 +0800
Subject: [PATCH 2/6] ~

---
 clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp 
b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
index a3af9504e6542..d210b000dfd33 100644
--- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
+++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
@@ -700,7 +700,7 @@ void FormatStringConverter::finalizeFormatText() {
 /// Append literal parts of the format text, reinstating escapes as required.
 void FormatStringConverter::appendFormatText(const StringRef Text) {
   for (const char Ch : Text) {
-    const unsigned char UCh = static_cast<unsigned char>(Ch);
+    const auto UCh = static_cast<unsigned char>(Ch);
     if (Ch == '\a')
       StandardFormatString += "\\a";
     else if (Ch == '\b')

>From 574e84a3a2fdb39fa9e89118f99d43e05055e792 Mon Sep 17 00:00:00 2001
From: mtx <[email protected]>
Date: Sun, 23 Nov 2025 23:03:19 +0800
Subject: [PATCH 3/6] Add testcase and release notes

---
 clang-tools-extra/docs/ReleaseNotes.rst            |  7 ++++---
 .../checkers/modernize/use-std-print.cpp           | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst 
b/clang-tools-extra/docs/ReleaseNotes.rst
index a6f80e3721db1..644c5cb573cf7 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -69,7 +69,7 @@ Potentially Breaking Changes
   - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in
     :doc:`bugprone-signed-char-misuse
     <clang-tidy/checks/bugprone/signed-char-misuse>`
-  
+
 - Modified the custom message format of :doc:`bugprone-unsafe-functions
   <clang-tidy/checks/bugprone/unsafe-functions>` by assigning a special meaning
   to the character ``>`` at the start of the value of the option
@@ -394,7 +394,7 @@ Changes in existing checks
   <clang-tidy/checks/bugprone/unhandled-self-assignment>` check by adding
   an additional matcher that generalizes the copy-and-swap idiom pattern
   detection.
-  
+
 - Improved :doc:`bugprone-unsafe-functions
   <clang-tidy/checks/bugprone/unsafe-functions>` check by hiding the default
   suffix when the reason starts with the character `>` in the `CustomFunctions`
@@ -497,7 +497,8 @@ Changes in existing checks
 - Improved :doc:`modernize-use-std-print
   <clang-tidy/checks/modernize/use-std-print>` check to correctly match
   when the format string is converted to a different type by an implicit
-  constructor call.
+  constructor call, and fixed a crash when handling format strings
+  containing non-ASCII characters.
 
 - Improved :doc:`performance-unnecessary-copy-initialization
   <clang-tidy/checks/performance/unnecessary-copy-initialization>` by printing
diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp 
b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
index ec37f077df7fc..a48f4dcb98b86 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
@@ -54,6 +54,13 @@ void printf_deceptive_newline() {
   // CHECK-FIXES: std::println("Hello");
 }
 
+void printf_utf8_text() {
+  // Non-ASCII UTF-8 in format string should not crash.
+  printf("你好世界\n");
+  // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'printf' [modernize-use-std-print]
+  // CHECK-FIXES: std::println("你好世界");
+}
+
 void printf_crlf_newline() {
   printf("Hello\r\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 
'printf' [modernize-use-std-print]
@@ -303,6 +310,13 @@ void fprintf_simple() {
   // CHECK-FIXES: std::print(stderr, "Hello");
 }
 
+void fprintf_utf8_text() {
+  // Non-ASCII UTF-8 in format string should not crash.
+  fprintf(stderr, "你好世界\n");
+  // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'fprintf' [modernize-use-std-print]
+  // CHECK-FIXES: std::println(stderr, "你好世界");
+}
+
 void std_printf_simple() {
   std::printf("std::Hello");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 
'printf' [modernize-use-std-print]

>From 9ce0e7d4d6987e39fe7b0d6d280d5118700d1eab Mon Sep 17 00:00:00 2001
From: mtx <[email protected]>
Date: Sun, 23 Nov 2025 23:35:07 +0800
Subject: [PATCH 4/6] Fix encoding

---
 .../clang-tidy/checkers/modernize/use-std-print.cpp  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp 
b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
index a48f4dcb98b86..184d8aa09639a 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
@@ -55,10 +55,10 @@ void printf_deceptive_newline() {
 }
 
 void printf_utf8_text() {
-  // Non-ASCII UTF-8 in format string should not crash.
-  printf("你好世界\n");
+  // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8
+  printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'printf' [modernize-use-std-print]
-  // CHECK-FIXES: std::println("你好世界");
+  // CHECK-FIXES: 
std::println("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C");
 }
 
 void printf_crlf_newline() {
@@ -311,10 +311,10 @@ void fprintf_simple() {
 }
 
 void fprintf_utf8_text() {
-  // Non-ASCII UTF-8 in format string should not crash.
-  fprintf(stderr, "你好世界\n");
+  // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8
+  fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'fprintf' [modernize-use-std-print]
-  // CHECK-FIXES: std::println(stderr, "你好世界");
+  // CHECK-FIXES: std::println(stderr, 
"\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C");
 }
 
 void std_printf_simple() {

>From 1b2e271ea393f8241d4d45e1e8038d530ec04c35 Mon Sep 17 00:00:00 2001
From: mtx <[email protected]>
Date: Mon, 24 Nov 2025 10:19:08 +0800
Subject: [PATCH 5/6] Fix test under Linux

---
 .../test/clang-tidy/checkers/modernize/use-std-print.cpp      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp 
b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
index 184d8aa09639a..9cf88f1a69364 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
@@ -58,7 +58,7 @@ void printf_utf8_text() {
   // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8
   printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'printf' [modernize-use-std-print]
-  // CHECK-FIXES: 
std::println("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C");
+  // CHECK-FIXES: std::println("你好世界");
 }
 
 void printf_crlf_newline() {
@@ -314,7 +314,7 @@ void fprintf_utf8_text() {
   // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8
   fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'fprintf' [modernize-use-std-print]
-  // CHECK-FIXES: std::println(stderr, 
"\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C");
+  // CHECK-FIXES: std::println(stderr, "你好世界");
 }
 
 void std_printf_simple() {

>From 5dd20c61acec4e26321828f099c79033a1059254 Mon Sep 17 00:00:00 2001
From: mtx <[email protected]>
Date: Mon, 24 Nov 2025 10:48:37 +0800
Subject: [PATCH 6/6] Fix CI

---
 .github/workflows/premerge.yaml                             | 6 ++++--
 .../test/clang-tidy/checkers/modernize/use-std-print.cpp    | 6 ++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml
index daf88b5b22125..8c3f644fc360f 100644
--- a/.github/workflows/premerge.yaml
+++ b/.github/workflows/premerge.yaml
@@ -145,7 +145,7 @@ jobs:
       - name: Compute Projects
         id: vars
         run: |
-          source <(git diff --name-only HEAD~1...HEAD | python 
.ci/compute_projects.py)
+          source <(git diff --name-only HEAD~1...HEAD | python -X utf8 
.ci/compute_projects.py)
 
           if [[ "${projects_to_build}" == "" ]]; then
             echo "No projects to build"
@@ -169,9 +169,11 @@ jobs:
           GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
         run: |
           call C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 
-host_arch=amd64
+          set PYTHONUTF8=1
+          set PYTHONIOENCODING=utf-8
           # See the comments above in the Linux job for why we define each of
           # these environment variables.
-          bash -c "export SCCACHE_GCS_BUCKET=$CACHE_GCS_BUCKET; export 
SCCACHE_GCS_RW_MODE=READ_WRITE; export SCCACHE_IDLE_TIMEOUT=0; mkdir artifacts; 
SCCACHE_LOG=info SCCACHE_ERROR_LOG=$(pwd)/artifacts/sccache.log sccache 
--start-server; .ci/monolithic-windows.sh \"${{ 
steps.vars.outputs.windows-projects }}\" \"${{ 
steps.vars.outputs.windows-check-targets }}\" \"${{ 
steps.vars.outputs.windows-runtimes }}\" \"${{ 
steps.vars.outputs.windows-runtimes-check-targets }}\""
+          bash -c "export PYTHONUTF8=1; export PYTHONIOENCODING=utf-8; export 
SCCACHE_GCS_BUCKET=$CACHE_GCS_BUCKET; export SCCACHE_GCS_RW_MODE=READ_WRITE; 
export SCCACHE_IDLE_TIMEOUT=0; mkdir artifacts; SCCACHE_LOG=info 
SCCACHE_ERROR_LOG=$(pwd)/artifacts/sccache.log sccache --start-server; 
.ci/monolithic-windows.sh \"${{ steps.vars.outputs.windows-projects }}\" \"${{ 
steps.vars.outputs.windows-check-targets }}\" \"${{ 
steps.vars.outputs.windows-runtimes }}\" \"${{ 
steps.vars.outputs.windows-runtimes-check-targets }}\""
       - name: Upload Artifacts
         # In some cases, Github will fail to upload the artifact. We want to
         # continue anyways as a failed artifact upload is an infra failure, not
diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp 
b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
index 9cf88f1a69364..63972cc0fd25e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
@@ -55,8 +55,7 @@ void printf_deceptive_newline() {
 }
 
 void printf_utf8_text() {
-  // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8
-  printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n");
+  printf("你好世界\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'printf' [modernize-use-std-print]
   // CHECK-FIXES: std::println("你好世界");
 }
@@ -311,8 +310,7 @@ void fprintf_simple() {
 }
 
 void fprintf_utf8_text() {
-  // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8
-  fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n");
+  fprintf(stderr, "你好世界\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'fprintf' [modernize-use-std-print]
   // CHECK-FIXES: std::println(stderr, "你好世界");
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to