Author: argothiel
Date: 2026-05-15T08:03:53+02:00
New Revision: 4ee6d62446a702ce507e8712aa68e2052056d3b1

URL: 
https://github.com/llvm/llvm-project/commit/4ee6d62446a702ce507e8712aa68e2052056d3b1
DIFF: 
https://github.com/llvm/llvm-project/commit/4ee6d62446a702ce507e8712aa68e2052056d3b1.diff

LOG: [clangd][Lex][NFC] Use valid non-ASCII identifiers in tests (#197826)

Several tests used "ab🙂cd" / "🙂cd" as multi-byte UTF-8 example
identifiers. The smiley, however, is not actually among the allowed
identifier characters, and Clang only accepts it as an extension (with a
warning).

Switch to identifiers that are valid per [lex.name]:
- "naïve", with a non-ASCII char in the middle of the identifier,
- "æon", with a non-ASCII char at the start of the identifier,
- "café", with a non-ASCII char at the end of the identifier.

The 2-byte characters are handled the same way as the original 4-byte
emoji; no functional change here.

Added: 
    

Modified: 
    clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
    clang-tools-extra/clangd/unittests/FuzzyMatchTests.cpp
    clang/unittests/Lex/LexerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp 
b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index f3a432a3b2632..7a6e70c39740b 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -4129,8 +4129,8 @@ TEST(CompletionTest, ReplaceRange) {
   EXPECT_EQ(Completions.ReplaceRange, A.range("replace"));
 
   // Cursor mid-word with UTF-8 continuation: replace extends past UTF-8.
-  const char *MidWordUTF8 = "struct S { int ab🙂cd; }; void f() { S s; "
-                            "s.$replace[[$insert[[ab^]]🙂cd]]; }";
+  const char *MidWordUTF8 = "struct S { int naïve; }; void f() { S s; "
+                            "s.$replace[[$insert[[na^]]ïve]]; }";
   Completions = completions(MidWordUTF8, /*IndexSymbols=*/{}, Opts);
   A = Annotations(MidWordUTF8);
   EXPECT_EQ(Completions.InsertRange, A.range("insert"));
@@ -4196,7 +4196,7 @@ TEST(CompletionTest, ReplaceRangeNoCompile) {
   EXPECT_EQ(Results.ReplaceRange, A.range("replace"));
 
   // ASCII heuristic stops at non-ASCII: replace doesn't extend past UTF-8.
-  const char *MidWordUTF8 = "auto x = $replace[[$insert[[ab^]]]]🙂cd";
+  const char *MidWordUTF8 = "auto x = $replace[[$insert[[na^]]]]ïve";
   Results = completionsNoCompile(MidWordUTF8, /*IndexSymbols=*/{}, Opts);
   A = Annotations(MidWordUTF8);
   EXPECT_EQ(Results.InsertRange, A.range("insert"));
@@ -4590,8 +4590,7 @@ TEST(CompletionTest, CommentParamName) {
   }
   {
     // With */ and UTF-8 suffix: replace extends past UTF-8 to */.
-    const std::string WithUTF8(Code +
-                               "fun(/*$replace[[$insert[[fo^]]o🙂=*/]])");
+    const std::string WithUTF8(Code + 
"fun(/*$replace[[$insert[[ca^]]fé=*/]])");
     const CodeCompleteResult Results = completions(WithUTF8, {}, ReplaceOpts);
     const Annotations A(WithUTF8);
     EXPECT_EQ(Results.InsertRange, A.range("insert"));

diff  --git a/clang-tools-extra/clangd/unittests/FuzzyMatchTests.cpp 
b/clang-tools-extra/clangd/unittests/FuzzyMatchTests.cpp
index 5efbfcd8d3e93..ad32623dbe26b 100644
--- a/clang-tools-extra/clangd/unittests/FuzzyMatchTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FuzzyMatchTests.cpp
@@ -305,8 +305,8 @@ TEST(FuzzyMatch, Segmentation) {
               returns("+--+---+------"));
   EXPECT_THAT(segment("t3h PeNgU1N oF d00m!!!!!!!!"), //
               returns("+-- +-+-+-+ ++ +---        "));
-  EXPECT_THAT(segment("ab🙂cd"), //
-              returns("+-------"));
+  EXPECT_THAT(segment("naïve"),   //
+              returns("+-----")); // Six bytes: ï is encoded as two bytes.
 }
 
 } // namespace

diff  --git a/clang/unittests/Lex/LexerTest.cpp 
b/clang/unittests/Lex/LexerTest.cpp
index 52c3522f1ed53..eaac861fa5e2c 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -878,8 +878,8 @@ TEST_F(LexerTest, FindEndOfIdentifierContinuation) {
 
   // UTF-8 identifier characters.
   LangOpts.CPlusPlus = true;
-  EXPECT_EQ(Measure("ab🙂cd", 2), 6u); // '🙂' (4 bytes) + "cd".
-  EXPECT_EQ(Measure("🙂cd", 0), 6u);   // Starts with '🙂'.
+  EXPECT_EQ(Measure("naïve", 2), 4u); // 'ï' (2 bytes) + "ve".
+  EXPECT_EQ(Measure("æon", 0), 4u);   // Starts with 'æ' (2 bytes).
 
   // Dollar sign (requires DollarIdents).
   LangOpts.DollarIdents = true;


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to