https://github.com/Thibault-Monnier updated 
https://github.com/llvm/llvm-project/pull/180819

>From faa899a6ce518c1176f2bf59f199eb42e59d840e Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Tue, 10 Feb 2026 19:41:47 +0100
Subject: [PATCH 1/2] Try prioritizing skipping space

---
 clang/lib/Lex/Lexer.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 1498657047bd6..483cca32e08a2 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2533,8 +2533,8 @@ bool Lexer::SkipWhitespace(Token &Result, const char 
*CurPtr,
 
   // Skip consecutive spaces efficiently.
   while (true) {
-    // Skip horizontal whitespace very aggressively.
-    while (isHorizontalWhitespace(Char))
+    // Skip horizontal whitespace, especially space, very aggressively.
+    while (LLVM_LIKELY(Char == ' ') || isHorizontalWhitespace(Char))
       Char = *++CurPtr;
 
     // Otherwise if we have something other than whitespace, we're done.
@@ -3756,10 +3756,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
   const char *CurPtr = BufferPtr;
 
   // Small amounts of horizontal whitespace is very common between tokens.
-  if (isHorizontalWhitespace(*CurPtr)) {
+  if (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)) {
     do {
       ++CurPtr;
-    } while (isHorizontalWhitespace(*CurPtr));
+    } while (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr));
 
     // If we are keeping whitespace and other tokens, just return what we just
     // skipped.  The next lexer invocation will return the token after the

>From 805b053f8003709679122a1bf639534f0defb70e Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Sat, 14 Feb 2026 15:23:11 +0100
Subject: [PATCH 2/2] Optimize isHorizontalWhitespace instead of at usage

---
 clang/include/clang/Basic/CharInfo.h | 4 ++--
 clang/lib/Lex/Lexer.cpp              | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/CharInfo.h 
b/clang/include/clang/Basic/CharInfo.h
index 87626eeb8a700..93143f2ab0daa 100644
--- a/clang/include/clang/Basic/CharInfo.h
+++ b/clang/include/clang/Basic/CharInfo.h
@@ -89,8 +89,8 @@ LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned 
char c,
 ///
 /// Note that this returns false for '\\0'.
 LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0;
+  return LLVM_LIKELY(c == ' ') || c == '\t' ||
+         LLVM_UNLIKELY(c == '\f' || c == '\v');
 }
 
 /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 483cca32e08a2..fc2747f31cee8 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2534,7 +2534,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char 
*CurPtr,
   // Skip consecutive spaces efficiently.
   while (true) {
     // Skip horizontal whitespace, especially space, very aggressively.
-    while (LLVM_LIKELY(Char == ' ') || isHorizontalWhitespace(Char))
+    while (isHorizontalWhitespace(Char))
       Char = *++CurPtr;
 
     // Otherwise if we have something other than whitespace, we're done.
@@ -3756,10 +3756,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool 
TokAtPhysicalStartOfLine) {
   const char *CurPtr = BufferPtr;
 
   // Small amounts of horizontal whitespace is very common between tokens.
-  if (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)) {
+  if (isHorizontalWhitespace(*CurPtr)) {
     do {
       ++CurPtr;
-    } while (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr));
+    } while (isHorizontalWhitespace(*CurPtr));
 
     // If we are keeping whitespace and other tokens, just return what we just
     // skipped.  The next lexer invocation will return the token after the

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to