https://github.com/Thibault-Monnier updated https://github.com/llvm/llvm-project/pull/180819
>From faa899a6ce518c1176f2bf59f199eb42e59d840e Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Tue, 10 Feb 2026 19:41:47 +0100 Subject: [PATCH 1/2] Try prioritizing skipping space --- clang/lib/Lex/Lexer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 1498657047bd6..483cca32e08a2 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2533,8 +2533,8 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, // Skip consecutive spaces efficiently. while (true) { - // Skip horizontal whitespace very aggressively. - while (isHorizontalWhitespace(Char)) + // Skip horizontal whitespace, especially space, very aggressively. + while (LLVM_LIKELY(Char == ' ') || isHorizontalWhitespace(Char)) Char = *++CurPtr; // Otherwise if we have something other than whitespace, we're done. @@ -3756,10 +3756,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { const char *CurPtr = BufferPtr; // Small amounts of horizontal whitespace is very common between tokens. - if (isHorizontalWhitespace(*CurPtr)) { + if (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)) { do { ++CurPtr; - } while (isHorizontalWhitespace(*CurPtr)); + } while (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)); // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the >From 805b053f8003709679122a1bf639534f0defb70e Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Sat, 14 Feb 2026 15:23:11 +0100 Subject: [PATCH 2/2] Optimize isHorizontalWhitespace instead of at usage --- clang/include/clang/Basic/CharInfo.h | 4 ++-- clang/lib/Lex/Lexer.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h index 87626eeb8a700..93143f2ab0daa 100644 --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -89,8 +89,8 @@ LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c, /// /// Note that this returns false for '\\0'. LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; + return LLVM_LIKELY(c == ' ') || c == '\t' || + LLVM_UNLIKELY(c == '\f' || c == '\v'); } /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 483cca32e08a2..fc2747f31cee8 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2534,7 +2534,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, // Skip consecutive spaces efficiently. while (true) { // Skip horizontal whitespace, especially space, very aggressively. - while (LLVM_LIKELY(Char == ' ') || isHorizontalWhitespace(Char)) + while (isHorizontalWhitespace(Char)) Char = *++CurPtr; // Otherwise if we have something other than whitespace, we're done. @@ -3756,10 +3756,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { const char *CurPtr = BufferPtr; // Small amounts of horizontal whitespace is very common between tokens. - if (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)) { + if (isHorizontalWhitespace(*CurPtr)) { do { ++CurPtr; - } while (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)); + } while (isHorizontalWhitespace(*CurPtr)); // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
