[clang] [clang-format][NFC] Clean up FormatTokenLexer (PR #203825)

via cfe-commits Sun, 14 Jun 2026 22:09:12 -0700

https://github.com/owenca created 
https://github.com/llvm/llvm-project/pull/203825


None

>From 2f8395569d25b6bcf32e4d0575a8ea14297ebe0c Mon Sep 17 00:00:00 2001
From: Owen Pan <[email protected]>
Date: Sun, 14 Jun 2026 22:07:31 -0700
Subject: [PATCH] [clang-format][NFC] Clean up FormatTokenLexer

---
 clang/lib/Format/FormatTokenLexer.cpp | 20 +++++++++++++-------
 clang/lib/Format/FormatTokenLexer.h   |  4 ----
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index 92571c012bdb2..bf26ef04fe88f 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -33,9 +33,7 @@ FormatTokenLexer::FormatTokenLexer(
       LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID),
       Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
       Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
-      FormattingDisabled(false), FormatOffRegex(Style.OneLineFormatOffRegex),
-      MacroBlockBeginRegex(Style.MacroBlockBegin),
-      MacroBlockEndRegex(Style.MacroBlockEnd), VerilogProtectedBlock(false) {
+      FormattingDisabled(false), VerilogProtectedBlock(false) {
   Lex = std::make_unique<Lexer>(ID, SourceMgr.getBufferOrFake(ID), SourceMgr,
                                 LangOpts);
   Lex->SetKeepWhitespaceMode(true);
@@ -88,12 +86,14 @@ FormatTokenLexer::FormatTokenLexer(
 ArrayRef<FormatToken *> FormatTokenLexer::lex() {
   assert(Tokens.empty());
   assert(FirstInLineIndex == 0);
+
   enum { FO_None, FO_CurrentLine, FO_NextLine } FormatOff = FO_None;
+  llvm::Regex FormatOffRegex(Style.OneLineFormatOffRegex);
   do {
     Tokens.push_back(getNextToken());
+
     auto &Tok = *Tokens.back();
-    const auto NewlinesBefore = Tok.NewlinesBefore;
-    switch (FormatOff) {
+    switch (const auto NewlinesBefore = Tok.NewlinesBefore; FormatOff) {
     case FO_NextLine:
       if (NewlinesBefore > 1) {
         FormatOff = FO_None;
@@ -125,13 +125,16 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
         }
       }
     }
+
     if (Style.isJavaScript()) {
       tryParseJSRegexLiteral();
       handleTemplateStrings();
     } else if (Style.isTextProto()) {
       tryParsePythonComment();
     }
+
     tryMergePreviousTokens();
+
     if (Style.isCSharp()) {
       // This needs to come after tokens have been merged so that C#
       // string literals are correctly identified.
@@ -140,9 +143,11 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
       handleTableGenMultilineString();
       handleTableGenNumericLikeIdentifier();
     }
+
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
       FirstInLineIndex = Tokens.size() - 1;
   } while (Tokens.back()->isNot(tok::eof));
+
   if (Style.InsertNewlineAtEOF) {
     auto &TokEOF = *Tokens.back();
     if (TokEOF.NewlinesBefore == 0) {
@@ -150,6 +155,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
       TokEOF.OriginalColumn = 0;
     }
   }
+
   return Tokens;
 }
 
@@ -1466,9 +1472,9 @@ FormatToken *FormatTokenLexer::getNextToken() {
         FormatTok->Tok.setKind(tok::kw_if);
       }
     } else if (FormatTok->is(tok::identifier)) {
-      if (MacroBlockBeginRegex.match(Text))
+      if (llvm::Regex(Style.MacroBlockBegin).match(Text))
         FormatTok->setType(TT_MacroBlockBegin);
-      else if (MacroBlockEndRegex.match(Text))
+      else if (llvm::Regex(Style.MacroBlockEnd).match(Text))
         FormatTok->setType(TT_MacroBlockEnd);
       else if (MacrosSkippedByRemoveParentheses.contains(Identifier))
         FormatTok->setFinalizedType(TT_FunctionLikeMacro);
diff --git a/clang/lib/Format/FormatTokenLexer.h 
b/clang/lib/Format/FormatTokenLexer.h
index 9f5b735efe1d0..9c80eaacf1f45 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -135,10 +135,6 @@ class FormatTokenLexer {
       TemplateNames, TypeNames, VariableTemplates;
 
   bool FormattingDisabled;
-  llvm::Regex FormatOffRegex; // For one line.
-
-  llvm::Regex MacroBlockBeginRegex;
-  llvm::Regex MacroBlockEndRegex;
 
   // The next line is a Verilog protected block that should not be split into
   // tokens. Set at the 'pragma protect' line. Cleared at the next line.

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang-format][NFC] Clean up FormatTokenLexer (PR #203825)

Reply via email to