https://github.com/owenca created https://github.com/llvm/llvm-project/pull/203825
None >From 2f8395569d25b6bcf32e4d0575a8ea14297ebe0c Mon Sep 17 00:00:00 2001 From: Owen Pan <[email protected]> Date: Sun, 14 Jun 2026 22:07:31 -0700 Subject: [PATCH] [clang-format][NFC] Clean up FormatTokenLexer --- clang/lib/Format/FormatTokenLexer.cpp | 20 +++++++++++++------- clang/lib/Format/FormatTokenLexer.h | 4 ---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 92571c012bdb2..bf26ef04fe88f 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -33,9 +33,7 @@ FormatTokenLexer::FormatTokenLexer( LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(IdentTable), Keywords(IdentTable), Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0), - FormattingDisabled(false), FormatOffRegex(Style.OneLineFormatOffRegex), - MacroBlockBeginRegex(Style.MacroBlockBegin), - MacroBlockEndRegex(Style.MacroBlockEnd), VerilogProtectedBlock(false) { + FormattingDisabled(false), VerilogProtectedBlock(false) { Lex = std::make_unique<Lexer>(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); Lex->SetKeepWhitespaceMode(true); @@ -88,12 +86,14 @@ FormatTokenLexer::FormatTokenLexer( ArrayRef<FormatToken *> FormatTokenLexer::lex() { assert(Tokens.empty()); assert(FirstInLineIndex == 0); + enum { FO_None, FO_CurrentLine, FO_NextLine } FormatOff = FO_None; + llvm::Regex FormatOffRegex(Style.OneLineFormatOffRegex); do { Tokens.push_back(getNextToken()); + auto &Tok = *Tokens.back(); - const auto NewlinesBefore = Tok.NewlinesBefore; - switch (FormatOff) { + switch (const auto NewlinesBefore = Tok.NewlinesBefore; FormatOff) { case FO_NextLine: if (NewlinesBefore > 1) { FormatOff = FO_None; @@ -125,13 +125,16 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { } } } + if (Style.isJavaScript()) { tryParseJSRegexLiteral(); handleTemplateStrings(); } else if (Style.isTextProto()) { tryParsePythonComment(); } + tryMergePreviousTokens(); + if (Style.isCSharp()) { // This needs to come after tokens have been merged so that C# // string literals are correctly identified. @@ -140,9 +143,11 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { handleTableGenMultilineString(); handleTableGenNumericLikeIdentifier(); } + if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); + if (Style.InsertNewlineAtEOF) { auto &TokEOF = *Tokens.back(); if (TokEOF.NewlinesBefore == 0) { @@ -150,6 +155,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { TokEOF.OriginalColumn = 0; } } + return Tokens; } @@ -1466,9 +1472,9 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->Tok.setKind(tok::kw_if); } } else if (FormatTok->is(tok::identifier)) { - if (MacroBlockBeginRegex.match(Text)) + if (llvm::Regex(Style.MacroBlockBegin).match(Text)) FormatTok->setType(TT_MacroBlockBegin); - else if (MacroBlockEndRegex.match(Text)) + else if (llvm::Regex(Style.MacroBlockEnd).match(Text)) FormatTok->setType(TT_MacroBlockEnd); else if (MacrosSkippedByRemoveParentheses.contains(Identifier)) FormatTok->setFinalizedType(TT_FunctionLikeMacro); diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 9f5b735efe1d0..9c80eaacf1f45 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -135,10 +135,6 @@ class FormatTokenLexer { TemplateNames, TypeNames, VariableTemplates; bool FormattingDisabled; - llvm::Regex FormatOffRegex; // For one line. - - llvm::Regex MacroBlockBeginRegex; - llvm::Regex MacroBlockEndRegex; // The next line is a Verilog protected block that should not be split into // tokens. Set at the 'pragma protect' line. Cleared at the next line. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
