klimek updated this revision to Diff 499777.
klimek added a comment.

Add comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144170/new/

https://reviews.llvm.org/D144170

Files:
  clang/include/clang/Format/Format.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/Format.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/MacroExpander.cpp
  clang/lib/Format/Macros.h
  clang/lib/Format/TokenAnalyzer.cpp
  clang/lib/Format/TokenAnalyzer.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/TokenAnnotator.h
  clang/lib/Format/UnwrappedLineFormatter.cpp
  clang/lib/Format/UnwrappedLineParser.cpp
  clang/lib/Format/UnwrappedLineParser.h
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTest.cpp
  clang/unittests/Format/TestLexer.h

Index: clang/unittests/Format/TestLexer.h
===================================================================
--- clang/unittests/Format/TestLexer.h
+++ clang/unittests/Format/TestLexer.h
@@ -72,7 +72,8 @@
   TokenList annotate(llvm::StringRef Code) {
     FormatTokenLexer Lex = getNewLexer(Code);
     auto Tokens = Lex.lex();
-    UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this);
+    UnwrappedLineParser Parser(SourceMgr.get(), Style, Lex.getKeywords(), 0,
+                               Tokens, *this, Allocator, IdentTable);
     Parser.parse();
     TokenAnnotator Annotator(Style, Lex.getKeywords());
     for (auto &Line : UnwrappedLines) {
Index: clang/unittests/Format/FormatTest.cpp
===================================================================
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -22568,6 +22568,230 @@
                "aaaallvm::outs()\n    <<");
 }
 
+TEST_F(FormatTest, UnexpandConfiguredMacros) {
+  FormatStyle Style = getLLVMStyle();
+  Style.Macros.push_back("CLASS=class C {");
+  Style.Macros.push_back("SEMI=;");
+  Style.Macros.push_back("STMT=f();");
+  Style.Macros.push_back("ID(x)=x");
+  Style.Macros.push_back("ID3(x, y, z)=x y z");
+  Style.Macros.push_back("CALL(x)=f([] { x })");
+  Style.Macros.push_back("ASSIGN_OR_RETURN(a, b, c)=a = (b) || (c)");
+  Style.Macros.push_back("MOCK_METHOD(r, n, a, s)=r n a s");
+
+  verifyFormat("ID(nested(a(b, c), d))", Style);
+  verifyFormat("CLASS\n"
+               "  a *b;\n"
+               "};",
+               Style);
+  verifyFormat("SEMI\n"
+               "SEMI\n"
+               "SEMI",
+               Style);
+  verifyFormat("STMT\n"
+               "STMT\n"
+               "STMT",
+               Style);
+  verifyFormat("void f() { ID(a *b); }", Style);
+  verifyFormat(R"(ID(
+    { ID(a *b); });
+)",
+               Style);
+  verifyIncompleteFormat(R"(ID3({, ID(a *b),
+  ;
+  });
+)",
+                         Style);
+
+  verifyFormat("ID(CALL(CALL(return a * b;)));", Style);
+
+  verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n"
+               "                 MySomewhatLongFunction(SomethingElse()));\n",
+               Style);
+
+  verifyFormat(R"(
+#define MACRO(a, b) ID(a + b)
+)",
+               Style);
+  EXPECT_EQ(R"(
+int a;
+int b;
+int c;
+int d;
+int e;
+int f;
+ID(
+    namespace foo {
+    int a;
+    }
+) // namespace k
+)",
+            format(R"(
+int a;
+int b;
+int c;
+int d;
+int e;
+int f;
+ID(namespace foo { int a; })  // namespace k
+)",
+                   Style));
+  verifyFormat(R"(ID(
+    //
+    ({ ; }))
+)",
+               Style);
+
+  Style.ColumnLimit = 35;
+  // FIXME: Arbitrary formatting of macros where the end of the logical
+  // line is in the middle of a macro call are not working yet.
+  verifyFormat(R"(ID(
+    void f();
+    void)
+ID(g) ID(()) ID(
+    ;
+    void g();)
+)",
+               Style);
+
+  Style.ColumnLimit = 10;
+  verifyFormat("STMT\n"
+               "STMT\n"
+               "STMT",
+               Style);
+
+  EXPECT_EQ(R"(
+ID(CALL(CALL(
+    a *b)));
+)",
+            format(R"(
+ID(CALL(CALL(a * b)));
+)",
+                   Style));
+
+  // FIXME: If we want to support unbalanced braces or parens from macro
+  // expansions we need to re-think how we propagate errors in
+  // TokenAnnotator::parseLine; for investigation, switching the inner loop of
+  // TokenAnnotator::parseLine to return LT_Other instead of LT_Invalid in case
+  // of !consumeToken() changes the formatting of the test below and makes it
+  // believe it has a fully correct formatting.
+  EXPECT_EQ(R"(
+ID3(
+    {
+    CLASS
+    a *b;
+    };
+    },
+    ID(x *y);
+    ,
+    STMT
+    STMT
+    STMT)
+void f();
+)",
+            format(R"(
+ID3({CLASS a*b; };}, ID(x*y);, STMT STMT STMT)
+void f();
+)",
+                   Style));
+
+  verifyFormat("ID(a(\n"
+               "#ifdef A\n"
+               "    b, c\n"
+               "#else\n"
+               "    d(e)\n"
+               "#endif\n"
+               "    ))",
+               Style);
+  Style.ColumnLimit = 80;
+  verifyFormat(R"(ASSIGN_OR_RETURN(
+    // Comment
+    a b, c);
+)",
+               Style);
+  Style.ColumnLimit = 30;
+  verifyFormat(R"(ASSIGN_OR_RETURN(
+    // Comment
+    //
+    a b,
+    xxxxxxxxxxxx(
+        yyyyyyyyyyyyyyyyy,
+        zzzzzzzzzzzzzzzzzz),
+    f([]() {
+      a();
+      b();
+    }));
+)",
+               Style);
+  verifyFormat(R"(int a = []() {
+  ID(
+      x;
+      y;
+      z;)
+  ;
+}();
+)",
+               Style);
+  EXPECT_EQ(
+      R"(ASSIGN_OR_RETURN((
+====
+#))
+})",
+      format(R"(ASSIGN_OR_RETURN((
+====
+#))
+})",
+             Style, SC_ExpectIncomplete));
+  EXPECT_EQ(R"(ASSIGN_OR_RETURN(
+}
+(
+====
+#),
+a))",
+            format(R"(ASSIGN_OR_RETURN(
+}
+(
+====
+#),
+a))",
+                   Style, SC_ExpectIncomplete));
+  EXPECT_EQ(R"(ASSIGN_OR_RETURN(a
+//
+====
+#
+                 <))",
+            format(R"(ASSIGN_OR_RETURN(a
+//
+====
+#
+                 <))",
+                   Style));
+  verifyFormat("class C {\n"
+               "  MOCK_METHOD(R, f,\n"
+               "              (a *b, c *d),\n"
+               "              (override));\n"
+               "};",
+               Style);
+}
+
+TEST_F(FormatTest, KeepParensWhenExpandingObjectLikeMacros) {
+  FormatStyle Style = getLLVMStyle();
+  Style.Macros.push_back("FN=class C { int f");
+  verifyFormat("void f() { FN(a *b);\n"
+               "  };\n"
+               "}",
+               Style);
+}
+
+TEST_F(FormatTest, DoesNotExpandFunctionLikeMacrosWithoutParens) {
+  FormatStyle Style = getLLVMStyle();
+  Style.Macros.push_back("CLASS()=class C {");
+  verifyFormat("CLASS void f();\n"
+               "}\n"
+               ";",
+               Style);
+}
+
 TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) {
   std::string code = "#if A\n"
                      "#if B\n"
Index: clang/lib/Format/WhitespaceManager.cpp
===================================================================
--- clang/lib/Format/WhitespaceManager.cpp
+++ clang/lib/Format/WhitespaceManager.cpp
@@ -49,7 +49,7 @@
                                           unsigned Spaces,
                                           unsigned StartOfTokenColumn,
                                           bool IsAligned, bool InPPDirective) {
-  if (Tok.Finalized)
+  if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
     return;
   Tok.setDecision((Newlines > 0) ? FD_Break : FD_Continue);
   Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
@@ -60,7 +60,7 @@
 
 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
                                             bool InPPDirective) {
-  if (Tok.Finalized)
+  if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
     return;
   Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
                            Tok.WhitespaceRange, /*Spaces=*/0,
@@ -84,7 +84,7 @@
     const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
     StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
     unsigned Newlines, int Spaces) {
-  if (Tok.Finalized)
+  if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
     return;
   SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
   Changes.push_back(
Index: clang/lib/Format/UnwrappedLineParser.h
===================================================================
--- clang/lib/Format/UnwrappedLineParser.h
+++ clang/lib/Format/UnwrappedLineParser.h
@@ -15,10 +15,14 @@
 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
 
+#include "Encoding.h"
 #include "FormatToken.h"
+#include "Macros.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Format/Format.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Regex.h"
 #include <list>
 #include <stack>
@@ -76,6 +80,18 @@
   unsigned FirstStartColumn = 0;
 };
 
+/// Interface for users of the UnwrappedLineParser to receive the parsed lines.
+/// Parsing a single snippet of code can lead to multiple runs, where each
+/// run is a coherent view of the file.
+/// For example, different runs are generated:
+/// - for different combinations of #if blocks
+/// - for code where macros are expanded and the code with the original
+///   macro calls.
+/// Some tokens will only be visible in a subset of the runs.
+/// For each run, \c UnwrappedLineParser will call \c consumeUnwrappedLine
+/// for each parsed unwrapped line, and then \c finishRun to indicate
+/// that the set of unwrapped lines before is one coherent view of the
+/// code snippet to be formatted.
 class UnwrappedLineConsumer {
 public:
   virtual ~UnwrappedLineConsumer() {}
@@ -87,10 +103,12 @@
 
 class UnwrappedLineParser {
 public:
-  UnwrappedLineParser(const FormatStyle &Style,
+  UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style,
                       const AdditionalKeywords &Keywords,
                       unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
-                      UnwrappedLineConsumer &Callback);
+                      UnwrappedLineConsumer &Callback,
+                      llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+                      IdentifierTable &IdentTable);
 
   void parse();
 
@@ -193,6 +211,7 @@
   unsigned parseVerilogHierarchyHeader();
   void parseVerilogTable();
   void parseVerilogCaseLabel();
+  llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> parseMacroCall();
 
   // Used by addUnwrappedLine to denote whether to keep or remove a level
   // when resetting the line state.
@@ -236,16 +255,49 @@
 
   bool isOnNewLine(const FormatToken &FormatTok);
 
+  // Returns whether there is a macro expansion in the line, i.e. a token that
+  // was expanded from a macro call.
+  bool containsExpansion(const UnwrappedLine &Line);
+
   // Compute hash of the current preprocessor branch.
   // This is used to identify the different branches, and thus track if block
   // open and close in the same branch.
   size_t computePPHash() const;
 
+  bool parsingPPDirective() { return CurrentLines != &Lines; }
+
   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
   // and use that everywhere in the Parser.
   std::unique_ptr<UnwrappedLine> Line;
 
+  // Lines that are created by macro expansion.
+  // When formatting code containing macro calls, we first format the expanded
+  // lines to set the token types correctly. Afterwards, we format the
+  // reconstructed macro calls, re-using the token types determined in the first
+  // step.
+  // ExpandedLines will be reset every time we create a new LineAndExpansion
+  // instance once a line containing macro calls has been parsed.
+  SmallVector<UnwrappedLine, 8> CurrentExpandedLines;
+
+  // Maps from the first token of a top-level UnwrappedLine that contains
+  // a macro call to the replacement UnwrappedLines expanded from the macro
+  // call.
+  llvm::DenseMap<FormatToken *, SmallVector<UnwrappedLine, 8>> ExpandedLines;
+
+  // Map from the macro identifier to a line containing the full unexpanded
+  // macro call.
+  llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
+
+  // For recursive macro expansions, trigger reconstruction only on the
+  // outermost expansion.
+  bool InExpansion = false;
+
+  // Set while we reconstruct a macro call.
+  // For reconstruction, we feed the expanded lines into the reconstructor
+  // until it is finished.
+  std::optional<MacroCallReconstructor> Reconstruct;
+
   // Comments are sorted into unwrapped lines by whether they are in the same
   // line as the previous token, or not. If not, they belong to the next token.
   // Since the next token might already be in a new unwrapped line, we need to
@@ -345,13 +397,17 @@
   // does not start at the beginning of the file.
   unsigned FirstStartColumn;
 
+  MacroExpander Macros;
+
   friend class ScopedLineState;
   friend class CompoundStatementIndenter;
 };
 
 struct UnwrappedLineNode {
   UnwrappedLineNode() : Tok(nullptr) {}
-  UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
+  UnwrappedLineNode(FormatToken *Tok,
+                    llvm::ArrayRef<UnwrappedLine> Children = {})
+      : Tok(Tok), Children(Children.begin(), Children.end()) {}
 
   FormatToken *Tok;
   SmallVector<UnwrappedLine, 0> Children;
Index: clang/lib/Format/UnwrappedLineParser.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineParser.cpp
+++ clang/lib/Format/UnwrappedLineParser.cpp
@@ -14,11 +14,15 @@
 
 #include "UnwrappedLineParser.h"
 #include "FormatToken.h"
+#include "FormatTokenLexer.h"
 #include "FormatTokenSource.h"
+#include "Macros.h"
 #include "TokenAnnotator.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include <algorithm>
@@ -143,11 +147,12 @@
   unsigned OldLineLevel;
 };
 
-UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
-                                         const AdditionalKeywords &Keywords,
-                                         unsigned FirstStartColumn,
-                                         ArrayRef<FormatToken *> Tokens,
-                                         UnwrappedLineConsumer &Callback)
+UnwrappedLineParser::UnwrappedLineParser(
+    SourceManager &SourceMgr, const FormatStyle &Style,
+    const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
+    ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
+    llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+    IdentifierTable &IdentTable)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
@@ -155,7 +160,8 @@
       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
                        ? IG_Rejected
                        : IG_Inited),
-      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
+      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
+      Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
 
 void UnwrappedLineParser::reset() {
   PPBranchLevel = -1;
@@ -173,6 +179,15 @@
   NestedTooDeep.clear();
   PPStack.clear();
   Line->FirstStartColumn = FirstStartColumn;
+
+  if (!Unexpanded.empty())
+    for (FormatToken *Token : AllTokens)
+      Token->MacroCtx.reset();
+  CurrentExpandedLines.clear();
+  ExpandedLines.clear();
+  Unexpanded.clear();
+  InExpansion = false;
+  Reconstruct.reset();
 }
 
 void UnwrappedLineParser::parse() {
@@ -196,12 +211,36 @@
     }
 
     // Create line with eof token.
+    assert(FormatTok->is(tok::eof));
     pushToken(FormatTok);
     addUnwrappedLine();
 
-    for (const UnwrappedLine &Line : Lines)
-      Callback.consumeUnwrappedLine(Line);
+    // In a first run, format everything with the lines containing macro calls
+    // replaced by the expansion.
+    if (!ExpandedLines.empty()) {
+      LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
+      for (const auto &Line : Lines) {
+        if (!Line.Tokens.empty()) {
+          auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
+          if (it != ExpandedLines.end()) {
+            for (const auto &Expanded : it->second) {
+              LLVM_DEBUG(printDebugInfo(Expanded));
+              Callback.consumeUnwrappedLine(Expanded);
+            }
+            continue;
+          }
+        }
+        LLVM_DEBUG(printDebugInfo(Line));
+        Callback.consumeUnwrappedLine(Line);
+      }
+      Callback.finishRun();
+    }
 
+    LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
+    for (const UnwrappedLine &Line : Lines) {
+      LLVM_DEBUG(printDebugInfo(Line));
+      Callback.consumeUnwrappedLine(Line);
+    }
     Callback.finishRun();
     Lines.clear();
     while (!PPLevelBranchIndex.empty() &&
@@ -724,7 +763,7 @@
     parseParens();
 
   size_t NbPreprocessorDirectives =
-      CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
+      !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
   addUnwrappedLine();
   size_t OpeningLineIndex =
       CurrentLines->empty()
@@ -4152,12 +4191,25 @@
   Line->Level = OrigLevel;
 }
 
+bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) {
+  for (const auto &N : Line.Tokens) {
+    if (N.Tok->MacroCtx)
+      return true;
+    for (const UnwrappedLine &Child : N.Children)
+      if (containsExpansion(Child))
+        return true;
+  }
+  return false;
+}
+
 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
   if (Line->Tokens.empty())
     return;
   LLVM_DEBUG({
-    if (CurrentLines == &Lines)
+    if (!parsingPPDirective()) {
+      llvm::dbgs() << "Adding unwrapped line:\n";
       printDebugInfo(*Line);
+    }
   });
 
   // If this line closes a block when in Whitesmiths mode, remember that
@@ -4168,7 +4220,39 @@
       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
 
-  CurrentLines->push_back(std::move(*Line));
+  // If the current line was expanded from a macro call, we use it to
+  // reconstruct an unwrapped line from the structure of the expanded unwrapped
+  // line and the unexpanded token stream.
+  if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
+    if (!Reconstruct)
+      Reconstruct.emplace(Line->Level, Unexpanded);
+    Reconstruct->addLine(*Line);
+
+    // While the reconstructed unexpanded lines are stored in the normal
+    // flow of lines, the expanded lines are stored on the side to be analyzed
+    // in an extra step.
+    CurrentExpandedLines.push_back(std::move(*Line));
+
+    if (Reconstruct->finished()) {
+      UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
+      assert(!Reconstructed.Tokens.empty() &&
+             "Reconstructed must at least contain the macro identifier.");
+      assert(!parsingPPDirective());
+      LLVM_DEBUG({
+        llvm::dbgs() << "Adding unexpanded line:\n";
+        printDebugInfo(Reconstructed);
+      });
+      ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
+      Lines.push_back(std::move(Reconstructed));
+      CurrentExpandedLines.clear();
+      Reconstruct.reset();
+    }
+  } else {
+    // At the top level we only get here when no unexpansion is going on, or
+    // when conditional formatting led to unfinished macro reconstructions.
+    assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
+    CurrentLines->push_back(std::move(*Line));
+  }
   Line->Tokens.clear();
   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
   Line->FirstStartColumn = 0;
@@ -4176,7 +4260,7 @@
 
   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
     --Line->Level;
-  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
+  if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
     CurrentLines->append(
         std::make_move_iterator(PreprocessorDirectives.begin()),
         std::make_move_iterator(PreprocessorDirectives.end()));
@@ -4470,6 +4554,56 @@
       continue;
     }
 
+    if (FormatTok->is(tok::identifier) &&
+        Macros.defined(FormatTok->TokenText) &&
+        (Macros.objectLike(FormatTok->TokenText) ||
+         Tokens->peekNextToken()->is(tok::l_paren)) &&
+        // FIXME: Allow expanding macros in preprocessor directives.
+        !Line->InPPDirective) {
+      FormatToken *ID = FormatTok;
+
+      // To correctly parse the code, we need to replace the tokens of the macro
+      // call with its expansion.
+      auto PreCall = std::move(Line);
+      Line.reset(new UnwrappedLine);
+      bool OldInExpansion = InExpansion;
+      InExpansion = true;
+      // We parse the macro call into a new line.
+      auto Args = parseMacroCall();
+      LLVM_DEBUG({
+        if (Args.size() != Macros.getArity(ID->TokenText)) {
+          llvm::dbgs() << "Macro " << ID->TokenText << " takes "
+                       << Macros.getArity(ID->TokenText)
+                       << " arguments, but got " << Args.size() << "\n";
+        }
+      });
+      InExpansion = OldInExpansion;
+      assert(Line->Tokens.front().Tok == ID);
+      // And remember the unexpanded macro call tokens.
+      Unexpanded[ID] = std::move(Line);
+
+      // Next, we insert the expanded tokens in the token stream at the current
+      // position, and continue parsing.
+      Line = std::move(PreCall);
+      SmallVector<FormatToken *, 8> New = Macros.expand(ID, Args);
+      if (!New.empty())
+        FormatTok = Tokens->insertTokens(New);
+
+      LLVM_DEBUG({
+        llvm::dbgs() << "Call: " << ID->TokenText << "(";
+        for (const auto &Arg : Args)
+          for (const auto &T : Arg)
+            llvm::dbgs() << T->TokenText << " ";
+        llvm::dbgs() << ")\n";
+      });
+      LLVM_DEBUG({
+        llvm::dbgs() << "Expanded: ";
+        for (const auto &T : New)
+          llvm::dbgs() << T->TokenText << " ";
+        llvm::dbgs() << "\n";
+      });
+    }
+
     if (!FormatTok->is(tok::comment)) {
       distributeComments(Comments, FormatTok);
       Comments.clear();
@@ -4483,6 +4617,67 @@
   Comments.clear();
 }
 
+namespace {
+template <typename Iterator>
+void pushTokens(Iterator Begin, Iterator End,
+                llvm::SmallVectorImpl<FormatToken *> &Into) {
+  for (auto I = Begin; I != End; ++I) {
+    Into.push_back(I->Tok);
+    for (const auto &Child : I->Children)
+      pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
+  }
+}
+} // namespace
+
+llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>
+UnwrappedLineParser::parseMacroCall() {
+  llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> Args;
+  assert(Line->Tokens.empty());
+  FormatToken *ID = FormatTok;
+  nextToken();
+  if (Macros.objectLike(ID->TokenText))
+    return Args;
+  assert(FormatTok->is(tok::l_paren));
+  nextToken();
+  auto ArgStart = std::prev(Line->Tokens.end());
+
+  int Parens = 0;
+  do {
+    switch (FormatTok->Tok.getKind()) {
+    case tok::l_paren:
+      ++Parens;
+      nextToken();
+      break;
+    case tok::r_paren: {
+      if (Parens > 0) {
+        --Parens;
+        nextToken();
+        break;
+      }
+      Args.push_back({});
+      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back());
+      nextToken();
+      return Args;
+    }
+    case tok::comma: {
+      if (Parens > 0) {
+        nextToken();
+        break;
+      }
+      Args.push_back({});
+      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back());
+      nextToken();
+      ArgStart = std::prev(Line->Tokens.end());
+      break;
+    }
+    default:
+      nextToken();
+      break;
+    }
+  } while (!eof());
+  return {};
+}
+
 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
   Line->Tokens.push_back(UnwrappedLineNode(Tok));
   if (MustBreakBeforeNextToken) {
Index: clang/lib/Format/UnwrappedLineFormatter.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineFormatter.cpp
+++ clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "UnwrappedLineFormatter.h"
+#include "FormatToken.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "WhitespaceManager.h"
 #include "llvm/Support/Debug.h"
@@ -918,9 +919,22 @@
 
 static void markFinalized(FormatToken *Tok) {
   for (; Tok; Tok = Tok->Next) {
-    Tok->Finalized = true;
-    for (AnnotatedLine *Child : Tok->Children)
-      markFinalized(Child->First);
+    if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) {
+      // In the first pass we format all macro arguments in the expanded token
+      // stream. Instead of finalizing the macro arguments, we mark that they
+      // will be modified as unexpanded arguments (as part of the macro call
+      // formatting) in the next pass.
+      Tok->MacroCtx->Role = MR_UnexpandedArg;
+      // Reset whether spaces are required before this token, as that is context
+      // dependent, and that context may change when formatting the macro call.
+      // For example, given M(x) -> 2 * x, and the macro call M(var),
+      // the token 'var' will have SpacesRequiredBefore = 1 after being
+      // formatted as part of the expanded macro, but SpacesRequiredBefore = 0
+      // for its position within the macro call.
+      Tok->SpacesRequiredBefore = 0;
+    } else {
+      Tok->Finalized = true;
+    }
   }
 }
 
@@ -975,15 +989,15 @@
   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
                       unsigned &Penalty) {
     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
+    bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block);
     FormatToken &Previous = *State.NextToken->Previous;
-    if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) ||
-        Previous.Children.size() == 0) {
+    if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) {
       // The previous token does not open a block. Nothing to do. We don't
       // assert so that we can simply call this function for all tokens.
       return true;
     }
 
-    if (NewLine) {
+    if (NewLine || Previous.MacroParent) {
       const ParenState &P = State.Stack.back();
 
       int AdditionalIndent =
@@ -1349,11 +1363,12 @@
       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
       unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
       bool FitsIntoOneLine =
-          TheLine.Last->TotalLength + Indent <= ColumnLimit ||
-          (TheLine.Type == LT_ImportStatement &&
-           (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
-          (Style.isCSharp() &&
-           TheLine.InPPDirective); // don't split #regions in C#
+          !TheLine.ContainsMacroCall &&
+          (TheLine.Last->TotalLength + Indent <= ColumnLimit ||
+           (TheLine.Type == LT_ImportStatement &&
+            (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
+           (Style.isCSharp() &&
+            TheLine.InPPDirective)); // don't split #regions in C#
       if (Style.ColumnLimit == 0) {
         NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
             .formatLine(TheLine, NextStartColumn + Indent,
Index: clang/lib/Format/TokenAnnotator.h
===================================================================
--- clang/lib/Format/TokenAnnotator.h
+++ clang/lib/Format/TokenAnnotator.h
@@ -65,20 +65,32 @@
     // left them in a different state.
     First->Previous = nullptr;
     FormatToken *Current = First;
+    addChildren(Line.Tokens.front(), Current);
     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
+      if (Node.Tok->MacroParent)
+        ContainsMacroCall = true;
       Current->Next = Node.Tok;
       Node.Tok->Previous = Current;
       Current = Current->Next;
-      Current->Children.clear();
-      for (const auto &Child : Node.Children) {
-        Children.push_back(new AnnotatedLine(Child));
-        Current->Children.push_back(Children.back());
-      }
+      addChildren(Node, Current);
+      // FIXME: if we add children, previous will point to the token before
+      // the children; changing this requires significant changes across
+      // clang-format.
     }
     Last = Current;
     Last->Next = nullptr;
   }
 
+  void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
+    Current->Children.clear();
+    for (const auto &Child : Node.Children) {
+      Children.push_back(new AnnotatedLine(Child));
+      if (Children.back()->ContainsMacroCall)
+        ContainsMacroCall = true;
+      Current->Children.push_back(Children.back());
+    }
+  }
+
   ~AnnotatedLine() {
     for (AnnotatedLine *Child : Children)
       delete Child;
@@ -149,6 +161,9 @@
   bool MightBeFunctionDecl;
   bool IsMultiVariableDeclStmt;
 
+  /// \c True if this line contains a macro call for which an expansion exists.
+  bool ContainsMacroCall = false;
+
   /// \c True if this line should be formatted, i.e. intersects directly or
   /// indirectly with one of the input ranges.
   bool Affected;
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -2595,6 +2595,13 @@
       // Consume operators with higher precedence.
       parse(Precedence + 1);
 
+      // Do not assign fake parenthesis to tokens that are part of an
+      // unexpanded macro call. The line within the macro call contains
+      // the parenthesis and commas, and we will not find operators within
+      // that structure.
+      if (Current && Current->MacroParent)
+        break;
+
       int CurrentPrecedence = getCurrentPrecedence();
 
       if (Precedence == CurrentPrecedence && Current &&
@@ -4225,8 +4232,12 @@
       Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
     return true;
   }
-  if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
+  if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) &&
+      // In an unexpanded macro call we only find the parentheses and commas
+      // in a line; the commas and closing parenthesis do not require a space.
+      (Left.Children.empty() || !Left.MacroParent)) {
     return true;
+  }
   if (Right.is(tok::comma))
     return false;
   if (Right.is(TT_ObjCBlockLParen))
Index: clang/lib/Format/TokenAnalyzer.h
===================================================================
--- clang/lib/Format/TokenAnalyzer.h
+++ clang/lib/Format/TokenAnalyzer.h
@@ -46,7 +46,7 @@
 
   FileID getFileID() const { return ID; }
 
-  const SourceManager &getSourceManager() const { return SM; }
+  SourceManager &getSourceManager() const { return SM; }
 
   ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
 
Index: clang/lib/Format/TokenAnalyzer.cpp
===================================================================
--- clang/lib/Format/TokenAnalyzer.cpp
+++ clang/lib/Format/TokenAnalyzer.cpp
@@ -104,12 +104,12 @@
   IdentifierTable IdentTable(getFormattingLangOpts(Style));
   FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(),
                        Env.getFirstStartColumn(), Style, Encoding, Allocator,
-
                        IdentTable);
   ArrayRef<FormatToken *> Toks(Lex.lex());
   SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end());
-  UnwrappedLineParser Parser(Style, Lex.getKeywords(),
-                             Env.getFirstStartColumn(), Tokens, *this);
+  UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(),
+                             Env.getFirstStartColumn(), Tokens, *this,
+                             Allocator, IdentTable);
   Parser.parse();
   assert(UnwrappedLines.back().empty());
   unsigned Penalty = 0;
Index: clang/lib/Format/Macros.h
===================================================================
--- clang/lib/Format/Macros.h
+++ clang/lib/Format/Macros.h
@@ -113,6 +113,9 @@
   /// subsequent parentheses.
   bool objectLike(llvm::StringRef Name) const;
 
+  /// Returns the arity of the macro \p Name.
+  unsigned getArity(llvm::StringRef Name) const;
+
   /// Returns the expanded stream of format tokens for \p ID, where
   /// each element in \p Args is a positional argument to the macro call.
   llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
@@ -149,7 +152,7 @@
 ///
 /// After this point, the state of the spelled/expanded stream is "in sync"
 /// (both at the start of an UnwrappedLine, with no macros open), so the
-/// Unexpander can be thrown away and parsing can continue.
+/// Reconstructor can be thrown away and parsing can continue.
 ///
 /// Given a mapping from the macro name identifier token in the macro call
 /// to the tokens of the macro call, for example:
Index: clang/lib/Format/MacroExpander.cpp
===================================================================
--- clang/lib/Format/MacroExpander.cpp
+++ clang/lib/Format/MacroExpander.cpp
@@ -145,6 +145,10 @@
   }
 }
 
+unsigned MacroExpander::getArity(llvm::StringRef Name) const {
+  return Definitions.find(Name)->second.Params.size();
+}
+
 bool MacroExpander::defined(llvm::StringRef Name) const {
   return Definitions.find(Name) != Definitions.end();
 }
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -373,6 +373,11 @@
   /// binary operator.
   TokenType getType() const { return Type; }
   void setType(TokenType T) {
+    // If this token is a macro argument while formatting an unexpanded macro
+    // call, we do not change its type any more - the type was deduced from
+    // formatting the expanded macro stream already.
+    if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
+      return;
     assert((!TypeIsFinalized || T == Type) &&
            "Please use overwriteFixedType to change a fixed type.");
     Type = T;
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -1036,6 +1036,7 @@
     IO.mapOptional("UseTab", Style.UseTab);
     IO.mapOptional("WhitespaceSensitiveMacros",
                    Style.WhitespaceSensitiveMacros);
+    IO.mapOptional("Macros", Style.Macros);
 
     // If AlwaysBreakAfterDefinitionReturnType was specified but
     // AlwaysBreakAfterReturnType was not, initialize the latter from the
Index: clang/lib/Format/ContinuationIndenter.cpp
===================================================================
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -18,6 +18,7 @@
 #include "WhitespaceManager.h"
 #include "clang/Basic/OperatorPrecedence.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Format/Format.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Debug.h"
@@ -739,9 +740,14 @@
   if (Previous.is(TT_TemplateString) && Previous.opensScope())
     CurrentState.NoLineBreak = true;
 
+  // Align following lines within parentheses / brackets if configured.
+  // Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
+  // with args as children of the '(' and ',' tokens. It does not make sense to
+  // align the commas with the opening paren.
   if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
       !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
       Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
+      !(Current.MacroParent && Previous.MacroParent) &&
       (Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) {
     CurrentState.Indent = State.Column + Spaces;
     CurrentState.IsAligned = true;
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -2745,6 +2745,23 @@
   /// \version 3.7
   std::string MacroBlockEnd;
 
+  /// A list of macros of the form \c <definition>=<expansion> .
+  ///
+  /// Code will be parsed with macros expanded, and formatting will try to best
+  /// match the structure of the expanded call.
+  ///
+  /// For example, with the macro "A(x)=x", the code
+  /// \code
+  ///    A(a * b);
+  /// \endcode
+  /// will be formatted as a declaration of the variable \c b of type \c A*
+  /// (depending on pointer-binding rules)
+  /// \code
+  ///    A(a* b);
+  /// \endcode
+  /// instead of as multiplication.
+  std::vector<std::string> Macros;
+
   /// The maximum number of consecutive empty lines to keep.
   /// \code
   ///    MaxEmptyLinesToKeep: 1         vs.     MaxEmptyLinesToKeep: 0
@@ -4306,7 +4323,8 @@
            StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
            StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
            TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
-           WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;
+           WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros &&
+           Macros == R.Macros;
   }
 
   std::optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to