klimek created this revision.
klimek added a reviewer: sammccall.
Herald added a project: All.
klimek requested review of this revision.
Herald added a project: clang.

Add configuration to specify macros.
Macros will be expanded, and the code will be parsed and annotated
in the expanded state. In a second step, the formatting decisions
in the annotated expanded code will be reconstructed onto the
original unexpanded macro call.

Eventually, this will allow to remove special-case code for
various macro options we accumulated over the years in favor of
one principled mechanism.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D144170

Files:
  clang/include/clang/Format/Format.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/Format.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/Macros.h
  clang/lib/Format/TokenAnalyzer.cpp
  clang/lib/Format/TokenAnalyzer.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/TokenAnnotator.h
  clang/lib/Format/UnwrappedLineFormatter.cpp
  clang/lib/Format/UnwrappedLineParser.cpp
  clang/lib/Format/UnwrappedLineParser.h
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTest.cpp
  clang/unittests/Format/TestLexer.h

Index: clang/unittests/Format/TestLexer.h
===================================================================
--- clang/unittests/Format/TestLexer.h
+++ clang/unittests/Format/TestLexer.h
@@ -72,7 +72,8 @@
   TokenList annotate(llvm::StringRef Code) {
     FormatTokenLexer Lex = getNewLexer(Code);
     auto Tokens = Lex.lex();
-    UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this);
+    UnwrappedLineParser Parser(SourceMgr.get(), Style, Lex.getKeywords(), 0,
+                               Tokens, *this, Allocator, IdentTable);
     Parser.parse();
     TokenAnnotator Annotator(Style, Lex.getKeywords());
     for (auto &Line : UnwrappedLines) {
Index: clang/unittests/Format/FormatTest.cpp
===================================================================
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -66,7 +66,8 @@
 
   void _verifyFormat(const char *File, int Line, llvm::StringRef Expected,
                      llvm::StringRef Code,
-                     const FormatStyle &Style = getLLVMStyle()) {
+                     const FormatStyle &Style = getLLVMStyle(),
+                     bool MessUp = true) {
     ScopedTrace t(File, Line, ::testing::Message() << Code.str());
     EXPECT_EQ(Expected.str(), format(Expected, Style))
         << "Expected code is not stable";
@@ -76,20 +77,24 @@
       // needs to be checked for Objective-C++ as well.
       FormatStyle ObjCStyle = Style;
       ObjCStyle.Language = FormatStyle::LK_ObjC;
-      EXPECT_EQ(Expected.str(), format(test::messUp(Code), ObjCStyle));
+      EXPECT_EQ(Expected.str(),
+                format(MessUp ? test::messUp(Code) : Code, ObjCStyle));
     }
   }
 
   void _verifyFormat(const char *File, int Line, llvm::StringRef Code,
-                     const FormatStyle &Style = getLLVMStyle()) {
-    _verifyFormat(File, Line, Code, test::messUp(Code), Style);
+                     const FormatStyle &Style = getLLVMStyle(),
+                     bool MessUp = true) {
+    _verifyFormat(File, Line, Code, MessUp ? test::messUp(Code) : Code, Style,
+                  MessUp);
   }
 
   void _verifyIncompleteFormat(const char *File, int Line, llvm::StringRef Code,
-                               const FormatStyle &Style = getLLVMStyle()) {
+                               const FormatStyle &Style = getLLVMStyle(),
+                               bool MessUp = true) {
     ScopedTrace t(File, Line, ::testing::Message() << Code.str());
-    EXPECT_EQ(Code.str(),
-              format(test::messUp(Code), Style, SC_ExpectIncomplete));
+    EXPECT_EQ(Code.str(), format(MessUp ? test::messUp(Code) : Code, Style,
+                                 SC_ExpectIncomplete));
   }
 
   void _verifyIndependentOfContext(const char *File, int Line,
@@ -22568,6 +22573,189 @@
                "aaaallvm::outs()\n    <<");
 }
 
+TEST_F(FormatTest, UnexpandConfiguredMacros) {
+  FormatStyle Style = getLLVMStyle();
+  Style.Macros.push_back("CLASS=class C {");
+  Style.Macros.push_back("SEMI=;");
+  Style.Macros.push_back("STMT=f();");
+  Style.Macros.push_back("ID(x)=x");
+  Style.Macros.push_back("ID3(x, y, z)=x y z");
+  Style.Macros.push_back("CALL(x)=f([] { x })");
+  Style.Macros.push_back("ASSIGN_OR_RETURN(a, b, c)=a = (b) || (c)");
+
+  verifyFormat("ID(nested(a(b, c), d))", Style);
+  verifyFormat("CLASS\n"
+               "  a *b;\n"
+               "};",
+               Style);
+  verifyFormat("SEMI\n"
+               "SEMI\n"
+               "SEMI",
+               Style);
+  verifyFormat("STMT\n"
+               "STMT\n"
+               "STMT",
+               Style);
+  verifyFormat("void f() { ID(a *b); }", Style);
+  verifyFormat(R"(ID(
+    { ID(a *b); });
+)",
+               Style);
+  verifyIncompleteFormat(R"(ID3({, ID(a *b),
+  ;
+  });
+)",
+                         Style);
+
+  verifyFormat("ID(CALL(CALL(return a * b;)));", Style);
+
+  verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n"
+               "                 MySomewhatLongFunction(SomethingElse()));\n",
+               Style);
+
+  verifyFormat(R"(
+#define MACRO(a, b) ID(a + b)
+)",
+               Style);
+  EXPECT_EQ(R"(
+int a;
+int b;
+int c;
+int d;
+int e;
+int f;
+ID(
+    namespace foo {
+    int a;
+    }
+) // namespace k
+)",
+            format(R"(
+int a;
+int b;
+int c;
+int d;
+int e;
+int f;
+ID(namespace foo { int a; })  // namespace k
+)",
+                   Style));
+  verifyFormat(R"(ID(
+    //
+    ({ ; }))
+)",
+               Style);
+
+  Style.ColumnLimit = 35;
+  // FIXME: Arbitrary formatting of macros where the end of the logical
+  // line is in the middle of a macro call are not working yet.
+  verifyFormat(R"(ID(
+    void f();
+    void)
+ID(g) ID(()) ID(
+    ;
+    void g();)
+)",
+               Style);
+
+  Style.ColumnLimit = 10;
+  verifyFormat("STMT\n"
+               "STMT\n"
+               "STMT",
+               Style);
+
+  EXPECT_EQ(R"(
+ID(CALL(CALL(
+    a *b)));
+)",
+            format(R"(
+ID(CALL(CALL(a * b)));
+)",
+                   Style));
+
+  // FIXME: If we want to support unbalanced braces or parens from macro
+  // expansions we need to re-think how we propagate errors in
+  // TokenAnnotator::parseLine; for investigation, switching the inner loop of
+  // TokenAnnotator::parseLine to return LT_Other instead of LT_Invalid in case
+  // of !consumeToken() changes the formatting of the test below and makes it
+  // believe it has a fully correct formatting.
+  EXPECT_EQ(R"(
+ID3(
+    {
+    CLASS
+    a *b;
+    };
+    },
+    ID(x *y);
+    ,
+    STMT
+    STMT
+    STMT)
+void f();
+)",
+            format(R"(
+ID3({CLASS a*b; };}, ID(x*y);, STMT STMT STMT)
+void f();
+)",
+                   Style));
+
+  verifyFormat("ID(a(\n"
+               "#ifdef A\n"
+               "    b, c\n"
+               "#else\n"
+               "    d(e)\n"
+               "#endif\n"
+               "    ))",
+               Style);
+  Style.ColumnLimit = 80;
+  verifyFormat(R"(ASSIGN_OR_RETURN(
+    // Comment
+    a b, c);
+)",
+               Style);
+  Style.ColumnLimit = 30;
+  verifyFormat(R"(ASSIGN_OR_RETURN(
+    // Comment
+    //
+    a b,
+    xxxxxxxxxxxx(
+        yyyyyyyyyyyyyyyyy,
+        zzzzzzzzzzzzzzzzzz),
+    f([]() {
+      a();
+      b();
+    }));
+)",
+               Style);
+  verifyFormat(R"(int a = []() {
+  ID(
+      x;
+      y;
+      z;)
+  ;
+}();
+)",
+               Style);
+  verifyIncompleteFormat(R"(ASSIGN_OR_RETURN((
+====
+#))
+})",
+                         Style, /*MessUp=*/false);
+  verifyIncompleteFormat(R"(ASSIGN_OR_RETURN(
+}
+(
+====
+#),
+a))",
+                         Style, /*MessUp=*/false);
+  verifyFormat(R"(ASSIGN_OR_RETURN(a
+//
+====
+#
+                 <))",
+               Style, /*MessUp=*/false);
+}
+
 TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) {
   std::string code = "#if A\n"
                      "#if B\n"
Index: clang/lib/Format/WhitespaceManager.cpp
===================================================================
--- clang/lib/Format/WhitespaceManager.cpp
+++ clang/lib/Format/WhitespaceManager.cpp
@@ -49,7 +49,7 @@
                                           unsigned Spaces,
                                           unsigned StartOfTokenColumn,
                                           bool IsAligned, bool InPPDirective) {
-  if (Tok.Finalized)
+  if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
     return;
   Tok.setDecision((Newlines > 0) ? FD_Break : FD_Continue);
   Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
@@ -60,7 +60,7 @@
 
 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
                                             bool InPPDirective) {
-  if (Tok.Finalized)
+  if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
     return;
   Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
                            Tok.WhitespaceRange, /*Spaces=*/0,
@@ -84,7 +84,7 @@
     const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
     StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
     unsigned Newlines, int Spaces) {
-  if (Tok.Finalized)
+  if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
     return;
   SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
   Changes.push_back(
Index: clang/lib/Format/UnwrappedLineParser.h
===================================================================
--- clang/lib/Format/UnwrappedLineParser.h
+++ clang/lib/Format/UnwrappedLineParser.h
@@ -15,10 +15,14 @@
 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
 
+#include "Encoding.h"
 #include "FormatToken.h"
+#include "Macros.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Format/Format.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Regex.h"
 #include <list>
 #include <stack>
@@ -71,6 +75,11 @@
   /// line with the corresponding closing brace.
   size_t MatchingClosingBlockLineIndex = kInvalidIndex;
 
+  void resetIndexes() {
+    MatchingOpeningBlockLineIndex = kInvalidIndex;
+    MatchingClosingBlockLineIndex = kInvalidIndex;
+  }
+
   static const size_t kInvalidIndex = -1;
 
   unsigned FirstStartColumn = 0;
@@ -84,13 +93,17 @@
 };
 
 class FormatTokenSource;
+class MacroCallReconstructor;
 
 class UnwrappedLineParser {
 public:
-  UnwrappedLineParser(const FormatStyle &Style,
+  UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style,
                       const AdditionalKeywords &Keywords,
                       unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
-                      UnwrappedLineConsumer &Callback);
+                      UnwrappedLineConsumer &Callback,
+                      llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+                      IdentifierTable &IdentTable);
+  ~UnwrappedLineParser();
 
   void parse();
 
@@ -193,6 +206,7 @@
   unsigned parseVerilogHierarchyHeader();
   void parseVerilogTable();
   void parseVerilogCaseLabel();
+  llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> parseMacroCall();
 
   // Used by addUnwrappedLine to denote whether to keep or remove a level
   // when resetting the line state.
@@ -236,6 +250,10 @@
 
   bool isOnNewLine(const FormatToken &FormatTok);
 
+  // Returns whether there is a macro expansion in the line, i.e. a token that
+  // was expanded from a macro call.
+  bool containsExpansion(const UnwrappedLine &Line);
+
   // Compute hash of the current preprocessor branch.
   // This is used to identify the different branches, and thus track if block
   // open and close in the same branch.
@@ -246,6 +264,26 @@
   // and use that everywhere in the Parser.
   std::unique_ptr<UnwrappedLine> Line;
 
+  // Lines that are created by macro expansion.
+  // When formatting code containing macro calls, we first format the expanded
+  // lines to set the token types correctly. Afterwards, we format the
+  // reconstructed macro calls, re-using the token types determined in the first
+  // step.
+  SmallVector<UnwrappedLine, 8> ExpandedLines;
+
+  // Map from the macro identifier to a line containing the full unexpanded
+  // macro call.
+  llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
+
+  // For recursive macro expansions, trigger reconstruction only on the
+  // outermost expansion.
+  bool InExpansion = false;
+
+  // Non-null while we reconstruct a macro call.
+  // For reconstruction, we feed the expanded lines into the reconstructor
+  // until it is finished.
+  std::unique_ptr<MacroCallReconstructor> Reconstruct;
+
   // Comments are sorted into unwrapped lines by whether they are in the same
   // line as the previous token, or not. If not, they belong to the next token.
   // Since the next token might already be in a new unwrapped line, we need to
@@ -345,13 +383,17 @@
   // does not start at the beginning of the file.
   unsigned FirstStartColumn;
 
+  MacroExpander Macros;
+
   friend class ScopedLineState;
   friend class CompoundStatementIndenter;
 };
 
 struct UnwrappedLineNode {
   UnwrappedLineNode() : Tok(nullptr) {}
-  UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
+  UnwrappedLineNode(FormatToken *Tok,
+                    llvm::ArrayRef<UnwrappedLine> Children = {})
+      : Tok(Tok), Children(Children.begin(), Children.end()) {}
 
   FormatToken *Tok;
   SmallVector<UnwrappedLine, 0> Children;
Index: clang/lib/Format/UnwrappedLineParser.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineParser.cpp
+++ clang/lib/Format/UnwrappedLineParser.cpp
@@ -14,11 +14,15 @@
 
 #include "UnwrappedLineParser.h"
 #include "FormatToken.h"
+#include "FormatTokenLexer.h"
 #include "FormatTokenSource.h"
+#include "Macros.h"
 #include "TokenAnnotator.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include <algorithm>
@@ -143,11 +147,12 @@
   unsigned OldLineLevel;
 };
 
-UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
-                                         const AdditionalKeywords &Keywords,
-                                         unsigned FirstStartColumn,
-                                         ArrayRef<FormatToken *> Tokens,
-                                         UnwrappedLineConsumer &Callback)
+UnwrappedLineParser::UnwrappedLineParser(
+    SourceManager &SourceMgr, const FormatStyle &Style,
+    const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
+    ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
+    llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+    IdentifierTable &IdentTable)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
@@ -155,7 +160,10 @@
       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
                        ? IG_Rejected
                        : IG_Inited),
-      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
+      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
+      Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
+
+UnwrappedLineParser::~UnwrappedLineParser() {}
 
 void UnwrappedLineParser::reset() {
   PPBranchLevel = -1;
@@ -173,6 +181,14 @@
   NestedTooDeep.clear();
   PPStack.clear();
   Line->FirstStartColumn = FirstStartColumn;
+
+  if (!Unexpanded.empty())
+    for (FormatToken *Token : AllTokens)
+      Token->MacroCtx.reset();
+  ExpandedLines.clear();
+  Unexpanded.clear();
+  InExpansion = false;
+  Reconstruct.reset();
 }
 
 void UnwrappedLineParser::parse() {
@@ -196,12 +212,24 @@
     }
 
     // Create line with eof token.
+    assert(FormatTok->is(tok::eof));
     pushToken(FormatTok);
     addUnwrappedLine();
 
-    for (const UnwrappedLine &Line : Lines)
-      Callback.consumeUnwrappedLine(Line);
+    if (!ExpandedLines.empty()) {
+      LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
+      for (const auto &Line : ExpandedLines) {
+        LLVM_DEBUG(printDebugInfo(Line));
+        Callback.consumeUnwrappedLine(Line);
+      }
+      Callback.finishRun();
+    }
 
+    LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
+    for (const UnwrappedLine &Line : Lines) {
+      LLVM_DEBUG(printDebugInfo(Line));
+      Callback.consumeUnwrappedLine(Line);
+    }
     Callback.finishRun();
     Lines.clear();
     while (!PPLevelBranchIndex.empty() &&
@@ -4152,12 +4180,25 @@
   Line->Level = OrigLevel;
 }
 
+bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) {
+  for (const auto &N : Line.Tokens) {
+    if (N.Tok->MacroCtx)
+      return true;
+    for (const UnwrappedLine &Child : N.Children)
+      if (containsExpansion(Child))
+        return true;
+  }
+  return false;
+}
+
 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
   if (Line->Tokens.empty())
     return;
   LLVM_DEBUG({
-    if (CurrentLines == &Lines)
+    if (CurrentLines == &Lines) {
+      llvm::dbgs() << "Adding unwrapped line:\n";
       printDebugInfo(*Line);
+    }
   });
 
   // If this line closes a block when in Whitesmiths mode, remember that
@@ -4168,7 +4209,44 @@
       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
 
-  CurrentLines->push_back(std::move(*Line));
+  // If the current line was expanded from a macro call, we use it to
+  // reconstruct an unwrapped line from the structure of the expanded unwrapped
+  // line and the unexpanded token stream.
+  if (CurrentLines == &Lines && !InExpansion && containsExpansion(*Line)) {
+    if (!Reconstruct) {
+      Reconstruct =
+          std::make_unique<MacroCallReconstructor>(Line->Level, Unexpanded);
+    }
+    Reconstruct->addLine(*Line);
+    if (Reconstruct->finished()) {
+      UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
+      assert(!Reconstructed.Tokens.empty() &&
+             "Reconstructed must at least contain the macro identifier.");
+      LLVM_DEBUG({
+        if (CurrentLines == &Lines) {
+          llvm::dbgs() << "Adding unexpanded line:\n";
+          printDebugInfo(Reconstructed);
+        }
+      });
+      CurrentLines->push_back(std::move(Reconstructed));
+      Reconstruct.reset();
+    }
+    // FIXME: We format the expanded lines in an extra step that does not give
+    // the formatter all unwrapped lines, thus the indexes are invalid; to allow
+    // all features during expanded line formatting, recalcuate the indexes
+    // based on the available expanded lines where possible.
+    Line->resetIndexes();
+
+    // While the reconstructed unexpanded lines are stored in the normal
+    // flow of lines, the expanded lines are stored on the side to be analyzed
+    // in an extra step.
+    ExpandedLines.push_back(std::move(*Line));
+  } else {
+    // At the top level we only get here when no unexpansion is going on, or
+    // when conditional formatting led to unfinished macro reconstructions.
+    assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
+    CurrentLines->push_back(std::move(*Line));
+  }
   Line->Tokens.clear();
   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
   Line->FirstStartColumn = 0;
@@ -4470,6 +4548,47 @@
       continue;
     }
 
+    if (FormatTok->is(tok::identifier) &&
+        Macros.defined(FormatTok->TokenText) &&
+        // FIXME: Allow expanding macros in preprocessor directives.
+        !Line->InPPDirective) {
+      FormatToken *ID = FormatTok;
+
+      // To correctly parse the code, we need to replace the tokens of the macro
+      // call with its expansion.
+      auto PreCall = std::move(Line);
+      Line.reset(new UnwrappedLine);
+      bool OldInExpansion = InExpansion;
+      InExpansion = true;
+      // We parse the macro call into a new line.
+      auto Args = parseMacroCall();
+      InExpansion = OldInExpansion;
+      assert(Line->Tokens.front().Tok == ID);
+      // And remember the unexpanded macro call tokens.
+      Unexpanded[ID] = std::move(Line);
+
+      // Next, we insert the expanded tokens in the token stream at the current
+      // position, and continue parsing.
+      Line = std::move(PreCall);
+      SmallVector<FormatToken *, 8> New = Macros.expand(ID, Args);
+      if (!New.empty())
+        FormatTok = Tokens->insertTokens(New);
+
+      LLVM_DEBUG({
+        llvm::dbgs() << "Call: " << ID->TokenText << "(";
+        for (const auto &Arg : Args)
+          for (const auto &T : Arg)
+            llvm::dbgs() << T->TokenText << " ";
+        llvm::dbgs() << ")\n";
+      });
+      LLVM_DEBUG({
+        llvm::dbgs() << "Expanded: ";
+        for (const auto &T : New)
+          llvm::dbgs() << T->TokenText << " ";
+        llvm::dbgs() << "\n";
+      });
+    }
+
     if (!FormatTok->is(tok::comment)) {
       distributeComments(Comments, FormatTok);
       Comments.clear();
@@ -4483,6 +4602,65 @@
   Comments.clear();
 }
 
+namespace {
+template <typename Iterator>
+void pushTokens(Iterator Begin, Iterator End,
+                llvm::SmallVectorImpl<FormatToken *> &Into) {
+  for (auto I = Begin; I != End; ++I) {
+    Into.push_back(I->Tok);
+    for (const auto &Child : I->Children)
+      pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
+  }
+}
+} // namespace
+
+llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>
+UnwrappedLineParser::parseMacroCall() {
+  llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> Args;
+  assert(Line->Tokens.empty());
+  nextToken();
+  if (FormatTok->isNot(tok::l_paren))
+    return Args;
+  nextToken();
+  auto ArgStart = std::prev(Line->Tokens.end());
+
+  int Parens = 0;
+  do {
+    switch (FormatTok->Tok.getKind()) {
+    case tok::l_paren:
+      ++Parens;
+      nextToken();
+      break;
+    case tok::r_paren: {
+      if (Parens > 0) {
+        --Parens;
+        nextToken();
+        break;
+      }
+      Args.push_back({});
+      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back());
+      nextToken();
+      return Args;
+    }
+    case tok::comma: {
+      if (Parens > 0) {
+        nextToken();
+        break;
+      }
+      Args.push_back({});
+      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back());
+      nextToken();
+      ArgStart = std::prev(Line->Tokens.end());
+      break;
+    }
+    default:
+      nextToken();
+      break;
+    }
+  } while (!eof());
+  return {};
+}
+
 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
   Line->Tokens.push_back(UnwrappedLineNode(Tok));
   if (MustBreakBeforeNextToken) {
Index: clang/lib/Format/UnwrappedLineFormatter.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineFormatter.cpp
+++ clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "UnwrappedLineFormatter.h"
+#include "FormatToken.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "WhitespaceManager.h"
 #include "llvm/Support/Debug.h"
@@ -918,9 +919,12 @@
 
 static void markFinalized(FormatToken *Tok) {
   for (; Tok; Tok = Tok->Next) {
-    Tok->Finalized = true;
-    for (AnnotatedLine *Child : Tok->Children)
-      markFinalized(Child->First);
+    if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) {
+      Tok->MacroCtx->Role = MR_UnexpandedArg;
+      Tok->SpacesRequiredBefore = 0;
+    } else {
+      Tok->Finalized = true;
+    }
   }
 }
 
@@ -975,15 +979,15 @@
   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
                       unsigned &Penalty) {
     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
+    bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block);
     FormatToken &Previous = *State.NextToken->Previous;
-    if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) ||
-        Previous.Children.size() == 0) {
+    if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) {
       // The previous token does not open a block. Nothing to do. We don't
       // assert so that we can simply call this function for all tokens.
       return true;
     }
 
-    if (NewLine) {
+    if (NewLine || Previous.MacroParent) {
       const ParenState &P = State.Stack.back();
 
       int AdditionalIndent =
@@ -1349,11 +1353,12 @@
       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
       unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
       bool FitsIntoOneLine =
-          TheLine.Last->TotalLength + Indent <= ColumnLimit ||
-          (TheLine.Type == LT_ImportStatement &&
-           (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
-          (Style.isCSharp() &&
-           TheLine.InPPDirective); // don't split #regions in C#
+          !TheLine.ContainsMacroCall &&
+          (TheLine.Last->TotalLength + Indent <= ColumnLimit ||
+           (TheLine.Type == LT_ImportStatement &&
+            (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
+           (Style.isCSharp() &&
+            TheLine.InPPDirective)); // don't split #regions in C#
       if (Style.ColumnLimit == 0) {
         NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
             .formatLine(TheLine, NextStartColumn + Indent,
Index: clang/lib/Format/TokenAnnotator.h
===================================================================
--- clang/lib/Format/TokenAnnotator.h
+++ clang/lib/Format/TokenAnnotator.h
@@ -65,20 +65,32 @@
     // left them in a different state.
     First->Previous = nullptr;
     FormatToken *Current = First;
+    addChildren(Line.Tokens.front(), Current);
     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
+      if (Node.Tok->MacroParent)
+        ContainsMacroCall = true;
       Current->Next = Node.Tok;
       Node.Tok->Previous = Current;
       Current = Current->Next;
-      Current->Children.clear();
-      for (const auto &Child : Node.Children) {
-        Children.push_back(new AnnotatedLine(Child));
-        Current->Children.push_back(Children.back());
-      }
+      addChildren(Node, Current);
+      // FIXME: if we add children, previous will point to the token before
+      // the children; changing this requires significant changes across
+      // clang-format.
     }
     Last = Current;
     Last->Next = nullptr;
   }
 
+  void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
+    Current->Children.clear();
+    for (const auto &Child : Node.Children) {
+      Children.push_back(new AnnotatedLine(Child));
+      if (Children.back()->ContainsMacroCall)
+        ContainsMacroCall = true;
+      Current->Children.push_back(Children.back());
+    }
+  }
+
   ~AnnotatedLine() {
     for (AnnotatedLine *Child : Children)
       delete Child;
@@ -149,6 +161,9 @@
   bool MightBeFunctionDecl;
   bool IsMultiVariableDeclStmt;
 
+  /// \c True if this line contains a macro call for which an expansion exists.
+  bool ContainsMacroCall = false;
+
   /// \c True if this line should be formatted, i.e. intersects directly or
   /// indirectly with one of the input ranges.
   bool Affected;
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -2595,6 +2595,13 @@
       // Consume operators with higher precedence.
       parse(Precedence + 1);
 
+      // Do not assign fake parenthesis to tokens that are part of an
+      // unexpanded macro call. The line within the macro call contains
+      // the parenthesis and commas, and we will not find operators within
+      // that structure.
+      if (Current && Current->MacroParent)
+        break;
+
       int CurrentPrecedence = getCurrentPrecedence();
 
       if (Precedence == CurrentPrecedence && Current &&
@@ -4225,8 +4232,12 @@
       Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
     return true;
   }
-  if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
+  if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) &&
+      // In an unexpanded macro call we only find the parentheses and commas
+      // in a line; the commas and closing parenthesis do not require a space.
+      (Left.Children.empty() || !Left.MacroParent)) {
     return true;
+  }
   if (Right.is(tok::comma))
     return false;
   if (Right.is(TT_ObjCBlockLParen))
Index: clang/lib/Format/TokenAnalyzer.h
===================================================================
--- clang/lib/Format/TokenAnalyzer.h
+++ clang/lib/Format/TokenAnalyzer.h
@@ -46,7 +46,7 @@
 
   FileID getFileID() const { return ID; }
 
-  const SourceManager &getSourceManager() const { return SM; }
+  SourceManager &getSourceManager() const { return SM; }
 
   ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
 
Index: clang/lib/Format/TokenAnalyzer.cpp
===================================================================
--- clang/lib/Format/TokenAnalyzer.cpp
+++ clang/lib/Format/TokenAnalyzer.cpp
@@ -104,12 +104,12 @@
   IdentifierTable IdentTable(getFormattingLangOpts(Style));
   FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(),
                        Env.getFirstStartColumn(), Style, Encoding, Allocator,
-
                        IdentTable);
   ArrayRef<FormatToken *> Toks(Lex.lex());
   SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end());
-  UnwrappedLineParser Parser(Style, Lex.getKeywords(),
-                             Env.getFirstStartColumn(), Tokens, *this);
+  UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(),
+                             Env.getFirstStartColumn(), Tokens, *this,
+                             Allocator, IdentTable);
   Parser.parse();
   assert(UnwrappedLines.back().empty());
   unsigned Penalty = 0;
Index: clang/lib/Format/Macros.h
===================================================================
--- clang/lib/Format/Macros.h
+++ clang/lib/Format/Macros.h
@@ -149,7 +149,7 @@
 ///
 /// After this point, the state of the spelled/expanded stream is "in sync"
 /// (both at the start of an UnwrappedLine, with no macros open), so the
-/// Unexpander can be thrown away and parsing can continue.
+/// Reconstructor can be thrown away and parsing can continue.
 ///
 /// Given a mapping from the macro name identifier token in the macro call
 /// to the tokens of the macro call, for example:
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -373,6 +373,11 @@
   /// binary operator.
   TokenType getType() const { return Type; }
   void setType(TokenType T) {
+    // If this token is a macro argument while formatting an unexpanded macro
+    // call, we do not change its type any more - the type was deduced from
+    // formatting the expanded macro stream already.
+    if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
+      return;
     assert((!TypeIsFinalized || T == Type) &&
            "Please use overwriteFixedType to change a fixed type.");
     Type = T;
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -1036,6 +1036,7 @@
     IO.mapOptional("UseTab", Style.UseTab);
     IO.mapOptional("WhitespaceSensitiveMacros",
                    Style.WhitespaceSensitiveMacros);
+    IO.mapOptional("Macros", Style.Macros);
 
     // If AlwaysBreakAfterDefinitionReturnType was specified but
     // AlwaysBreakAfterReturnType was not, initialize the latter from the
Index: clang/lib/Format/ContinuationIndenter.cpp
===================================================================
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -18,6 +18,7 @@
 #include "WhitespaceManager.h"
 #include "clang/Basic/OperatorPrecedence.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Format/Format.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Debug.h"
@@ -739,9 +740,15 @@
   if (Previous.is(TT_TemplateString) && Previous.opensScope())
     CurrentState.NoLineBreak = true;
 
+  // Align following lines within parenthesis / brackets if configured.
+  // For a line of macro parents, the commas that follow the opening parenthesis
+  // in the line come after the opening parenthesis' children - we want to align
+  // the comma with the previous token's children instead of the opening
+  // parenthesis.
   if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
       !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
       Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
+      !(Current.MacroParent && Previous.MacroParent) &&
       (Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) {
     CurrentState.Indent = State.Column + Spaces;
     CurrentState.IsAligned = true;
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -2745,6 +2745,23 @@
   /// \version 3.7
   std::string MacroBlockEnd;
 
+  /// A list of macros of the form \c <definition>=<expansion> .
+  ///
+  /// Code will be parsed with macros expanded, and formatting will try to best
+  /// match the structure of the expanded call.
+  ///
+  /// For example, with the macro "A(x)=x", the code
+  /// \code
+  ///    A(a * b);
+  /// \endcode
+  /// will be formatted as a declaration of the variable \c b of type \c A*
+  /// (depending on pointer-binding rules)
+  /// \code
+  ///    A(a* b);
+  /// \endcode
+  /// instead of as multiplication.
+  std::vector<std::string> Macros;
+
   /// The maximum number of consecutive empty lines to keep.
   /// \code
   ///    MaxEmptyLinesToKeep: 1         vs.     MaxEmptyLinesToKeep: 0
@@ -4306,7 +4323,8 @@
            StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
            StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
            TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
-           WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;
+           WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros &&
+           Macros == R.Macros;
   }
 
   std::optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to