[PATCH] D88299: [clang-format] Add MacroUnexpander.

Manuel Klimek via Phabricator via cfe-commits Wed, 28 Oct 2020 05:37:25 -0700

klimek updated this revision to Diff 301253.
klimek marked 3 inline comments as done.
klimek added a comment.


Adapted based on review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88299/new/

https://reviews.llvm.org/D88299

Files:
  clang/lib/Format/CMakeLists.txt
  clang/lib/Format/FormatToken.h
  clang/lib/Format/MacroUnexpander.cpp
  clang/lib/Format/Macros.h
  clang/unittests/Format/CMakeLists.txt
  clang/unittests/Format/MacroUnexpanderTest.cpp

Index: clang/unittests/Format/MacroUnexpanderTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Format/MacroUnexpanderTest.cpp
@@ -0,0 +1,650 @@
+#include "../../lib/Format/Macros.h"
+#include "../../lib/Format/UnwrappedLineParser.h"
+#include "TestLexer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace clang {
+namespace format {
+namespace {
+
+using UnexpandedMap = std::map<FormatToken *, std::unique_ptr<UnwrappedLine>>;
+
+// Keeps track of a sequence of macro expansions.
+//
+// The expanded tokens are accessible via getTokens(), while a map of macro call
+// identifier token to unexpanded token stream is accessible via
+// getUnexpanded().
+class Expansion {
+public:
+  Expansion(TestLexer &Lex, MacroExpander &Macros) : Lex(Lex), Macros(Macros) {}
+
+  // Appends the token stream obtained from expanding the macro Name given
+  // the provided arguments, to be later retrieved with getTokens().
+  // Returns the list of tokens making up the unexpanded macro call.
+  TokenList
+  expand(llvm::StringRef Name,
+         const SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> &Args) {
+    auto *ID = Lex.id(Name);
+    auto UnexpandedLine = std::make_unique<UnwrappedLine>();
+    UnexpandedLine->Tokens.push_back(ID);
+    if (!Args.empty()) {
+      UnexpandedLine->Tokens.push_back(Lex.id("("));
+      for (auto I = Args.begin(), E = Args.end(); I != E; ++I) {
+        if (I != Args.begin())
+          UnexpandedLine->Tokens.push_back(Lex.id(","));
+        UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(),
+                                      I->end());
+      }
+      UnexpandedLine->Tokens.push_back(Lex.id(")"));
+    }
+    Unexpanded[ID] = std::move(UnexpandedLine);
+
+    auto Expanded = uneof(Macros.expand(ID, Args));
+    Tokens.append(Expanded.begin(), Expanded.end());
+
+    TokenList UnexpandedTokens;
+    for (const UnwrappedLineNode &Node : Unexpanded[ID]->Tokens) {
+      UnexpandedTokens.push_back(Node.Tok);
+    }
+    return UnexpandedTokens;
+  }
+
+  TokenList expand(llvm::StringRef Name,
+                   const std::vector<std::string> &Args = {}) {
+    return expand(Name, lexArgs(Args));
+  }
+
+  const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
+
+  const TokenList &getTokens() const { return Tokens; }
+
+private:
+  llvm::SmallVector<TokenList, 1>
+  lexArgs(const std::vector<std::string> &Args) {
+    llvm::SmallVector<TokenList, 1> Result;
+    for (const auto &Arg : Args) {
+      Result.push_back(uneof(Lex.lex(Arg)));
+    }
+    return Result;
+  }
+  std::map<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
+  llvm::SmallVector<FormatToken *, 8> Tokens;
+  TestLexer &Lex;
+  MacroExpander &Macros;
+};
+
+struct Chunk {
+  Chunk(llvm::ArrayRef<FormatToken *> Tokens)
+      : Tokens(Tokens.begin(), Tokens.end()) {}
+  Chunk(llvm::ArrayRef<UnwrappedLine> Children)
+      : Children(Children.begin(), Children.end()) {}
+  llvm::SmallVector<UnwrappedLineNode, 1> Tokens;
+  llvm::SmallVector<UnwrappedLine, 0> Children;
+};
+
+bool tokenMatches(const FormatToken *Left, const FormatToken *Right) {
+  if (Left->getType() == Right->getType() &&
+      Left->TokenText == Right->TokenText)
+    return true;
+  llvm::dbgs() << Left->TokenText << " != " << Right->TokenText << "\n";
+  return false;
+}
+
+// Allows to produce chunks of a token list by typing the code of equal tokens.
+//
+// Created from a list of tokens, users call "consume" to get the next chunk
+// of tokens, checking that they match the written code.
+struct Matcher {
+  Matcher(const TokenList &Tokens, TestLexer &Lex)
+      : Tokens(Tokens), It(this->Tokens.begin()), Lex(Lex) {}
+
+  Chunk consume(StringRef Tokens) {
+    TokenList Result;
+    for (const FormatToken *Token : uneof(Lex.lex(Tokens))) {
+      assert(tokenMatches(*It, Token));
+      Result.push_back(*It);
+      ++It;
+    }
+    return Chunk(Result);
+  }
+
+  TokenList Tokens;
+  TokenList::iterator It;
+  TestLexer &Lex;
+};
+
+UnexpandedMap mergeUnexpanded(const UnexpandedMap &M1,
+                              const UnexpandedMap &M2) {
+  UnexpandedMap Result;
+  for (const auto &KV : M1) {
+    Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+  }
+  for (const auto &KV : M2) {
+    Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+  }
+  return Result;
+}
+
+class MacroUnexpanderTest : public ::testing::Test {
+public:
+  MacroUnexpanderTest() : Lex(Allocator, Buffers) {}
+
+  std::unique_ptr<MacroExpander>
+  createExpander(const std::vector<std::string> &MacroDefinitions) {
+    return std::make_unique<MacroExpander>(MacroDefinitions,
+                                           Lex.SourceMgr.get(), Lex.Style,
+                                           Lex.Allocator, Lex.IdentTable);
+  }
+
+  UnwrappedLine line(llvm::ArrayRef<FormatToken *> Tokens) {
+    UnwrappedLine Result;
+    for (FormatToken *Tok : Tokens) {
+      Result.Tokens.push_back(UnwrappedLineNode(Tok));
+    }
+    return Result;
+  }
+
+  UnwrappedLine line(llvm::StringRef Text) { return line({lex(Text)}); }
+
+  UnwrappedLine line(llvm::ArrayRef<Chunk> Chunks) {
+    UnwrappedLine Result;
+    for (const Chunk &Chunk : Chunks) {
+      Result.Tokens.insert(Result.Tokens.end(), Chunk.Tokens.begin(),
+                           Chunk.Tokens.end());
+      assert(!Result.Tokens.empty());
+      Result.Tokens.back().Children.append(Chunk.Children.begin(),
+                                           Chunk.Children.end());
+    }
+    return Result;
+  }
+
+  TokenList lex(llvm::StringRef Text) { return uneof(Lex.lex(Text)); }
+
+  Chunk tokens(llvm::StringRef Text) { return Chunk(lex(Text)); }
+
+  Chunk children(llvm::ArrayRef<UnwrappedLine> Children) {
+    return Chunk(Children);
+  }
+
+  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+  std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
+  TestLexer Lex;
+};
+
+bool matchesTokens(const UnwrappedLine &L1, const UnwrappedLine &L2) {
+  if (L1.Tokens.size() != L2.Tokens.size())
+    return false;
+  for (auto L1It = L1.Tokens.begin(), L2It = L2.Tokens.begin();
+       L1It != L1.Tokens.end(); ++L1It, ++L2It) {
+    if (L1It->Tok != L2It->Tok)
+      return false;
+    if (L1It->Children.size() != L2It->Children.size())
+      return false;
+    for (auto L1ChildIt = L1It->Children.begin(),
+              L2ChildIt = L2It->Children.begin();
+         L1ChildIt != L1It->Children.end(); ++L1ChildIt, ++L2ChildIt) {
+      if (!matchesTokens(*L1ChildIt, *L2ChildIt))
+        return false;
+    }
+  }
+  return true;
+}
+MATCHER_P(matchesLine, line, "") { return matchesTokens(arg, line); }
+
+TEST_F(MacroUnexpanderTest, Identifier) {
+  auto Macros = createExpander({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X");
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Unexp.addLine(line(Exp.getTokens()));
+  EXPECT_TRUE(Unexp.finished());
+  UnwrappedLine Result = Unexp.getResult();
+  Matcher U(Call, Lex);
+  EXPECT_THAT(Unexp.getResult(), matchesLine(line(U.consume("X"))));
+}
+
+TEST_F(MacroUnexpanderTest, NestedLineWithinCall) {
+  auto Macros = createExpander({"C(a)=class C { a; };"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("C", {"void f()"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("class C {")));
+  Unexp.addLine(line(E.consume("void f();")));
+  Unexp.addLine(line(E.consume("};")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  EXPECT_THAT(Unexp.getResult(), matchesLine(line(U.consume("C(void f())"))));
+}
+
+TEST_F(MacroUnexpanderTest, StatementSequence) {
+  auto Macros = createExpander({"SEMI=;"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call1 = Exp.expand("SEMI");
+  TokenList Call2 = Exp.expand("SEMI");
+  TokenList Call3 = Exp.expand("SEMI");
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume(";")));
+  Unexp.addLine(line(E.consume(";")));
+  Unexp.addLine(line(E.consume(";")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  Matcher U3(Call3, Lex);
+  EXPECT_THAT(Unexp.getResult(),
+              matchesLine(line(
+                  {U1.consume("SEMI"),
+                   children({line({U2.consume("SEMI"),
+                                   children({line(U3.consume("SEMI"))})})})})));
+}
+
+TEST_F(MacroUnexpanderTest, NestedBlock) {
+  auto Macros = createExpander({"ID(x)=x"});
+  // Test: ID({ ID(a *b); })
+  // 1. expand ID(a *b) -> a *b
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("ID", {"a *b"});
+  // 2. expand ID({ a *b; })
+  TokenList Arg;
+  Arg.push_back(Lex.id("{"));
+  Arg.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+  Arg.push_back(Lex.id(";"));
+  Arg.push_back(Lex.id("}"));
+  Expansion Exp2(Lex, *Macros);
+  TokenList Call2 = Exp2.expand("ID", {Arg});
+
+  // Consume as-if formatted:
+  // {
+  //   a *b;
+  // }
+  UnexpandedMap Unexpanded =
+      mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+  MacroUnexpander Unexp(0, Unexpanded);
+  Matcher E(Exp2.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("{")));
+  Unexp.addLine(line(E.consume("a *b;")));
+  Unexp.addLine(line(E.consume("}")));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID({
+  //   ID(a *b);
+  // })
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  auto Chunk2Start = U2.consume("ID(");
+  auto Chunk2LBrace = U2.consume("{");
+  U2.consume("a *b");
+  auto Chunk2Mid = U2.consume(";");
+  auto Chunk2RBrace = U2.consume("}");
+  auto Chunk2End = U2.consume(")");
+  auto Chunk1 = U1.consume("ID(a *b)");
+
+  auto Expected = line({Chunk2Start,
+                        children({
+                            line(Chunk2LBrace),
+                            line({Chunk1, Chunk2Mid}),
+                            line(Chunk2RBrace),
+                        }),
+                        Chunk2End});
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, NestedChildBlocks) {
+  auto Macros = createExpander({"ID(x)=x", "CALL(x)=f([] { x })"});
+  // Test: ID(CALL(CALL(return a * b;)))
+  // 1. expand CALL(return a * b;)
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("CALL", {"return a * b;"});
+  // 2. expand CALL(f([] { return a * b; }))
+  Expansion Exp2(Lex, *Macros);
+  TokenList Call2 = Exp2.expand("CALL", {Exp1.getTokens()});
+  // 3. expand ID({ f([] { f([] { return a * b; }) }) })
+  TokenList Arg3;
+  Arg3.push_back(Lex.id("{"));
+  Arg3.append(Exp2.getTokens().begin(), Exp2.getTokens().end());
+  Arg3.push_back(Lex.id("}"));
+  Expansion Exp3(Lex, *Macros);
+  TokenList Call3 = Exp3.expand("ID", {Arg3});
+
+  // Consume as-if formatted in three unwrapped lines:
+  // 0: {
+  // 1:   f([] {
+  //        f([] {
+  //          return a * b;
+  //        })
+  //      })
+  // 2: }
+  UnexpandedMap Unexpanded = mergeUnexpanded(
+      Exp1.getUnexpanded(),
+      mergeUnexpanded(Exp2.getUnexpanded(), Exp3.getUnexpanded()));
+  MacroUnexpander Unexp(0, Unexpanded);
+  Matcher E(Exp3.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("{")));
+  Unexp.addLine(
+      line({E.consume("f([] {"),
+            children({line({E.consume("f([] {"),
+                            children({line(E.consume("return a * b;"))}),
+                            E.consume("})")})}),
+            E.consume("})")}));
+  Unexp.addLine(line(E.consume("}")));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID(
+  //   {
+  //   CALL(CALL(return a * b;))
+  //   }
+  // )
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  Matcher U3(Call3, Lex);
+  auto Chunk3Start = U3.consume("ID(");
+  auto Chunk3LBrace = U3.consume("{");
+  U3.consume("f([] { f([] { return a * b; }) })");
+  auto Chunk3RBrace = U3.consume("}");
+  auto Chunk3End = U3.consume(")");
+  auto Chunk2Start = U2.consume("CALL(");
+  U2.consume("f([] { return a * b; })");
+  auto Chunk2End = U2.consume(")");
+  auto Chunk1 = U1.consume("CALL(return a * b;)");
+
+  auto Expected = line({
+      Chunk3Start,
+      children({
+          line(Chunk3LBrace),
+          line({
+              Chunk2Start,
+              Chunk1,
+              Chunk2End,
+          }),
+          line(Chunk3RBrace),
+      }),
+      Chunk3End,
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, NestedChildrenMultipleArguments) {
+  auto Macros = createExpander({"CALL(a, b)=f([] { a; b; })"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("int a"), "int b"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line({
+      E.consume("f([] {"),
+      children({
+          line(E.consume("int a;")),
+          line(E.consume("int b;")),
+      }),
+      E.consume("})"),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line(U.consume("CALL(int a, int b)"));
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, ReverseOrderArgumentsInExpansion) {
+  auto Macros = createExpander({"CALL(a, b)=b + a"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("x"), "y"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("y + x")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line(U.consume("CALL(x, y)"));
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, MultipleToplevelUnwrappedLines) {
+  auto Macros = createExpander({"ID(a, b)=a b"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("ID", {std::string("x; x"), "y"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("x;")));
+  Unexp.addLine(line(E.consume("x y")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      U.consume("ID("),
+      children({
+          line(U.consume("x;")),
+          line(U.consume("x")),
+      }),
+      U.consume(", y)"),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, NestedCallsMultipleLines) {
+  auto Macros = createExpander({"ID(x)=x"});
+  // Test: ID({ID(a * b);})
+  // 1. expand ID(a * b)
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("ID", {"a * b"});
+  // 2. expand ID({ a * b; })
+  Expansion Exp2(Lex, *Macros);
+  TokenList Arg2;
+  Arg2.push_back(Lex.id("{"));
+  Arg2.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+  Arg2.push_back(Lex.id(";"));
+  Arg2.push_back(Lex.id("}"));
+  TokenList Call2 = Exp2.expand("ID", {Arg2});
+
+  // Consume as-if formatted in three unwrapped lines:
+  // 0: {
+  // 1:   a * b;
+  // 2: }
+  UnexpandedMap Unexpanded =
+      mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+  MacroUnexpander Unexp(0, Unexpanded);
+  Matcher E(Exp2.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("{")));
+  Unexp.addLine(line(E.consume("a * b;")));
+  Unexp.addLine(line(E.consume("}")));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID(
+  //     {
+  //     ID(a * b);
+  //     }
+  // )
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  auto Chunk2Start = U2.consume("ID(");
+  auto Chunk2LBrace = U2.consume("{");
+  U2.consume("a * b");
+  auto Chunk2Semi = U2.consume(";");
+  auto Chunk2RBrace = U2.consume("}");
+  auto Chunk2End = U2.consume(")");
+  auto Chunk1 = U1.consume("ID(a * b)");
+
+  auto Expected = line({
+      Chunk2Start,
+      children({
+          line({Chunk2LBrace}),
+          line({Chunk1, Chunk2Semi}),
+          line({Chunk2RBrace}),
+      }),
+      Chunk2End,
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, ParentOutsideMacroCall) {
+  auto Macros = createExpander({"ID(a)=a"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("ID", {std::string("x; y; z;")});
+
+  auto Prefix = tokens("int a = []() {");
+  auto Postfix = tokens("}();");
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line({
+      Prefix,
+      children({
+          line(E.consume("x;")),
+          line(E.consume("y;")),
+          line(E.consume("z;")),
+      }),
+      Postfix,
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      Prefix,
+      children({
+          line({
+              U.consume("ID("),
+              children({
+                  line(U.consume("x;")),
+                  line(U.consume("y;")),
+                  line(U.consume("z;")),
+              }),
+              U.consume(")"),
+          }),
+      }),
+      Postfix,
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, UnusedMacroArguments) {
+  auto Macros = createExpander({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X", {"a", "b", "c"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Unexp.addLine(line(Exp.getTokens()));
+  EXPECT_TRUE(Unexp.finished());
+  UnwrappedLine Result = Unexp.getResult();
+  Matcher U(Call, Lex);
+  EXPECT_THAT(Unexp.getResult(), matchesLine(line(U.consume("X(a, b, c)"))));
+}
+
+TEST_F(MacroUnexpanderTest, UnusedEmptyMacroArgument) {
+  auto Macros = createExpander({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X", {std::string("")});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  auto Semi = tokens(";");
+  Unexp.addLine(line({E.consume("x"), Semi}));
+  EXPECT_TRUE(Unexp.finished());
+  UnwrappedLine Result = Unexp.getResult();
+  Matcher U(Call, Lex);
+  EXPECT_THAT(Unexp.getResult(), matchesLine(line({U.consume("X()"), Semi})));
+}
+
+TEST_F(MacroUnexpanderTest, ChildrenSplitAcrossArguments) {
+  auto Macros = createExpander({"CALL(a, b)=f([]() a b)"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("{ a;"), "b; }"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line({
+      E.consume("f([]() {"),
+      children({
+          line(E.consume("a;")),
+          line(E.consume("b;")),
+      }),
+      E.consume("})"),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      U.consume("CALL({"),
+      children(line(U.consume("a;"))),
+      U.consume(", b; })"),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, ChildrenAfterMacroCall) {
+  auto Macros = createExpander({"CALL(a, b)=f([]() a b"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("{ a"), "b"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  auto Semi = tokens(";");
+  auto SecondLine = tokens("c d;");
+  auto ThirdLine = tokens("e f;");
+  auto Postfix = tokens("})");
+  Unexp.addLine(line({
+      E.consume("f([]() {"),
+      children({
+          line({E.consume("a b"), Semi}),
+          line(SecondLine),
+          line(ThirdLine),
+      }),
+      Postfix,
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      U.consume("CALL({"),
+      children(line(U.consume("a"))),
+      U.consume(", b)"),
+      Semi,
+      children(line({
+          SecondLine,
+          children(line({
+              ThirdLine,
+              Postfix,
+          })),
+      })),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, InvalidCodeSplittingBracesAcrossArgs) {
+  auto Macros = createExpander({"M(a, b)=(a) (b)"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("M", {std::string("{"), "x", ""});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  auto Prefix = tokens("({");
+  Unexp.addLine(line({
+      Prefix,
+      children({
+          line({
+              E.consume("({"),
+              children({line(E.consume(")(x)"))}),
+          }),
+      }),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      Prefix,
+      children({line(U.consume("M({,x,)"))}),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang
Index: clang/unittests/Format/CMakeLists.txt
===================================================================
--- clang/unittests/Format/CMakeLists.txt
+++ clang/unittests/Format/CMakeLists.txt
@@ -16,6 +16,7 @@
   FormatTestTableGen.cpp
   FormatTestTextProto.cpp
   MacroExpanderTest.cpp
+  MacroUnexpanderTest.cpp
   NamespaceEndCommentsFixerTest.cpp
   SortImportsTestJS.cpp
   SortImportsTestJava.cpp
Index: clang/lib/Format/Macros.h
===================================================================
--- clang/lib/Format/Macros.h
+++ clang/lib/Format/Macros.h
@@ -37,26 +37,22 @@
 #ifndef CLANG_LIB_FORMAT_MACROS_H
 #define CLANG_LIB_FORMAT_MACROS_H
 
+#include <list>
+#include <map>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
-#include "Encoding.h"
 #include "FormatToken.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 
-namespace llvm {
-class MemoryBuffer;
-} // namespace llvm
-
 namespace clang {
-class IdentifierTable;
-class SourceManager;
-
 namespace format {
-struct FormatStyle;
+
+struct UnwrappedLine;
+struct UnwrappedLineNode;
 
 /// Takes a set of macro definitions as strings and allows expanding calls to
 /// those macros.
@@ -135,6 +131,204 @@
   llvm::StringMap<Definition> Definitions;
 };
 
+/// Converts a stream of unwrapped lines of expanded tokens into unwrapped lines
+/// containing the tokens of the macro call in a way that the resulting
+/// unwrapped lines of the macro call best resemble the split of unwrapped lines
+/// in the input.
+///
+/// Given a mapping from the macro name identifier token in the macro call
+/// to the tokens of the macro call, for example:
+/// CLASSA -> CLASSA({public: void x();})
+///
+/// When getting the formatted lines of the expansion via the \c addLine method
+/// (each '->' specifies a call to \c addLine ):
+/// -> class A {
+/// -> public:
+/// ->   void x();
+/// -> };
+///
+/// Creates the tree of unwrapped lines containing the macro call tokens so that
+/// the macro call tokens fit the semantic structure of the expanded formatted
+/// lines:
+/// -> CLASSA({
+/// -> public:
+/// ->   void x();
+/// -> })
+class MacroUnexpander {
+public:
+  /// Create an Unexpander whose resulting unexpanded line will start at
+  /// \p Level, unexpanding using the map from name identifier token
+  /// to the corresponding tokens of the original macro call.
+  MacroUnexpander(unsigned Level,
+                  const std::map<FormatToken *, std::unique_ptr<UnwrappedLine>>
+                      &Unexpanded);
+
+  /// For the given \p Line, match all occurences of tokens expanded from a
+  /// macro to unwrapped lines in the original macro call so that the resulting
+  /// tree of unwrapped lines best resembles the structure of unwrapped lines
+  /// passed in via \c addLine.
+  void addLine(const UnwrappedLine &Line);
+
+  /// Check whether at the current state there is no open macro expansion
+  /// that needs to be processed to finish an macro call.
+  bool finished() { return State == InProgress && Unexpanded.empty(); }
+
+  /// Retrieve the formatted \c UnwrappedLine containing the orginal
+  /// macro calls, formatted according to the expanded token stream received
+  /// via \c addLine().
+  /// Generally, this line tries to have the same structure as the expanded,
+  /// formatted unwrapped lines handed in via \c addLine(), with the exception
+  /// that for multiple top-level lines, each subsequent line will be the
+  /// child of the last token in its predecessor.
+  /// If a token in a macro argument is a child of a token in the expansion,
+  /// the parent will be the corresponding token in the macro call.
+  /// For example:
+  /// #define C(a, b) class C { a b
+  /// C(int x;, int y;) would expand to class C { int x; int y; where in a
+  /// formatted line "int x;" and "int y;" would both be new separate lines (at
+  /// different indentation levels). In the result, "int x;" will be a child of
+  /// the opening parenthesis in "C(" and "int y;" will be a child of the ","
+  /// token.
+  UnwrappedLine getResult();
+
+private:
+  void forEachToken(
+      const UnwrappedLine &Line,
+      const std::function<void(FormatToken *, FormatToken *, bool)> &Call,
+      FormatToken *Parent = nullptr);
+  void add(FormatToken *Token, FormatToken *OriginalParent, bool First);
+  void prepareParent(FormatToken *OriginalParent, bool First);
+  FormatToken *getParentInOutput(FormatToken *Parent);
+  void unexpand(FormatToken *Token);
+  void startUnexpansion(FormatToken *Token);
+  bool continueUnexpansionUntil(FormatToken *Token);
+  void endUnexpansion(FormatToken *Token);
+  bool processNextUnexpanded();
+  void finalize();
+
+  struct Line;
+
+  void pushToken(FormatToken *Token, Line *L = nullptr);
+  UnwrappedLine createUnwrappedLine(const Line &Line, int Level);
+  void debug(const Line &Line, int Level);
+  Line &parentLine();
+  Line *currentLine();
+
+  enum UnexpanderState {
+    Start,      // No macro expansion was found in the input yet.
+    InProgress, // During a macro unexpansion.
+    Finalized,  // Past macro unexpansion, the result is finalized.
+  };
+  UnexpanderState State = Start;
+
+  // Node in which we build up the resulting unwrapped line; this type is
+  // analogous to UnwrappedLineNode.
+  struct LineNode {
+    LineNode() = default;
+    LineNode(FormatToken *Tok) : Tok(Tok) {}
+    FormatToken *Tok = nullptr;
+    llvm::SmallVector<std::unique_ptr<Line>, 4> Children;
+  };
+
+  // Line in which we build up the resulting unwrapped line.
+  // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
+  // instead of rolling our own type.
+  struct Line {
+    llvm::SmallVector<std::unique_ptr<LineNode>, 4> Tokens;
+  };
+
+  // The line in which we collect the resulting unexpanded output.
+  // To reduce special cases in the algorithm, the first level of the line
+  // contains a single null token that has the unexpanded incoming
+  // lines as children.
+  // In the end, we stich the lines together so that each subsequent line
+  // is a child of the last token of the previous line. This is necessary
+  // in order to format the overall expression as a single logical line -
+  // if we created separate lines, we'd format them with their own top-level
+  // indent depending on the semantic structure, which is not desired.
+  Line Output;
+
+  // Stack of currently "open" lines, where each line's predecessor's last
+  // token is the parent token for that line.
+  llvm::SmallVector<Line *, 4> Lines;
+
+  // Maps from the original token to the token that takes its place in the
+  // unexpanded token stream in terms of parent-child relationships.
+  // Note that it might take multiple steps to arrive at the correct
+  // parent in the output.
+  // Given: C(a, b) []() { a; b; }
+  // And a call: C(f(), g())
+  // The structure in the incoming formatted unwrapped line will be:
+  // []() {
+  //      |- f();
+  //      \- g();
+  // }
+  // with f and g being children of the opening brace.
+  // In the unexpanded call:
+  // C(f(), g())
+  //  \- f()
+  //      \- g()
+  // We want f to be a child of the opening parenthesis and g to be a child
+  // of the comma token in the macro call.
+  // Thus, we map
+  // { -> (
+  // and add
+  // ( -> ,
+  // once we're past the comma in the unexpansion.
+  llvm::DenseMap<FormatToken *, FormatToken *> TokenToParentInOutput;
+
+  // Keeps track of a single expansion while we're unexpanding tokens it
+  // generated.
+  struct Expansion {
+    // The identifier token of the macro call.
+    FormatToken *ID;
+    // Our current position in the unexpansion.
+    std::list<UnwrappedLineNode>::iterator I;
+    // The end of the unexpanded token sequence.
+    std::list<UnwrappedLineNode>::iterator End;
+  };
+
+  // Stack of unexpanded macro calls for which we're in the middle
+  // of an expansion.
+  llvm::SmallVector<Expansion, 2> Unexpanded;
+
+  struct MacroCallState {
+    Line *Line;
+    FormatToken *RedirectParentFrom;
+    FormatToken *RedirectParentTo;
+  };
+
+  // Keeps track of the lines into which the opening brace/parenthesis &
+  // argument separating commas for each level in the macro call go in order to
+  // put the corresponding closing brace/parenthesis into the same line in the
+  // output and keep track of which parents in the expanded token stream map to
+  // which tokens in the unexpanded stream.
+  // When an opening brace/parenthesis has children, we want the structure of
+  // the output line to be:
+  // |- MACRO
+  // |- (
+  // |  \- <argument>
+  // |- ,
+  // |  \- <argument>
+  // \- )
+  llvm::SmallVector<MacroCallState, 4> MacroCallStructure;
+
+  // For each token, the previous token in the resulting unexpanded token
+  // stream.
+  llvm::DenseMap<FormatToken *, LineNode *> TokenToPrevious;
+
+  // The previous token's node we put into the resulting unexpanded token
+  // stream.
+  LineNode *PreviousNode = nullptr;
+
+  // Level the generated UnwrappedLine will be at.
+  const unsigned Level;
+
+  // Maps from identifier of the macro call to an unwrapped line containing
+  // all tokens of the macro call.
+  const std::map<FormatToken *, std::unique_ptr<UnwrappedLine>> &IdToUnexpanded;
+};
+
 } // namespace format
 } // namespace clang
 
Index: clang/lib/Format/MacroUnexpander.cpp
===================================================================
--- /dev/null
+++ clang/lib/Format/MacroUnexpander.cpp
@@ -0,0 +1,481 @@
+//===--- MacroUnexpander.cpp - Format C++ code ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of MacroUnexpander, which fits an
+/// unexpanded macro call to a parsed set of UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Macros.h"
+
+#include "UnwrappedLineParser.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-unexpand"
+
+namespace clang {
+namespace format {
+
+MacroUnexpander::MacroUnexpander(
+    unsigned Level,
+    const std::map<FormatToken *, std::unique_ptr<UnwrappedLine>> &Unexpanded)
+    : Level(Level), IdToUnexpanded(Unexpanded) {
+  Output.Tokens.push_back(std::make_unique<LineNode>());
+  Lines.push_back(&Output);
+}
+
+void MacroUnexpander::addLine(const UnwrappedLine &Line) {
+  assert(State != Finalized);
+  LLVM_DEBUG(llvm::dbgs() << "Unex: new line...\n");
+  forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
+    add(Token, Parent, First);
+  });
+}
+
+UnwrappedLine MacroUnexpander::getResult() {
+  finalize();
+  UnwrappedLine Final =
+      createUnwrappedLine(*Output.Tokens.front()->Children.front(), Level);
+  assert(!Final.Tokens.empty());
+  return Final;
+}
+
+// Call \p Call for each token in the unwrapped line given, passing
+// the token, its parent and whether it is the first token in the line.
+void MacroUnexpander::forEachToken(
+    const UnwrappedLine &Line,
+    const std::function<void(FormatToken *, FormatToken *, bool)> &Call,
+    FormatToken *Parent) {
+  bool First = true;
+  for (const auto &N : Line.Tokens) {
+    Call(N.Tok, Parent, First);
+    First = false;
+    for (const auto &Child : N.Children) {
+      forEachToken(Child, Call, N.Tok);
+    }
+  }
+}
+
+// Unexand \p Token, given its parent \p OriginalParent in the incoming
+// unwrapped line. \p First specifies whether it is the first token in a given
+// unwrapped line.
+void MacroUnexpander::add(FormatToken *Token, FormatToken *OriginalParent,
+                          bool First) {
+  LLVM_DEBUG(
+      llvm::dbgs() << "Unex: Token: " << Token->TokenText << ", Parent: "
+                   << (OriginalParent ? OriginalParent->TokenText : "<null>")
+                   << ", First: " << First << "\n");
+  // In order to be able to find the correct parent in the unexpanded token
+  // stream, we need to continue the unexpansion until we find the right token
+  // if it is part of the unexpanded token stream.
+  // Note that hidden tokens can be part of the unexpanded stream in nested
+  // macro calls.
+  // For example, given: BRACED(a) {a}
+  // And the call: BRACED(BRACED(x))
+  // The outer macro call will be BRACED({a}), and the hidden tokens '{' and
+  // '}' can be found in the unexpanded macro stream of that level.
+  if (!Unexpanded.empty() && Token->MacroCtx &&
+      (Token->MacroCtx->Role != MR_Hidden ||
+       Unexpanded.size() != Token->MacroCtx->ExpandedFrom.size())) {
+    if (continueUnexpansionUntil(Token))
+      First = true;
+  }
+
+  prepareParent(OriginalParent, First);
+
+  if (!Token->MacroCtx) {
+    // If this token was not generated by a macro call, we add it to the current
+    // line.
+    pushToken(Token);
+  } else {
+    // Otherwise add the unexpanded equivalent of the token.
+    unexpand(Token);
+  }
+}
+
+// Ensures that:
+// Lines.back() is the line that has \p OriginalParent or its unexpanded
+// replacement token as a parent (when possible) - that is, the last token in
+// \c Lines[Lines.size()-2] is the parent of Lines.back() in the
+// unexpanded unwrapped line.
+void MacroUnexpander::prepareParent(FormatToken *OriginalParent, bool First) {
+  // We want to find the parent in the new unwrapped line, where the original
+  // parent might have been replaced by an unxpansion.
+  FormatToken *Parent = getParentInOutput(OriginalParent);
+  LLVM_DEBUG(llvm::dbgs() << "Unex: New parent: "
+                          << (Parent ? Parent->TokenText : "<null>") << "\n");
+
+  FormatToken *OpenMacroParent = nullptr;
+  if (!MacroCallStructure.empty()) {
+    // Inside a macro expansion, it is possible to lose track of the correct
+    // parent - either because it is already popped, for example because it was
+    // in a different macro argument (e.g. M({, })), or when we work on invalid
+    // code.
+    // Thus, we use the innermost macro call's parent as the parent at which
+    // we stop; this allows us to stay within the macro expansion and keeps
+    // any problems confined to the extent of the macro call.
+    OpenMacroParent =
+        getParentInOutput(MacroCallStructure.back().RedirectParentTo);
+  }
+  if (First || (!Lines.back()->Tokens.empty() &&
+                Parent == Lines.back()->Tokens.back()->Tok)) {
+    // If we are at the first token in a new line, we want to also
+    // create a new line in the resulting unexpanded unwrapped line.
+    while (Lines.back()->Tokens.empty() ||
+           (Parent != Lines.back()->Tokens.back()->Tok &&
+            Lines.back()->Tokens.back()->Tok != OpenMacroParent)) {
+      Lines.pop_back();
+      assert(!Lines.empty());
+    }
+    assert(!Lines.empty());
+    Lines.back()->Tokens.back()->Children.push_back(std::make_unique<Line>());
+    Lines.push_back(&*Lines.back()->Tokens.back()->Children.back());
+  } else if (parentLine().Tokens.back()->Tok != Parent) {
+    // If we're not the first token in a new line, pop lines until we find
+    // the child of \c Parent in the stack.
+    while (Parent != parentLine().Tokens.back()->Tok &&
+           parentLine().Tokens.back()->Tok &&
+           parentLine().Tokens.back()->Tok != OpenMacroParent) {
+      Lines.pop_back();
+      assert(!Lines.empty());
+    }
+  }
+  assert(!Lines.empty());
+}
+
+// For a given \p Parent in the incoming expanded token stream, find the
+// corresponding parent in the output.
+FormatToken *MacroUnexpander::getParentInOutput(FormatToken *Parent) {
+  auto I = TokenToParentInOutput.find(Parent);
+  if (I == TokenToParentInOutput.end())
+    return Parent;
+  for (; I != TokenToParentInOutput.end();
+       I = TokenToParentInOutput.find(Parent)) {
+    Parent = I->second;
+  }
+  // If we use a different token than the parent in the expanded token stream
+  // as parent, mark it as a special parent, so the formatting code knows it
+  // needs to have its children formatted.
+  Parent->MacroParent = true;
+  return Parent;
+}
+
+// Unexpand a \p Token that was expanded from a macro call.
+void MacroUnexpander::unexpand(FormatToken *Token) {
+  assert(Token->MacroCtx);
+  // A single token can be the only result of a macro call:
+  // Given: ID(x, y) ;
+  // And the call: ID(<some>, <tokens>)
+  // ';' in the expanded stream will unexpand all of ID(<some>, <tokens>).
+  if (Token->MacroCtx->StartOfExpansion) {
+    startUnexpansion(Token);
+    // If the order of tokens in the expanded token stream is not the
+    // same as the order of tokens in the unexpanded stream, we need
+    // to unexpand tokens that arrive later in the stream.
+    if (Token->MacroCtx->Role != MR_Hidden) {
+      continueUnexpansionUntil(Token);
+    }
+  }
+  assert(!Unexpanded.empty());
+  if (Unexpanded.back().I != Unexpanded.back().End) {
+    assert(Unexpanded.size() == Token->MacroCtx->ExpandedFrom.size());
+    if (Token->MacroCtx->Role != MR_Hidden) {
+      // The current token in the unexpanded token stream must be the token
+      // we're looking for - we either arrive here after startUnexpansion,
+      // which initiates the stream to the first token, or after
+      // continueExpansionUntil skipped until the expected token in the
+      // unexpanded stream at the start of add(...).
+      assert(Unexpanded.back().I->Tok == Token);
+      processNextUnexpanded();
+    } else if (!currentLine()->Tokens.empty()) {
+      // FIXME:assert(!currentLine()->Tokens.empty());
+      // Map all hidden tokens to the last visible token in the output.
+      // If the hidden token is a parent, we'll use the last visible
+      // token as the parent of the hidden token's children.
+      TokenToParentInOutput[Token] = currentLine()->Tokens.back()->Tok;
+    } else {
+      for (auto I = Lines.rbegin(), E = Lines.rend(); I != E; ++I) {
+        if (!(*I)->Tokens.empty()) {
+          TokenToParentInOutput[Token] = (*I)->Tokens.back()->Tok;
+
+          break;
+        }
+      }
+      llvm::dbgs() << "CURRENT LINE TOKENS EMPTY!!\n";
+    }
+  }
+  if (Token->MacroCtx->EndOfExpansion)
+    endUnexpansion(Token);
+}
+
+// Given a \p Token that starts an expansion, unexpand the beginning of the
+// macro call.
+// For example, given: ID(x) x
+// And the call: ID(int a)
+// Unexpands: ID(
+void MacroUnexpander::startUnexpansion(FormatToken *Token) {
+  assert(Token->MacroCtx);
+  assert(!Token->MacroCtx->ExpandedFrom.empty());
+  assert(Unexpanded.size() <= Token->MacroCtx->ExpandedFrom.size());
+#ifndef NDEBUG
+  // Check that the token's unexpansion stack matches our current unexpansion
+  // stack.
+  for (size_t I = 0; I < Unexpanded.size(); ++I) {
+    assert(Unexpanded[I].ID ==
+           Token->MacroCtx
+               ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
+  }
+#endif
+  // Start unexpansion for all calls for which this token is the first token
+  // generated by the call.
+  for (size_t I = Unexpanded.size(); I < Token->MacroCtx->ExpandedFrom.size();
+       ++I) {
+    FormatToken *ID =
+        Token->MacroCtx
+            ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I];
+    // We found a macro call to be unexpanded; the next time our unexpansion
+    // stack is empty we know we finished an unexpansion.
+    State = InProgress;
+    // Put the unexpanded macro call's token into our unexpansion stack.
+    auto IU = IdToUnexpanded.find(ID);
+    assert(IU != IdToUnexpanded.end());
+    Unexpanded.push_back(
+        {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
+    // Process the macro call's identifier.
+    processNextUnexpanded();
+    if (Unexpanded.back().I == Unexpanded.back().End)
+      continue;
+    assert(Unexpanded.back().I->Tok->is(tok::l_paren));
+    // Process the optional opening parenthesis.
+    processNextUnexpanded();
+  }
+}
+
+// Add all tokens in the unexpansion stream to the output until we find the
+// given \p Token.
+bool MacroUnexpander::continueUnexpansionUntil(FormatToken *Token) {
+  assert(!Unexpanded.empty());
+  bool PassedMacroComma = false;
+  // FIXME: If Token was already expanded earlier, due to
+  // a change in order, we will not find it, but need to
+  // skip it.
+  for (; Unexpanded.back().I != Unexpanded.back().End &&
+         Unexpanded.back().I->Tok != Token;) {
+    PassedMacroComma = processNextUnexpanded() || PassedMacroComma;
+  }
+  assert(Unexpanded.back().I == Unexpanded.back().End ||
+         Unexpanded.back().I->Tok == Token);
+  return PassedMacroComma;
+}
+
+// End all unexpansions for which \p Token is the final token.
+void MacroUnexpander::endUnexpansion(FormatToken *Token) {
+  assert(!Token->MacroCtx ||
+         (Unexpanded.size() >= Token->MacroCtx->EndOfExpansion));
+  if (!Token->MacroCtx)
+    return;
+  for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
+#ifndef NDEBUG
+    // Check all remaining tokens but the final closing parenthesis and optional
+    // trailing comment were already expanded at an inner expansion level.
+    for (auto T = Unexpanded.back().I; T != Unexpanded.back().End; ++T) {
+      FormatToken *Token = T->Tok;
+      bool ClosingParen = (std::next(T) == Unexpanded.back().End ||
+                           std::next(T)->Tok->isTrailingComment()) &&
+                          !Token->MacroCtx && Token->is(tok::r_paren);
+      bool TrailingComment = Token->isTrailingComment();
+      bool PreviousLevel =
+          Token->MacroCtx &&
+          (Unexpanded.size() < Token->MacroCtx->ExpandedFrom.size());
+      if (!ClosingParen && !TrailingComment && !PreviousLevel) {
+        llvm::dbgs() << "At token: " << Token->TokenText << "\n";
+      }
+      // In addition to the following cases, we can also run into this
+      // when a macro call had more arguments than expected; in that case,
+      // the comma and the remaining tokens in the macro call will potentially
+      // end up in the line when we finishe the expansion.
+      // FIXME: Add the information which arguments are unused, and assert
+      // one of the cases below plus unexpanded macro argument tokens.
+      // assert(ClosingParen || TrailingComment || PreviousLevel);
+    }
+#endif
+    // Expand the closing parenthesis, if it exists, including an optional
+    // trailing comment.
+    for (auto T = Unexpanded.back().I; T != Unexpanded.back().End; ++T) {
+      processNextUnexpanded();
+    }
+    Unexpanded.pop_back();
+  }
+}
+
+// If visible, add the next token of the unexpanded token sequence to the
+// output.
+bool MacroUnexpander::processNextUnexpanded() {
+  FormatToken *Token = Unexpanded.back().I->Tok;
+  // llvm::dbgs() << "ProcessNext: " << Token->TokenText << "\n";
+  ++Unexpanded.back().I;
+  if (Token->MacroCtx) {
+    // Skip tokens that are not part of the macro call.
+    if (Token->MacroCtx->Role == MR_Hidden) {
+      return false;
+    }
+    // Skip tokens we already expanded during an inner unexpansion.
+    // For example, given: ID(x) {x}
+    // And the call: ID(ID(f))
+    // We get two unexpansions:
+    // ID(f) -> {f}
+    // ID({f}) -> {{f}}
+    // We unexpand f during the first unexpansion, and skip it during the second
+    // unexpansion.
+    if (Unexpanded.size() < Token->MacroCtx->ExpandedFrom.size()) {
+      return false;
+    }
+  }
+  // Put the parentheses and commas of a macro call into the same line;
+  // if the arguments produce new unwrapped lines, they will become children
+  // of the corresponding opening parenthesis or comma tokens in the
+  // unexpanded call.
+  if (!Token->MacroCtx && Token->isOneOf(tok::comma, tok::r_paren) &&
+      !MacroCallStructure.empty()) {
+    if (Token->is(tok::comma)) {
+      TokenToParentInOutput
+          [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
+      Token->MacroParent = true;
+      pushToken(Token, MacroCallStructure.back().Line);
+      prepareParent(Token, /*First=*/true);
+      return true;
+    }
+    assert(Token->is(tok::r_paren));
+    pushToken(Token, MacroCallStructure.back().Line);
+    TokenToParentInOutput.erase(MacroCallStructure.back().RedirectParentFrom);
+    MacroCallStructure.pop_back();
+    if (!MacroCallStructure.empty()) {
+      // We might have overwritten the previous state's parent.
+      TokenToParentInOutput[MacroCallStructure.back().RedirectParentFrom] =
+          MacroCallStructure.back().RedirectParentTo;
+    }
+    return false;
+  }
+  if (!Token->MacroCtx && Token->is(tok::l_paren)) {
+    MacroCallStructure.push_back(
+        {currentLine(), parentLine().Tokens.back()->Tok, Token});
+    TokenToParentInOutput[MacroCallStructure.back().RedirectParentFrom] = Token;
+    pushToken(Token);
+    prepareParent(Token, /*First=*/true);
+    Token->MacroParent = true;
+    return false;
+  }
+  // Note that any tokens that are tagged with MR_None have been passed as
+  // arguments to the macro that have not been expanded, for example:
+  // Given: ID(X) x
+  // When calling: ID(a, b)
+  // 'b' will be part of the unexpanded token stream, but tagged MR_None.
+  // Given that erroring out in this case would be disruptive, we continue
+  // pushing the (unformatted) token.
+  // FIXME: This can lead to unfortunate formatting decisions - give the user
+  // a hint that their macro definition is broken.
+  pushToken(Token);
+  return false;
+}
+
+void MacroUnexpander::finalize() {
+  if (State == Finalized)
+    return;
+  assert(State == InProgress && Unexpanded.empty());
+  State = Finalized;
+
+  // We created corresponding unwrapped lines for each incoming line as children
+  // the the toplevel null token.
+  assert(Output.Tokens.size() == 1 && !Output.Tokens.front()->Children.empty());
+
+  // The first line becomes the top level line in the resuling unwrapped line.
+  auto *I = Output.Tokens.front()->Children.begin();
+  ++I;
+  LLVM_DEBUG({
+    llvm::dbgs() << "Finalizing unexpanded lines:\n";
+    debug(Output, 0);
+  });
+  for (auto *E = Output.Tokens.front()->Children.end(); I != E; ++I) {
+    if ((*I)->Tokens.empty())
+      continue;
+
+    // Every subsequent line will become a child of the last token in the
+    // previous line, which is the token prior to the first token in the line.
+    auto L = TokenToPrevious.find((*I)->Tokens.front()->Tok);
+    assert(L != TokenToPrevious.end());
+    assert(L->second->Children.empty());
+    L->second->Children.push_back(std::move(*I));
+
+    // Mark the previous line's last token as generated by a macro expansion
+    // so the formatting algorithm can take that into account.
+    L->second->Tok->MacroParent = true;
+  }
+  Output.Tokens.front()->Children.resize(1);
+}
+
+void MacroUnexpander::pushToken(FormatToken *Token, Line *L) {
+  L = L ? L : currentLine();
+  LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
+  L->Tokens.push_back(std::make_unique<LineNode>(Token));
+  if (PreviousNode != nullptr) {
+    assert(TokenToPrevious.find(Token) == TokenToPrevious.end());
+    TokenToPrevious[Token] = PreviousNode;
+  }
+  PreviousNode = &*L->Tokens.back();
+}
+
+UnwrappedLine MacroUnexpander::createUnwrappedLine(const Line &Line,
+                                                   int Level) {
+  UnwrappedLine Result;
+  Result.Level = Level;
+  for (const auto &N : Line.Tokens) {
+    Result.Tokens.push_back(N->Tok);
+    UnwrappedLineNode &Current = Result.Tokens.back();
+    for (const auto &Child : N->Children) {
+      if (Child->Tokens.empty())
+        continue;
+      Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
+    }
+    if (Current.Children.size() == 1 &&
+        Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
+      Result.Tokens.splice(Result.Tokens.end(),
+                           Current.Children.front().Tokens);
+      Current.Children.clear();
+    }
+  }
+  return Result;
+}
+
+void MacroUnexpander::debug(const Line &Line, int Level) {
+  for (int i = 0; i < Level; ++i)
+    llvm::dbgs() << " ";
+  for (const auto &N : Line.Tokens) {
+    if (!N)
+      continue;
+    if (N->Tok)
+      llvm::dbgs() << N->Tok->TokenText << " ";
+    for (const auto &Child : N->Children) {
+      llvm::dbgs() << "\n";
+      debug(*Child, Level + 1);
+      for (int i = 0; i < Level; ++i)
+        llvm::dbgs() << " ";
+    }
+  }
+  llvm::dbgs() << "\n";
+}
+
+MacroUnexpander::Line &MacroUnexpander::parentLine() {
+  return **std::prev(std::prev(Lines.end()));
+}
+
+MacroUnexpander::Line *MacroUnexpander::currentLine() { return Lines.back(); }
+
+} // namespace format
+} // namespace clang
\ No newline at end of file
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -436,6 +436,15 @@
   // in a configured macro expansion.
   llvm::Optional<MacroExpansion> MacroCtx;
 
+  /// When macro expansion introduces nodes with children, those are marked as
+  /// \c MacroParent.
+  /// FIXME: The formatting code currently hard-codes the assumption that
+  /// child nodes are introduced by blocks following an opening brace.
+  /// This is deeply baked into the code and disentangling this will require
+  /// signficant refactorings. \c MacroParent allows us to special-case the
+  /// cases in which we treat parents as block-openers for now.
+  bool MacroParent = false;
+
   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
   bool is(TokenType TT) const { return getType() == TT; }
   bool is(const IdentifierInfo *II) const {
Index: clang/lib/Format/CMakeLists.txt
===================================================================
--- clang/lib/Format/CMakeLists.txt
+++ clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@
   FormatToken.cpp
   FormatTokenLexer.cpp
   MacroExpander.cpp
+  MacroUnexpander.cpp
   NamespaceEndCommentsFixer.cpp
   SortJavaScriptImports.cpp
   TokenAnalyzer.cpp

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D88299: [clang-format] Add MacroUnexpander.

Reply via email to