arphaman created this revision.
arphaman added reviewers: dexonsmith, Bigcheese, dblaikie, vsapsai, sammccall, 
rsmith, bruno.
Herald added subscribers: jkorous, mgorny.

This patch introduces a dependency directives source minimizer to clang that 
minimizes header and source files to the minimum necessary preprocessor 
directives for evaluating includes. It reduces the source down to `#define`, 
`#include`, `#import`, `@import`, and any conditional preprocessor logic that 
contains one of those.

The source minimizer works by lexing the input with a custom fast lexer that 
recognizes the preprocessor directives it cares about, and emitting those 
directives in the minimized source. It ignores source code, comments, and 
normalizes whitespace. It gives up and fails if seems any directives that it 
doesn't recognize as valid (e.g. `#define 0`).

In addition to the source minimizer this patch adds a 
`print-dependency-directives-minimized-source` CC1 option that allows you to 
invoke the minimizer using clang directly.

There a couple of known issues with the source minimizer:

- It fails to detect `@import` that was formed in a macro expansion. We are 
planning to add a warning to discourage this use.
- It fails to detect `_Pragma ("clang import")`. We are planning to probably 
add a warning to discourage this use.
- It assumes raw string literals are valid when minimizing source without 
respecting language mode.

This is based on code that was included in the original WIP patch I posted 
before the dev meeting: https://reviews.llvm.org/D53354 . It's based on the 
original filter-to-includes code written by @dexonsmith . We are planning to 
use to implement fast dependency scanning for explicit module builds.


Repository:
  rC Clang

https://reviews.llvm.org/D55463

Files:
  include/clang/Basic/DiagnosticFrontendKinds.td
  include/clang/Driver/CC1Options.td
  include/clang/Frontend/FrontendActions.h
  include/clang/Frontend/FrontendOptions.h
  include/clang/Lex/DependencyDirectivesSourceMinimizer.h
  lib/Frontend/CompilerInvocation.cpp
  lib/Frontend/FrontendActions.cpp
  lib/FrontendTool/ExecuteCompilerInvocation.cpp
  lib/Lex/CMakeLists.txt
  lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  test/Frontend/minimize_source_to_dependency_directives.c
  test/Frontend/minimize_source_to_dependency_directives_error.c
  unittests/Lex/CMakeLists.txt
  unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp

Index: unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
===================================================================
--- /dev/null
+++ unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
@@ -0,0 +1,463 @@
+//===- unittests/Lex/DependencyDirectivesSourceMinimizer.cpp -  -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "llvm/ADT/SmallString.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace clang {
+
+bool minimizeSourceToDependencyDirectives(StringRef Input,
+                                          SmallVectorImpl<char> &Out) {
+  SmallVector<minimize_source_to_dependency_directives::Token, 32> Tokens;
+  return minimizeSourceToDependencyDirectives(Input, Out, Tokens);
+}
+
+} // end namespace clang
+
+namespace {
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens));
+  EXPECT_TRUE(Out.empty());
+  ASSERT_EQ(1u, Tokens.size());
+  ASSERT_EQ(pp_eof, Tokens.back().K);
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens));
+  EXPECT_TRUE(Out.empty());
+  ASSERT_EQ(1u, Tokens.size());
+  ASSERT_EQ(pp_eof, Tokens.back().K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define A\n"
+                                           "#undef A\n"
+                                           "#endif\n"
+                                           "#if A\n"
+                                           "#ifdef A\n"
+                                           "#ifndef A\n"
+                                           "#elif A\n"
+                                           "#else\n"
+                                           "#include <A>\n"
+                                           "#include_next <A>\n"
+                                           "#__include_macros <A>\n"
+                                           "#import <A>\n"
+                                           "@import A;\n"
+                                           "#pragma clang module import A\n",
+                                           Out, Tokens));
+  EXPECT_EQ(pp_define, Tokens[0].K);
+  EXPECT_EQ(pp_undef, Tokens[1].K);
+  EXPECT_EQ(pp_endif, Tokens[2].K);
+  EXPECT_EQ(pp_if, Tokens[3].K);
+  EXPECT_EQ(pp_ifdef, Tokens[4].K);
+  EXPECT_EQ(pp_ifndef, Tokens[5].K);
+  EXPECT_EQ(pp_elif, Tokens[6].K);
+  EXPECT_EQ(pp_else, Tokens[7].K);
+  EXPECT_EQ(pp_include, Tokens[8].K);
+  EXPECT_EQ(pp_include_next, Tokens[9].K);
+  EXPECT_EQ(pp___include_macros, Tokens[10].K);
+  EXPECT_EQ(pp_import, Tokens[11].K);
+  EXPECT_EQ(pp_at_import, Tokens[12].K);
+  EXPECT_EQ(pp_pragma_import, Tokens[13].K);
+  EXPECT_EQ(pp_eof, Tokens[14].K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+  ASSERT_EQ(2u, Tokens.size());
+  ASSERT_EQ(pp_define, Tokens.front().K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineSpacing) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO\n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO \n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO a \n\n\n", Out));
+  EXPECT_STREQ("#define MACRO a\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define   MACRO\n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineMacroArguments) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO()", Out));
+  EXPECT_STREQ("#define MACRO()\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a, b...)", Out));
+  EXPECT_STREQ("#define MACRO(a,b...)\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO content", Out));
+  EXPECT_STREQ("#define MACRO content\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO   con  tent   ", Out));
+  EXPECT_STREQ("#define MACRO con  tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO()   con  tent   ", Out));
+  EXPECT_STREQ("#define MACRO() con  tent\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\t)\tcon \t tent\t", Out));
+  EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\f)\fcon \f tent\f", Out));
+  EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\v)\vcon \v tent\v", Out));
+  EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out));
+  EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a        \\\n"
+                                           "              )",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a)\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\n"
+                                           "              b)       \\\n"
+                                           "        call((a),      \\\n"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsCarriageReturn) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\r"
+                                           "              b)       \\\r"
+                                           "        call((a),      \\\r"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsCarriageReturnNewline) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\r\n"
+                                           "              b)       \\\r\n"
+                                           "        call((a),      \\\r\n"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsNewlineCarriageReturn) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\n\r"
+                                           "              b)       \\\n\r"
+                                           "        call((a),      \\\n\r"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out));
+  EXPECT_STREQ("#define AND &\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n"
+                                                    "&\n",
+                                                    Out));
+  EXPECT_STREQ("#define AND &\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO a/*\n"
+                                           "  /*\n"
+                                           "#define MISSING abc\n"
+                                           "  /*\n"
+                                           "  /* something */ \n"
+                                           "#include  /* \"def\" */ <abc> \n",
+                                           Out));
+  EXPECT_STREQ("#define MACRO a\n"
+               "#include <abc>\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, MultilineCommentInStrings) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO1 \"/*\"\n"
+                                                    "#define MACRO2 \"*/\"\n",
+                                                    Out));
+  EXPECT_STREQ("#define MACRO1 \"/*\"\n"
+               "#define MACRO2 \"*/\"\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#define B\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifdef A\n"
+               "#define B\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#define B\n"
+                                                    "#elif B\n"
+                                                    "#define C\n"
+                                                    "#elif C\n"
+                                                    "#define D\n"
+                                                    "#else\n"
+                                                    "#define E\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifdef A\n"
+               "#define B\n"
+               "#elif B\n"
+               "#define C\n"
+               "#elif C\n"
+               "#define D\n"
+               "#else\n"
+               "#define E\n"
+               "#endif\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIfdef) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#elif B\n"
+                                                    "#elif C\n"
+                                                    "#else D\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#pragma clang module\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#pragma clang module impor\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#pragma clang module import\n", Out));
+  EXPECT_STREQ("#pragma clang module import\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Include) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include \"A\"\n", Out));
+  EXPECT_STREQ("#include \"A\"\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include <A>\n", Out));
+  EXPECT_STREQ("#include <A>\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#include_next <A>\n", Out));
+  EXPECT_STREQ("#include_next <A>\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#import <A>\n", Out));
+  EXPECT_STREQ("#import <A>\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#__include_macros <A>\n", Out));
+  EXPECT_STREQ("#__include_macros <A>\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A;\n", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import  A;\n", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out));
+  EXPECT_STREQ("@import A.B;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out));
+  EXPECT_STREQ("@import A.B;\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifndef GUARD\n"
+                                                    "#define GUARD\n"
+                                                    "R\"()\"\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#ifndef GUARD\n"
+      "#define GUARD\n"
+      R"raw(static constexpr char bytes[] = R"(-?:\,[]{}#&*!|>'"%@`)";)raw"
+      "\n"
+      "#endif\n",
+      Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#ifndef GUARD\n"
+      "#define GUARD\n"
+      R"raw(static constexpr char bytes[] = R"abc(-?:\,[]{}#&*!|>'"%@`)abc";)raw"
+      "\n"
+      "#endif\n",
+      Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#if\\\n"
+                                                    "ndef GUARD\n"
+                                                    "#define GUARD\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
+                                                    "RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUARD\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r"
+                                                    "RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUARD\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
+                                                    "           RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUA RD\n", Out.data());
+}
+
+} // end anonymous namespace
Index: unittests/Lex/CMakeLists.txt
===================================================================
--- unittests/Lex/CMakeLists.txt
+++ unittests/Lex/CMakeLists.txt
@@ -3,6 +3,7 @@
   )
 
 add_clang_unittest(LexTests
+  DependencyDirectivesSourceMinimizerTest.cpp
   HeaderMapTest.cpp
   HeaderSearchTest.cpp
   LexerTest.cpp
Index: test/Frontend/minimize_source_to_dependency_directives_error.c
===================================================================
--- /dev/null
+++ test/Frontend/minimize_source_to_dependency_directives_error.c
@@ -0,0 +1,4 @@
+// RUN: not %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s
+
+#define 0 0
+// CHECK: dependency directives minimization failed for given source
Index: test/Frontend/minimize_source_to_dependency_directives.c
===================================================================
--- /dev/null
+++ test/Frontend/minimize_source_to_dependency_directives.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s > %t
+// RUN: echo END. >> %t
+// RUN: FileCheck < %t %s
+
+#ifdef FOO
+#include "a.h"
+#else
+void skipThisCode();
+#endif
+
+// CHECK:      #ifdef FOO
+// CHECK-NEXT: #include "a.h"
+// CHECK-NEXT: #endif
+// CHECK-NEXT: END.
Index: lib/Lex/DependencyDirectivesSourceMinimizer.cpp
===================================================================
--- /dev/null
+++ lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -0,0 +1,697 @@
+//===- DependencyDirectivesSourceMinimizer.cpp -  -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the implementation for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "clang/Basic/CharInfo.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace {
+
+struct Lexer {
+  SmallVectorImpl<char> &Out;
+  SmallVectorImpl<Token> &Tokens;
+
+  Lexer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens)
+      : Out(Out), Tokens(Tokens) {}
+
+  bool lex(StringRef Bytes);
+
+  StringMap<char> SplitIds;
+
+private:
+  struct IdInfo {
+    const char *Last;
+    StringRef Name;
+  };
+
+  /// Lex an identifier.
+  ///
+  /// \pre First points at a valid identifier head.
+  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
+  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
+                                       const char *const End);
+  LLVM_NODISCARD bool lexImpl(const char *First, const char *const End);
+  LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
+                                 const char *&First, const char *const End);
+  Token &makeToken(TokenKind K) {
+    Tokens.emplace_back(K, Out.size());
+    return Tokens.back();
+  }
+  void popToken() {
+    Out.resize(Tokens.back().Offset);
+    Tokens.pop_back();
+  }
+  TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
+
+  Lexer &put(char Byte) {
+    Out.push_back(Byte);
+    return *this;
+  }
+  Lexer &append(StringRef S) { return append(S.begin(), S.end()); }
+  Lexer &append(const char *First, const char *Last) {
+    Out.append(First, Last);
+    return *this;
+  }
+
+  void printToNewline(const char *&First, const char *const End);
+  void printAdjacentModuleNameParts(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printAtImportBody(const char *&First,
+                                        const char *const End);
+  void printDirectiveBody(const char *&First, const char *const End);
+  void printAdjacentMacroArgs(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
+};
+
+} // end anonymous namespace
+
+static void skipOverSpaces(const char *&First, const char *const End) {
+  while (First != End && isHorizontalWhitespace(*First))
+    ++First;
+}
+
+LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
+                                              const char *Current) {
+  assert(First <= Current);
+
+  // Check if we can even back up.
+  if (*Current != '\"' || First == Current)
+    return false;
+
+  // Check for an "R".
+  --Current;
+  if (*Current != 'R')
+    return false;
+  if (First == Current || !isIdentifierBody(*--Current))
+    return true;
+
+  // Check for a prefix of "u", "U", or "L".
+  if (*Current == 'u' || *Current == 'U' || *Current == 'L')
+    return First == Current || !isIdentifierBody(*--Current);
+
+  // Check for a prefix of "u8".
+  if (*Current != '8' || First == Current || *Current-- != 'u')
+    return false;
+  return First == Current || !isIdentifierBody(*--Current);
+}
+
+static void skipRawString(const char *&First, const char *const End) {
+  assert(First[0] == '\"');
+  assert(First[-1] == 'R');
+
+  const char *Last = ++First;
+  while (Last != End && *Last != '(')
+    ++Last;
+  if (Last == End) {
+    First = Last; // Hit the end... just give up.
+    return;
+  }
+
+  StringRef Terminator(First, Last - First);
+  for (;;) {
+    // Move First to just past the next ")".
+    First = Last;
+    while (First != End && *First != ')')
+      ++First;
+    if (First == End)
+      return;
+    ++First;
+
+    // Look ahead for the terminator sequence.
+    Last = First;
+    while (Last != End && size_t(Last - First) < Terminator.size() &&
+           Terminator[Last - First] == *Last)
+      ++Last;
+
+    // Check if we hit it (or the end of the file).
+    if (Last == End) {
+      First = Last;
+      return;
+    }
+    if (size_t(Last - First) < Terminator.size())
+      continue;
+    if (*Last != '\"')
+      continue;
+    First = Last + 1;
+    return;
+  }
+}
+
+static void skipString(const char *&First, const char *const End) {
+  assert(*First == '\'' || *First == '\"');
+  const char Terminator = *First;
+  for (++First; First != End && *First != Terminator; ++First)
+    if (*First == '\\')
+      if (++First == End)
+        return;
+  if (First != End)
+    ++First; // Finish off the string.
+}
+
+static void skipNewline(const char *&First, const char *End) {
+  assert(isVerticalWhitespace(*First));
+  ++First;
+  if (First == End)
+    return;
+
+  // Check for "\n\r" and "\r\n".
+  if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0]))
+    ++First;
+}
+
+static void skipToNewlineRaw(const char *&First, const char *const End) {
+  for (;;) {
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First))
+      return;
+
+    while (!isVerticalWhitespace(*First))
+      if (++First == End)
+        return;
+
+    if (First[-1] != '\\')
+      return;
+
+    ++First; // Keep going...
+  }
+}
+
+static const char *reverseOverSpaces(const char *First, const char *Last) {
+  while (First != Last && isHorizontalWhitespace(Last[-1]))
+    --Last;
+  return Last;
+}
+
+static void skipLineComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '/');
+  First += 2;
+  skipToNewlineRaw(First, End);
+}
+
+static void skipBlockComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '*');
+  if (End - First < 4) {
+    First = End;
+    return;
+  }
+  for (First += 3; First != End; ++First)
+    if (First[-1] == '*' && First[0] == '/') {
+      ++First;
+      return;
+    }
+}
+
+static void skipLine(const char *&First, const char *const End) {
+  do {
+    assert(First <= End);
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      return;
+    }
+    const char *Start = First;
+    while (First != End && !isVerticalWhitespace(*First)) {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*First == '\"' || *First == '\'') {
+        if (isRawStringLiteral(Start, First))
+          skipRawString(First, End);
+        else
+          skipString(First, End);
+        continue;
+      }
+
+      // Iterate over comments correctly.
+      if (*First != '/' || End - First < 2) {
+        ++First;
+        continue;
+      }
+
+      if (First[1] == '/') {
+        // "//...".
+        skipLineComment(First, End);
+        continue;
+      }
+
+      if (First[1] != '*') {
+        ++First;
+        continue;
+      }
+
+      // "/*...*/".
+      skipBlockComment(First, End);
+    }
+    if (First == End)
+      return;
+
+    // Skip over the newline.
+    assert(isVerticalWhitespace(*First));
+    skipNewline(First, End);
+  } while (First[-2] == '\\'); // Continue past line-continuations.
+}
+
+void Lexer::printToNewline(const char *&First, const char *const End) {
+  while (First != End && !isVerticalWhitespace(*First)) {
+    const char *Last = First;
+    do {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*Last == '\"' || *Last == '\'') {
+        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
+          skipRawString(Last, End);
+        else
+          skipString(Last, End);
+        continue;
+      }
+      if (*Last != '/' || End - Last < 2) {
+        ++Last;
+        continue; // Gather the rest up to print verbatim.
+      }
+
+      if (Last[1] != '/' && Last[1] != '*') {
+        ++Last;
+        continue;
+      }
+
+      // Deal with "//..." and "/*...*/".
+      append(First, reverseOverSpaces(First, Last));
+      First = Last;
+
+      if (Last[1] == '/') {
+        skipLineComment(First, End);
+        return;
+      }
+
+      put(' ');
+      skipBlockComment(First, End);
+      skipOverSpaces(First, End);
+      Last = First;
+    } while (Last != End && !isVerticalWhitespace(*Last));
+
+    // Print out the string.
+    if (Last == End || Last == First || Last[-1] != '\\') {
+      append(First, reverseOverSpaces(First, Last));
+      return;
+    }
+
+    // Print up to the backslash, backing up over spaces.
+    append(First, reverseOverSpaces(First, Last - 1));
+
+    First = Last;
+    skipNewline(First, End);
+    skipOverSpaces(First, End);
+  }
+}
+
+static void skipWhitespace(const char *&First, const char *const End) {
+  for (;;) {
+    assert(First <= End);
+    skipOverSpaces(First, End);
+
+    if (End - First < 2)
+      return;
+
+    if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
+      skipNewline(++First, End);
+      continue;
+    }
+
+    // Check for a non-comment character.
+    if (First[0] != '/')
+      return;
+
+    // "// ...".
+    if (First[1] == '/') {
+      skipLineComment(First, End);
+      return;
+    }
+
+    // Cannot be a comment.
+    if (First[1] != '*')
+      return;
+
+    // "/*...*/".
+    skipBlockComment(First, End);
+  }
+}
+
+void Lexer::printAdjacentModuleNameParts(const char *&First,
+                                         const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
+  append(First, Last);
+  First = Last;
+}
+
+bool Lexer::printAtImportBody(const char *&First, const char *const End) {
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      continue;
+    }
+
+    // Found a semicolon.
+    if (*First == ';') {
+      put(*First++).put('\n');
+      return false;
+    }
+
+    // Don't handle macro expansions inside @import for now.
+    if (!isIdentifierBody(*First) && *First != '.')
+      return true;
+
+    printAdjacentModuleNameParts(First, End);
+  }
+}
+
+void Lexer::printDirectiveBody(const char *&First, const char *const End) {
+  skipWhitespace(First, End); // Skip initial whitespace.
+  printToNewline(First, End);
+  while (Out.back() == ' ')
+    Out.pop_back();
+  put('\n');
+}
+
+LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
+                                                   const char *const End) {
+  assert(isIdentifierBody(*First) && "invalid identifer");
+  const char *Last = First + 1;
+  while (Last != End && isIdentifierBody(*Last))
+    ++Last;
+  return Last;
+}
+
+LLVM_NODISCARD static const char *
+getIdentifierContinuation(const char *First, const char *const End) {
+  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
+    return nullptr;
+
+  ++First;
+  skipNewline(First, End);
+  if (First == End)
+    return nullptr;
+  return isIdentifierBody(First[0]) ? First : nullptr;
+}
+
+Lexer::IdInfo Lexer::lexIdentifier(const char *First, const char *const End) {
+  const char *Last = lexRawIdentifier(First, End);
+  const char *Next = getIdentifierContinuation(Last, End);
+  if (LLVM_LIKELY(!Next))
+    return IdInfo{Last, StringRef(First, Last - First)};
+
+  // Slow path, where identifiers are split over lines.
+  SmallVector<char, 64> Id(First, Last);
+  while (Next) {
+    Last = lexRawIdentifier(Next, End);
+    Id.append(Next, Last);
+    Next = getIdentifierContinuation(Last, End);
+  }
+  return IdInfo{
+      Last,
+      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+}
+
+void Lexer::printAdjacentMacroArgs(const char *&First, const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End &&
+         (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
+  append(First, Last);
+  First = Last;
+}
+
+bool Lexer::printMacroArgs(const char *&First, const char *const End) {
+  assert(*First == '(');
+  put(*First++);
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (*First == ')') {
+      put(*First++);
+      return false;
+    }
+
+    // This is intentionally fairly liberal.
+    if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
+      return true;
+
+    printAdjacentMacroArgs(First, End);
+  }
+}
+
+/// Looks for an identifier starting from Last.
+///
+/// Updates "First" to just past the next identifier, if any.  Returns true iff
+/// the identifier matches "Id".
+bool Lexer::isNextIdentifier(StringRef Id, const char *&First,
+                             const char *const End) {
+  skipWhitespace(First, End);
+  if (First == End || !isIdentifierHead(*First))
+    return false;
+
+  IdInfo FoundId = lexIdentifier(First, End);
+  First = FoundId.Last;
+  return FoundId.Name == Id;
+}
+
+bool Lexer::lexAt(const char *&First, const char *const End) {
+  // Handle "@import".
+  ++First;
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+  makeToken(pp_at_import);
+  append("@import ");
+  if (printAtImportBody(First, End))
+    return true; // Error: Could not find semi-colon.
+  skipWhitespace(First, End);
+  if (First == End)
+    return false;
+  if (!isVerticalWhitespace(*First))
+    return true; // Error: Nothing expected after semi-colon.
+
+  skipNewline(First, End);
+  return false;
+}
+
+bool Lexer::lexDefine(const char *&First, const char *const End) {
+  makeToken(pp_define);
+  append("#define ");
+  skipWhitespace(First, End);
+
+  if (!isIdentifierHead(*First))
+    return true; // Error: Don't understand this #define.
+
+  IdInfo Id = lexIdentifier(First, End);
+  const char *Last = Id.Last;
+  append(Id.Name);
+  if (Last == End)
+    return false;
+  if (*Last == '(') {
+    size_t Size = Out.size();
+    if (printMacroArgs(Last, End)) {
+      // Be robust to bad macro arguments, since they can show up in disabled
+      // code.
+      Out.resize(Size);
+      append("(/* invalid */\n");
+      skipLine(Last, End);
+      return false;
+    }
+  }
+  skipWhitespace(Last, End);
+  if (Last == End)
+    return false;
+  if (!isVerticalWhitespace(*Last))
+    put(' ');
+  printDirectiveBody(Last, End);
+  First = Last;
+  return false;
+}
+
+bool Lexer::lexPragma(const char *&First, const char *const End) {
+  // #pragma.
+  if (!isNextIdentifier("clang", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang.
+  if (!isNextIdentifier("module", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module.
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module import.
+  makeToken(pp_pragma_import);
+  append("#pragma clang module import ");
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Lexer::lexEndif(const char *&First, const char *const End) {
+  // Strip out "#else" if it's empty.
+  if (top() == pp_else)
+    popToken();
+
+  // Strip out "#elif" if they're empty.
+  while (top() == pp_elif)
+    popToken();
+
+  // If "#if" is empty, strip it and skip the "#endif".
+  if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
+    popToken();
+    skipLine(First, End);
+    return false;
+  }
+
+  return lexDefault(pp_endif, "endif", First, End);
+}
+
+bool Lexer::lexDefault(TokenKind Kind, StringRef Directive, const char *&First,
+                       const char *const End) {
+  makeToken(Kind);
+  put('#').append(Directive).put(' ');
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Lexer::lexPPLine(const char *&First, const char *const End) {
+  assert(First != End);
+
+  skipWhitespace(First, End);
+  assert(First <= End);
+  if (First == End)
+    return false;
+
+  if (*First != '#' && *First != '@') {
+    skipLine(First, End);
+    assert(First <= End);
+    return false;
+  }
+
+  // Handle "@import".
+  if (*First == '@')
+    return lexAt(First, End);
+
+  // Handle preprocessing directives.
+  ++First; // Skip over '#'.
+  skipWhitespace(First, End);
+
+  if (First == End)
+    return true; // Error: Invalid preprocessor directive.
+
+  if (!isIdentifierHead(*First)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // Figure out the token.
+  IdInfo Id = lexIdentifier(First, End);
+  First = Id.Last;
+  auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
+                  .Case("include", pp_include)
+                  .Case("__include_macros", pp___include_macros)
+                  .Case("define", pp_define)
+                  .Case("undef", pp_undef)
+                  .Case("import", pp_import)
+                  .Case("include_next", pp_include_next)
+                  .Case("if", pp_if)
+                  .Case("ifdef", pp_ifdef)
+                  .Case("ifndef", pp_ifndef)
+                  .Case("elif", pp_elif)
+                  .Case("else", pp_else)
+                  .Case("endif", pp_endif)
+                  .Case("pragma", pp_pragma_import)
+                  .Default(pp_none);
+  if (Kind == pp_none) {
+    skipLine(First, End);
+    return false;
+  }
+
+  if (Kind == pp_endif)
+    return lexEndif(First, End);
+
+  if (Kind == pp_define)
+    return lexDefine(First, End);
+
+  if (Kind == pp_pragma_import)
+    return lexPragma(First, End);
+
+  // Everything else.
+  return lexDefault(Kind, Id.Name, First, End);
+}
+
+bool Lexer::lexImpl(const char *First, const char *const End) {
+  while (First != End)
+    if (lexPPLine(First, End))
+      return true;
+  return false;
+}
+
+bool Lexer::lex(StringRef Bytes) {
+  bool Error = lexImpl(Bytes.begin(), Bytes.end());
+
+  if (!Error) {
+    // Add a trailing newline and an EOF on success.
+    if (!Out.empty() && Out.back() != '\n')
+      Out.push_back('\n');
+    makeToken(pp_eof);
+  }
+
+  // Null-terminate the output. This way the memory buffer that's passed to
+  // Clang will not have to worry about the terminating '\0'.
+  Out.push_back(0);
+  Out.pop_back();
+  return Error;
+}
+
+bool clang::minimizeSourceToDependencyDirectives(
+    StringRef Input, SmallVectorImpl<char> &Output,
+    SmallVectorImpl<Token> &Tokens) {
+  Output.clear();
+  Tokens.clear();
+  return Lexer(Output, Tokens).lex(Input);
+}
Index: lib/Lex/CMakeLists.txt
===================================================================
--- lib/Lex/CMakeLists.txt
+++ lib/Lex/CMakeLists.txt
@@ -3,6 +3,7 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangLex
+  DependencyDirectivesSourceMinimizer.cpp
   HeaderMap.cpp
   HeaderSearch.cpp
   Lexer.cpp
Index: lib/FrontendTool/ExecuteCompilerInvocation.cpp
===================================================================
--- lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -117,6 +117,8 @@
   case RunAnalysis:            Action = "RunAnalysis"; break;
 #endif
   case RunPreprocessorOnly:    return llvm::make_unique<PreprocessOnlyAction>();
+  case PrintDependencyDirectivesSourceMinimizerOutput:
+    return llvm::make_unique<PrintDependencyDirectivesSourceMinimizerAction>();
   }
 
 #if !CLANG_ENABLE_ARCMT || !CLANG_ENABLE_STATIC_ANALYZER \
Index: lib/Frontend/FrontendActions.cpp
===================================================================
--- lib/Frontend/FrontendActions.cpp
+++ lib/Frontend/FrontendActions.cpp
@@ -15,6 +15,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/MultiplexConsumer.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
@@ -908,3 +909,19 @@
 
   OS << "}";
 }
+
+void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
+  CompilerInstance &CI = getCompilerInstance();
+  auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile());
+  if (!Buffer)
+    return;
+  llvm::SmallString<1024> Output;
+  llvm::SmallVector<minimize_source_to_dependency_directives::Token, 32> Toks;
+  if (minimizeSourceToDependencyDirectives((*Buffer)->getBuffer(), Output,
+                                           Toks)) {
+    CI.getDiagnostics().Report(
+        diag::err_minimize_source_to_dependency_directives_failed);
+    return;
+  }
+  llvm::outs() << Output;
+}
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1650,6 +1650,10 @@
       Opts.ProgramAction = frontend::MigrateSource; break;
     case OPT_Eonly:
       Opts.ProgramAction = frontend::RunPreprocessorOnly; break;
+    case OPT_print_dependency_directives_minimized_source:
+      Opts.ProgramAction =
+          frontend::PrintDependencyDirectivesSourceMinimizerOutput;
+      break;
     }
   }
 
@@ -3042,6 +3046,7 @@
   case frontend::PrintPreprocessedInput:
   case frontend::RewriteMacros:
   case frontend::RunPreprocessorOnly:
+  case frontend::PrintDependencyDirectivesSourceMinimizerOutput:
     return true;
   }
   llvm_unreachable("invalid frontend action");
Index: include/clang/Lex/DependencyDirectivesSourceMinimizer.h
===================================================================
--- /dev/null
+++ include/clang/Lex/DependencyDirectivesSourceMinimizer.h
@@ -0,0 +1,81 @@
+//===- clang/Lex/DependencyDirectivesSourceMinimizer.h -  ----------*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the interface for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
+#define LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace minimize_source_to_dependency_directives {
+
+/// Represents the kind of preprocessor directive that is tracked by the source
+/// minimizer in its token output.
+enum TokenKind {
+  pp_none,
+  pp_include,
+  pp___include_macros,
+  pp_define,
+  pp_undef,
+  pp_import,
+  pp_at_import,
+  pp_pragma_import,
+  pp_include_next,
+  pp_if,
+  pp_ifdef,
+  pp_ifndef,
+  pp_elif,
+  pp_else,
+  pp_endif,
+  pp_eof,
+};
+
+/// Represents a simplified token that's lexed as part of the source
+/// minimization. It's used to track the location of various preprocessor
+/// directives that could potentially have an effect on the depedencies.
+struct Token {
+  /// The kind of token.
+  TokenKind K = pp_none;
+
+  /// Offset into the output byte stream of where the directive begins.
+  int Offset = -1;
+
+  Token(TokenKind K, int Offset) : K(K), Offset(Offset) {}
+};
+
+} // end namespace minimize_source_to_dependency_directives
+
+/// Minimize the input down to the preprocessor directives that might have
+/// an effect on the dependencies for a compilation unit.
+///
+/// This function deletes all non-preprocessor code, and strips anything that
+/// can't affect what gets included. It canonicalizes whitespace where
+/// convenient to stabilize the output against formatting changes in the input.
+///
+/// Clears the output vectors at the beginning of the call.
+///
+/// \returns false on success, true on error.
+bool minimizeSourceToDependencyDirectives(
+    llvm::StringRef Input, llvm::SmallVectorImpl<char> &Output,
+    llvm::SmallVectorImpl<minimize_source_to_dependency_directives::Token>
+        &Tokens);
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
Index: include/clang/Frontend/FrontendOptions.h
===================================================================
--- include/clang/Frontend/FrontendOptions.h
+++ include/clang/Frontend/FrontendOptions.h
@@ -128,7 +128,10 @@
   MigrateSource,
 
   /// Just lex, no output.
-  RunPreprocessorOnly
+  RunPreprocessorOnly,
+
+  /// Print the output of the dependency directives source minimizer
+  PrintDependencyDirectivesSourceMinimizerOutput
 };
 
 } // namespace frontend
Index: include/clang/Frontend/FrontendActions.h
===================================================================
--- include/clang/Frontend/FrontendActions.h
+++ include/clang/Frontend/FrontendActions.h
@@ -247,6 +247,17 @@
   bool usesPreprocessorOnly() const override { return true; }
 };
 
+class PrintDependencyDirectivesSourceMinimizerAction : public FrontendAction {
+protected:
+  void ExecuteAction() override;
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &,
+                                                 StringRef) override {
+    return nullptr;
+  }
+
+  bool usesPreprocessorOnly() const override { return true; }
+};
+
 //===----------------------------------------------------------------------===//
 // Preprocessor Actions
 //===----------------------------------------------------------------------===//
Index: include/clang/Driver/CC1Options.td
===================================================================
--- include/clang/Driver/CC1Options.td
+++ include/clang/Driver/CC1Options.td
@@ -580,6 +580,9 @@
   HelpText<"Migrate source code">;
 def compiler_options_dump : Flag<["-"], "compiler-options-dump">,
   HelpText<"Dump the compiler configuration options">;
+def print_dependency_directives_minimized_source : Flag<["-"],
+  "print-dependency-directives-minimized-source">,
+  HelpText<"Print the output of the dependency directives source minimizer">;
 }
 
 def emit_llvm_uselists : Flag<["-"], "emit-llvm-uselists">,
Index: include/clang/Basic/DiagnosticFrontendKinds.td
===================================================================
--- include/clang/Basic/DiagnosticFrontendKinds.td
+++ include/clang/Basic/DiagnosticFrontendKinds.td
@@ -274,6 +274,9 @@
   "as the %select{aliasee|resolver}2">,
   InGroup<IgnoredAttributes>;
 
+def err_minimize_source_to_dependency_directives_failed : Error<
+  "dependency directives minimization failed for given source">;
+
 let CategoryName = "Instrumentation Issue" in {
 def warn_profile_data_out_of_date : Warning<
   "profile data may be out of date: of %0 function%s0, %1 %plural{1:has|:have}1"
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to