This revision was automatically updated to reflect the committed changes.
Closed by commit rL362459: Add clang source minimizer that reduces source to 
directives (authored by arphaman, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D55463?vs=200376&id=202814#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55463/new/

https://reviews.llvm.org/D55463

Files:
  cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
  cfe/trunk/include/clang/Driver/CC1Options.td
  cfe/trunk/include/clang/Frontend/FrontendActions.h
  cfe/trunk/include/clang/Frontend/FrontendOptions.h
  cfe/trunk/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
  cfe/trunk/lib/Frontend/CompilerInvocation.cpp
  cfe/trunk/lib/Frontend/FrontendActions.cpp
  cfe/trunk/lib/FrontendTool/ExecuteCompilerInvocation.cpp
  cfe/trunk/lib/Lex/CMakeLists.txt
  cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  cfe/trunk/test/Frontend/minimize_source_to_dependency_directives.c
  
cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m
  
cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m
  
cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
  cfe/trunk/unittests/Lex/CMakeLists.txt
  cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp

Index: cfe/trunk/lib/Lex/CMakeLists.txt
===================================================================
--- cfe/trunk/lib/Lex/CMakeLists.txt
+++ cfe/trunk/lib/Lex/CMakeLists.txt
@@ -3,6 +3,7 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangLex
+  DependencyDirectivesSourceMinimizer.cpp
   HeaderMap.cpp
   HeaderSearch.cpp
   Lexer.cpp
Index: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
===================================================================
--- cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -0,0 +1,756 @@
+//===- DependencyDirectivesSourceMinimizer.cpp -  -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the implementation for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace {
+
+struct Minimizer {
+  /// Minimized output.
+  SmallVectorImpl<char> &Out;
+  /// The known tokens encountered during the minimization.
+  SmallVectorImpl<Token> &Tokens;
+
+  Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
+            StringRef Input, DiagnosticsEngine *Diags,
+            SourceLocation InputSourceLoc)
+      : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
+        InputSourceLoc(InputSourceLoc) {}
+
+  /// Lex the provided source and emit the minimized output.
+  ///
+  /// \returns True on error.
+  bool minimize();
+
+private:
+  struct IdInfo {
+    const char *Last;
+    StringRef Name;
+  };
+
+  /// Lex an identifier.
+  ///
+  /// \pre First points at a valid identifier head.
+  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
+  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
+                                       const char *const End);
+  LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
+  LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
+                                 const char *&First, const char *const End);
+  Token &makeToken(TokenKind K) {
+    Tokens.emplace_back(K, Out.size());
+    return Tokens.back();
+  }
+  void popToken() {
+    Out.resize(Tokens.back().Offset);
+    Tokens.pop_back();
+  }
+  TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
+
+  Minimizer &put(char Byte) {
+    Out.push_back(Byte);
+    return *this;
+  }
+  Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
+  Minimizer &append(const char *First, const char *Last) {
+    Out.append(First, Last);
+    return *this;
+  }
+
+  void printToNewline(const char *&First, const char *const End);
+  void printAdjacentModuleNameParts(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printAtImportBody(const char *&First,
+                                        const char *const End);
+  void printDirectiveBody(const char *&First, const char *const End);
+  void printAdjacentMacroArgs(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
+
+  /// Reports a diagnostic if the diagnostic engine is provided. Always returns
+  /// true at the end.
+  bool reportError(const char *CurPtr, unsigned Err);
+
+  StringMap<char> SplitIds;
+  StringRef Input;
+  DiagnosticsEngine *Diags;
+  SourceLocation InputSourceLoc;
+};
+
+} // end anonymous namespace
+
+bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
+  if (!Diags)
+    return true;
+  assert(CurPtr >= Input.data() && "invalid buffer ptr");
+  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
+  return true;
+}
+
+static void skipOverSpaces(const char *&First, const char *const End) {
+  while (First != End && isHorizontalWhitespace(*First))
+    ++First;
+}
+
+LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
+                                              const char *Current) {
+  assert(First <= Current);
+
+  // Check if we can even back up.
+  if (*Current != '\"' || First == Current)
+    return false;
+
+  // Check for an "R".
+  --Current;
+  if (*Current != 'R')
+    return false;
+  if (First == Current || !isIdentifierBody(*--Current))
+    return true;
+
+  // Check for a prefix of "u", "U", or "L".
+  if (*Current == 'u' || *Current == 'U' || *Current == 'L')
+    return First == Current || !isIdentifierBody(*--Current);
+
+  // Check for a prefix of "u8".
+  if (*Current != '8' || First == Current || *Current-- != 'u')
+    return false;
+  return First == Current || !isIdentifierBody(*--Current);
+}
+
+static void skipRawString(const char *&First, const char *const End) {
+  assert(First[0] == '\"');
+  assert(First[-1] == 'R');
+
+  const char *Last = ++First;
+  while (Last != End && *Last != '(')
+    ++Last;
+  if (Last == End) {
+    First = Last; // Hit the end... just give up.
+    return;
+  }
+
+  StringRef Terminator(First, Last - First);
+  for (;;) {
+    // Move First to just past the next ")".
+    First = Last;
+    while (First != End && *First != ')')
+      ++First;
+    if (First == End)
+      return;
+    ++First;
+
+    // Look ahead for the terminator sequence.
+    Last = First;
+    while (Last != End && size_t(Last - First) < Terminator.size() &&
+           Terminator[Last - First] == *Last)
+      ++Last;
+
+    // Check if we hit it (or the end of the file).
+    if (Last == End) {
+      First = Last;
+      return;
+    }
+    if (size_t(Last - First) < Terminator.size())
+      continue;
+    if (*Last != '\"')
+      continue;
+    First = Last + 1;
+    return;
+  }
+}
+
+static void skipString(const char *&First, const char *const End) {
+  assert(*First == '\'' || *First == '\"');
+  const char Terminator = *First;
+  for (++First; First != End && *First != Terminator; ++First)
+    if (*First == '\\')
+      if (++First == End)
+        return;
+  if (First != End)
+    ++First; // Finish off the string.
+}
+
+static void skipNewline(const char *&First, const char *End) {
+  assert(isVerticalWhitespace(*First));
+  ++First;
+  if (First == End)
+    return;
+
+  // Check for "\n\r" and "\r\n".
+  if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0]))
+    ++First;
+}
+
+static void skipToNewlineRaw(const char *&First, const char *const End) {
+  for (;;) {
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First))
+      return;
+
+    while (!isVerticalWhitespace(*First))
+      if (++First == End)
+        return;
+
+    if (First[-1] != '\\')
+      return;
+
+    ++First; // Keep going...
+  }
+}
+
+static const char *reverseOverSpaces(const char *First, const char *Last) {
+  assert(First <= Last);
+  while (First != Last && isHorizontalWhitespace(Last[-1]))
+    --Last;
+  return Last;
+}
+
+static void skipLineComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '/');
+  First += 2;
+  skipToNewlineRaw(First, End);
+}
+
+static void skipBlockComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '*');
+  if (End - First < 4) {
+    First = End;
+    return;
+  }
+  for (First += 3; First != End; ++First)
+    if (First[-1] == '*' && First[0] == '/') {
+      ++First;
+      return;
+    }
+}
+
+/// \returns True if the current single quotation mark character is a C++ 14
+/// digit separator.
+static bool isQuoteCppDigitSeparator(const char *const Start,
+                                     const char *const Cur,
+                                     const char *const End) {
+  assert(*Cur == '\'' && "expected quotation character");
+  // skipLine called in places where we don't expect a valid number
+  // body before `start` on the same line, so always return false at the start.
+  if (Start == Cur)
+    return false;
+  // The previous character must be a valid PP number character.
+  if (!isPreprocessingNumberBody(*(Cur - 1)))
+    return false;
+  // The next character should be a valid identifier body character.
+  return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
+}
+
+static void skipLine(const char *&First, const char *const End) {
+  do {
+    assert(First <= End);
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      return;
+    }
+    const char *Start = First;
+    while (First != End && !isVerticalWhitespace(*First)) {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*First == '\"' ||
+          (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
+        if (isRawStringLiteral(Start, First))
+          skipRawString(First, End);
+        else
+          skipString(First, End);
+        continue;
+      }
+
+      // Iterate over comments correctly.
+      if (*First != '/' || End - First < 2) {
+        ++First;
+        continue;
+      }
+
+      if (First[1] == '/') {
+        // "//...".
+        skipLineComment(First, End);
+        continue;
+      }
+
+      if (First[1] != '*') {
+        ++First;
+        continue;
+      }
+
+      // "/*...*/".
+      skipBlockComment(First, End);
+    }
+    if (First == End)
+      return;
+
+    // Skip over the newline.
+    assert(isVerticalWhitespace(*First));
+    skipNewline(First, End);
+  } while (First[-2] == '\\'); // Continue past line-continuations.
+}
+
+static void skipDirective(StringRef Name, const char *&First,
+                          const char *const End) {
+  if (llvm::StringSwitch<bool>(Name)
+          .Case("warning", true)
+          .Case("error", true)
+          .Default(false))
+    // Do not process quotes or comments.
+    skipToNewlineRaw(First, End);
+  else
+    skipLine(First, End);
+}
+
+void Minimizer::printToNewline(const char *&First, const char *const End) {
+  while (First != End && !isVerticalWhitespace(*First)) {
+    const char *Last = First;
+    do {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*Last == '\"' || *Last == '\'') {
+        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
+          skipRawString(Last, End);
+        else
+          skipString(Last, End);
+        continue;
+      }
+      if (*Last != '/' || End - Last < 2) {
+        ++Last;
+        continue; // Gather the rest up to print verbatim.
+      }
+
+      if (Last[1] != '/' && Last[1] != '*') {
+        ++Last;
+        continue;
+      }
+
+      // Deal with "//..." and "/*...*/".
+      append(First, reverseOverSpaces(First, Last));
+      First = Last;
+
+      if (Last[1] == '/') {
+        skipLineComment(First, End);
+        return;
+      }
+
+      put(' ');
+      skipBlockComment(First, End);
+      skipOverSpaces(First, End);
+      Last = First;
+    } while (Last != End && !isVerticalWhitespace(*Last));
+
+    // Print out the string.
+    if (Last == End || Last == First || Last[-1] != '\\') {
+      append(First, reverseOverSpaces(First, Last));
+      return;
+    }
+
+    // Print up to the backslash, backing up over spaces.
+    append(First, reverseOverSpaces(First, Last - 1));
+
+    First = Last;
+    skipNewline(First, End);
+    skipOverSpaces(First, End);
+  }
+}
+
+static void skipWhitespace(const char *&First, const char *const End) {
+  for (;;) {
+    assert(First <= End);
+    skipOverSpaces(First, End);
+
+    if (End - First < 2)
+      return;
+
+    if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
+      skipNewline(++First, End);
+      continue;
+    }
+
+    // Check for a non-comment character.
+    if (First[0] != '/')
+      return;
+
+    // "// ...".
+    if (First[1] == '/') {
+      skipLineComment(First, End);
+      return;
+    }
+
+    // Cannot be a comment.
+    if (First[1] != '*')
+      return;
+
+    // "/*...*/".
+    skipBlockComment(First, End);
+  }
+}
+
+void Minimizer::printAdjacentModuleNameParts(const char *&First,
+                                             const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
+  append(First, Last);
+  First = Last;
+}
+
+bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      continue;
+    }
+
+    // Found a semicolon.
+    if (*First == ';') {
+      put(*First++).put('\n');
+      return false;
+    }
+
+    // Don't handle macro expansions inside @import for now.
+    if (!isIdentifierBody(*First) && *First != '.')
+      return true;
+
+    printAdjacentModuleNameParts(First, End);
+  }
+}
+
+void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
+  skipWhitespace(First, End); // Skip initial whitespace.
+  printToNewline(First, End);
+  while (Out.back() == ' ')
+    Out.pop_back();
+  put('\n');
+}
+
+LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
+                                                   const char *const End) {
+  assert(isIdentifierBody(*First) && "invalid identifer");
+  const char *Last = First + 1;
+  while (Last != End && isIdentifierBody(*Last))
+    ++Last;
+  return Last;
+}
+
+LLVM_NODISCARD static const char *
+getIdentifierContinuation(const char *First, const char *const End) {
+  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
+    return nullptr;
+
+  ++First;
+  skipNewline(First, End);
+  if (First == End)
+    return nullptr;
+  return isIdentifierBody(First[0]) ? First : nullptr;
+}
+
+Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
+                                           const char *const End) {
+  const char *Last = lexRawIdentifier(First, End);
+  const char *Next = getIdentifierContinuation(Last, End);
+  if (LLVM_LIKELY(!Next))
+    return IdInfo{Last, StringRef(First, Last - First)};
+
+  // Slow path, where identifiers are split over lines.
+  SmallVector<char, 64> Id(First, Last);
+  while (Next) {
+    Last = lexRawIdentifier(Next, End);
+    Id.append(Next, Last);
+    Next = getIdentifierContinuation(Last, End);
+  }
+  return IdInfo{
+      Last,
+      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+}
+
+void Minimizer::printAdjacentMacroArgs(const char *&First,
+                                       const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End &&
+         (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
+  append(First, Last);
+  First = Last;
+}
+
+bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
+  assert(*First == '(');
+  put(*First++);
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (*First == ')') {
+      put(*First++);
+      return false;
+    }
+
+    // This is intentionally fairly liberal.
+    if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
+      return true;
+
+    printAdjacentMacroArgs(First, End);
+  }
+}
+
+/// Looks for an identifier starting from Last.
+///
+/// Updates "First" to just past the next identifier, if any.  Returns true iff
+/// the identifier matches "Id".
+bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
+                                 const char *const End) {
+  skipWhitespace(First, End);
+  if (First == End || !isIdentifierHead(*First))
+    return false;
+
+  IdInfo FoundId = lexIdentifier(First, End);
+  First = FoundId.Last;
+  return FoundId.Name == Id;
+}
+
+bool Minimizer::lexAt(const char *&First, const char *const End) {
+  // Handle "@import".
+  const char *ImportLoc = First++;
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+  makeToken(decl_at_import);
+  append("@import ");
+  if (printAtImportBody(First, End))
+    return reportError(
+        ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
+  skipWhitespace(First, End);
+  if (First == End)
+    return false;
+  if (!isVerticalWhitespace(*First))
+    return reportError(
+        ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
+  skipNewline(First, End);
+  return false;
+}
+
+bool Minimizer::lexDefine(const char *&First, const char *const End) {
+  makeToken(pp_define);
+  append("#define ");
+  skipWhitespace(First, End);
+
+  if (!isIdentifierHead(*First))
+    return reportError(First, diag::err_pp_macro_not_identifier);
+
+  IdInfo Id = lexIdentifier(First, End);
+  const char *Last = Id.Last;
+  append(Id.Name);
+  if (Last == End)
+    return false;
+  if (*Last == '(') {
+    size_t Size = Out.size();
+    if (printMacroArgs(Last, End)) {
+      // Be robust to bad macro arguments, since they can show up in disabled
+      // code.
+      Out.resize(Size);
+      append("(/* invalid */\n");
+      skipLine(Last, End);
+      return false;
+    }
+  }
+  skipWhitespace(Last, End);
+  if (Last == End)
+    return false;
+  if (!isVerticalWhitespace(*Last))
+    put(' ');
+  printDirectiveBody(Last, End);
+  First = Last;
+  return false;
+}
+
+bool Minimizer::lexPragma(const char *&First, const char *const End) {
+  // #pragma.
+  if (!isNextIdentifier("clang", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang.
+  if (!isNextIdentifier("module", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module.
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module import.
+  makeToken(pp_pragma_import);
+  append("#pragma clang module import ");
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Minimizer::lexEndif(const char *&First, const char *const End) {
+  // Strip out "#else" if it's empty.
+  if (top() == pp_else)
+    popToken();
+
+  // Strip out "#elif" if they're empty.
+  while (top() == pp_elif)
+    popToken();
+
+  // If "#if" is empty, strip it and skip the "#endif".
+  if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
+    popToken();
+    skipLine(First, End);
+    return false;
+  }
+
+  return lexDefault(pp_endif, "endif", First, End);
+}
+
+bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
+                           const char *&First, const char *const End) {
+  makeToken(Kind);
+  put('#').append(Directive).put(' ');
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Minimizer::lexPPLine(const char *&First, const char *const End) {
+  assert(First != End);
+
+  skipWhitespace(First, End);
+  assert(First <= End);
+  if (First == End)
+    return false;
+
+  if (*First != '#' && *First != '@') {
+    skipLine(First, End);
+    assert(First <= End);
+    return false;
+  }
+
+  // Handle "@import".
+  if (*First == '@')
+    return lexAt(First, End);
+
+  // Handle preprocessing directives.
+  ++First; // Skip over '#'.
+  skipWhitespace(First, End);
+
+  if (First == End)
+    return reportError(First, diag::err_pp_expected_eol);
+
+  if (!isIdentifierHead(*First)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // Figure out the token.
+  IdInfo Id = lexIdentifier(First, End);
+  First = Id.Last;
+  auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
+                  .Case("include", pp_include)
+                  .Case("__include_macros", pp___include_macros)
+                  .Case("define", pp_define)
+                  .Case("undef", pp_undef)
+                  .Case("import", pp_import)
+                  .Case("include_next", pp_include_next)
+                  .Case("if", pp_if)
+                  .Case("ifdef", pp_ifdef)
+                  .Case("ifndef", pp_ifndef)
+                  .Case("elif", pp_elif)
+                  .Case("else", pp_else)
+                  .Case("endif", pp_endif)
+                  .Case("pragma", pp_pragma_import)
+                  .Default(pp_none);
+  if (Kind == pp_none) {
+    skipDirective(Id.Name, First, End);
+    return false;
+  }
+
+  if (Kind == pp_endif)
+    return lexEndif(First, End);
+
+  if (Kind == pp_define)
+    return lexDefine(First, End);
+
+  if (Kind == pp_pragma_import)
+    return lexPragma(First, End);
+
+  // Everything else.
+  return lexDefault(Kind, Id.Name, First, End);
+}
+
+bool Minimizer::minimizeImpl(const char *First, const char *const End) {
+  while (First != End)
+    if (lexPPLine(First, End))
+      return true;
+  return false;
+}
+
+bool Minimizer::minimize() {
+  bool Error = minimizeImpl(Input.begin(), Input.end());
+
+  if (!Error) {
+    // Add a trailing newline and an EOF on success.
+    if (!Out.empty() && Out.back() != '\n')
+      Out.push_back('\n');
+    makeToken(pp_eof);
+  }
+
+  // Null-terminate the output. This way the memory buffer that's passed to
+  // Clang will not have to worry about the terminating '\0'.
+  Out.push_back(0);
+  Out.pop_back();
+  return Error;
+}
+
+bool clang::minimizeSourceToDependencyDirectives(
+    StringRef Input, SmallVectorImpl<char> &Output,
+    SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
+    SourceLocation InputSourceLoc) {
+  Output.clear();
+  Tokens.clear();
+  return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
+}
Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp
@@ -1696,6 +1696,10 @@
       Opts.ProgramAction = frontend::MigrateSource; break;
     case OPT_Eonly:
       Opts.ProgramAction = frontend::RunPreprocessorOnly; break;
+    case OPT_print_dependency_directives_minimized_source:
+      Opts.ProgramAction =
+          frontend::PrintDependencyDirectivesSourceMinimizerOutput;
+      break;
     }
   }
 
@@ -3116,6 +3120,7 @@
   case frontend::PrintPreprocessedInput:
   case frontend::RewriteMacros:
   case frontend::RunPreprocessorOnly:
+  case frontend::PrintDependencyDirectivesSourceMinimizerOutput:
     return true;
   }
   llvm_unreachable("invalid frontend action");
Index: cfe/trunk/lib/Frontend/FrontendActions.cpp
===================================================================
--- cfe/trunk/lib/Frontend/FrontendActions.cpp
+++ cfe/trunk/lib/Frontend/FrontendActions.cpp
@@ -14,6 +14,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/MultiplexConsumer.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
@@ -23,8 +24,8 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
 #include <memory>
 #include <system_error>
 
@@ -908,3 +909,33 @@
 
   OS << "}";
 }
+
+void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
+  CompilerInstance &CI = getCompilerInstance();
+  SourceManager &SM = CI.getPreprocessor().getSourceManager();
+  const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID());
+
+  llvm::SmallString<1024> Output;
+  llvm::SmallVector<minimize_source_to_dependency_directives::Token, 32> Toks;
+  if (minimizeSourceToDependencyDirectives(
+          FromFile->getBuffer(), Output, Toks, &CI.getDiagnostics(),
+          SM.getLocForStartOfFile(SM.getMainFileID()))) {
+    assert(CI.getDiagnostics().hasErrorOccurred() &&
+           "no errors reported for failure");
+
+    // Preprocess the source when verifying the diagnostics to capture the
+    // 'expected' comments.
+    if (CI.getDiagnosticOpts().VerifyDiagnostics) {
+      // Make sure we don't emit new diagnostics!
+      CI.getDiagnostics().setSuppressAllDiagnostics();
+      Preprocessor &PP = getCompilerInstance().getPreprocessor();
+      PP.EnterMainSourceFile();
+      Token Tok;
+      do {
+        PP.Lex(Tok);
+      } while (Tok.isNot(tok::eof));
+    }
+    return;
+  }
+  llvm::outs() << Output;
+}
Index: cfe/trunk/lib/FrontendTool/ExecuteCompilerInvocation.cpp
===================================================================
--- cfe/trunk/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ cfe/trunk/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -116,6 +116,8 @@
   case RunAnalysis:            Action = "RunAnalysis"; break;
 #endif
   case RunPreprocessorOnly:    return llvm::make_unique<PreprocessOnlyAction>();
+  case PrintDependencyDirectivesSourceMinimizerOutput:
+    return llvm::make_unique<PrintDependencyDirectivesSourceMinimizerAction>();
   }
 
 #if !CLANG_ENABLE_ARCMT || !CLANG_ENABLE_STATIC_ANALYZER \
Index: cfe/trunk/unittests/Lex/CMakeLists.txt
===================================================================
--- cfe/trunk/unittests/Lex/CMakeLists.txt
+++ cfe/trunk/unittests/Lex/CMakeLists.txt
@@ -3,6 +3,7 @@
   )
 
 add_clang_unittest(LexTests
+  DependencyDirectivesSourceMinimizerTest.cpp
   HeaderMapTest.cpp
   HeaderSearchTest.cpp
   LexerTest.cpp
Index: cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
===================================================================
--- cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
+++ cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
@@ -0,0 +1,508 @@
+//===- unittests/Lex/DependencyDirectivesSourceMinimizer.cpp -  -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "llvm/ADT/SmallString.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace clang {
+
+bool minimizeSourceToDependencyDirectives(StringRef Input,
+                                          SmallVectorImpl<char> &Out) {
+  SmallVector<minimize_source_to_dependency_directives::Token, 32> Tokens;
+  return minimizeSourceToDependencyDirectives(Input, Out, Tokens);
+}
+
+} // end namespace clang
+
+namespace {
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens));
+  EXPECT_TRUE(Out.empty());
+  ASSERT_EQ(1u, Tokens.size());
+  ASSERT_EQ(pp_eof, Tokens.back().K);
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens));
+  EXPECT_TRUE(Out.empty());
+  ASSERT_EQ(1u, Tokens.size());
+  ASSERT_EQ(pp_eof, Tokens.back().K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define A\n"
+                                           "#undef A\n"
+                                           "#endif\n"
+                                           "#if A\n"
+                                           "#ifdef A\n"
+                                           "#ifndef A\n"
+                                           "#elif A\n"
+                                           "#else\n"
+                                           "#include <A>\n"
+                                           "#include_next <A>\n"
+                                           "#__include_macros <A>\n"
+                                           "#import <A>\n"
+                                           "@import A;\n"
+                                           "#pragma clang module import A\n",
+                                           Out, Tokens));
+  EXPECT_EQ(pp_define, Tokens[0].K);
+  EXPECT_EQ(pp_undef, Tokens[1].K);
+  EXPECT_EQ(pp_endif, Tokens[2].K);
+  EXPECT_EQ(pp_if, Tokens[3].K);
+  EXPECT_EQ(pp_ifdef, Tokens[4].K);
+  EXPECT_EQ(pp_ifndef, Tokens[5].K);
+  EXPECT_EQ(pp_elif, Tokens[6].K);
+  EXPECT_EQ(pp_else, Tokens[7].K);
+  EXPECT_EQ(pp_include, Tokens[8].K);
+  EXPECT_EQ(pp_include_next, Tokens[9].K);
+  EXPECT_EQ(pp___include_macros, Tokens[10].K);
+  EXPECT_EQ(pp_import, Tokens[11].K);
+  EXPECT_EQ(decl_at_import, Tokens[12].K);
+  EXPECT_EQ(pp_pragma_import, Tokens[13].K);
+  EXPECT_EQ(pp_eof, Tokens[14].K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+  ASSERT_EQ(2u, Tokens.size());
+  ASSERT_EQ(pp_define, Tokens.front().K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineSpacing) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO\n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO \n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO a \n\n\n", Out));
+  EXPECT_STREQ("#define MACRO a\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define   MACRO\n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineMacroArguments) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO()", Out));
+  EXPECT_STREQ("#define MACRO()\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a, b...)", Out));
+  EXPECT_STREQ("#define MACRO(a,b...)\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO content", Out));
+  EXPECT_STREQ("#define MACRO content\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO   con  tent   ", Out));
+  EXPECT_STREQ("#define MACRO con  tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO()   con  tent   ", Out));
+  EXPECT_STREQ("#define MACRO() con  tent\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\t)\tcon \t tent\t", Out));
+  EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\f)\fcon \f tent\f", Out));
+  EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\v)\vcon \v tent\v", Out));
+  EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out));
+  EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a        \\\n"
+                                           "              )",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a)\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\n"
+                                           "              b)       \\\n"
+                                           "        call((a),      \\\n"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsCarriageReturn) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\r"
+                                           "              b)       \\\r"
+                                           "        call((a),      \\\r"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsCarriageReturnNewline) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\r\n"
+                                           "              b)       \\\r\n"
+                                           "        call((a),      \\\r\n"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsNewlineCarriageReturn) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\n\r"
+                                           "              b)       \\\n\r"
+                                           "        call((a),      \\\n\r"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out));
+  EXPECT_STREQ("#define AND &\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n"
+                                                    "&\n",
+                                                    Out));
+  EXPECT_STREQ("#define AND &\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO a/*\n"
+                                           "  /*\n"
+                                           "#define MISSING abc\n"
+                                           "  /*\n"
+                                           "  /* something */ \n"
+                                           "#include  /* \"def\" */ <abc> \n",
+                                           Out));
+  EXPECT_STREQ("#define MACRO a\n"
+               "#include <abc>\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, MultilineCommentInStrings) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO1 \"/*\"\n"
+                                                    "#define MACRO2 \"*/\"\n",
+                                                    Out));
+  EXPECT_STREQ("#define MACRO1 \"/*\"\n"
+               "#define MACRO2 \"*/\"\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#define B\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifdef A\n"
+               "#define B\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#define B\n"
+                                                    "#elif B\n"
+                                                    "#define C\n"
+                                                    "#elif C\n"
+                                                    "#define D\n"
+                                                    "#else\n"
+                                                    "#define E\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifdef A\n"
+               "#define B\n"
+               "#elif B\n"
+               "#define C\n"
+               "#elif C\n"
+               "#define D\n"
+               "#else\n"
+               "#define E\n"
+               "#endif\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIfdef) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#elif B\n"
+                                                    "#elif C\n"
+                                                    "#else D\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#pragma clang module\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#pragma clang module impor\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#pragma clang module import\n", Out));
+  EXPECT_STREQ("#pragma clang module import\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Include) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include \"A\"\n", Out));
+  EXPECT_STREQ("#include \"A\"\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include <A>\n", Out));
+  EXPECT_STREQ("#include <A>\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#include_next <A>\n", Out));
+  EXPECT_STREQ("#include_next <A>\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#import <A>\n", Out));
+  EXPECT_STREQ("#import <A>\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#__include_macros <A>\n", Out));
+  EXPECT_STREQ("#__include_macros <A>\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A;\n", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import  A;\n", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out));
+  EXPECT_STREQ("@import A.B;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out));
+  EXPECT_STREQ("@import A.B;\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifndef GUARD\n"
+                                                    "#define GUARD\n"
+                                                    "R\"()\"\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#ifndef GUARD\n"
+      "#define GUARD\n"
+      R"raw(static constexpr char bytes[] = R"(-?:\,[]{}#&*!|>'"%@`)";)raw"
+      "\n"
+      "#endif\n",
+      Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#ifndef GUARD\n"
+      "#define GUARD\n"
+      R"raw(static constexpr char bytes[] = R"abc(-?:\,[]{}#&*!|>'"%@`)abc";)raw"
+      "\n"
+      "#endif\n",
+      Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#if\\\n"
+                                                    "ndef GUARD\n"
+                                                    "#define GUARD\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
+                                                    "RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUARD\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r"
+                                                    "RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUARD\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
+                                                    "           RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUA RD\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) {
+  SmallVector<char, 128> Out;
+
+  for (auto Source : {
+           "#warning '\n#include <t.h>\n",
+           "#warning \"\n#include <t.h>\n",
+           "#warning /*\n#include <t.h>\n",
+           "#warning \\\n#include <t.h>\n#include <t.h>\n",
+           "#error '\n#include <t.h>\n",
+           "#error \"\n#include <t.h>\n",
+           "#error /*\n#include <t.h>\n",
+           "#error \\\n#include <t.h>\n#include <t.h>\n",
+       }) {
+    ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+    EXPECT_STREQ("#include <t.h>\n", Out.data());
+  }
+
+  for (auto Source : {
+           "#warning \\\n#include <t.h>\n",
+           "#error \\\n#include <t.h>\n",
+           "#if MACRO\n#warning '\n#endif\n",
+           "#if MACRO\n#warning \"\n#endif\n",
+           "#if MACRO\n#warning /*\n#endif\n",
+           "#if MACRO\n#error '\n#endif\n",
+           "#if MACRO\n#error \"\n#endif\n",
+           "#if MACRO\n#error /*\n#endif\n",
+       }) {
+    ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+    EXPECT_STREQ("", Out.data());
+  }
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteral) {
+  SmallVector<char, 128> Out;
+
+  StringRef Source = R"(
+#include <bob>
+int a = 0'1;
+int b = 0xfa'af'fa;
+int c = 12 ' ';
+#include <foo>
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include <bob>\n#include <foo>\n", Out.data());
+}
+
+} // end anonymous namespace
Index: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
===================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
+++ cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
@@ -818,4 +818,13 @@
 
 }
 
+let CategoryName = "Dependency Directive Source Minimization Issue" in {
+
+def err_dep_source_minimizer_missing_sema_after_at_import : Error<
+  "could not find ';' after @import">;
+def err_dep_source_minimizer_unexpected_tokens_at_import : Error<
+  "unexpected extra tokens at end of @import declaration">;
+
+}
+
 }
Index: cfe/trunk/include/clang/Frontend/FrontendActions.h
===================================================================
--- cfe/trunk/include/clang/Frontend/FrontendActions.h
+++ cfe/trunk/include/clang/Frontend/FrontendActions.h
@@ -240,6 +240,17 @@
   bool usesPreprocessorOnly() const override { return true; }
 };
 
+class PrintDependencyDirectivesSourceMinimizerAction : public FrontendAction {
+protected:
+  void ExecuteAction() override;
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &,
+                                                 StringRef) override {
+    return nullptr;
+  }
+
+  bool usesPreprocessorOnly() const override { return true; }
+};
+
 //===----------------------------------------------------------------------===//
 // Preprocessor Actions
 //===----------------------------------------------------------------------===//
Index: cfe/trunk/include/clang/Frontend/FrontendOptions.h
===================================================================
--- cfe/trunk/include/clang/Frontend/FrontendOptions.h
+++ cfe/trunk/include/clang/Frontend/FrontendOptions.h
@@ -128,7 +128,10 @@
   MigrateSource,
 
   /// Just lex, no output.
-  RunPreprocessorOnly
+  RunPreprocessorOnly,
+
+  /// Print the output of the dependency directives source minimizer.
+  PrintDependencyDirectivesSourceMinimizerOutput
 };
 
 } // namespace frontend
Index: cfe/trunk/include/clang/Driver/CC1Options.td
===================================================================
--- cfe/trunk/include/clang/Driver/CC1Options.td
+++ cfe/trunk/include/clang/Driver/CC1Options.td
@@ -612,6 +612,9 @@
   HelpText<"Migrate source code">;
 def compiler_options_dump : Flag<["-"], "compiler-options-dump">,
   HelpText<"Dump the compiler configuration options">;
+def print_dependency_directives_minimized_source : Flag<["-"],
+  "print-dependency-directives-minimized-source">,
+  HelpText<"Print the output of the dependency directives source minimizer">;
 }
 
 def emit_llvm_uselists : Flag<["-"], "emit-llvm-uselists">,
Index: cfe/trunk/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
===================================================================
--- cfe/trunk/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
+++ cfe/trunk/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
@@ -0,0 +1,88 @@
+//===- clang/Lex/DependencyDirectivesSourceMinimizer.h -  ----------*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the interface for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
+#define LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+
+class DiagnosticsEngine;
+
+namespace minimize_source_to_dependency_directives {
+
+/// Represents the kind of preprocessor directive or a module declaration that
+/// is tracked by the source minimizer in its token output.
+enum TokenKind {
+  pp_none,
+  pp_include,
+  pp___include_macros,
+  pp_define,
+  pp_undef,
+  pp_import,
+  pp_pragma_import,
+  pp_include_next,
+  pp_if,
+  pp_ifdef,
+  pp_ifndef,
+  pp_elif,
+  pp_else,
+  pp_endif,
+  decl_at_import,
+  pp_eof,
+};
+
+/// Represents a simplified token that's lexed as part of the source
+/// minimization. It's used to track the location of various preprocessor
+/// directives that could potentially have an effect on the depedencies.
+struct Token {
+  /// The kind of token.
+  TokenKind K = pp_none;
+
+  /// Offset into the output byte stream of where the directive begins.
+  int Offset = -1;
+
+  Token(TokenKind K, int Offset) : K(K), Offset(Offset) {}
+};
+
+} // end namespace minimize_source_to_dependency_directives
+
+/// Minimize the input down to the preprocessor directives that might have
+/// an effect on the dependencies for a compilation unit.
+///
+/// This function deletes all non-preprocessor code, and strips anything that
+/// can't affect what gets included. It canonicalizes whitespace where
+/// convenient to stabilize the output against formatting changes in the input.
+///
+/// Clears the output vectors at the beginning of the call.
+///
+/// \returns false on success, true on error. If the diagnostic engine is not
+/// null, an appropriate error is reported using the given input location
+/// with the offset that corresponds to the minimizer's current buffer offset.
+bool minimizeSourceToDependencyDirectives(
+    llvm::StringRef Input, llvm::SmallVectorImpl<char> &Output,
+    llvm::SmallVectorImpl<minimize_source_to_dependency_directives::Token>
+        &Tokens,
+    DiagnosticsEngine *Diags = nullptr,
+    SourceLocation InputSourceLoc = SourceLocation());
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
Index: cfe/trunk/test/Frontend/minimize_source_to_dependency_directives.c
===================================================================
--- cfe/trunk/test/Frontend/minimize_source_to_dependency_directives.c
+++ cfe/trunk/test/Frontend/minimize_source_to_dependency_directives.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s > %t
+// RUN: echo END. >> %t
+// RUN: FileCheck < %t %s
+
+#ifdef FOO
+#include "a.h"
+#else
+void skipThisCode();
+#endif
+
+// CHECK:      #ifdef FOO
+// CHECK-NEXT: #include "a.h"
+// CHECK-NEXT: #endif
+// CHECK-NEXT: END.
Index: cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m
===================================================================
--- cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m
+++ cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+@import x // expected-error {{could not find ';' after @import}}
Index: cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
===================================================================
--- cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
+++ cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+#define 0 0 // expected-error {{macro name must be an identifier}}
Index: cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m
===================================================================
--- cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m
+++ cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+@import x; a // expected-error {{unexpected extra tokens at end of @import declaration}}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D55463: Introduce a so... Alex Lorenz via Phabricator via cfe-commits

Reply via email to