ioeric created this revision.
ioeric added a reviewer: ilya-biryukov.
Herald added subscribers: cfe-commits, klimek.

The class will be moved into libToolingCore as followup.

The new behaviors in this patch:

- New #include is inserted in the right position in a #include block to

preserver sorted #includes. This is best effort - only works when the
block is already sorted.

- When inserting multiple #includes to the end of a file which doesn't

end with a "\n" character, a "\n" will be prepended to each #include.
This is a special and rare case that was previously handled. This is now
relaxed to avoid complexity as it's rare in practice.


Repository:
  rC Clang

https://reviews.llvm.org/D46180

Files:
  lib/Format/Format.cpp
  unittests/Format/CleanupTest.cpp

Index: unittests/Format/CleanupTest.cpp
===================================================================
--- unittests/Format/CleanupTest.cpp
+++ unittests/Format/CleanupTest.cpp
@@ -471,19 +471,77 @@
   EXPECT_EQ(Expected, apply(Code, Replaces));
 }
 
+TEST_F(CleanUpReplacementsTest, InsertIntoBlockSorted) {
+  std::string Code = "#include \"x/fix.h\"\n"
+                     "#include \"a.h\"\n"
+                     "#include \"c.h\"\n"
+                     "#include <memory>\n";
+  std::string Expected = "#include \"x/fix.h\"\n"
+                         "#include \"a.h\"\n"
+                         "#include \"b.h\"\n"
+                         "#include \"c.h\"\n"
+                         "#include <memory>\n";
+  tooling::Replacements Replaces =
+      toReplacements({createInsertion("#include \"b.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, InsertIntoFirstBlockOfSameKind) {
+  std::string Code = "#include \"x/fix.h\"\n"
+                     "#include \"c.h\"\n"
+                     "#include \"e.h\"\n"
+                     "#include \"f.h\"\n"
+                     "#include <memory>\n"
+                     "#include <vector>\n"
+                     "#include \"m.h\"\n"
+                     "#include \"n.h\"\n";
+  std::string Expected = "#include \"x/fix.h\"\n"
+                         "#include \"c.h\"\n"
+                         "#include \"d.h\"\n"
+                         "#include \"e.h\"\n"
+                         "#include \"f.h\"\n"
+                         "#include <memory>\n"
+                         "#include <vector>\n"
+                         "#include \"m.h\"\n"
+                         "#include \"n.h\"\n";
+  tooling::Replacements Replaces =
+      toReplacements({createInsertion("#include \"d.h\"")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, InsertIntoSystemBlockSorted) {
+  std::string Code = "#include \"x/fix.h\"\n"
+                     "#include \"a.h\"\n"
+                     "#include \"c.h\"\n"
+                     "#include <a>\n"
+                     "#include <z>\n";
+  std::string Expected = "#include \"x/fix.h\"\n"
+                         "#include \"a.h\"\n"
+                         "#include \"c.h\"\n"
+                         "#include <a>\n"
+                         "#include <vector>\n"
+                         "#include <z>\n";
+  tooling::Replacements Replaces =
+      toReplacements({createInsertion("#include <vector>")});
+  EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+
 TEST_F(CleanUpReplacementsTest, InsertMultipleIncludesLLVMStyle) {
   std::string Code = "#include \"x/fix.h\"\n"
                      "#include \"a.h\"\n"
                      "#include \"b.h\"\n"
+                     "#include \"z.h\"\n"
                      "#include \"clang/Format/Format.h\"\n"
                      "#include <memory>\n";
   std::string Expected = "#include \"x/fix.h\"\n"
                          "#include \"a.h\"\n"
                          "#include \"b.h\"\n"
                          "#include \"new/new.h\"\n"
+                         "#include \"z.h\"\n"
                          "#include \"clang/Format/Format.h\"\n"
-                         "#include <memory>\n"
-                         "#include <list>\n";
+                         "#include <list>\n"
+                         "#include <memory>\n";
   tooling::Replacements Replaces =
       toReplacements({createInsertion("#include <list>"),
                       createInsertion("#include \"new/new.h\"")});
@@ -517,12 +575,12 @@
                      "#include \"z/b.h\"\n";
   std::string Expected = "#include \"x/fix.h\"\n"
                          "\n"
-                         "#include <vector>\n"
                          "#include <list>\n"
+                         "#include <vector>\n"
                          "\n"
+                         "#include \"x/x.h\"\n"
                          "#include \"y/a.h\"\n"
-                         "#include \"z/b.h\"\n"
-                         "#include \"x/x.h\"\n";
+                         "#include \"z/b.h\"\n";
   tooling::Replacements Replaces =
       toReplacements({createInsertion("#include <list>"),
                       createInsertion("#include \"x/x.h\"")});
@@ -776,8 +834,10 @@
 
 TEST_F(CleanUpReplacementsTest, NoNewLineAtTheEndOfCodeMultipleInsertions) {
   std::string Code = "#include <map>";
+  // FIXME: a better behavior is to only append on newline to Code, but this
+  // case should be rare in practice.
   std::string Expected =
-      "#include <map>\n#include <string>\n#include <vector>\n";
+      "#include <map>\n#include <string>\n\n#include <vector>\n";
   tooling::Replacements Replaces =
       toReplacements({createInsertion("#include <string>"),
                       createInsertion("#include <vector>")});
@@ -795,14 +855,11 @@
   EXPECT_EQ(Expected, apply(Code, Replaces));
 }
 
-TEST_F(CleanUpReplacementsTest, AddIncludesWithDifferentForms) {
+TEST_F(CleanUpReplacementsTest, NoAddSameIncludesWithDifferentForms) {
   std::string Code = "#include \"a.h\"\n"
                      "#include <vector>\n";
-  // FIXME: this might not be the best behavior.
   std::string Expected = "#include \"a.h\"\n"
-                         "#include \"vector\"\n"
-                         "#include <vector>\n"
-                         "#include <a.h>\n";
+                         "#include <vector>\n";
   tooling::Replacements Replaces =
       toReplacements({createInsertion("#include \"vector\""),
                       createInsertion("#include <a.h>")});
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -41,6 +41,7 @@
 #include <algorithm>
 #include <memory>
 #include <string>
+#include <unordered_map>
 
 #define DEBUG_TYPE "format-formatter"
 
@@ -1687,7 +1688,7 @@
   // Returns the priority of the category which \p IncludeName belongs to.
   // If \p CheckMainHeader is true and \p IncludeName is a main header, returns
   // 0. Otherwise, returns the priority of the matching category or INT_MAX.
-  int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) {
+  int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const {
     int Ret = INT_MAX;
     for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
       if (CategoryRegexs[i].match(IncludeName)) {
@@ -1720,7 +1721,7 @@
   bool IsMainFile;
   StringRef FileName;
   StringRef FileStem;
-  SmallVector<llvm::Regex, 4> CategoryRegexs;
+  mutable SmallVector<llvm::Regex, 4> CategoryRegexs;
 };
 
 const char IncludeRegexPattern[] =
@@ -1984,103 +1985,117 @@
       });
 }
 
-bool isDeletedHeader(llvm::StringRef HeaderName,
-                     const std::set<llvm::StringRef> &HeadersToDelete) {
-  return HeadersToDelete.count(HeaderName) ||
-         HeadersToDelete.count(HeaderName.trim("\"<>"));
-}
+/// Generates replacements for inserting or deleting #include headers in a file.
+class HeaderIncludes {
+public:
+  HeaderIncludes(llvm::StringRef FileName, llvm::StringRef Code,
+                 const FormatStyle &Style);
+
+  /// Inserts an #include directive of \p IncludeName into the code. If \p
+  /// IncludeName is quoted e.g. <...> or "...", it will be #included as is;
+  /// by default,  it is quoted with "".
+  ///
+  /// When searching for points to insert new header, this ignores #include's
+  /// after the #include block(s) in the beginning of a file to avoid inserting
+  /// headers into code sections where new #include's should not be added by
+  /// default. These code sections include:
+  ///   - raw string literals (containing #include).
+  ///   - #if blocks.
+  ///   - Special #include's among declarations (e.g. functions).
+  ///
+  /// Returns a replacement that inserts the new header into a suitable #include
+  /// block (#include order is preserved with best effort e.g. if they are
+  /// already sorted; caller should consider sorting the #includes with
+  /// clang-format after insertions). If \p IncludeName already exists, this
+  /// returns None.
+  llvm::Optional<tooling::Replacement> insert(llvm::StringRef IncludeName) const;
+
+  /// Removes all existing #includes of \p Header. If \p Header is not quoted
+  /// (e.g. without <> or ""), #include of \p Header with both quotations will
+  /// be removed.
+  tooling::Replacements remove(llvm::StringRef Header) const;
 
-// FIXME: insert empty lines between newly created blocks.
-tooling::Replacements
-fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
-                        const FormatStyle &Style) {
-  if (!Style.isCpp())
-    return Replaces;
+private:
+  struct Include {
+    Include(StringRef Name, tooling::Range R) : Name(Name), R(R) {}
+
+    // An include header quoted with either <> or "".
+    std::string Name;
+    // The range of the whole line of include directive including any eading
+    // whitespaces and trailing comment.
+    tooling::Range R;
+  };
 
-  tooling::Replacements HeaderInsertions;
-  std::set<llvm::StringRef> HeadersToDelete;
-  tooling::Replacements Result;
-  for (const auto &R : Replaces) {
-    if (isHeaderInsertion(R)) {
-      // Replacements from \p Replaces must be conflict-free already, so we can
-      // simply consume the error.
-      llvm::consumeError(HeaderInsertions.add(R));
-    } else if (isHeaderDeletion(R)) {
-      HeadersToDelete.insert(R.getReplacementText());
-    } else if (R.getOffset() == UINT_MAX) {
-      llvm::errs() << "Insertions other than header #include insertion are "
-                      "not supported! "
-                   << R.getReplacementText() << "\n";
-    } else {
-      llvm::consumeError(Result.add(R));
-    }
-  }
-  if (HeaderInsertions.empty() && HeadersToDelete.empty())
-    return Replaces;
+  void addExistingInclude(Include IncludeToAdd, unsigned NextLineOffset);
 
-  llvm::Regex IncludeRegex(IncludeRegexPattern);
-  llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)");
-  SmallVector<StringRef, 4> Matches;
+  StringRef FileName;
+  StringRef Code;
 
-  StringRef FileName = Replaces.begin()->getFilePath();
-  IncludeCategoryManager Categories(Style, FileName);
+  // Map from include name (quotation trimmed) to a list of existing includes (in case
+  // there are more than one) with the name in the current file.
+  // <x> and "x" will be treated as the same header when deleting #includes.
+  llvm::StringMap<llvm::SmallVector<Include, 1>> ExistingIncludes;
+
+  std::unordered_map<int, llvm::SmallVector<const Include *, 8>>
+      IncludesByPriority;
 
+  int FirstIncludeOffset;
+  // All new headers should be inserted after this offset.
+  unsigned MinInsertOffset;
+  // Code with the first MinInsertOffset characters trimmed.
+  StringRef TrimmedCode;
+  // Max insertion offset in the original code.
+  unsigned MaxInsertOffset;
+  IncludeCategoryManager Categories;
   // Record the offset of the end of the last include in each category.
-  std::map<int, int> CategoryEndOffsets;
+  std::unordered_map<int, int> CategoryEndOffsets;
+
   // All possible priorities.
-  // Add 0 for main header and INT_MAX for headers that are not in any category.
-  std::set<int> Priorities = {0, INT_MAX};
+  std::set<int> Priorities;
+
+  // Matches a whole #include directive.
+  llvm::Regex IncludeRegex;
+};
+
+HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
+                               const FormatStyle &Style)
+    : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
+      MinInsertOffset(
+          getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
+      TrimmedCode(Code.drop_front(MinInsertOffset)),
+      MaxInsertOffset(MinInsertOffset + getMaxHeaderInsertionOffset(
+                                            FileName, TrimmedCode, Style)),
+      Categories(Style, FileName),
+      IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
+  // Add 0 for main header and INT_MAX for headers that are not in any
+  // category.
+  Priorities = {0, INT_MAX};
   for (const auto &Category : Style.IncludeCategories)
     Priorities.insert(Category.Priority);
-  int FirstIncludeOffset = -1;
-  // All new headers should be inserted after this offset.
-  unsigned MinInsertOffset =
-      getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
-  StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
-  // Max insertion offset in the original code.
-  unsigned MaxInsertOffset =
-      MinInsertOffset +
-      getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style);
   SmallVector<StringRef, 32> Lines;
   TrimmedCode.split(Lines, '\n');
+
   unsigned Offset = MinInsertOffset;
   unsigned NextLineOffset;
-  std::set<StringRef> ExistingIncludes;
+  SmallVector<StringRef, 4> Matches;
   for (auto Line : Lines) {
     NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
     if (IncludeRegex.match(Line, &Matches)) {
-      // The header name with quotes or angle brackets.
-      StringRef IncludeName = Matches[2];
-      ExistingIncludes.insert(IncludeName);
-      // Only record the offset of current #include if we can insert after it.
-      if (Offset <= MaxInsertOffset) {
-        int Category = Categories.getIncludePriority(
-            IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
-        CategoryEndOffsets[Category] = NextLineOffset;
-        if (FirstIncludeOffset < 0)
-          FirstIncludeOffset = Offset;
-      }
-      if (isDeletedHeader(IncludeName, HeadersToDelete)) {
-        // If this is the last line without trailing newline, we need to make
-        // sure we don't delete across the file boundary.
-        unsigned Length = std::min(Line.size() + 1, Code.size() - Offset);
-        llvm::Error Err =
-            Result.add(tooling::Replacement(FileName, Offset, Length, ""));
-        if (Err) {
-          // Ignore the deletion on conflict.
-          llvm::errs() << "Failed to add header deletion replacement for "
-                       << IncludeName << ": " << llvm::toString(std::move(Err))
-                       << "\n";
-        }
-      }
+      // If this is the last line without trailing newline, we need to make
+      // sure we don't delete across the file boundary.
+      addExistingInclude(
+          Include(Matches[2],
+                  tooling::Range(
+                      Offset, std::min(Line.size() + 1, Code.size() - Offset))),
+          NextLineOffset);
     }
     Offset = NextLineOffset;
   }
 
   // Populate CategoryEndOfssets:
   // - Ensure that CategoryEndOffset[Highest] is always populated.
-  // - If CategoryEndOffset[Priority] isn't set, use the next higher value that
-  //   is set, up to CategoryEndOffset[Highest].
+  // - If CategoryEndOffset[Priority] isn't set, use the next higher value
+  // that is set, up to CategoryEndOffset[Highest].
   auto Highest = Priorities.begin();
   if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
     if (FirstIncludeOffset >= 0)
@@ -2095,39 +2110,156 @@
   for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
     if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
       CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
+}
+
+inline StringRef trimInclude(StringRef IncludeName) {
+  return IncludeName.trim("\"<>");
+}
 
-  bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n';
+// \p Offset: the start of the line following this include directive.
+void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
+                                        unsigned NextLineOffset) {
+  auto Iter =
+      ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first;
+  Iter->second.push_back(std::move(IncludeToAdd));
+  auto &CurInclude = Iter->second.back();
+  // The header name with quotes or angle brackets.
+  // Only record the offset of current #include if we can insert after it.
+  if (CurInclude.R.getOffset() <= MaxInsertOffset) {
+    int Priority = Categories.getIncludePriority(
+        CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+    CategoryEndOffsets[Priority] = NextLineOffset;
+    IncludesByPriority[Priority].push_back(&CurInclude);
+    if (FirstIncludeOffset < 0)
+      FirstIncludeOffset = CurInclude.R.getOffset();
+  }
+}
+
+llvm::Optional<tooling::Replacement>
+HeaderIncludes::insert(llvm::StringRef IncludeName) const {
+  std::string TrimmedName = trimInclude(IncludeName);
+  // If a <header> ("header") already exists in code, "header" (<header>) with
+  // different quotation will not be inserted.
+  if (ExistingIncludes.find(TrimmedName) != ExistingIncludes.end())
+    return llvm::None;
+  std::string Quoted =
+      (IncludeName.startswith("<") || IncludeName.startswith("\""))
+          ? IncludeName.str()
+          : ("\"" + IncludeName + "\"").str();
+  StringRef QuotedName = Quoted;
+  int Priority = Categories.getIncludePriority(
+      QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+  auto CatOffset = CategoryEndOffsets.find(Priority);
+  assert(CatOffset != CategoryEndOffsets.end());
+  unsigned InsertOffset = CatOffset->second; // Fall back offset
+  auto Iter = IncludesByPriority.find(Priority);
+  if (Iter != IncludesByPriority.end()) {
+    for (const auto *Inc : Iter->second) {
+      if (QuotedName < Inc->Name) {
+        InsertOffset = Inc->R.getOffset();
+        break;
+      }
+    }
+  }
+  assert(InsertOffset <= Code.size());
+  std::string NewInclude = ("#include " + QuotedName + "\n").str();
+  // When inserting headers at end of the code, also append '\n' to the code
+  // if it does not end with '\n'.
+  // FIXME: when inserting multiple #includes at the end of code, only one
+  // newline should be added.
+  if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
+    NewInclude = "\n" + NewInclude;
+  return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
+}
+
+tooling::Replacements
+HeaderIncludes::remove(llvm::StringRef IncludeName) const {
+  StringRef TrimmedName = trimInclude(IncludeName);
+  tooling::Replacements Result;
+  auto Iter = ExistingIncludes.find(TrimmedName);
+  if (Iter == ExistingIncludes.end())
+    return Result;
+  for (const auto &Inc : Iter->second) {
+    assert(trimInclude(Inc.Name) == TrimmedName);
+    // If IncludeName is not quoted, then both quotations are deleted.
+    if (IncludeName != TrimmedName &&
+        IncludeName != Inc.Name) // Different quotations, skip.
+      continue;
+    llvm::Error Err = Result.add(tooling::Replacement(
+        FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
+    if (Err) {
+      auto ErrMsg = "Unexpected conflicts in #include deletions: " +
+                    llvm::toString(std::move(Err));
+      llvm_unreachable(ErrMsg.c_str());
+    }
+  }
+  return Result;
+}
+
+// FIXME: insert empty lines between newly created blocks.
+tooling::Replacements
+fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
+                        const FormatStyle &Style) {
+  if (!Style.isCpp())
+    return Replaces;
+
+  tooling::Replacements HeaderInsertions;
+  std::set<llvm::StringRef> HeadersToDelete;
+  tooling::Replacements Result;
+  for (const auto &R : Replaces) {
+    if (isHeaderInsertion(R)) {
+      // Replacements from \p Replaces must be conflict-free already, so we can
+      // simply consume the error.
+      llvm::consumeError(HeaderInsertions.add(R));
+    } else if (isHeaderDeletion(R)) {
+      HeadersToDelete.insert(R.getReplacementText());
+    } else if (R.getOffset() == UINT_MAX) {
+      llvm::errs() << "Insertions other than header #include insertion are "
+                      "not supported! "
+                   << R.getReplacementText() << "\n";
+    } else {
+      llvm::consumeError(Result.add(R));
+    }
+  }
+  if (HeaderInsertions.empty() && HeadersToDelete.empty())
+    return Replaces;
+
+
+  StringRef FileName = Replaces.begin()->getFilePath();
+  HeaderIncludes Includes(FileName, Code, Style);
+
+  for (const auto &Header : HeadersToDelete) {
+    tooling::Replacements Replaces = Includes.remove(Header);
+    for (const auto &R : Replaces) {
+      auto Err = Result.add(R);
+      if (Err) {
+        // Ignore the deletion on conflict.
+        llvm::errs() << "Failed to add header deletion replacement for "
+                     << Header << ": " << llvm::toString(std::move(Err))
+                     << "\n";
+      }
+    }
+  }
+
+  llvm::Regex IncludeRegex = llvm::Regex(IncludeRegexPattern);
+  llvm::SmallVector<StringRef, 4> Matches;
   for (const auto &R : HeaderInsertions) {
     auto IncludeDirective = R.getReplacementText();
     bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
     assert(Matched && "Header insertion replacement must have replacement text "
                       "'#include ...'");
     (void)Matched;
     auto IncludeName = Matches[2];
-    if (ExistingIncludes.find(IncludeName) != ExistingIncludes.end()) {
-      DEBUG(llvm::dbgs() << "Skip adding existing include : " << IncludeName
-                         << "\n");
-      continue;
-    }
-    int Category =
-        Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true);
-    Offset = CategoryEndOffsets[Category];
-    std::string NewInclude = !IncludeDirective.endswith("\n")
-                                 ? (IncludeDirective + "\n").str()
-                                 : IncludeDirective.str();
-    // When inserting headers at end of the code, also append '\n' to the code
-    // if it does not end with '\n'.
-    if (NeedNewLineAtEnd && Offset == Code.size()) {
-      NewInclude = "\n" + NewInclude;
-      NeedNewLineAtEnd = false;
-    }
-    auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude);
-    auto Err = Result.add(NewReplace);
-    if (Err) {
-      llvm::consumeError(std::move(Err));
-      unsigned NewOffset = Result.getShiftedCodePosition(Offset);
-      NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude);
-      Result = Result.merge(tooling::Replacements(NewReplace));
+    auto Replace = Includes.insert(IncludeName);
+    if (Replace) {
+      auto Err = Result.add(*Replace);
+      if (Err) {
+        llvm::consumeError(std::move(Err));
+        unsigned NewOffset = Result.getShiftedCodePosition(Replace->getOffset());
+        auto Shifted = tooling::Replacement(FileName, NewOffset, 0,
+                                            Replace->getReplacementText());
+        Result = Result.merge(tooling::Replacements(Shifted));
+      }
     }
   }
   return Result;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to