ilya-biryukov created this revision.
ilya-biryukov added a reviewer: sammccall.
Herald added a project: clang.

This change makes sure we have a single mapping for each macro expansion,
even if the result of expansion was empty.

To achieve that, we take information from PPCallbacks::MacroExpands into
account. Previously we relied only on source locations of expanded tokens.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D62953

Files:
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/unittests/Tooling/Syntax/TokensTest.cpp

Index: clang/unittests/Tooling/Syntax/TokensTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -420,7 +420,9 @@
   spelled tokens:
     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
   mappings:
-    ['#'_0, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
+    ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
+    ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
+    ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
 )"},
       // File ends with a macro replacement.
       {R"cpp(
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -14,6 +14,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -226,6 +227,33 @@
   return Tokens;
 }
 
+class TokenCollector::Callbacks : public PPCallbacks {
+public:
+  Callbacks(TokenCollector &C) : C(&C) {}
+
+  /// Disabled instance will stop reporting anything to TokenCollector.
+  void disable() { C = nullptr; }
+
+  void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
+                    SourceRange Range, const MacroArgs *Args) override {
+    if (!C)
+      return;
+    // Do not record recursive expansions.
+    if (!MacroNameTok.getLocation().isFileID() ||
+        (LastExpansionEnd.isValid() &&
+         C->PP.getSourceManager().isBeforeInTranslationUnit(Range.getBegin(),
+                                                            LastExpansionEnd)))
+      return;
+    C->Mappings[Range.getBegin().getRawEncoding()] = Range.getEnd();
+    LastExpansionEnd = Range.getEnd();
+  }
+  // FIXME: handle #pragma, #include, etc.
+private:
+  TokenCollector *C;
+  /// Used to detect recursive macro expansions.
+  SourceLocation LastExpansionEnd;
+};
+
 /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
 /// implementation tracks the tokens, macro expansions and directives coming
 /// from the preprocessor and:
@@ -253,15 +281,19 @@
     );
     Expanded.push_back(syntax::Token(T));
   });
+  // And locations of macro calls.
+  auto CB = llvm::make_unique<Callbacks>(*this);
+  this->CB = CB.get();
+  PP.addPPCallbacks(std::move(CB));
 }
 
 /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
 /// token stream.
 class TokenCollector::Builder {
 public:
-  Builder(std::vector<syntax::Token> Expanded, const SourceManager &SM,
-          const LangOptions &LangOpts)
-      : Result(SM), SM(SM), LangOpts(LangOpts) {
+  Builder(std::vector<syntax::Token> Expanded, SpelledMappings Mappings,
+          const SourceManager &SM, const LangOptions &LangOpts)
+      : Result(SM), Mappings(std::move(Mappings)), SM(SM), LangOpts(LangOpts) {
     Result.ExpandedTokens = std::move(Expanded);
   }
 
@@ -330,22 +362,17 @@
 
     fillGapUntil(File, SpelledRange.getBegin(), I);
 
-    TokenBuffer::Mapping M;
-    // Skip the spelled macro tokens.
-    std::tie(M.BeginSpelled, M.EndSpelled) =
-        consumeSpelledUntil(File, SpelledRange.getEnd().getLocWithOffset(1));
     // Skip all expanded tokens from the same macro expansion.
-    M.BeginExpanded = I;
+    unsigned BeginExpanded = I;
     for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
       auto NextL = Result.ExpandedTokens[I + 1].location();
       if (!NextL.isMacroID() ||
           SM.getExpansionLoc(NextL) != SpelledRange.getBegin())
         break;
     }
-    M.EndExpanded = I + 1;
-
-    // Add a resulting mapping.
-    File.Mappings.push_back(M);
+    unsigned EndExpanded = I + 1;
+    consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded,
+                   EndExpanded, NextSpelled[FID]);
   }
 
   /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
@@ -367,69 +394,108 @@
     }
   }
 
-  /// Consumed spelled tokens until location L is reached (token starting at L
-  /// is not included). Returns the indicies of the consumed range.
-  std::pair</*Begin*/ unsigned, /*End*/ unsigned>
-  consumeSpelledUntil(TokenBuffer::MarkedFile &File, SourceLocation L) {
-    assert(L.isFileID());
-    FileID FID;
-    unsigned Offset;
-    std::tie(FID, Offset) = SM.getDecomposedLoc(L);
+  void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
+                           unsigned ExpandedIndex, unsigned &SpelledIndex) {
+    consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
+  }
 
-    // (!) we update the index in-place.
-    unsigned &SpelledI = NextSpelled[FID];
-    unsigned Before = SpelledI;
-    for (; SpelledI < File.SpelledTokens.size() &&
-           SM.getFileOffset(File.SpelledTokens[SpelledI].location()) < Offset;
-         ++SpelledI) {
-    }
-    return std::make_pair(Before, /*After*/ SpelledI);
-  };
+  /// Consumes spelled tokens that form a macro expansion and adds a entry to
+  /// the resulting token buffer.
+  /// (!) SpelledIndex is updated in-place.
+  void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
+                      unsigned BeginExpanded, unsigned EndExpanded,
+                      unsigned &SpelledIndex) {
+    // We need to record this mapping before continuing.
+    unsigned MappingBegin = SpelledIndex;
+    ++SpelledIndex;
+
+    bool HitMapping =
+        tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
+    (void)HitMapping;
+    assert(!HitMapping && "recursive macro expansion?");
+
+    TokenBuffer::Mapping M;
+    M.BeginExpanded = BeginExpanded;
+    M.EndExpanded = EndExpanded;
+    M.BeginSpelled = MappingBegin;
+    M.EndSpelled = SpelledIndex;
+
+    File.Mappings.push_back(M);
+  }
 
   /// Consumes spelled tokens until location \p L is reached and adds a mapping
   /// covering the consumed tokens. The mapping will point to an empty expanded
   /// range at position \p ExpandedIndex.
   void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L,
                     unsigned ExpandedIndex) {
-    unsigned BeginSpelledGap, EndSpelledGap;
-    std::tie(BeginSpelledGap, EndSpelledGap) = consumeSpelledUntil(File, L);
-    if (BeginSpelledGap == EndSpelledGap)
-      return; // No gap.
-    TokenBuffer::Mapping M;
-    M.BeginSpelled = BeginSpelledGap;
-    M.EndSpelled = EndSpelledGap;
-    M.BeginExpanded = M.EndExpanded = ExpandedIndex;
-    File.Mappings.push_back(M);
+    assert(L.isFileID());
+    FileID FID;
+    unsigned Offset;
+    std::tie(FID, Offset) = SM.getDecomposedLoc(L);
+
+    unsigned &SpelledIndex = NextSpelled[FID];
+    unsigned MappingBegin = SpelledIndex;
+    while (true) {
+      auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
+      if (SpelledIndex != MappingBegin) {
+        TokenBuffer::Mapping M;
+        M.BeginSpelled = MappingBegin;
+        M.EndSpelled = SpelledIndex;
+        M.BeginExpanded = M.EndExpanded = ExpandedIndex;
+        File.Mappings.push_back(M);
+      }
+      if (!EndLoc)
+        break;
+      consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex,
+                          SpelledIndex);
+
+      MappingBegin = SpelledIndex;
+    }
   };
 
+  /// Consumes spelled tokens until it reaches Offset or a mapping boundary,
+  /// i.e. a name of a macro expansion or the start '#' token of a PP directive.
+  /// (!) NextSpelled is updated in place.
+  ///
+  /// returns None if \p Offset was reached, otherwise returns the end location
+  /// of a mapping that starts at \p NextSpelled.
+  llvm::Optional<SourceLocation>
+  tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset,
+                         unsigned &NextSpelled) {
+    for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
+      auto L = File.SpelledTokens[NextSpelled].location();
+      if (Offset <= SM.getFileOffset(L))
+        return llvm::None; // reached the offset we are looking for.
+      auto Mapping = Mappings.find(L.getRawEncoding());
+      if (Mapping != Mappings.end())
+        return Mapping->second; // found a mapping before the offset.
+    }
+    return llvm::None; // no more tokens, we "reached" the offset.
+  }
+
   /// Adds empty mappings for unconsumed spelled tokens at the end of each file.
   void fillGapsAtEndOfFiles() {
     for (auto &F : Result.Files) {
-      unsigned Next = NextSpelled[F.first];
-      if (F.second.SpelledTokens.size() == Next)
-        continue; // All spelled tokens are accounted for.
-
-      // Record a mapping for the gap at the end of the spelled tokens.
-      TokenBuffer::Mapping M;
-      M.BeginSpelled = Next;
-      M.EndSpelled = F.second.SpelledTokens.size();
-      M.BeginExpanded = F.second.EndExpanded;
-      M.EndExpanded = F.second.EndExpanded;
-
-      F.second.Mappings.push_back(M);
+      if (F.second.SpelledTokens.empty())
+        continue;
+      fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
+                   F.second.EndExpanded);
     }
   }
 
   TokenBuffer Result;
   /// For each file, a position of the next spelled token we will consume.
   llvm::DenseMap<FileID, unsigned> NextSpelled;
+  SpelledMappings Mappings;
   const SourceManager &SM;
   const LangOptions &LangOpts;
 };
 
 TokenBuffer TokenCollector::consume() && {
   PP.setTokenWatcher(nullptr);
-  return Builder(std::move(Expanded), PP.getSourceManager(), PP.getLangOpts())
+  CB->disable();
+  return Builder(std::move(Expanded), std::move(Mappings),
+                 PP.getSourceManager(), PP.getLangOpts())
       .build();
 }
 
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -208,6 +208,8 @@
   ///        #pragma, etc.
   llvm::ArrayRef<syntax::Token> spelledTokens(FileID FID) const;
 
+  const SourceManager &sourceManager() const { return *SourceMgr; }
+
   std::string dumpForTests() const;
 
 private:
@@ -291,9 +293,18 @@
   LLVM_NODISCARD TokenBuffer consume() &&;
 
 private:
+  /// Maps from a start location to an end location of each mapping.
+  ///   1. range from '#' to the last token in the line for PP directives,
+  ///   2. macro name and arguments for macro expansions.
+  using SpelledMappings =
+      llvm::DenseMap</*SourceLocation*/ int, SourceLocation>;
   class Builder;
+  class Callbacks;
+
   std::vector<syntax::Token> Expanded;
+  SpelledMappings Mappings;
   Preprocessor &PP;
+  Callbacks *CB;
 };
 
 } // namespace syntax
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to