[PATCH] D134685: Fix SourceManager::isBeforeInTranslationUnit bug with token-pasting

Sam McCall via Phabricator via cfe-commits Wed, 05 Oct 2022 09:29:23 -0700

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
sammccall marked an inline comment as done.
Closed by commit rG41b51007e637: Fix SourceManager::isBeforeInTranslationUnit 
bug with token-pasting (authored by sammccall).


Changed prior to commit:
  https://reviews.llvm.org/D134685?vs=463053&id=465437#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D134685/new/

https://reviews.llvm.org/D134685

Files:
  clang-tools-extra/clangd/unittests/SelectionTests.cpp
  clang/include/clang/Basic/SourceManager.h
  clang/lib/Basic/SourceManager.cpp
  clang/unittests/Basic/SourceManagerTest.cpp

Index: clang/unittests/Basic/SourceManagerTest.cpp
===================================================================
--- clang/unittests/Basic/SourceManagerTest.cpp
+++ clang/unittests/Basic/SourceManagerTest.cpp
@@ -171,6 +171,83 @@
   EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(idLoc, macroExpEndLoc));
 }
 
+TEST_F(SourceManagerTest, isBeforeInTranslationUnitWithTokenSplit) {
+  const char *main = R"cpp(
+    #define ID(X) X
+    ID(
+      ID(a >> b)
+      c
+    )
+  )cpp";
+
+  SourceMgr.setMainFileID(
+      SourceMgr.createFileID(llvm::MemoryBuffer::getMemBuffer(main)));
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
+                          Diags, LangOpts, &*Target);
+  Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts,
+                  SourceMgr, HeaderInfo, ModLoader,
+                  /*IILookup =*/nullptr,
+                  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+  PP.EnterMainSourceFile();
+  llvm::SmallString<8> Scratch;
+
+  std::vector<Token> toks;
+  while (1) {
+    Token tok;
+    PP.Lex(tok);
+    if (tok.is(tok::eof))
+      break;
+    toks.push_back(tok);
+  }
+
+  // Make sure we got the tokens that we expected.
+  ASSERT_EQ(4U, toks.size()) << "a >> b c";
+  // Sanity check their order.
+  for (unsigned I = 0; I < toks.size() - 1; ++I) {
+    EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(),
+                                                    toks[I + 1].getLocation()));
+    EXPECT_FALSE(SourceMgr.isBeforeInTranslationUnit(toks[I + 1].getLocation(),
+                                                     toks[I].getLocation()));
+  }
+
+  // Split the >> into two > tokens, as happens when parsing nested templates.
+  unsigned RightShiftIndex = 1;
+  SourceLocation RightShift = toks[RightShiftIndex].getLocation();
+  EXPECT_EQ(">>", Lexer::getSpelling(SourceMgr.getSpellingLoc(RightShift),
+                                     Scratch, SourceMgr, LangOpts));
+  SourceLocation Greater1 = PP.SplitToken(RightShift, /*Length=*/1);
+  SourceLocation Greater2 = RightShift.getLocWithOffset(1);
+  EXPECT_TRUE(Greater1.isMacroID());
+  EXPECT_EQ(">", Lexer::getSpelling(SourceMgr.getSpellingLoc(Greater1), Scratch,
+                                    SourceMgr, LangOpts));
+  EXPECT_EQ(">", Lexer::getSpelling(SourceMgr.getSpellingLoc(Greater2), Scratch,
+                                    SourceMgr, LangOpts));
+  EXPECT_EQ(SourceMgr.getImmediateExpansionRange(Greater1).getBegin(),
+            RightShift);
+
+  for (unsigned I = 0; I < toks.size(); ++I) {
+    SCOPED_TRACE("Token " + std::to_string(I));
+    // Right-shift is the parent of Greater1, so it compares less.
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(), Greater1),
+        I <= RightShiftIndex);
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(), Greater2),
+        I <= RightShiftIndex);
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(Greater1, toks[I].getLocation()),
+        RightShiftIndex < I);
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(Greater2, toks[I].getLocation()),
+        RightShiftIndex < I);
+  }
+  EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(Greater1, Greater2));
+  EXPECT_FALSE(SourceMgr.isBeforeInTranslationUnit(Greater2, Greater1));
+}
+
 TEST_F(SourceManagerTest, getColumnNumber) {
   const char *Source =
     "int x;\n"
Index: clang/lib/Basic/SourceManager.cpp
===================================================================
--- clang/lib/Basic/SourceManager.cpp
+++ clang/lib/Basic/SourceManager.cpp
@@ -1992,6 +1992,7 @@
   // This is a magic number for limiting the cache size.  It was experimentally
   // derived from a small Objective-C project (where the cache filled
   // out to ~250 items).  We can make it larger if necessary.
+  // FIXME: this is almost certainly full these days. Use an LRU cache?
   enum { MagicCacheSize = 300 };
   IsBeforeInTUCacheKey Key(LFID, RFID);
 
@@ -2000,7 +2001,7 @@
   // use.  When they update the value, the cache will get automatically
   // updated as well.
   if (IBTUCache.size() < MagicCacheSize)
-    return IBTUCache[Key];
+    return IBTUCache.try_emplace(Key, LFID, RFID).first->second;
 
   // Otherwise, do a lookup that will not construct a new value.
   InBeforeInTUCache::iterator I = IBTUCache.find(Key);
@@ -2008,6 +2009,7 @@
     return I->second;
 
   // Fall back to the overflow value.
+  IBTUCacheOverflow.setQueryFIDs(LFID, RFID);
   return IBTUCacheOverflow;
 }
 
@@ -2082,43 +2084,62 @@
 
   // If we are comparing a source location with multiple locations in the same
   // file, we get a big win by caching the result.
-  if (IsBeforeInTUCache.isCacheValid(LOffs.first, ROffs.first))
+  if (IsBeforeInTUCache.isCacheValid())
     return std::make_pair(
         true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
 
-  // Okay, we missed in the cache, start updating the cache for this query.
-  IsBeforeInTUCache.setQueryFIDs(LOffs.first, ROffs.first,
-                          /*isLFIDBeforeRFID=*/LOffs.first.ID < ROffs.first.ID);
-
+  // Okay, we missed in the cache, we'll compute the answer and populate it.
   // We need to find the common ancestor. The only way of doing this is to
   // build the complete include chain for one and then walking up the chain
   // of the other looking for a match.
-  // We use a map from FileID to Offset to store the chain. Easier than writing
-  // a custom set hash info that only depends on the first part of a pair.
-  using LocSet = llvm::SmallDenseMap<FileID, unsigned, 16>;
-  LocSet LChain;
+
+  // A location within a FileID on the path up from LOffs to the main file.
+  struct Entry {
+    unsigned Offset;
+    FileID ParentFID; // Used for breaking ties.
+  };
+  llvm::SmallDenseMap<FileID, Entry, 16> LChain;
+
+  FileID Parent;
   do {
-    LChain.insert(LOffs);
+    LChain.try_emplace(LOffs.first, Entry{LOffs.second, Parent});
     // We catch the case where LOffs is in a file included by ROffs and
     // quit early. The other way round unfortunately remains suboptimal.
-  } while (LOffs.first != ROffs.first && !MoveUpIncludeHierarchy(LOffs, *this));
-  LocSet::iterator I;
-  while((I = LChain.find(ROffs.first)) == LChain.end()) {
-    if (MoveUpIncludeHierarchy(ROffs, *this))
-      break; // Met at topmost file.
-  }
-  if (I != LChain.end())
-    LOffs = *I;
+    if (LOffs.first == ROffs.first)
+      break;
+    Parent = LOffs.first;
+  } while (!MoveUpIncludeHierarchy(LOffs, *this));
 
-  // If we exited because we found a nearest common ancestor, compare the
-  // locations within the common file and cache them.
-  if (LOffs.first == ROffs.first) {
-    IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second);
-    return std::make_pair(
-        true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
-  }
-  // Clear the lookup cache, it depends on a common location.
-  IsBeforeInTUCache.clear();
+  Parent = FileID();
+  do {
+    auto I = LChain.find(ROffs.first);
+    if (I != LChain.end()) {
+      // Compare the locations within the common file and cache them.
+      LOffs.first = I->first;
+      LOffs.second = I->second.Offset;
+      // The relative order of LParent and RParent is a tiebreaker when
+      // - locs expand to the same location (occurs in macro arg expansion)
+      // - one loc is a parent of the other (we consider the parent as "first")
+      // For the parent to be first, the invalid file ID must compare smaller.
+      // However loaded FileIDs are <0, so we perform *unsigned* comparison!
+      // This changes the relative order of local vs loaded FileIDs, but it
+      // doesn't matter as these are never mixed in macro expansion.
+      unsigned LParent = I->second.ParentFID.ID;
+      unsigned RParent = Parent.ID;
+      assert((LOffs.second != ROffs.second) || (LParent == 0 || RParent == 0) ||
+             isInSameSLocAddrSpace(getComposedLoc(I->second.ParentFID, 0),
+                                   getComposedLoc(Parent, 0), nullptr) &&
+                 "Mixed local/loaded FileIDs with same include location?");
+      IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second,
+                                     LParent < RParent);
+      return std::make_pair(
+          true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
+    }
+    Parent = ROffs.first;
+  } while (!MoveUpIncludeHierarchy(ROffs, *this));
+
+  // If we found no match, we're not in the same TU.
+  // We don't cache this, but it is rare.
   return std::make_pair(false, false);
 }
 
Index: clang/include/clang/Basic/SourceManager.h
===================================================================
--- clang/include/clang/Basic/SourceManager.h
+++ clang/include/clang/Basic/SourceManager.h
@@ -542,10 +542,10 @@
   /// If these match up with a subsequent query, the result can be reused.
   FileID LQueryFID, RQueryFID;
 
-  /// True if LQueryFID was created before RQueryFID.
+  /// The relative order of FileIDs that the CommonFID *immediately* includes.
   ///
   /// This is used to compare macro expansion locations.
-  bool IsLQFIDBeforeRQFID;
+  bool LChildBeforeRChild;
 
   /// The file found in common between the two \#include traces, i.e.,
   /// the nearest common ancestor of the \#include tree.
@@ -559,12 +559,17 @@
   unsigned LCommonOffset, RCommonOffset;
 
 public:
+  InBeforeInTUCacheEntry() = default;
+  InBeforeInTUCacheEntry(FileID L, FileID R) : LQueryFID(L), RQueryFID(R) {
+    assert(L != R);
+  }
+
   /// Return true if the currently cached values match up with
   /// the specified LHS/RHS query.
   ///
   /// If not, we can't use the cache.
-  bool isCacheValid(FileID LHS, FileID RHS) const {
-    return LQueryFID == LHS && RQueryFID == RHS;
+  bool isCacheValid() const {
+    return CommonFID.isValid();
   }
 
   /// If the cache is valid, compute the result given the
@@ -581,29 +586,28 @@
     // one of the locations points at the inclusion/expansion point of the other
     // in which case its FileID will come before the other.
     if (LOffset == ROffset)
-      return IsLQFIDBeforeRQFID;
+      return LChildBeforeRChild;
 
     return LOffset < ROffset;
   }
 
   /// Set up a new query.
-  void setQueryFIDs(FileID LHS, FileID RHS, bool isLFIDBeforeRFID) {
+  /// If it matches the old query, we can keep the cached answer.
+  void setQueryFIDs(FileID LHS, FileID RHS) {
     assert(LHS != RHS);
-    LQueryFID = LHS;
-    RQueryFID = RHS;
-    IsLQFIDBeforeRQFID = isLFIDBeforeRFID;
-  }
-
-  void clear() {
-    LQueryFID = RQueryFID = FileID();
-    IsLQFIDBeforeRQFID = false;
+    if (LQueryFID != LHS || RQueryFID != RHS) {
+      LQueryFID = LHS;
+      RQueryFID = RHS;
+      CommonFID = FileID();
+    }
   }
 
   void setCommonLoc(FileID commonFID, unsigned lCommonOffset,
-                    unsigned rCommonOffset) {
+                    unsigned rCommonOffset, bool LParentBeforeRParent) {
     CommonFID = commonFID;
     LCommonOffset = lCommonOffset;
     RCommonOffset = rCommonOffset;
+    LChildBeforeRChild = LParentBeforeRParent;
   }
 };
 
Index: clang-tools-extra/clangd/unittests/SelectionTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/SelectionTests.cpp
+++ clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -706,8 +706,24 @@
   Test = Annotations(Case);
   AST = TestTU::withCode(Test.code()).build();
   T = makeSelectionTree(Case, AST);
-
   EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind());
+
+  // Reduced from private bug involving RETURN_IF_ERROR.
+  // Due to >>-splitting and a bug in isBeforeInTranslationUnit, the inner
+  // S<int> would claim way too many tokens.
+  Case = R"cpp(
+    #define ID(x) x
+    template <typename T> class S {};
+    ID(
+      ID(S<S<int>> x);
+      int ^y;
+    )
+  )cpp";
+  Test = Annotations(Case);
+  AST = TestTU::withCode(Test.code()).build();
+  T = makeSelectionTree(Case, AST);
+  // not TemplateSpecializationTypeLoc!
+  EXPECT_EQ("VarDecl", T.commonAncestor()->kind());
 }
 
 TEST(SelectionTest, Implicit) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D134685: Fix SourceManager::isBeforeInTranslationUnit bug with token-pasting

Reply via email to