[PATCH] D72874: [clangd] Add a textual fallback for go-to-definition

Nathan Ridge via Phabricator via cfe-commits Thu, 16 Jan 2020 13:37:14 -0800

nridge created this revision.
nridge added a reviewer: sammccall.
Herald added subscribers: cfe-commits, usaxena95, kadircet, arphaman, jkorous, 
MaskRay, ilya-biryukov.
Herald added a project: clang.


This facilitates performing go-to-definition inside comments, strings,
invalid code, and dependent contexts where AST-based heuristics are
insufficient.

Fixes https://github.com/clangd/clangd/issues/241


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72874

Files:
  clang-tools-extra/clangd/SourceCode.cpp
  clang-tools-extra/clangd/SourceCode.h
  clang-tools-extra/clangd/XRefs.cpp
  clang-tools-extra/clangd/unittests/XRefsTests.cpp

Index: clang-tools-extra/clangd/unittests/XRefsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -585,6 +585,51 @@
   }
 }
 
+TEST(LocateSymbol, Textual) {
+  const char *Tests[] = {
+      R"cpp(// Comment
+        struct [[Foo]] {};
+        // Comment mentioning F^oo
+      )cpp",
+      R"cpp(// String
+        struct [[Foo]] {};
+        const char* = "String literal mentioning F^oo";
+      )cpp",
+      R"cpp(// Invalid code
+        int [[foo]](int);
+        int var = f^oo();
+      )cpp",
+      R"cpp(// Dependent type
+        struct Foo {
+          void [[uniqueMethodName]]();
+        };
+        template <typename T>
+        void f(T t) {
+          t->u^niqueMethodName();
+        }
+      )cpp"};
+
+  for (const char *Test : Tests) {
+    Annotations T(Test);
+    llvm::Optional<Range> WantDecl;
+    if (!T.ranges().empty())
+      WantDecl = T.range();
+
+    auto TU = TestTU::withCode(T.code());
+
+    auto AST = TU.build();
+    auto Index = TU.index();
+    auto Results = locateSymbolAt(AST, T.point(), Index.get());
+
+    if (!WantDecl) {
+      EXPECT_THAT(Results, IsEmpty()) << Test;
+    } else {
+      ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;
+      EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;
+    }
+  }
+}
+
 TEST(LocateSymbol, Ambiguous) {
   auto T = Annotations(R"cpp(
     struct Foo {
@@ -659,6 +704,27 @@
                                    Sym("baz", T.range("StaticOverload2"))));
 }
 
+TEST(LocateSymbol, TextualAmbiguous) {
+  auto T = Annotations(R"cpp(
+        struct Foo {
+          void $FooLoc[[uniqueMethodName]]();
+        };
+        struct Bar {
+          void $BarLoc[[uniqueMethodName]]();
+        };
+        template <typename T>
+        void f(T t) {
+          t->u^niqueMethodName();
+        }
+      )cpp");
+  auto TU = TestTU::withCode(T.code());
+  auto AST = TU.build();
+  auto Index = TU.index();
+  EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()),
+              UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
+                                   Sym("uniqueMethodName", T.range("BarLoc"))));
+}
+
 TEST(LocateSymbol, TemplateTypedefs) {
   auto T = Annotations(R"cpp(
     template <class T> struct function {};
Index: clang-tools-extra/clangd/XRefs.cpp
===================================================================
--- clang-tools-extra/clangd/XRefs.cpp
+++ clang-tools-extra/clangd/XRefs.cpp
@@ -187,6 +187,32 @@
   return Result;
 }
 
+std::vector<LocatedSymbol> navigationFallback(ParsedAST &AST,
+                                              const SymbolIndex *Index,
+                                              Position Pos,
+                                              const std::string &MainFilePath) {
+  const auto &SM = AST.getSourceManager();
+  auto SourceRange = getWordAtPosition(Pos, SM, AST.getLangOpts());
+  auto QueryString = toSourceCode(SM, SourceRange);
+  // Choose a limit that's large enough that it contains the user's desired
+  // target even in the presence of some false positives, but small enough that
+  // it doesn't generate too much noise.
+  int Limit = 5;
+  auto Symbols = getWorkspaceSymbols(QueryString, Limit, Index, MainFilePath);
+  if (!Symbols) {
+    elog("Workspace symbols failed", Symbols.takeError());
+  }
+  std::vector<LocatedSymbol> Result;
+  for (auto &Sym : *Symbols) {
+    LocatedSymbol Located;
+    Located.Name = Sym.name;
+    Located.PreferredDeclaration = Sym.location;
+    // TODO: Populate Definition?
+    Result.push_back(std::move(Located));
+  }
+  return Result;
+}
+
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index) {
   const auto &SM = AST.getSourceManager();
@@ -318,6 +344,10 @@
     });
   }
 
+  if (Result.empty()) {
+    return navigationFallback(AST, Index, Pos, *MainFilePath);
+  }
+
   return Result;
 }
 
Index: clang-tools-extra/clangd/SourceCode.h
===================================================================
--- clang-tools-extra/clangd/SourceCode.h
+++ clang-tools-extra/clangd/SourceCode.h
@@ -68,7 +68,7 @@
 /// FIXME: This should return an error if the location is invalid.
 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
 
-/// Returns the taken range at \p TokLoc.
+/// Returns the token range at \p TokLoc.
 llvm::Optional<Range> getTokenRange(const SourceManager &SM,
                                     const LangOptions &LangOpts,
                                     SourceLocation TokLoc);
@@ -86,6 +86,13 @@
                                         const SourceManager &SM,
                                         const LangOptions &LangOpts);
 
+/// Get the source range of the raw word at a specified \p Pos in the main file.
+/// This is similar to the token at the specified position, but for positions
+/// inside comments and strings, it only returns a single word rather than
+/// the entire comment or string token.
+SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM,
+                              const LangOptions &LangOpts);
+
 /// Returns true iff \p Loc is inside the main file. This function handles
 /// file & macro locations. For macro locations, returns iff the macro is being
 /// expanded inside the main file.
Index: clang-tools-extra/clangd/SourceCode.cpp
===================================================================
--- clang-tools-extra/clangd/SourceCode.cpp
+++ clang-tools-extra/clangd/SourceCode.cpp
@@ -272,6 +272,42 @@
   return Other;
 }
 
+SourceLocation getRawWordBegin(SourceLocation Loc, const SourceManager &SM,
+                               const LangOptions &LangOpts) {
+  llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID());
+  FileID FID;
+  unsigned Offset;
+  std::tie(FID, Offset) = SM.getDecomposedLoc(Loc);
+  unsigned Start = Offset;
+  while (Start > 0 && Lexer::isIdentifierBodyChar(Buf[Start - 1], LangOpts)) {
+    --Start;
+  }
+  return SM.getComposedLoc(FID, Start);
+}
+
+SourceLocation getRawWordEnd(SourceLocation Loc, const SourceManager &SM,
+                             const LangOptions &LangOpts) {
+  llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID());
+  FileID FID;
+  unsigned Offset;
+  std::tie(FID, Offset) = SM.getDecomposedLoc(Loc);
+  unsigned End = Offset;
+  while (End < Buf.size() && Lexer::isIdentifierBodyChar(Buf[End], LangOpts))
+    ++End;
+  return SM.getComposedLoc(FID, End);
+}
+
+SourceLocation getEndOfIdentifier(SourceLocation Loc, const SourceManager &SM,
+                                  const LangOptions &LangOpts) {
+  if (!Loc.isValid())
+    return SourceLocation{};
+  bool Raw = getTokenFlavor(Loc, SM, LangOpts) == Other;
+  if (Raw) {
+    return getRawWordEnd(Loc, SM, LangOpts);
+  }
+  return Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
+}
+
 } // namespace
 
 SourceLocation getBeginningOfIdentifier(const Position &Pos,
@@ -311,8 +347,9 @@
     // For interesting token, we return the beginning of the token.
     if (CurrentKind == Identifier || CurrentKind == Operator)
       return CurrentTokBeginning;
-    // otherwise, we return the original loc.
-    return InputLoc;
+    // Otherwise, return the beginning of the raw word at the given position.
+    // This facilitates selecting identifiers in comments and strings.
+    return getRawWordBegin(InputLoc, SM, LangOpts);
   }
 
   // Whitespace is not interesting.
@@ -337,6 +374,13 @@
   return InputLoc;
 }
 
+SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM,
+                              const LangOptions &LangOpts) {
+  SourceLocation Begin = getBeginningOfIdentifier(Pos, SM, LangOpts);
+  SourceLocation End = getEndOfIdentifier(Begin, SM, LangOpts);
+  return {Begin, End};
+}
+
 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
   if (!R.getBegin().isValid() || !R.getEnd().isValid())
     return false;
@@ -793,97 +837,99 @@
 
   NamespaceEvent Event;
   lex(Code, format::getFormattingLangOpts(Style),
-      [&](const clang::Token &Tok,const SourceManager &SM) {
-    Event.Pos = sourceLocToPosition(SM, Tok.getLocation());
-    switch (Tok.getKind()) {
-    case tok::raw_identifier:
-      // In raw mode, this could be a keyword or a name.
-      switch (State) {
-      case UsingNamespace:
-      case UsingNamespaceName:
-        NSName.append(Tok.getRawIdentifier());
-        State = UsingNamespaceName;
-        break;
-      case Namespace:
-      case NamespaceName:
-        NSName.append(Tok.getRawIdentifier());
-        State = NamespaceName;
-        break;
-      case Using:
-        State =
-            (Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default;
-        break;
-      case Default:
-        NSName.clear();
-        if (Tok.getRawIdentifier() == "namespace")
-          State = Namespace;
-        else if (Tok.getRawIdentifier() == "using")
-          State = Using;
-        break;
-      }
-      break;
-    case tok::coloncolon:
-      // This can come at the beginning or in the middle of a namespace name.
-      switch (State) {
-      case UsingNamespace:
-      case UsingNamespaceName:
-        NSName.append("::");
-        State = UsingNamespaceName;
-        break;
-      case NamespaceName:
-        NSName.append("::");
-        State = NamespaceName;
-        break;
-      case Namespace: // Not legal here.
-      case Using:
-      case Default:
-        State = Default;
-        break;
-      }
-      break;
-    case tok::l_brace:
-      // Record which { started a namespace, so we know when } ends one.
-      if (State == NamespaceName) {
-        // Parsed: namespace <name> {
-        BraceStack.push_back(true);
-        Enclosing.push_back(NSName);
-        Event.Trigger = NamespaceEvent::BeginNamespace;
-        Event.Payload = llvm::join(Enclosing, "::");
-        Callback(Event);
-      } else {
-        // This case includes anonymous namespaces (State = Namespace).
-        // For our purposes, they're not namespaces and we ignore them.
-        BraceStack.push_back(false);
-      }
-      State = Default;
-      break;
-    case tok::r_brace:
-      // If braces are unmatched, we're going to be confused, but don't crash.
-      if (!BraceStack.empty()) {
-        if (BraceStack.back()) {
-          // Parsed: } // namespace
-          Enclosing.pop_back();
-          Event.Trigger = NamespaceEvent::EndNamespace;
-          Event.Payload = llvm::join(Enclosing, "::");
-          Callback(Event);
+      [&](const clang::Token &Tok, const SourceManager &SM) {
+        Event.Pos = sourceLocToPosition(SM, Tok.getLocation());
+        switch (Tok.getKind()) {
+        case tok::raw_identifier:
+          // In raw mode, this could be a keyword or a name.
+          switch (State) {
+          case UsingNamespace:
+          case UsingNamespaceName:
+            NSName.append(Tok.getRawIdentifier());
+            State = UsingNamespaceName;
+            break;
+          case Namespace:
+          case NamespaceName:
+            NSName.append(Tok.getRawIdentifier());
+            State = NamespaceName;
+            break;
+          case Using:
+            State = (Tok.getRawIdentifier() == "namespace") ? UsingNamespace
+                                                            : Default;
+            break;
+          case Default:
+            NSName.clear();
+            if (Tok.getRawIdentifier() == "namespace")
+              State = Namespace;
+            else if (Tok.getRawIdentifier() == "using")
+              State = Using;
+            break;
+          }
+          break;
+        case tok::coloncolon:
+          // This can come at the beginning or in the middle of a namespace
+          // name.
+          switch (State) {
+          case UsingNamespace:
+          case UsingNamespaceName:
+            NSName.append("::");
+            State = UsingNamespaceName;
+            break;
+          case NamespaceName:
+            NSName.append("::");
+            State = NamespaceName;
+            break;
+          case Namespace: // Not legal here.
+          case Using:
+          case Default:
+            State = Default;
+            break;
+          }
+          break;
+        case tok::l_brace:
+          // Record which { started a namespace, so we know when } ends one.
+          if (State == NamespaceName) {
+            // Parsed: namespace <name> {
+            BraceStack.push_back(true);
+            Enclosing.push_back(NSName);
+            Event.Trigger = NamespaceEvent::BeginNamespace;
+            Event.Payload = llvm::join(Enclosing, "::");
+            Callback(Event);
+          } else {
+            // This case includes anonymous namespaces (State = Namespace).
+            // For our purposes, they're not namespaces and we ignore them.
+            BraceStack.push_back(false);
+          }
+          State = Default;
+          break;
+        case tok::r_brace:
+          // If braces are unmatched, we're going to be confused, but don't
+          // crash.
+          if (!BraceStack.empty()) {
+            if (BraceStack.back()) {
+              // Parsed: } // namespace
+              Enclosing.pop_back();
+              Event.Trigger = NamespaceEvent::EndNamespace;
+              Event.Payload = llvm::join(Enclosing, "::");
+              Callback(Event);
+            }
+            BraceStack.pop_back();
+          }
+          break;
+        case tok::semi:
+          if (State == UsingNamespaceName) {
+            // Parsed: using namespace <name> ;
+            Event.Trigger = NamespaceEvent::UsingDirective;
+            Event.Payload = std::move(NSName);
+            Callback(Event);
+          }
+          State = Default;
+          break;
+        default:
+          State = Default;
+          break;
         }
-        BraceStack.pop_back();
-      }
-      break;
-    case tok::semi:
-      if (State == UsingNamespaceName) {
-        // Parsed: using namespace <name> ;
-        Event.Trigger = NamespaceEvent::UsingDirective;
-        Event.Payload = std::move(NSName);
-        Callback(Event);
-      }
-      State = Default;
-      break;
-    default:
-      State = Default;
-      break;
-    }
-  });
+      });
 }
 
 // Returns the prefix namespaces of NS: {"" ... NS}.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D72874: [clangd] Add a textual fallback for go-to-definition

Reply via email to