nridge updated this revision to Diff 247529.
nridge added a comment.

Address some review comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72874/new/

https://reviews.llvm.org/D72874

Files:
  clang-tools-extra/clangd/SourceCode.cpp
  clang-tools-extra/clangd/SourceCode.h
  clang-tools-extra/clangd/XRefs.cpp
  clang-tools-extra/clangd/unittests/XRefsTests.cpp

Index: clang-tools-extra/clangd/unittests/XRefsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -586,6 +586,52 @@
   }
 }
 
+TEST(LocateSymbol, Textual) {
+  const char *Tests[] = {
+      R"cpp(// Comment
+        struct [[MyClass]] {};
+        // Comment mentioning M^yClass
+      )cpp",
+      R"cpp(// String
+        struct [[MyClass]] {};
+        const char* s = "String literal mentioning M^yClass";
+      )cpp",
+      R"cpp(// Invalid code
+        /*error-ok*/
+        int [[myFunction]](int);
+        int var = m^yFunction();
+      )cpp",
+      R"cpp(// Dependent type
+        struct Foo {
+          void [[uniqueMethodName]]();
+        };
+        template <typename T>
+        void f(T t) {
+          t->u^niqueMethodName();
+        }
+      )cpp"};
+
+  for (const char *Test : Tests) {
+    Annotations T(Test);
+    llvm::Optional<Range> WantDecl;
+    if (!T.ranges().empty())
+      WantDecl = T.range();
+
+    auto TU = TestTU::withCode(T.code());
+
+    auto AST = TU.build();
+    auto Index = TU.index();
+    auto Results = locateSymbolAt(AST, T.point(), Index.get());
+
+    if (!WantDecl) {
+      EXPECT_THAT(Results, IsEmpty()) << Test;
+    } else {
+      ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;
+      EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;
+    }
+  }
+}
+
 TEST(LocateSymbol, Ambiguous) {
   auto T = Annotations(R"cpp(
     struct Foo {
@@ -660,6 +706,27 @@
                                    Sym("baz", T.range("StaticOverload2"))));
 }
 
+TEST(LocateSymbol, TextualAmbiguous) {
+  auto T = Annotations(R"cpp(
+        struct Foo {
+          void $FooLoc[[uniqueMethodName]]();
+        };
+        struct Bar {
+          void $BarLoc[[uniqueMethodName]]();
+        };
+        template <typename T>
+        void f(T t) {
+          t->u^niqueMethodName();
+        }
+      )cpp");
+  auto TU = TestTU::withCode(T.code());
+  auto AST = TU.build();
+  auto Index = TU.index();
+  EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()),
+              UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
+                                   Sym("uniqueMethodName", T.range("BarLoc"))));
+}
+
 TEST(LocateSymbol, TemplateTypedefs) {
   auto T = Annotations(R"cpp(
     template <class T> struct function {};
Index: clang-tools-extra/clangd/XRefs.cpp
===================================================================
--- clang-tools-extra/clangd/XRefs.cpp
+++ clang-tools-extra/clangd/XRefs.cpp
@@ -10,9 +10,11 @@
 #include "CodeCompletionStrings.h"
 #include "FindSymbols.h"
 #include "FindTarget.h"
+#include "FuzzyMatch.h"
 #include "Logger.h"
 #include "ParsedAST.h"
 #include "Protocol.h"
+#include "Quality.h"
 #include "Selection.h"
 #include "SourceCode.h"
 #include "URI.h"
@@ -45,6 +47,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cctype>
 
 namespace clang {
 namespace clangd {
@@ -191,6 +194,116 @@
   return Result;
 }
 
+bool isLikelyToBeIdentifier(StringRef Word) {
+  // Word contains underscore
+  if (Word.contains('_')) {
+    return true;
+  }
+  // Word contains capital letter other than at beginning
+  if (Word.substr(1).find_if([](char C) { return std::isupper(C); }) !=
+      StringRef::npos) {
+    return true;
+  }
+  // FIXME: There are other signals we could listen for.
+  // Some of these require inspecting the surroundings of the word as well.
+  //   - mid-sentence Capitalization
+  //   - markup like quotes / backticks / brackets / "\p"
+  //   - word is used as an identifier in nearby token (very close if very
+  //     short, anywhere in file if longer)
+  //   - word has a qualifier (foo::bar)
+  return false;
+}
+
+using ScoredLocatedSymbol = std::pair<float, LocatedSymbol>;
+struct ScoredSymbolGreater {
+  bool operator()(const ScoredLocatedSymbol &L, const ScoredLocatedSymbol &R) {
+    if (L.first != R.first)
+      return L.first > R.first;
+    return L.second.Name < R.second.Name; // Earlier name is better.
+  }
+};
+
+std::vector<LocatedSymbol> navigationFallback(ParsedAST &AST,
+                                              const SymbolIndex *Index,
+                                              Position Pos,
+                                              const std::string &MainFilePath) {
+  const auto &SM = AST.getSourceManager();
+  auto SourceRange = getWordAtPosition(Pos, SM, AST.getLangOpts());
+  auto QueryString = toSourceCode(SM, SourceRange);
+  if (!isLikelyToBeIdentifier(QueryString)) {
+    return {};
+  }
+
+  FuzzyFindRequest Req;
+  Req.Query = QueryString.str();
+  Req.ProximityPaths = {MainFilePath};
+  // FIXME: Set Req.Scopes to the lexically enclosing scopes.
+  // For extra strictness, consider AnyScope=false.
+  Req.AnyScope = true;
+  // Choose a limit that's large enough that it contains the user's desired
+  // target even in the presence of some false positives, but small enough that
+  // it doesn't generate too much noise.
+  Req.Limit = 5;
+  TopN<ScoredLocatedSymbol, ScoredSymbolGreater> Top(*Req.Limit);
+  FuzzyMatcher Filter(Req.Query);
+  auto MainFileURI = URIForFile::canonicalize(MainFilePath, MainFilePath);
+  bool HaveResultInMainFile = false;
+  Index->fuzzyFind(Req, [&](const Symbol &Sym) {
+    auto Loc = symbolToLocation(Sym, MainFilePath);
+    if (!Loc) {
+      log("Navigation fallback: {0}", Loc.takeError());
+      return;
+    }
+
+    // For now, only consider exact name matches, including case.
+    // This is to avoid too many false positives.
+    // We could relax this in the future if we make the query more accurate
+    // by other means.
+    if (Sym.Name != QueryString)
+      return;
+
+    if (Loc->uri == MainFileURI)
+      HaveResultInMainFile = true;
+
+    std::string Scope = std::string(Sym.Scope);
+    llvm::StringRef ScopeRef = Scope;
+    ScopeRef.consume_back("::");
+    LocatedSymbol Located;
+    Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str();
+    Located.PreferredDeclaration = *Loc;
+    // FIXME: Populate Definition?
+
+    SymbolQualitySignals Quality;
+    Quality.merge(Sym);
+    SymbolRelevanceSignals Relevance;
+    Relevance.Name = Sym.Name;
+    Relevance.Query = SymbolRelevanceSignals::Generic;
+    if (auto NameMatch = Filter.match(Sym.Name))
+      Relevance.NameMatch = *NameMatch;
+    else {
+      log("Navigation fallback: {0} didn't match query {1}", Sym.Name,
+          Filter.pattern());
+      return;
+    }
+    Relevance.merge(Sym);
+    auto Score =
+        evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate());
+    dlog("Navigation fallback: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, Sym.Name,
+         Score, Quality, Relevance);
+
+    Top.push({Score, std::move(Located)});
+  });
+  std::vector<LocatedSymbol> Result;
+  for (auto &Res : std::move(Top).items())
+    Result.push_back(std::move(Res.second));
+  // If we have more than 3 results, and none from the current file, don't
+  // return anything, as confidence is too low.
+  // FIXME: Alternatively, try a stricter query?
+  if (Result.size() > 3 && !HaveResultInMainFile)
+    return {};
+  return Result;
+}
+
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index) {
   const auto &SM = AST.getSourceManager();
@@ -341,6 +454,10 @@
     });
   }
 
+  if (Result.empty()) {
+    return navigationFallback(AST, Index, Pos, *MainFilePath);
+  }
+
   return Result;
 }
 
Index: clang-tools-extra/clangd/SourceCode.h
===================================================================
--- clang-tools-extra/clangd/SourceCode.h
+++ clang-tools-extra/clangd/SourceCode.h
@@ -68,7 +68,7 @@
 /// FIXME: This should return an error if the location is invalid.
 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
 
-/// Returns the taken range at \p TokLoc.
+/// Returns the token range at \p TokLoc.
 llvm::Optional<Range> getTokenRange(const SourceManager &SM,
                                     const LangOptions &LangOpts,
                                     SourceLocation TokLoc);
@@ -86,6 +86,13 @@
                                         const SourceManager &SM,
                                         const LangOptions &LangOpts);
 
+/// Get the source range of the raw word at a specified \p Pos in the main file.
+/// This is similar to the token at the specified position, but for positions
+/// inside comments and strings, it only returns a single word rather than
+/// the entire comment or string token.
+SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM,
+                              const LangOptions &LangOpts);
+
 /// Returns true iff \p Loc is inside the main file. This function handles
 /// file & macro locations. For macro locations, returns iff the macro is being
 /// expanded inside the main file.
Index: clang-tools-extra/clangd/SourceCode.cpp
===================================================================
--- clang-tools-extra/clangd/SourceCode.cpp
+++ clang-tools-extra/clangd/SourceCode.cpp
@@ -272,6 +272,42 @@
   return Other;
 }
 
+SourceLocation getRawWordBegin(SourceLocation Loc, const SourceManager &SM,
+                               const LangOptions &LangOpts) {
+  llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID());
+  FileID FID;
+  unsigned Offset;
+  std::tie(FID, Offset) = SM.getDecomposedLoc(Loc);
+  unsigned Start = Offset;
+  while (Start > 0 && Lexer::isIdentifierBodyChar(Buf[Start - 1], LangOpts)) {
+    --Start;
+  }
+  return SM.getComposedLoc(FID, Start);
+}
+
+SourceLocation getRawWordEnd(SourceLocation Loc, const SourceManager &SM,
+                             const LangOptions &LangOpts) {
+  llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID());
+  FileID FID;
+  unsigned Offset;
+  std::tie(FID, Offset) = SM.getDecomposedLoc(Loc);
+  unsigned End = Offset;
+  while (End < Buf.size() && Lexer::isIdentifierBodyChar(Buf[End], LangOpts))
+    ++End;
+  return SM.getComposedLoc(FID, End);
+}
+
+SourceLocation getEndOfIdentifier(SourceLocation Loc, const SourceManager &SM,
+                                  const LangOptions &LangOpts) {
+  if (!Loc.isValid())
+    return SourceLocation{};
+  bool Raw = getTokenFlavor(Loc, SM, LangOpts) == Other;
+  if (Raw) {
+    return getRawWordEnd(Loc, SM, LangOpts);
+  }
+  return Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
+}
+
 } // namespace
 
 SourceLocation getBeginningOfIdentifier(const Position &Pos,
@@ -311,8 +347,9 @@
     // For interesting token, we return the beginning of the token.
     if (CurrentKind == Identifier || CurrentKind == Operator)
       return CurrentTokBeginning;
-    // otherwise, we return the original loc.
-    return InputLoc;
+    // Otherwise, return the beginning of the raw word at the given position.
+    // This facilitates selecting identifiers in comments and strings.
+    return getRawWordBegin(InputLoc, SM, LangOpts);
   }
 
   // Whitespace is not interesting.
@@ -337,6 +374,13 @@
   return InputLoc;
 }
 
+SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM,
+                              const LangOptions &LangOpts) {
+  SourceLocation Begin = getBeginningOfIdentifier(Pos, SM, LangOpts);
+  SourceLocation End = getEndOfIdentifier(Begin, SM, LangOpts);
+  return {Begin, End};
+}
+
 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
   if (!R.getBegin().isValid() || !R.getEnd().isValid())
     return false;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to