ioeric updated this revision to Diff 151322.
ioeric added a comment.

- Rebase...


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D47935

Files:
  clangd/CodeComplete.cpp
  clangd/Quality.cpp
  clangd/Quality.h
  unittests/clangd/QualityTests.cpp
  unittests/clangd/TestFS.cpp
  unittests/clangd/URITests.cpp

Index: unittests/clangd/URITests.cpp
===================================================================
--- unittests/clangd/URITests.cpp
+++ unittests/clangd/URITests.cpp
@@ -14,46 +14,20 @@
 
 namespace clang {
 namespace clangd {
+
+// Force the unittest URI scheme to be linked,
+extern volatile int UnittestSchemeAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest =
+    UnittestSchemeAnchorSource;
+
 namespace {
 
 using ::testing::AllOf;
 
 MATCHER_P(Scheme, S, "") { return arg.scheme() == S; }
 MATCHER_P(Authority, A, "") { return arg.authority() == A; }
 MATCHER_P(Body, B, "") { return arg.body() == B; }
 
-// Assume all files in the schema have a "test-root/" root directory, and the
-// schema path is the relative path to the root directory.
-// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z".
-class TestScheme : public URIScheme {
-public:
-  static const char *Scheme;
-
-  static const char *TestRoot;
-
-  llvm::Expected<std::string>
-  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
-                  llvm::StringRef HintPath) const override {
-    auto Pos = HintPath.find(TestRoot);
-    assert(Pos != llvm::StringRef::npos);
-    return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body)
-        .str();
-  }
-
-  llvm::Expected<URI>
-  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
-    auto Pos = AbsolutePath.find(TestRoot);
-    assert(Pos != llvm::StringRef::npos);
-    return URI(Scheme, /*Authority=*/"",
-               AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size()));
-  }
-};
-
-const char *TestScheme::Scheme = "unittest";
-const char *TestScheme::TestRoot = "/test-root/";
-
-static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema");
-
 std::string createOrDie(llvm::StringRef AbsolutePath,
                         llvm::StringRef Scheme = "file") {
   auto Uri = URI::create(AbsolutePath, Scheme);
Index: unittests/clangd/TestFS.cpp
===================================================================
--- unittests/clangd/TestFS.cpp
+++ unittests/clangd/TestFS.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "TestFS.h"
+#include "URI.h"
 #include "llvm/Support/Errc.h"
 #include "gtest/gtest.h"
 
@@ -62,5 +63,50 @@
   return Path.str();
 }
 
+// Assume all files in the schema have a "test-root/" root directory, and the
+// schema path is the relative path to the root directory.
+// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z".
+class TestScheme : public URIScheme {
+public:
+  static const char *Scheme;
+
+  static const char *TestRoot;
+
+  llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
+                  llvm::StringRef HintPath) const override {
+    auto Pos = HintPath.find(TestRoot);
+    assert(Pos != llvm::StringRef::npos);
+    return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body)
+        .str();
+  }
+
+  llvm::Expected<URI>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
+    auto Pos = AbsolutePath.find(TestRoot);
+    if (Pos == llvm::StringRef::npos)
+      return llvm::make_error<llvm::StringError>(
+          llvm::Twine("Directory ") + TestRoot + " not found in path " +
+              AbsolutePath,
+          llvm::inconvertibleErrorCode());
+
+    return URI(Scheme, /*Authority=*/"",
+               AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size()));
+  }
+};
+
+const char *TestScheme::Scheme = "unittest";
+#ifdef _WIN32
+const char *TestScheme::TestRoot = "\\test-root\\";
+#else
+const char *TestScheme::TestRoot = "/test-root/";
+#endif
+
+static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema");
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the plugin.
+volatile int UnittestSchemeAnchorSource = 0;
+
 } // namespace clangd
 } // namespace clang
Index: unittests/clangd/QualityTests.cpp
===================================================================
--- unittests/clangd/QualityTests.cpp
+++ unittests/clangd/QualityTests.cpp
@@ -18,12 +18,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "Quality.h"
+#include "TestFS.h"
 #include "TestTU.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
 namespace clangd {
+
+// Force the unittest URI scheme to be linked,
+extern volatile int UnittestSchemeAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest =
+    UnittestSchemeAnchorSource;
+
 namespace {
 
 TEST(QualityTests, SymbolQualitySignalExtraction) {
@@ -87,13 +94,14 @@
 
   Relevance = {};
   Relevance.merge(CodeCompletionResult(&findDecl(AST, "main"), 42));
-  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0) << "Decl in current file";
+  EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0) << "Decl in current file";
   Relevance = {};
   Relevance.merge(CodeCompletionResult(&findDecl(AST, "header"), 42));
-  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.0) << "Decl from header";
+  EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 0.6) << "Decl from header";
   Relevance = {};
   Relevance.merge(CodeCompletionResult(&findDecl(AST, "header_main"), 42));
-  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0) << "Current file and header";
+  EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0)
+      << "Current file and header";
 
   Relevance = {};
   Relevance.merge(CodeCompletionResult(&findAnyDecl(AST, "X"), 42));
@@ -145,9 +153,14 @@
   EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate());
 
   SymbolRelevanceSignals WithProximity;
-  WithProximity.ProximityScore = 0.2f;
+  WithProximity.SemaProximityScore = 0.2f;
   EXPECT_GT(WithProximity.evaluate(), Default.evaluate());
 
+  SymbolRelevanceSignals WithBetterProximity;
+  WithBetterProximity.IndexProximityScore = 0.3f;
+  WithBetterProximity.SemaProximityScore = 0.2f;
+  EXPECT_GT(WithBetterProximity.evaluate(), WithProximity.evaluate());
+
   SymbolRelevanceSignals Scoped;
   Scoped.Scope = SymbolRelevanceSignals::FileScope;
   EXPECT_EQ(Scoped.evaluate(), Default.evaluate());
@@ -167,6 +180,78 @@
   EXPECT_LT(sortText(0, "a"), sortText(0, "z"));
 }
 
+// {a,b,c} becomes /clangd-test/a/b/c
+std::string joinPaths(llvm::ArrayRef<StringRef> Parts) {
+  return testPath(
+      llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator()));
+}
+
+static constexpr float ProximityBase = 0.7;
+
+// Calculates a proximity score for an index symbol with declaration file
+// SymPath with the given URI scheme.
+float IndexProximity(const FileProximityMatcher &Matcher, StringRef SymPath,
+                     StringRef Scheme = "file") {
+  Symbol Sym;
+  std::string Storage;
+  auto U = URI::create(SymPath, Scheme);
+  EXPECT_TRUE(static_cast<bool>(U)) << llvm::toString(U.takeError());
+  Storage = U->toString();
+  Sym.CanonicalDeclaration.FileURI = Storage;
+
+  SymbolRelevanceSignals Relevance;
+  Relevance.FileProximityMatch = &Matcher;
+  Relevance.merge(Sym);
+  return Relevance.IndexProximityScore;
+}
+
+TEST(QualityTests, IndexSymbolProximityScores) {
+  FileProximityMatcher Matcher(
+      /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})});
+
+  EXPECT_FLOAT_EQ(IndexProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})),
+                  1);
+  EXPECT_FLOAT_EQ(IndexProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})),
+                  ProximityBase);
+  EXPECT_FLOAT_EQ(IndexProximity(Matcher, joinPaths({"a", "y", "z"})),
+                  std::pow(ProximityBase, 5));
+  EXPECT_FLOAT_EQ(
+      IndexProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})),
+      std::pow(ProximityBase, 2));
+  EXPECT_FLOAT_EQ(
+      IndexProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})),
+      std::pow(ProximityBase, 5));
+  EXPECT_FLOAT_EQ(
+      IndexProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})),
+      std::pow(ProximityBase, 6));
+  // Note the common directory is /clang-test/
+  EXPECT_FLOAT_EQ(IndexProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})),
+                  std::pow(ProximityBase, 7));
+  // Different URI schemes.
+  EXPECT_FLOAT_EQ(IndexProximity(Matcher,
+                                 joinPaths({"a", "b", "test-root", "a", "y"}),
+                                 "unittest"),
+                  0.0);
+}
+
+TEST(QualityTests, IndexSymbolProximityScoresWithTestURI) {
+  FileProximityMatcher Matcher(
+      /*ProximityPaths=*/{joinPaths({"a", "test-root", "b", "c", "x"})});
+  EXPECT_FLOAT_EQ(
+      IndexProximity(Matcher, joinPaths({"remote", "test-root", "b", "c", "x"}),
+                     "unittest"),
+      1);
+  EXPECT_FLOAT_EQ(IndexProximity(Matcher,
+                                 joinPaths({"remote", "test-root", "b", "y"}),
+                                 "unittest"),
+                  std::pow(ProximityBase, 2));
+  // unittest:///b/c/x vs unittest:///m/n/y. No common directory.
+  EXPECT_FLOAT_EQ(
+      IndexProximity(Matcher, joinPaths({"remote", "test-root", "m", "n", "y"}),
+                     "unittest"),
+      0);
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clangd/Quality.h
===================================================================
--- clangd/Quality.h
+++ clangd/Quality.h
@@ -26,6 +26,7 @@
 //===---------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include <algorithm>
 #include <functional>
@@ -67,13 +68,37 @@
 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                               const SymbolQualitySignals &);
 
+class FileProximityMatcher {
+  public:
+    /// \p ProximityPaths are used to compute proximity scores from symbol's
+    /// declaring file. The best score will be used.
+    explicit FileProximityMatcher(
+        llvm::ArrayRef<llvm::StringRef> ProximityPaths);
+
+    /// Calculates the best proximity score from proximity paths to the symbol's
+    /// URI. When a path cannot be encoded into the same scheme as \p SymbolURI,
+    /// the proximity will be 0.
+    float uriProximity(llvm::StringRef SymbolURI) const;
+
+  private:
+    llvm::SmallVector<std::string, 2> ProximityPaths;
+    friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                                         const FileProximityMatcher &);
+};
+
 /// Attributes of a symbol-query pair that affect how much we like it.
 struct SymbolRelevanceSignals {
   /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
   float NameMatch = 1;
   bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
+
+  const FileProximityMatcher *FileProximityMatch = nullptr;
+  /// Proximity between the best index symbol and the query. [0-1], 1 is
+  /// closest.
+  float IndexProximityScore = 0;
+  llvm::StringRef IndexSymbolURI;
   /// Proximity between best declaration and the query. [0-1], 1 is closest.
-  float ProximityScore = 0;
+  float SemaProximityScore = 0;
 
   // An approximate measure of where we expect the symbol to be used.
   enum AccessibleScope {
@@ -93,7 +118,7 @@
 
   // Condense these signals down to a single number, higher is better.
   float evaluate() const;
-};
+  };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                               const SymbolRelevanceSignals &);
 
Index: clangd/Quality.cpp
===================================================================
--- clangd/Quality.cpp
+++ clangd/Quality.cpp
@@ -7,6 +7,7 @@
 //
 //===---------------------------------------------------------------------===//
 #include "Quality.h"
+#include "URI.h"
 #include "index/Index.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/Basic/CharInfo.h"
@@ -162,6 +163,63 @@
   return OS;
 }
 
+/// Calculates a proximity score from \p From and \p To, which are URI strings
+/// that have the same scheme. This does not parse URI. A URI (sans "<scheme>:")
+/// is split into chunks by '/' and each chunk is considered a file/directory.
+/// For example, "uri:///a/b/c" will be treated as /a/b/c
+static float uriProximity(StringRef From, StringRef To) {
+  auto SchemeSplitFrom = From.split(':');
+  auto SchemeSplitTo = To.split(':');
+  assert((SchemeSplitFrom.first == SchemeSplitTo.first) &&
+         "URIs must have the same scheme in order to compute proximity.");
+  auto Split = [](StringRef URIWithoutScheme) {
+    SmallVector<StringRef, 8> Split;
+    URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+    return Split;
+  };
+  SmallVector<StringRef, 8> Fs = Split(SchemeSplitFrom.second);
+  SmallVector<StringRef, 8> Ts = Split(SchemeSplitTo.second);
+  auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end();
+  for (; F != FE && T != TE && *F == *T; ++F, ++T) {
+  }
+  if (F == Fs.begin() && T == Ts.begin()) // No common directory.
+    return 0;
+  // We penalize for traversing up and down from \p From to \p To but penalize
+  // less for traversing down because subprojects are more closely related than
+  // superprojects.
+  int UpDist = FE - F;
+  int DownDist = TE - T;
+  return std::pow(0.7, UpDist + DownDist/2);
+}
+
+FileProximityMatcher::FileProximityMatcher(ArrayRef<StringRef> ProximityPaths)
+    : ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {}
+
+float FileProximityMatcher::uriProximity(StringRef SymbolURI) const {
+  float Score = 0;
+  if (!ProximityPaths.empty() && !SymbolURI.empty()) {
+    for (const auto &Path : ProximityPaths)
+      // Only calculate proximity score for two URIs with the same scheme so
+      // that the computation can be purely text-based and thus avoid expensive
+      // URI encoding/decoding.
+      if (auto U = URI::create(Path, SymbolURI.split(':').first)) {
+        Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI));
+      } else {
+        llvm::consumeError(U.takeError());
+      }
+  }
+  return Score;
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const FileProximityMatcher &M) {
+  OS << formatv("=== File proximity matcher ===\n");
+  OS << formatv(
+      "\tProximityPaths:\n\t\t{0}\n",
+      llvm::join(M.ProximityPaths.begin(), M.ProximityPaths.end(), "\n\t\t"));
+  return OS;
+}
+
 static SymbolRelevanceSignals::AccessibleScope
 ComputeScope(const NamedDecl &D) {
   bool InClass = false;
@@ -182,19 +240,25 @@
 void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
   // FIXME: Index results always assumed to be at global scope. If Scope becomes
   // relevant to non-completion requests, we should recognize class members etc.
+
+  IndexSymbolURI = IndexResult.CanonicalDeclaration.FileURI;
+  if (FileProximityMatch)
+    IndexProximityScore = std::max(
+        FileProximityMatch->uriProximity(IndexSymbolURI), IndexProximityScore);
 }
 
 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
   if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
       SemaCCResult.Availability == CXAvailability_NotAccessible)
     Forbidden = true;
 
   if (SemaCCResult.Declaration) {
-    // We boost things that have decls in the main file.
-    // The real proximity scores would be more general when we have them.
+    // We boost things that have decls in the main file. We give a fixed score
+    // for all other declarations in sema as they are already included in the
+    // translation unit.
     float DeclProximity =
-        hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.0;
-    ProximityScore = std::max(DeclProximity, ProximityScore);
+        hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.6;
+    SemaProximityScore = std::max(DeclProximity, SemaProximityScore);
   }
 
   // Declarations are scoped, others (like macros) are assumed global.
@@ -212,7 +276,7 @@
 
   // Proximity scores are [0,1] and we translate them into a multiplier in the
   // range from 1 to 2.
-  Score *= 1 + ProximityScore;
+  Score *= 1 + std::max(IndexProximityScore, SemaProximityScore);
 
   // Symbols like local variables may only be referenced within their scope.
   // Conversely if we're in that scope, it's likely we'll reference them.
@@ -236,11 +300,14 @@
 
   return Score;
 }
+
 raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
   OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
   OS << formatv("\tName match: {0}\n", S.NameMatch);
   OS << formatv("\tForbidden: {0}\n", S.Forbidden);
-  OS << formatv("\tProximity: {0}\n", S.ProximityScore);
+  OS << formatv("\tIndexProximity: {0}\n", S.IndexProximityScore);
+  OS << formatv("\tIndexSymbolURI: {0}\n", S.IndexSymbolURI);
+  OS << formatv("\tSemaProximity: {0}\n", S.SemaProximityScore);
   OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
   OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
   return OS;
Index: clangd/CodeComplete.cpp
===================================================================
--- clangd/CodeComplete.cpp
+++ clangd/CodeComplete.cpp
@@ -848,11 +848,17 @@
   bool Incomplete = false; // Would more be available with a higher limit?
   llvm::Optional<FuzzyMatcher> Filter;       // Initialized once Sema runs.
   std::unique_ptr<IncludeInserter> Includes; // Initialized once compiler runs.
+  FileProximityMatcher FileProximityMatch;
 
 public:
   // A CodeCompleteFlow object is only useful for calling run() exactly once.
   CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts)
-      : FileName(FileName), Opts(Opts) {}
+      : FileName(FileName), Opts(Opts),
+        // FIXME: also use path of the main header corresponding to FileName to
+        // calculate the file proximity, which would capture include/ and src/
+        // project setup where headers and implementations are not in the same
+        // directory.
+        FileProximityMatch({FileName}) {}
 
   CompletionList run(const SemaCompleteInput &SemaCCInput) && {
     trace::Span Tracer("CodeCompleteFlow");
@@ -993,6 +999,7 @@
     SymbolQualitySignals Quality;
     SymbolRelevanceSignals Relevance;
     Relevance.Query = SymbolRelevanceSignals::CodeComplete;
+    Relevance.FileProximityMatch = &FileProximityMatch;
     if (auto FuzzyScore = fuzzyScore(C))
       Relevance.NameMatch = *FuzzyScore;
     else
@@ -1020,7 +1027,7 @@
                << "CodeComplete: " << C.Name << (IndexResult ? " (index)" : "")
                << (SemaResult ? " (sema)" : "") << " = " << Scores.finalScore
                << "\n"
-               << Quality << Relevance << "\n");
+               << Quality << FileProximityMatch << Relevance << "\n");
 
     NSema += bool(SemaResult);
     NIndex += bool(IndexResult);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to