Author: sammccall Date: Mon May 6 03:25:10 2019 New Revision: 360030 URL: http://llvm.org/viewvc/llvm-project?rev=360030&view=rev Log: [clangd] Boost code completion results that were named in the last few lines.
Summary: The hope is this will catch a few patterns with repetition: SomeClass* S = ^SomeClass::Create() int getFrobnicator() { return ^frobnicator_; } // discard the factory, it's no longer valid. ^MyFactory.reset(); Without triggering antipatterns too often: return Point(x.first, x.^second); I'm going to gather some data on whether this turns out to be a win overall. Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, jfb, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D61537 Modified: clang-tools-extra/trunk/clangd/CodeComplete.cpp clang-tools-extra/trunk/clangd/FindSymbols.cpp clang-tools-extra/trunk/clangd/Quality.cpp clang-tools-extra/trunk/clangd/Quality.h clang-tools-extra/trunk/clangd/SourceCode.cpp clang-tools-extra/trunk/clangd/SourceCode.h clang-tools-extra/trunk/clangd/test/completion-auto-trigger.test clang-tools-extra/trunk/clangd/unittests/CodeCompleteTests.cpp clang-tools-extra/trunk/clangd/unittests/QualityTests.cpp clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp Modified: clang-tools-extra/trunk/clangd/CodeComplete.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CodeComplete.cpp?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/CodeComplete.cpp (original) +++ clang-tools-extra/trunk/clangd/CodeComplete.cpp Mon May 6 03:25:10 2019 @@ -54,7 +54,9 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" @@ -1215,6 +1217,7 @@ class CodeCompleteFlow { llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs. // Whether to query symbols from any scope. Initialized once Sema runs. bool AllScopes = false; + llvm::StringSet<> ContextWords; // Include-insertion and proximity scoring rely on the include structure. // This is available after Sema has run. llvm::Optional<IncludeInserter> Inserter; // Available during runWithSema. @@ -1237,6 +1240,7 @@ public: trace::Span Tracer("CodeCompleteFlow"); HeuristicPrefix = guessCompletionPrefix(SemaCCInput.Contents, SemaCCInput.Offset); + populateContextWords(SemaCCInput.Contents); if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) { assert(!SpecFuzzyFind->Result.valid()); SpecReq = speculativeFuzzyFindRequestForCompletion( @@ -1323,6 +1327,7 @@ public: trace::Span Tracer("CodeCompleteWithoutSema"); // Fill in fields normally set by runWithSema() HeuristicPrefix = guessCompletionPrefix(Content, Offset); + populateContextWords(Content); CCContextKind = CodeCompletionContext::CCC_Recovery; Filter = FuzzyMatcher(HeuristicPrefix.Name); auto Pos = offsetToPosition(Content, Offset); @@ -1380,6 +1385,24 @@ public: } private: + void populateContextWords(llvm::StringRef Content) { + // Take last 3 lines before the completion point. + unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(), + RangeBegin = RangeEnd; + for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) { + auto PrevNL = Content.rfind('\n', RangeBegin - 1); + if (PrevNL == StringRef::npos) { + RangeBegin = 0; + break; + } + RangeBegin = PrevNL + 1; + } + + ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd)); + dlog("Completion context words: {0}", + llvm::join(ContextWords.keys(), ", ")); + } + // This is called by run() once Sema code completion is done, but before the // Sema data structures are torn down. It does all the real work. CodeCompleteResult runWithSema() { @@ -1563,12 +1586,14 @@ private: SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; Relevance.Context = CCContextKind; + Relevance.Name = Bundle.front().Name; Relevance.Query = SymbolRelevanceSignals::CodeComplete; Relevance.FileProximityMatch = FileProximity.getPointer(); if (ScopeProximity) Relevance.ScopeProximityMatch = ScopeProximity.getPointer(); if (PreferredType) Relevance.HadContextType = true; + Relevance.ContextWords = &ContextWords; auto &First = Bundle.front(); if (auto FuzzyScore = fuzzyScore(First)) Modified: clang-tools-extra/trunk/clangd/FindSymbols.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/FindSymbols.cpp?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/FindSymbols.cpp (original) +++ clang-tools-extra/trunk/clangd/FindSymbols.cpp Mon May 6 03:25:10 2019 @@ -100,6 +100,7 @@ getWorkspaceSymbols(llvm::StringRef Quer SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; + Relevance.Name = Sym.Name; Relevance.Query = SymbolRelevanceSignals::Generic; if (auto NameMatch = Filter.match(Sym.Name)) Relevance.NameMatch = *NameMatch; Modified: clang-tools-extra/trunk/clangd/Quality.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Quality.cpp?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/Quality.cpp (original) +++ clang-tools-extra/trunk/clangd/Quality.cpp Mon May 6 03:25:10 2019 @@ -336,6 +336,15 @@ static float scopeBoost(ScopeDistance &D return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0)); } +static llvm::Optional<llvm::StringRef> +wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) { + if (ContextWords) + for (const auto& Word : ContextWords->keys()) + if (Name.contains_lower(Word)) + return Word; + return llvm::None; +} + float SymbolRelevanceSignals::evaluate() const { float Score = 1; @@ -357,6 +366,9 @@ float SymbolRelevanceSignals::evaluate() Score *= SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope); + if (wordMatching(Name, ContextWords)) + Score *= 1.5; + // Symbols like local variables may only be referenced within their scope. // Conversely if we're in that scope, it's likely we'll reference them. if (Query == CodeComplete) { @@ -413,7 +425,12 @@ float SymbolRelevanceSignals::evaluate() llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolRelevanceSignals &S) { OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate()); + OS << llvm::formatv("\tName: {0}\n", S.Name); OS << llvm::formatv("\tName match: {0}\n", S.NameMatch); + if (S.ContextWords) + OS << llvm::formatv( + "\tMatching context word: {0}\n", + wordMatching(S.Name, S.ContextWords).getValueOr("<none>")); OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden); OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts); OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember); Modified: clang-tools-extra/trunk/clangd/Quality.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Quality.h?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/Quality.h (original) +++ clang-tools-extra/trunk/clangd/Quality.h Mon May 6 03:25:10 2019 @@ -32,13 +32,14 @@ #include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include <algorithm> #include <functional> #include <vector> namespace llvm { class raw_ostream; -} +} // namespace llvm namespace clang { class CodeCompletionResult; @@ -84,8 +85,12 @@ llvm::raw_ostream &operator<<(llvm::raw_ /// Attributes of a symbol-query pair that affect how much we like it. struct SymbolRelevanceSignals { + /// The name of the symbol (for ContextWords). Must be explicitly assigned. + llvm::StringRef Name; /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; + /// Lowercase words relevant to the context (e.g. near the completion point). + llvm::StringSet<>* ContextWords = nullptr; bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). /// Whether fixits needs to be applied for that completion or not. bool NeedsFixIts = false; Modified: clang-tools-extra/trunk/clangd/SourceCode.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.cpp?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.cpp (original) +++ clang-tools-extra/trunk/clangd/SourceCode.cpp Mon May 6 03:25:10 2019 @@ -8,6 +8,7 @@ #include "SourceCode.h" #include "Context.h" +#include "FuzzyMatch.h" #include "Logger.h" #include "Protocol.h" #include "clang/AST/ASTContext.h" @@ -18,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -602,5 +604,43 @@ std::vector<std::string> visibleNamespac return Found; } +llvm::StringSet<> collectWords(llvm::StringRef Content) { + // We assume short words are not significant. + // We may want to consider other stopwords, e.g. language keywords. + // (A very naive implementation showed no benefit, but lexing might do better) + static constexpr int MinWordLength = 4; + + std::vector<CharRole> Roles(Content.size()); + calculateRoles(Content, Roles); + + llvm::StringSet<> Result; + llvm::SmallString<256> Word; + auto Flush = [&] { + if (Word.size() >= MinWordLength) { + for (char &C : Word) + C = llvm::toLower(C); + Result.insert(Word); + } + Word.clear(); + }; + for (unsigned I = 0; I < Content.size(); ++I) { + switch (Roles[I]) { + case Head: + Flush(); + LLVM_FALLTHROUGH; + case Tail: + Word.push_back(Content[I]); + break; + case Unknown: + case Separator: + Flush(); + break; + } + } + Flush(); + + return Result; +} + } // namespace clangd } // namespace clang Modified: clang-tools-extra/trunk/clangd/SourceCode.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.h?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.h (original) +++ clang-tools-extra/trunk/clangd/SourceCode.h Mon May 6 03:25:10 2019 @@ -20,6 +20,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SHA1.h" @@ -165,6 +166,13 @@ cleanupAndFormat(StringRef Code, const t llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, const format::FormatStyle &Style); +/// Collects words from the source code. +/// Unlike collectIdentifiers: +/// - also finds text in comments: +/// - splits text into words +/// - drops stopwords like "get" and "for" +llvm::StringSet<> collectWords(llvm::StringRef Content); + /// Heuristically determine namespaces visible at a point, without parsing Code. /// This considers using-directives and enclosing namespace-declarations that /// are visible (and not obfuscated) in the file itself (not headers). Modified: clang-tools-extra/trunk/clangd/test/completion-auto-trigger.test URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/test/completion-auto-trigger.test?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/test/completion-auto-trigger.test (original) +++ clang-tools-extra/trunk/clangd/test/completion-auto-trigger.test Mon May 6 03:25:10 2019 @@ -23,7 +23,7 @@ # CHECK-NEXT: "insertTextFormat": 1, # CHECK-NEXT: "kind": 5, # CHECK-NEXT: "label": " size", -# CHECK-NEXT: "sortText": "3eacccccsize", +# CHECK-NEXT: "sortText": "{{.*}}size", # CHECK-NEXT: "textEdit": { # CHECK-NEXT: "newText": "size", # CHECK-NEXT: "range": { @@ -45,7 +45,7 @@ # CHECK-NEXT: "insertTextFormat": 1, # CHECK-NEXT: "kind": 10, # CHECK-NEXT: "label": " default_capacity", -# CHECK-NEXT: "sortText": "3fd70a3ddefault_capacity", +# CHECK-NEXT: "sortText": "{{.*}}default_capacity", # CHECK-NEXT: "textEdit": { # CHECK-NEXT: "newText": "default_capacity", # CHECK-NEXT: "range": { @@ -84,7 +84,7 @@ # CHECK-NEXT: "insertTextFormat": 1, # CHECK-NEXT: "kind": 6, # CHECK-NEXT: "label": " ns_member", -# CHECK-NEXT: "sortText": "3f2cccccns_member", +# CHECK-NEXT: "sortText": "{{.*}}ns_member", # CHECK-NEXT: "textEdit": { # CHECK-NEXT: "newText": "ns_member", # CHECK-NEXT: "range": { Modified: clang-tools-extra/trunk/clangd/unittests/CodeCompleteTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/CodeCompleteTests.cpp?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/unittests/CodeCompleteTests.cpp (original) +++ clang-tools-extra/trunk/clangd/unittests/CodeCompleteTests.cpp Mon May 6 03:25:10 2019 @@ -174,6 +174,7 @@ struct ClassWithMembers { int BBB(); int CCC(); }; + int main() { ClassWithMembers().^ } )cpp", /*IndexSymbols=*/{}, Opts); @@ -324,7 +325,7 @@ TEST(CompletionTest, CompletionOptions) } } -TEST(CompletionTest, Priorities) { +TEST(CompletionTest, Accessible) { auto Internal = completions(R"cpp( class Foo { public: void pub(); @@ -334,7 +335,7 @@ TEST(CompletionTest, Priorities) { void Foo::pub() { this->^ } )cpp"); EXPECT_THAT(Internal.Completions, - HasSubsequence(Named("priv"), Named("prot"), Named("pub"))); + AllOf(Has("priv"), Has("prot"), Has("pub"))); auto External = completions(R"cpp( class Foo { @@ -502,6 +503,21 @@ TEST(CompletionTest, ReferencesAffectRan HasSubsequence(Named("absl"), Named("absb"))); } +TEST(CompletionTest, ContextWords) { + auto Results = completions(R"cpp( + enum class Color { RED, YELLOW, BLUE }; + + // (blank lines so the definition above isn't "context") + + // "It was a yellow car," he said. "Big yellow car, new." + auto Finish = Color::^ + )cpp"); + // Yellow would normally sort last (alphabetic). + // But the recent mention shuold bump it up. + ASSERT_THAT(Results.Completions, + HasSubsequence(Named("YELLOW"), Named("BLUE"))); +} + TEST(CompletionTest, GlobalQualified) { auto Results = completions( R"cpp( Modified: clang-tools-extra/trunk/clangd/unittests/QualityTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/QualityTests.cpp?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/unittests/QualityTests.cpp (original) +++ clang-tools-extra/trunk/clangd/unittests/QualityTests.cpp Mon May 6 03:25:10 2019 @@ -292,6 +292,16 @@ TEST(QualityTests, SymbolRelevanceSignal SymbolRelevanceSignals InBaseClass; InBaseClass.InBaseClass = true; EXPECT_LT(InBaseClass.evaluate(), Default.evaluate()); + + llvm::StringSet<> Words = {"one", "two", "three"}; + SymbolRelevanceSignals WithoutMatchingWord; + WithoutMatchingWord.ContextWords = &Words; + WithoutMatchingWord.Name = "four"; + EXPECT_EQ(WithoutMatchingWord.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithMatchingWord; + WithMatchingWord.ContextWords = &Words; + WithMatchingWord.Name = "TheTwoTowers"; + EXPECT_GT(WithMatchingWord.evaluate(), Default.evaluate()); } TEST(QualityTests, ScopeProximity) { Modified: clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp?rev=360030&r1=360029&r2=360030&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp (original) +++ clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp Mon May 6 03:25:10 2019 @@ -22,6 +22,7 @@ namespace { using llvm::Failed; using llvm::HasValue; +using ::testing::UnorderedElementsAreArray; MATCHER_P2(Pos, Line, Col, "") { return arg.line == int(Line) && arg.character == int(Col); @@ -322,6 +323,19 @@ TEST(SourceCodeTests, CollectIdentifiers EXPECT_EQ(IDs["foo"], 2u); } +TEST(SourceCodeTests, CollectWords) { + auto Words = collectWords(R"cpp( + #define FIZZ_BUZZ + // this is a comment + std::string getSomeText() { return "magic word"; } + )cpp"); + std::set<std::string> ActualWords(Words.keys().begin(), Words.keys().end()); + std::set<std::string> ExpectedWords = {"define", "fizz", "buzz", "this", + "comment", "string", "some", "text", + "return", "magic", "word"}; + EXPECT_EQ(ActualWords, ExpectedWords); +} + TEST(SourceCodeTests, VisibleNamespaces) { std::vector<std::pair<const char *, std::vector<std::string>>> Cases = { { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits