[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-12-07 Thread Kirill Bobyrev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG976a74d7d2db: [clangd] Dex Trigrams: Improve query trigram 
generation (authored by kbobyrev).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "bc", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "dg",  "de",  "abc",
+   "abd", "ade", "adg", "bcd", "bde", "bdg", "cde",
+   "cdg", "def", "deg", "dgh", "dgk", "efg", "egh",
+   "egk", "fgh", "fgk", "ghi", "ghk", "gkl", "hij",
+   "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
-  EXPECT_THAT(match(*I, Req, ), ElementsAre());
-  EXPECT_TRUE(Incomplete) << "Short queries have different semantics";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as 

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-12-07 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 392380.
kbobyrev marked an inline comment as done.
kbobyrev added a comment.

No problem, thank you!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "bc", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "dg",  "de",  "abc",
+   "abd", "ade", "adg", "bcd", "bde", "bdg", "cde",
+   "cdg", "def", "deg", "dgh", "dgk", "efg", "egh",
+   "egk", "fgh", "fgk", "ghi", "ghk", "gkl", "hij",
+   "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
-  EXPECT_THAT(match(*I, Req, ), ElementsAre());
-  EXPECT_TRUE(Incomplete) << "Short queries have different semantics";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as unigram";
+
+  Req.Query = "_o";
+  

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-12-07 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added a comment.
This revision is now accepted and ready to land.

Sorry about the delay!




Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:46
   // not available then 0 is stored.
-  std::vector> Next(LowercaseIdentifier.size());
+  llvm::SmallVector> Next(LowercaseIdentifier.size());
   unsigned NextTail = 0, NextHead = 0;

the default size of this vector is 6 - if we care about the optimization we 
probably want to double it or so?

(Roles above is fine, it's 40)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 390643.
kbobyrev added a comment.

Remove refactoring leftovers.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "bc", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "dg",  "de",  "abc",
+   "abd", "ade", "adg", "bcd", "bde", "bdg", "cde",
+   "cdg", "def", "deg", "dgh", "dgk", "efg", "egh",
+   "egk", "fgh", "fgk", "ghi", "ghk", "gkl", "hij",
+   "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
-  EXPECT_THAT(match(*I, Req, ), ElementsAre());
-  EXPECT_TRUE(Incomplete) << "Short queries have different semantics";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as unigram";
+
+  Req.Query = "_o";
+  EXPECT_THAT(match(*I, Req, ), 

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 390641.
kbobyrev marked 5 inline comments as done.
kbobyrev added a comment.

Address review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "bc", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "dg",  "de",  "abc",
+   "abd", "ade", "adg", "bcd", "bde", "bdg", "cde",
+   "cdg", "def", "deg", "dgh", "dgk", "efg", "egh",
+   "egk", "fgh", "fgk", "ghi", "ghk", "gkl", "hij",
+   "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
-  EXPECT_THAT(match(*I, Req, ), ElementsAre());
-  EXPECT_TRUE(Incomplete) << "Short queries have different semantics";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as unigram";
+
+  Req.Query = "_o";
+  

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-30 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:47
+  const size_t NPOS = std::numeric_limits::max();
+  llvm::SmallVector> 
Next(LowercaseIdentifier.size());
+  size_t NextTail = NPOS, NextHead = NPOS;

you've changed unsigned -> size_t here, can you revert that please?



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:91
+std::tie(NextTail, NextHead) = Next[Position];
+if (NextTail != NPOS)
+  Out(Trigram(LowercaseIdentifier[Position],

You're not treating head vs tail differently, this should just be iterating 
over Next as above.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:47
+  const size_t NPOS = std::numeric_limits::max();
+  llvm::SmallVector> 
Next(LowercaseIdentifier.size());
+  size_t NextTail = NPOS, NextHead = NPOS;

kbobyrev wrote:
> sammccall wrote:
> > why the change from array to pair and 0 to NPOS?
> - Array of two elements is confusing: we had it since we had 3 elements, with 
> 2 it doesn't make sense anymore. Also, makes it easier to decompose the items 
> (`std::tie(Tail, Head)` vs `size_t Tail = Next[I][0], Head = Next[I][1]`).
> - NPOS is more explicit than 0 in our intent: 0 is a valid index and it feels 
> awkward to use a valid index as NOT_FOUND. Yes, it's the one that can't 
> appear in the Next but it's not clear from the code and requires some 
> explanation for understanding. Why NPOS can't be found in `Next` is obvious.
I disagree on the style points here and they don't seem particularly related to 
the patch. Do they need to be included here?

> Array of two elements is confusing

Well, I disagree here: the difference between array and pair is 
homogeneous vs heterogeneous. The usage here is almost entirely homogeneous.
3 vs 2 doesn't make a difference, we could have used `tuple`, same 
would apply.

> Also, makes it easier to decompose the items

As far as ergonomics go, this comes at the cost of not being able to iterate 
over them, which we do much more often. (And AFAICT you never need NextTail 
decomposed.)

`NPOS` is not a common convention so I don't think it's more obvious. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:47
+  const size_t NPOS = std::numeric_limits::max();
+  llvm::SmallVector> 
Next(LowercaseIdentifier.size());
+  size_t NextTail = NPOS, NextHead = NPOS;

sammccall wrote:
> why the change from array to pair and 0 to NPOS?
- Array of two elements is confusing: we had it since we had 3 elements, with 2 
it doesn't make sense anymore. Also, makes it easier to decompose the items 
(`std::tie(Tail, Head)` vs `size_t Tail = Next[I][0], Head = Next[I][1]`).
- NPOS is more explicit than 0 in our intent: 0 is a valid index and it feels 
awkward to use a valid index as NOT_FOUND. Yes, it's the one that can't appear 
in the Next but it's not clear from the code and requires some explanation for 
understanding. Why NPOS can't be found in `Next` is obvious.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:102
+  LowercaseIdentifier[NextHead]));
+Position = NextHead;
+  }

sammccall wrote:
> (if the change to NPOS is just to avoid hitting Position == 0 on the first 
> iteration, you could check it here instead)
It's not only for that but this was the trigger, having the `Position == 0` 
check here makes the logic slightly confusing, I think it's better to just use 
NPOS instead for the reasons above.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 390627.
kbobyrev marked 3 inline comments as done.
kbobyrev added a comment.

Address review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "bc", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "dg",  "de",  "abc",
+   "abd", "ade", "adg", "bcd", "bde", "bdg", "cde",
+   "cdg", "def", "deg", "dgh", "dgk", "efg", "egh",
+   "egk", "fgh", "fgk", "ghi", "ghk", "gkl", "hij",
+   "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
-  EXPECT_THAT(match(*I, Req, ), ElementsAre());
-  EXPECT_TRUE(Incomplete) << "Short queries have different semantics";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as unigram";
+
+  Req.Query = "_o";
+  

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-29 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

Basically LG now, some of the changes are a bit mysterious to me though




Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:47
+  const size_t NPOS = std::numeric_limits::max();
+  llvm::SmallVector> 
Next(LowercaseIdentifier.size());
+  size_t NextTail = NPOS, NextHead = NPOS;

why the change from array to pair and 0 to NPOS?



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:82
+  //
+  // - First separator + first head
+  // - First head

Fist -> first
You're missing first separator only

I think examples might be clearer than a description of these.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:102
+  LowercaseIdentifier[NextHead]));
+Position = NextHead;
+  }

(if the change to NPOS is just to avoid hitting Position == 0 on the first 
iteration, you could check it here instead)



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:148
 
+  // For queries with very few letters, emulate what generateIdentifierTrigrams
+  // outputs for the beginning of the Identifier.

Since the query trigram corresponds so closely to the query for short queries, 
I think it makes more sense to consider it the other way: 
generateIdentifierTrigrams is deciding which queries it wants to match and 
emulating this function.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:151
+  if (UniqueTrigrams.empty()) {
+// If a bigram can't be formed, we might prepend a separator.
+std::string Result(1, LowercaseQuery.front());

nit: by prepending a separator, we form a bigram!
If a bigram can't be formed out of letters/numbers...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-29 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 390298.
kbobyrev added a comment.

Slightly improve docs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "bc", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "dg",  "de",  "abc",
+   "abd", "ade", "adg", "bcd", "bde", "bdg", "cde",
+   "cdg", "def", "deg", "dgh", "dgk", "efg", "egh",
+   "egk", "fgh", "fgk", "ghi", "ghk", "gkl", "hij",
+   "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
-  EXPECT_THAT(match(*I, Req, ), ElementsAre());
-  EXPECT_TRUE(Incomplete) << "Short queries have different semantics";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as unigram";
+
+  Req.Query = "_o";
+  EXPECT_THAT(match(*I, Req, ), 

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-29 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 390297.
kbobyrev marked 5 inline comments as done.
kbobyrev added a comment.

Address review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "bc", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "dg",  "de",  "abc",
+   "abd", "ade", "adg", "bcd", "bde", "bdg", "cde",
+   "cdg", "def", "deg", "dgh", "dgk", "efg", "egh",
+   "egk", "fgh", "fgk", "ghi", "ghk", "gkl", "hij",
+   "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
-  EXPECT_THAT(match(*I, Req, ), ElementsAre());
-  EXPECT_TRUE(Incomplete) << "Short queries have different semantics";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as unigram";
+
+  Req.Query = "_o";
+  

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-28 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:71
   }
-  // Emit short-query trigrams: FooBar -> f, fo, fb.
-  if (!LowercaseIdentifier.empty())
-Out(Trigram(LowercaseIdentifier[0]));
-  if (LowercaseIdentifier.size() >= 2)
-Out(Trigram(LowercaseIdentifier[0], LowercaseIdentifier[1]));
-  for (size_t I = 1; I < LowercaseIdentifier.size(); ++I)
-if (Roles[I] == Head) {
-  Out(Trigram(LowercaseIdentifier[0], LowercaseIdentifier[I]));
+  // Short queries semantics are different: emit incomplete trigrams for them.
+  //

It'd be good to describe what the semantics actually are somewhere.
As it is, this comment describes the algorithm used to generate the trigrams, 
and the other function refers to this one.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:74
+  // First, the first separator if it is preceding the alphanumeric symbols.
+  size_t FirstSeparator = std::numeric_limits::max();
+  for (size_t I = 0; I < LowercaseIdentifier.size(); ++I) {

This seems overly complicated.
If a separator precedes the alphanums, then it's at the front.
Moreover its Next table seems to be set up correctly.

So I believe this all amounts to: if the first char is a separator, then we 
allow starting our partial trigrams at 0, first head, second head. Else just 
first head & second head. And we just follow the next table.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:87
+  //
+  // - First head
+  // - Fist head + subsequent tail

This looks a lot like the Next table. Can't we use that? (Second head + third 
head is missing, but I don't see why)



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:147
+
+  unsigned ValidSymbols =
+  llvm::count_if(Roles, [](CharRole R) { return R == Head || R == Tail; });

both "valid" and "symbols" are confusing here.

Do we actually need this variable?
Seems like we can detect the case at the end, if we generated no trigrams.



Comment at: clang-tools-extra/clangd/index/dex/Trigram.cpp:152
+  if (ValidSymbols < 3) {
+// If a bigram can't be formed, we might prepend a separator.
+std::string Letters = Roles.front() == Separator && ValidSymbols < 2

I'd suggest always including the separator, and then using 
StringRef(Letters).take_back(2). Then we don't need ValidSymbols here either.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-19 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 388461.
kbobyrev added a comment.

Add improvements for identifier trigram generation and make query & id
generators consistent with each other.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp

Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -386,30 +386,35 @@
   trigramsAre({"c", "cl", "cla", "lan", "ang", "ngd"}));
 
   EXPECT_THAT(identifierTrigramTokens("abc_def"),
-  trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "bcd", "bde",
-   "cde", "def"}));
+  trigramsAre({"a", "d", "ab", "ad", "de", "abc", "abd", "ade",
+   "bcd", "bde", "cde", "def"}));
 
   EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_"),
-  trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
+  trigramsAre({"a", "b", "ab", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(identifierTrigramTokens("unique_ptr"),
-  trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
-   "npt", "iqu", "iqp", "ipt", "que", "qup", "qpt",
-   "uep", "ept", "ptr"}));
+  trigramsAre({"u",   "p",   "un",  "up",  "pt",  "uni", "unp",
+   "upt", "niq", "nip", "npt", "iqu", "iqp", "ipt",
+   "que", "qup", "qpt", "uep", "ept", "ptr"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("TUDecl"),
-  trigramsAre({"t", "tu", "td", "tud", "tde", "ude", "dec", "ecl"}));
+  EXPECT_THAT(identifierTrigramTokens("TUDecl"),
+  trigramsAre({"t", "d", "tu", "td", "de", "tud", "tde", "ude",
+   "dec", "ecl"}));
 
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
-  trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+  trigramsAre({"i", "o", "is", "ok", "io", "iso", "iok", "sok"}));
 
-  EXPECT_THAT(
-  identifierTrigramTokens("abc_defGhij__klm"),
-  trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
-   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
-   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
-   "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
+  EXPECT_THAT(identifierTrigramTokens("_pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"),
+  trigramsAre({"_", "_p", "p", "pb"}));
+
+  EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm"),
+  trigramsAre({"a",   "d",   "ab",  "ad",  "de",  "abc", "abd",
+   "ade", "adg", "bcd", "bde", "bdg", "cde", "cdg",
+   "def", "deg", "dgh", "dgk", "efg", "egh", "egk",
+   "fgh", "fgk", "ghi", "ghk", "gkl", "hij", "hik",
+   "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 
 TEST(DexTrigrams, QueryTrigrams) {
@@ -419,8 +424,16 @@
 
   EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
-  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
-  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+  EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"_"}));
+  EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"pb"}));
 
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
@@ -525,25 +538,45 @@
 }
 
 TEST(DexTest, ShortQuery) {
-  auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(),
+  auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(),
   RelationSlab());
   FuzzyFindRequest Req;
   Req.AnyScope = true;
   bool Incomplete;
 
-  EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
   EXPECT_FALSE(Incomplete) << "Empty string is not a short query";
 
-  Req.Query = "t";
+  Req.Query = "o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using first head as unigram";
+
+  Req.Query = "_o";
+  EXPECT_THAT(match(*I, Req, ), ElementsAre("_OneTwoFourSix"));
+  EXPECT_TRUE(Incomplete) << "Using 

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-16 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 387615.
kbobyrev added a comment.

Add a small comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113995/new/

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp


Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -404,6 +404,9 @@
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
   trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
 
+  EXPECT_THAT(identifierTrigramTokens("_pb"), trigramsAre({"_", "_p"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"), trigramsAre({"_", "__", "_p"}));
+
   EXPECT_THAT(
   identifierTrigramTokens("abc_defGhij__klm"),
   trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
@@ -422,6 +425,14 @@
   EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
   EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
 
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"_p"}));
+
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
   EXPECT_THAT(generateQueryTrigrams("clangd"),
@@ -545,6 +556,18 @@
   Req.Query = "ttf";
   EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
   EXPECT_FALSE(Incomplete) << "3-char string is not a short query";
+
+  I = Dex::build(generateSymbols({"tok::kw_builtin_va_arg", "bar::whatever"}),
+ RefSlab(), RelationSlab());
+
+  Req.Query = "kw_";
+  EXPECT_THAT(match(*I, Req, ),
+  ElementsAre("tok::kw_builtin_va_arg"));
+  EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol";
+  Req.Scopes = {"tok::"};
+  EXPECT_THAT(match(*I, Req, ),
+  ElementsAre("tok::kw_builtin_va_arg"));
+  EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol";
 }
 
 TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {
Index: clang-tools-extra/clangd/index/dex/Trigram.cpp
===
--- clang-tools-extra/clangd/index/dex/Trigram.cpp
+++ clang-tools-extra/clangd/index/dex/Trigram.cpp
@@ -101,17 +101,43 @@
 std::vector generateQueryTrigrams(llvm::StringRef Query) {
   if (Query.empty())
 return {};
-  std::string LowercaseQuery = Query.lower();
-  if (Query.size() < 3) // short-query trigrams only
-return {Token(Token::Kind::Trigram, LowercaseQuery)};
 
   // Apply fuzzy matching text segmentation.
   std::vector Roles(Query.size());
   calculateRoles(Query, llvm::makeMutableArrayRef(Roles.data(), Query.size()));
 
+  std::string LowercaseQuery = Query.lower();
+
+  if (LowercaseQuery.size() < 3) // short-query trigrams only.
+return {Token(Token::Kind::Trigram, LowercaseQuery)};
+
+  unsigned ValidSymbols =
+  llvm::count_if(Roles, [](CharRole R) { return R == Head || R == Tail; });
+  // If the query does not have any alphanumeric symbols, don't restrict the
+  // result to the names.
+  if (ValidSymbols == 0)
+return {};
+  // For queries with very few letters, emulate what generateIdentifierTrigrams
+  // outputs for the beginning of the Identifier.
+  if (ValidSymbols < 3) {
+std::string Letters =
+Roles.front() == Separator ? std::string(1, Query.front()) : "";
+for (unsigned I = 0; I < LowercaseQuery.size(); ++I) {
+  if (Roles[I] == Head || Roles[I] == Tail) {
+Letters += LowercaseQuery[I];
+// Similar to the identifier trigram generation, stop here for the
+// queries starting with the separator, i.e. "_va" will only output
+// "_v" here, identifier trigram generator will output "_" and "_v"
+if (Roles.front() == Separator)
+  break;
+  }
+}
+return {Token(Token::Kind::Trigram, Letters)};
+  }
+
   llvm::DenseSet UniqueTrigrams;
   std::string Chars;
-  for (unsigned I = 0; I < Query.size(); ++I) {
+  for (unsigned I = 0; I < LowercaseQuery.size(); ++I) {
 if (Roles[I] != Head && Roles[I] != Tail)
   continue; // Skip delimiters.
 Chars.push_back(LowercaseQuery[I]);


Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -404,6 +404,9 @@
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
   trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
 
+  

[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation

2021-11-16 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev created this revision.
kbobyrev added a reviewer: sammccall.
Herald added subscribers: usaxena95, arphaman.
kbobyrev requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, ilya-biryukov.
Herald added a project: clang-tools-extra.

These are the trigrams for queries right now:

- "va" -> {Trigram("va")}
- "va_" -> {} (empty)

This is suboptimal since the resulting query will discard the query information
and return all symbols, some of which will be later be scored expensively
(fuzzy matching score). This is related to
https://github.com/clangd/clangd/issues/39 but does not fix it. Accidentally,
because of that incorrect behavior, when user types "tok::va" there are no
results (the issue is that `tok::kw___builtin_va_arg` does not have "va" token)
but when "tok::va_" is typed, expected result (`tok::kw___builtin_va_arg`)
shows up by accident. This is because the dex query transformer will only
lookup symbols within the `tok::` namespace. There won't be many, so the
returned results will contain symbol we need; this symbol will be filtered out
by the expensive checks and that will be displayed in the editor.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D113995

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/unittests/DexTests.cpp


Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -404,6 +404,9 @@
   EXPECT_THAT(identifierTrigramTokens("IsOK"),
   trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
 
+  EXPECT_THAT(identifierTrigramTokens("_pb"), trigramsAre({"_", "_p"}));
+  EXPECT_THAT(identifierTrigramTokens("__pb"), trigramsAre({"_", "__", "_p"}));
+
   EXPECT_THAT(
   identifierTrigramTokens("abc_defGhij__klm"),
   trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
@@ -422,6 +425,14 @@
   EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
   EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
 
+  EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m_"}));
+
+  EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+  EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"_p"}));
+  EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"_p"}));
+
   EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
 
   EXPECT_THAT(generateQueryTrigrams("clangd"),
@@ -545,6 +556,18 @@
   Req.Query = "ttf";
   EXPECT_THAT(match(*I, Req, ), ElementsAre("OneTwoThreeFour"));
   EXPECT_FALSE(Incomplete) << "3-char string is not a short query";
+
+  I = Dex::build(generateSymbols({"tok::kw_builtin_va_arg", "bar::whatever"}),
+ RefSlab(), RelationSlab());
+
+  Req.Query = "kw_";
+  EXPECT_THAT(match(*I, Req, ),
+  ElementsAre("tok::kw_builtin_va_arg"));
+  EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol";
+  Req.Scopes = {"tok::"};
+  EXPECT_THAT(match(*I, Req, ),
+  ElementsAre("tok::kw_builtin_va_arg"));
+  EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol";
 }
 
 TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {
Index: clang-tools-extra/clangd/index/dex/Trigram.cpp
===
--- clang-tools-extra/clangd/index/dex/Trigram.cpp
+++ clang-tools-extra/clangd/index/dex/Trigram.cpp
@@ -101,17 +101,42 @@
 std::vector generateQueryTrigrams(llvm::StringRef Query) {
   if (Query.empty())
 return {};
-  std::string LowercaseQuery = Query.lower();
-  if (Query.size() < 3) // short-query trigrams only
-return {Token(Token::Kind::Trigram, LowercaseQuery)};
 
   // Apply fuzzy matching text segmentation.
   std::vector Roles(Query.size());
   calculateRoles(Query, llvm::makeMutableArrayRef(Roles.data(), Query.size()));
 
+  std::string LowercaseQuery = Query.lower();
+
+  if (LowercaseQuery.size() < 3) // short-query trigrams only.
+return {Token(Token::Kind::Trigram, LowercaseQuery)};
+
+  unsigned ValidSymbols =
+  llvm::count_if(Roles, [](CharRole R) { return R == Head || R == Tail; });
+  // If the query does not have any alphanumeric symbols, don't restrict the
+  // result to the names.
+  if (ValidSymbols == 0)
+return {};
+  //
+  if (ValidSymbols < 3) {
+std::string Letters =
+Roles.front() == Separator ? std::string(1, Query.front()) : "";
+for (unsigned I = 0; I < LowercaseQuery.size(); ++I) {
+  if (Roles[I] == Head || Roles[I] == Tail) {
+Letters += LowercaseQuery[I];
+// Similar to the identifier trigram generation, stop here for the
+// queries starting with the separator, i.e. "_va" will only output
+// "_v" here,