This is an automated email from the ASF dual-hosted git repository.
hossman pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new ae66b3fe505 SOLR-17335: New "vectorSimilarity" QParser for matching
documents mased on a minimum vector similarity threshold
ae66b3fe505 is described below
commit ae66b3fe505e7d8f4af515e763d53a80e2affcf1
Author: Chris Hostetter <[email protected]>
AuthorDate: Fri Jun 21 10:53:20 2024 -0700
SOLR-17335: New "vectorSimilarity" QParser for matching documents mased on
a minimum vector similarity threshold
(cherry picked from commit b49f3d076942186d924588e602e976d13647a331)
---
solr/CHANGES.txt | 2 +
.../java/org/apache/solr/search/QParserPlugin.java | 2 +
...QParser.java => AbstractVectorQParserBase.java} | 52 +-
.../org/apache/solr/search/neural/KnnQParser.java | 183 +-----
.../search/neural/VectorSimilarityQParser.java | 75 +++
.../neural/VectorSimilarityQParserPlugin.java | 33 ++
.../org/apache/solr/search/QueryEqualityTest.java | 74 +++
.../apache/solr/search/neural/KnnQParserTest.java | 2 +-
...rTest.java => VectorSimilarityQParserTest.java} | 620 +++++++++------------
.../query-guide/pages/dense-vector-search.adoc | 103 +++-
10 files changed, 562 insertions(+), 584 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index eefa86795dd..412fdd1b6a2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -17,6 +17,8 @@ New Features
* SOLR-17277: Circuit breakers may now be configured in a "soft" or "warnOnly"
mode in order to more easily test out new thresholds. Soft breakers will log
out a
message on each relevant request when tripped, but will not otherwise impact
or short circuit the requests. (Jason Gerlowski)
+* SOLR-17335: New "vectorSimilarity" QParser for matching documents mased on a
minimum vector similarity threshold. (hossman)
+
Improvements
---------------------
* SOLR-17137: Enable Prometheus exporter to communicate with SSL protected
Solr. (Eivind Bergstøl via Eric Pugh)
diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
index 99cb9efb0c7..8146c585cf0 100644
--- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
@@ -31,6 +31,7 @@ import org.apache.solr.search.join.HashRangeQParserPlugin;
import org.apache.solr.search.mlt.MLTContentQParserPlugin;
import org.apache.solr.search.mlt.MLTQParserPlugin;
import org.apache.solr.search.neural.KnnQParserPlugin;
+import org.apache.solr.search.neural.VectorSimilarityQParserPlugin;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
public abstract class QParserPlugin implements NamedListInitializedPlugin,
SolrInfoBean {
@@ -89,6 +90,7 @@ public abstract class QParserPlugin implements
NamedListInitializedPlugin, SolrI
map.put(HashRangeQParserPlugin.NAME, new HashRangeQParserPlugin());
map.put(RankQParserPlugin.NAME, new RankQParserPlugin());
map.put(KnnQParserPlugin.NAME, new KnnQParserPlugin());
+ map.put(VectorSimilarityQParserPlugin.NAME, new
VectorSimilarityQParserPlugin());
standardPlugins = Collections.unmodifiableMap(map);
}
diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java
similarity index 83%
copy from solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
copy to
solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java
index 06e6f25afaa..d680b64f6f5 100644
--- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
+++
b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java
@@ -33,60 +33,50 @@ import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.SyntaxError;
-public class KnnQParser extends QParser {
+public abstract class AbstractVectorQParserBase extends QParser {
static final String PRE_FILTER = "preFilter";
static final String EXCLUDE_TAGS = "excludeTags";
static final String INCLUDE_TAGS = "includeTags";
- // retrieve the top K results based on the distance similarity function
- static final String TOP_K = "topK";
- static final int DEFAULT_TOP_K = 10;
+ private final String denseVectorFieldName;
+ private final String vectorToSearch;
- /**
- * Constructor for the QParser
- *
- * @param qstr The part of the query string specific to this parser
- * @param localParams The set of parameters that are specific to this
QParser. See
- *
https://solr.apache.org/guide/solr/latest/query-guide/local-params.html
- * @param params The rest of the {@link SolrParams}
- * @param req The original {@link SolrQueryRequest}.
- */
- public KnnQParser(String qstr, SolrParams localParams, SolrParams params,
SolrQueryRequest req) {
+ public AbstractVectorQParserBase(
+ String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest
req) {
super(qstr, localParams, params, req);
+ vectorToSearch = localParams.get(QueryParsing.V);
+ denseVectorFieldName = localParams.get(QueryParsing.F);
}
- @Override
- public Query parse() throws SyntaxError {
- String denseVectorField = localParams.get(QueryParsing.F);
- String vectorToSearch = localParams.get(QueryParsing.V);
- int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
-
- if (denseVectorField == null || denseVectorField.isEmpty()) {
+ protected String getVectorToSearch() {
+ if (vectorToSearch == null || vectorToSearch.isEmpty()) {
throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector field 'f' is
missing");
+ SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector value 'v' to
search is missing");
}
+ return vectorToSearch;
+ }
- if (vectorToSearch == null || vectorToSearch.isEmpty()) {
+ protected String getFieldName() {
+ if (denseVectorFieldName == null || denseVectorFieldName.isEmpty()) {
throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector value 'v' to
search is missing");
+ SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector field 'f' is
missing");
}
+ return denseVectorFieldName;
+ }
- SchemaField schemaField =
req.getCore().getLatestSchema().getField(denseVectorField);
+ protected static DenseVectorField getCheckedFieldType(SchemaField
schemaField) {
FieldType fieldType = schemaField.getType();
if (!(fieldType instanceof DenseVectorField)) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
- "only DenseVectorField is compatible with Knn Query Parser");
+ "only DenseVectorField is compatible with Vector Query Parsers");
}
- DenseVectorField denseVectorType = (DenseVectorField) fieldType;
-
- return denseVectorType.getKnnVectorQuery(
- schemaField.getName(), vectorToSearch, topK, getFilterQuery());
+ return (DenseVectorField) fieldType;
}
- private Query getFilterQuery() throws SolrException, SyntaxError {
+ protected Query getFilterQuery() throws SolrException, SyntaxError {
// Default behavior of FQ wrapping, and suitability of some local params
// depends on wether we are a sub-query or not
diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
index 06e6f25afaa..166dada5b7f 100644
--- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
@@ -16,204 +16,31 @@
*/
package org.apache.solr.search.neural;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
import org.apache.lucene.search.Query;
-import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.DenseVectorField;
-import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.QParser;
-import org.apache.solr.search.QueryParsing;
-import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.SyntaxError;
-public class KnnQParser extends QParser {
-
- static final String PRE_FILTER = "preFilter";
- static final String EXCLUDE_TAGS = "excludeTags";
- static final String INCLUDE_TAGS = "includeTags";
+public class KnnQParser extends AbstractVectorQParserBase {
// retrieve the top K results based on the distance similarity function
static final String TOP_K = "topK";
static final int DEFAULT_TOP_K = 10;
- /**
- * Constructor for the QParser
- *
- * @param qstr The part of the query string specific to this parser
- * @param localParams The set of parameters that are specific to this
QParser. See
- *
https://solr.apache.org/guide/solr/latest/query-guide/local-params.html
- * @param params The rest of the {@link SolrParams}
- * @param req The original {@link SolrQueryRequest}.
- */
public KnnQParser(String qstr, SolrParams localParams, SolrParams params,
SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
@Override
public Query parse() throws SyntaxError {
- String denseVectorField = localParams.get(QueryParsing.F);
- String vectorToSearch = localParams.get(QueryParsing.V);
- int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
-
- if (denseVectorField == null || denseVectorField.isEmpty()) {
- throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector field 'f' is
missing");
- }
-
- if (vectorToSearch == null || vectorToSearch.isEmpty()) {
- throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector value 'v' to
search is missing");
- }
-
- SchemaField schemaField =
req.getCore().getLatestSchema().getField(denseVectorField);
- FieldType fieldType = schemaField.getType();
- if (!(fieldType instanceof DenseVectorField)) {
- throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST,
- "only DenseVectorField is compatible with Knn Query Parser");
- }
-
- DenseVectorField denseVectorType = (DenseVectorField) fieldType;
+ final SchemaField schemaField =
req.getCore().getLatestSchema().getField(getFieldName());
+ final DenseVectorField denseVectorType = getCheckedFieldType(schemaField);
+ final String vectorToSearch = getVectorToSearch();
+ final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
return denseVectorType.getKnnVectorQuery(
schemaField.getName(), vectorToSearch, topK, getFilterQuery());
}
-
- private Query getFilterQuery() throws SolrException, SyntaxError {
-
- // Default behavior of FQ wrapping, and suitability of some local params
- // depends on wether we are a sub-query or not
- final boolean isSubQuery = recurseCount != 0;
-
- // include/exclude tags for global fqs to wrap;
- // Check these up front for error handling if combined with `fq` local
param.
- final List<String> includedGlobalFQTags = getLocalParamTags(INCLUDE_TAGS);
- final List<String> excludedGlobalFQTags = getLocalParamTags(EXCLUDE_TAGS);
- final boolean haveGlobalFQTags =
- !(includedGlobalFQTags.isEmpty() && excludedGlobalFQTags.isEmpty());
-
- if (haveGlobalFQTags) {
- // Some early error handling of incompatible options...
-
- if (isFilter()) { // this knn query is itself a filter query
- throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST,
- "Knn Query Parser used as a filter does not support "
- + INCLUDE_TAGS
- + " or "
- + EXCLUDE_TAGS
- + " localparams");
- }
-
- if (isSubQuery) { // this knn query is a sub-query of a broader query
(possibly disjunction)
- throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST,
- "Knn Query Parser used as a sub-query does not support "
- + INCLUDE_TAGS
- + " or "
- + EXCLUDE_TAGS
- + " localparams");
- }
- }
-
- // Explicit local params specifying the filter(s) to wrap
- final String[] preFilters = getLocalParams().getParams(PRE_FILTER);
- if (null != preFilters) {
-
- // We don't particularly care if preFilters is empty, the usage below
will still work,
- // but SolrParams API says it should be null not empty...
- assert 0 != preFilters.length
- : "SolrParams.getParams should return null, never zero len array";
-
- if (haveGlobalFQTags) {
- throw new SolrException(
- SolrException.ErrorCode.BAD_REQUEST,
- "Knn Query Parser does not support combining "
- + PRE_FILTER
- + " localparam with either "
- + INCLUDE_TAGS
- + " or "
- + EXCLUDE_TAGS
- + " localparams");
- }
-
- final List<Query> preFilterQueries = new ArrayList<>(preFilters.length);
- for (String f : preFilters) {
- final QParser parser = subQuery(f, null);
- parser.setIsFilter(true);
-
- // maybe null, ie: `preFilter=""`
- final Query filter = parser.getQuery();
- if (null != filter) {
- preFilterQueries.add(filter);
- }
- }
- try {
- return req.getSearcher().getProcessedFilter(null,
preFilterQueries).filter;
- } catch (IOException e) {
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
- }
- }
-
- // No explicit `preFilter` localparams specifying what we should filter on.
- //
- // So now, if we're either a filter or a subquery, we have to default to
- // not wrapping anything...
- if (isFilter() || isSubQuery) {
- return null;
- }
-
- // At this point we now are a (regular) query and can wrap global `fq`
filters...
- try {
- // Start by assuming we wrap all global filters,
- // then adjust our list based on include/exclude tag params
- List<Query> globalFQs = QueryUtils.parseFilterQueries(req);
-
- // Adjust our globalFQs based on any include/exclude we may have
- if (!includedGlobalFQTags.isEmpty()) {
- // NOTE: Even if no FQs match the specified tag(s) the fact that tags
were specified
- // means we should replace globalFQs (even with a possibly empty list)
- globalFQs = new ArrayList<>(QueryUtils.getTaggedQueries(req,
includedGlobalFQTags));
- }
- if (null != excludedGlobalFQTags) {
- globalFQs.removeAll(QueryUtils.getTaggedQueries(req,
excludedGlobalFQTags));
- }
-
- return req.getSearcher().getProcessedFilter(null, globalFQs).filter;
-
- } catch (IOException e) {
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
- }
- }
-
- /**
- * @return set (possibly empty) of tags specified in the given local param
- * @see StrUtils#splitSmart
- * @see QueryUtils#getTaggedQueries
- * @see #localParams
- */
- private List<String> getLocalParamTags(final String param) {
- final String[] strVals = localParams.getParams(param);
- if (null == strVals) {
- return Collections.emptyList();
- }
- final List<String> tags = new ArrayList<>(strVals.length * 2);
- for (String val : strVals) {
- // This ensures parity w/how QParser constructor builds tagMap,
- // and that empty strings will make it into our List (for "include
nothing")
- if (0 < val.indexOf(',')) {
- tags.addAll(StrUtils.splitSmart(val, ','));
- } else {
- tags.add(val);
- }
- }
- return tags;
- }
}
diff --git
a/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java
b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java
new file mode 100644
index 00000000000..e3ec2f242f7
--- /dev/null
+++
b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.neural;
+
+import org.apache.lucene.index.VectorEncoding;
+import org.apache.lucene.search.ByteVectorSimilarityQuery;
+import org.apache.lucene.search.FloatVectorSimilarityQuery;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.DenseVectorField;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.SyntaxError;
+import org.apache.solr.util.vector.DenseVectorParser;
+
+public class VectorSimilarityQParser extends AbstractVectorQParserBase {
+
+ // retrieve the top results based on the distance similarity function
thresholds
+ static final String MIN_RETURN = "minReturn";
+ static final String MIN_TRAVERSE = "minTraverse";
+
+ static final float DEFAULT_MIN_TRAVERSE = Float.NEGATIVE_INFINITY;
+
+ public VectorSimilarityQParser(
+ String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest
req) {
+ super(qstr, localParams, params, req);
+ }
+
+ @Override
+ public Query parse() throws SyntaxError {
+ final String fieldName = getFieldName();
+ final SchemaField schemaField =
req.getCore().getLatestSchema().getField(fieldName);
+ final DenseVectorField denseVectorType = getCheckedFieldType(schemaField);
+ final String vectorToSearch = getVectorToSearch();
+ final float minTraverse = localParams.getFloat(MIN_TRAVERSE,
DEFAULT_MIN_TRAVERSE);
+ final Float minReturn = localParams.getFloat(MIN_RETURN);
+ if (null == minReturn) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ MIN_RETURN + " is required to use Vector Similarity QParser");
+ }
+
+ final DenseVectorParser vectorBuilder =
+ denseVectorType.getVectorBuilder(vectorToSearch,
DenseVectorParser.BuilderPhase.QUERY);
+
+ final VectorEncoding vectorEncoding = denseVectorType.getVectorEncoding();
+ switch (vectorEncoding) {
+ case FLOAT32:
+ return new FloatVectorSimilarityQuery(
+ fieldName, vectorBuilder.getFloatVector(), minTraverse, minReturn,
getFilterQuery());
+ case BYTE:
+ return new ByteVectorSimilarityQuery(
+ fieldName, vectorBuilder.getByteVector(), minTraverse, minReturn,
getFilterQuery());
+ default:
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "Unexpected state. Vector Encoding: " + vectorEncoding);
+ }
+ }
+}
diff --git
a/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParserPlugin.java
b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParserPlugin.java
new file mode 100644
index 00000000000..4109d0df388
--- /dev/null
+++
b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParserPlugin.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.neural;
+
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QParserPlugin;
+
+/** A neural query parser to run min-similarity search on Dense Vector fields.
*/
+public class VectorSimilarityQParserPlugin extends QParserPlugin {
+ public static final String NAME = "vectorSimilarity";
+
+ @Override
+ public QParser createParser(
+ String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest
req) {
+ return new VectorSimilarityQParser(qstr, localParams, params, req);
+ }
+}
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 653c0935879..6e675771560 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -1469,6 +1469,80 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
}
}
+ public void testQueryVecSim() throws Exception {
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField("id", "0");
+ doc.addField("vector", Arrays.asList(1, 2, 3, 4));
+ assertU(adoc(doc));
+ assertU(commit());
+
+ final String common = "!vectorSimilarity minReturn=0.3 f=vector";
+ final String qvec = "[1.0,2.0,3.0,4.0]";
+
+ try (SolrQueryRequest req0 = req()) {
+
+ // no filters
+ final Query fqNull =
+ assertQueryEqualsAndReturn(
+ "vectorSimilarity",
+ req0,
+ "{" + common + "}" + qvec,
+ "{" + common + " minTraverse='-Infinity'}" + qvec,
+ "{" + common + " preFilter=''}" + qvec,
+ "{" + common + " v=" + qvec + "}");
+
+ try (SolrQueryRequest req1 = req("fq", "{!tag=t1}id:1", "xxx", "id:1")) {
+ // either global fq, or (same) preFilter as localparam
+ final Query fqOne =
+ assertQueryEqualsAndReturn(
+ "vectorSimilarity",
+ req1,
+ "{" + common + "}" + qvec,
+ "{" + common + " includeTags=t1}" + qvec,
+ "{" + common + " preFilter='id:1'}" + qvec,
+ "{" + common + " preFilter=$xxx}" + qvec,
+ "{" + common + " v=" + qvec + "}");
+ QueryUtils.checkUnequal(fqNull, fqOne);
+
+ try (SolrQueryRequest req2 = req("fq", "{!tag=t2}id:2", "xxx", "id:1",
"yyy", "")) {
+ // override global fq with local param to use different preFilter
+ final Query fqOneOverride =
+ assertQueryEqualsAndReturn(
+ "vectorSimilarity",
+ req2,
+ "{" + common + " preFilter='id:1'}" + qvec,
+ "{" + common + " preFilter=$xxx}" + qvec);
+ QueryUtils.checkEqual(fqOne, fqOneOverride);
+
+ // override global fq with local param to use no preFilters
+ final Query fqNullOverride =
+ assertQueryEqualsAndReturn(
+ "vectorSimilarity",
+ req2,
+ "{" + common + " preFilter=''}" + qvec,
+ "{" + common + " excludeTags=t2}" + qvec,
+ "{" + common + " preFilter=$yyy}" + qvec);
+ QueryUtils.checkEqual(fqNull, fqNullOverride);
+ }
+ }
+
+ try (SolrQueryRequest reqPostFilter = req("fq", "{!tag=post frange
cache=false l=0}9.9")) {
+ // global post-filter fq should always be ignored
+ final Query fqPostFilter =
+ assertQueryEqualsAndReturn(
+ "vectorSimilarity",
+ reqPostFilter,
+ "{" + common + "}" + qvec,
+ "{" + common + " includeTags=post}" + qvec);
+ QueryUtils.checkEqual(fqNull, fqPostFilter);
+ }
+
+ } finally {
+ delQ("id:0");
+ assertU(commit());
+ }
+ }
+
/**
* NOTE: defType is not only used to pick the parser, but also to record the
parser being tested
* for coverage sanity checking
diff --git
a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
index ccd0e3ecc44..f5d5668a7e5 100644
--- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
@@ -178,7 +178,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"Incorrect vector field type should throw Exception",
- "only DenseVectorField is compatible with Knn Query Parser",
+ "only DenseVectorField is compatible with Vector Query Parsers",
req(CommonParams.Q, "{!knn f=id topK=10}" + vectorToSearch, "fl",
"id"),
SolrException.ErrorCode.BAD_REQUEST);
}
diff --git
a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
b/solr/core/src/test/org/apache/solr/search/neural/VectorSimilarityQParserTest.java
similarity index 56%
copy from solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
copy to
solr/core/src/test/org/apache/solr/search/neural/VectorSimilarityQParserTest.java
index ccd0e3ecc44..cf6aeb65973 100644
--- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
+++
b/solr/core/src/test/org/apache/solr/search/neural/VectorSimilarityQParserTest.java
@@ -16,12 +16,10 @@
*/
package org.apache.solr.search.neural;
-import static org.apache.solr.search.neural.KnnQParser.DEFAULT_TOP_K;
-
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
+import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@@ -32,7 +30,7 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-public class KnnQParserTest extends SolrTestCaseJ4 {
+public class VectorSimilarityQParserTest extends SolrTestCaseJ4 {
String IDField = "id";
String vectorField = "vector";
String vectorField2 = "vector2";
@@ -115,71 +113,14 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
deleteCore();
}
- @Test
- public void incorrectTopK_shouldThrowException() {
- String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
-
- assertQEx(
- "String topK should throw Exception",
- "For input string: \"string\"",
- req(CommonParams.Q, "{!knn f=vector topK=string}" + vectorToSearch,
"fl", "id"),
- SolrException.ErrorCode.BAD_REQUEST);
-
- assertQEx(
- "Double topK should throw Exception",
- "For input string: \"4.5\"",
- req(CommonParams.Q, "{!knn f=vector topK=4.5}" + vectorToSearch, "fl",
"id"),
- SolrException.ErrorCode.BAD_REQUEST);
- }
-
- @Test
- public void topKMissing_shouldReturnDefaultTopK() {
- String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
-
- assertQ(
- req(CommonParams.Q, "{!knn f=vector}" + vectorToSearch, "fl", "id"),
- "//result[@numFound='" + DEFAULT_TOP_K + "']",
- "//result/doc[1]/str[@name='id'][.='1']",
- "//result/doc[2]/str[@name='id'][.='4']",
- "//result/doc[3]/str[@name='id'][.='2']",
- "//result/doc[4]/str[@name='id'][.='10']",
- "//result/doc[5]/str[@name='id'][.='3']",
- "//result/doc[6]/str[@name='id'][.='7']",
- "//result/doc[7]/str[@name='id'][.='5']",
- "//result/doc[8]/str[@name='id'][.='6']",
- "//result/doc[9]/str[@name='id'][.='9']",
- "//result/doc[10]/str[@name='id'][.='8']");
- }
-
- @Test
- public void topK_shouldReturnOnlyTopKResults() {
- String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
-
- assertQ(
- req(CommonParams.Q, "{!knn f=vector topK=5}" + vectorToSearch, "fl",
"id"),
- "//result[@numFound='5']",
- "//result/doc[1]/str[@name='id'][.='1']",
- "//result/doc[2]/str[@name='id'][.='4']",
- "//result/doc[3]/str[@name='id'][.='2']",
- "//result/doc[4]/str[@name='id'][.='10']",
- "//result/doc[5]/str[@name='id'][.='3']");
-
- assertQ(
- req(CommonParams.Q, "{!knn f=vector topK=3}" + vectorToSearch, "fl",
"id"),
- "//result[@numFound='3']",
- "//result/doc[1]/str[@name='id'][.='1']",
- "//result/doc[2]/str[@name='id'][.='4']",
- "//result/doc[3]/str[@name='id'][.='2']");
- }
-
@Test
public void incorrectVectorFieldType_shouldThrowException() {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
assertQEx(
"Incorrect vector field type should throw Exception",
- "only DenseVectorField is compatible with Knn Query Parser",
- req(CommonParams.Q, "{!knn f=id topK=10}" + vectorToSearch, "fl",
"id"),
+ "only DenseVectorField is compatible with ",
+ req(CommonParams.Q, "{!vectorSimilarity f=id}" + vectorToSearch, "fl",
"id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -190,7 +131,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"Undefined vector field should throw Exception",
"undefined field: \"notExistent\"",
- req(CommonParams.Q, "{!knn f=notExistent topK=10}" + vectorToSearch,
"fl", "id"),
+ req(CommonParams.Q, "{!vectorSimilarity f=notExistent}" +
vectorToSearch, "fl", "id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -201,7 +142,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"missing vector field should throw Exception",
"the Dense Vector field 'f' is missing",
- req(CommonParams.Q, "{!knn topK=10}" + vectorToSearch, "fl", "id"),
+ req(CommonParams.Q, "{!vectorSimilarity}" + vectorToSearch, "fl",
"id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -210,113 +151,45 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
assertQ(
- req(CommonParams.Q, "{!knn f=vector2 topK=5}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector2 minReturn=0.8}" + vectorToSearch,
+ "fl",
+ "id"),
"//result[@numFound='3']",
"//result/doc[1]/str[@name='id'][.='11']",
"//result/doc[2]/str[@name='id'][.='13']",
"//result/doc[3]/str[@name='id'][.='12']");
}
- @Test
- public void highDimensionFloatVectorField_shouldSearchOnThatField() {
- int highDimension = 2048;
- List<SolrInputDocument> docsToIndex =
this.prepareHighDimensionFloatVectorsDocs(highDimension);
- for (SolrInputDocument doc : docsToIndex) {
- assertU(adoc(doc));
- }
- assertU(commit());
-
- float[] highDimensionalityQueryVector = new float[highDimension];
- for (int i = 0; i < highDimension; i++) {
- highDimensionalityQueryVector[i] = i;
- }
- String vectorToSearch = Arrays.toString(highDimensionalityQueryVector);
-
- assertQ(
- req(CommonParams.Q, "{!knn f=2048_float_vector topK=1}" +
vectorToSearch, "fl", "id"),
- "//result[@numFound='1']",
- "//result/doc[1]/str[@name='id'][.='1']");
- }
-
- @Test
- public void highDimensionByteVectorField_shouldSearchOnThatField() {
- int highDimension = 2048;
- List<SolrInputDocument> docsToIndex =
this.prepareHighDimensionByteVectorsDocs(highDimension);
- for (SolrInputDocument doc : docsToIndex) {
- assertU(adoc(doc));
- }
- assertU(commit());
-
- byte[] highDimensionalityQueryVector = new byte[highDimension];
- for (int i = 0; i < highDimension; i++) {
- highDimensionalityQueryVector[i] = (byte) (i % 127);
- }
- String vectorToSearch = Arrays.toString(highDimensionalityQueryVector);
-
- assertQ(
- req(CommonParams.Q, "{!knn f=2048_byte_vector topK=1}" +
vectorToSearch, "fl", "id"),
- "//result[@numFound='1']",
- "//result/doc[1]/str[@name='id'][.='1']");
- }
-
- private List<SolrInputDocument> prepareHighDimensionFloatVectorsDocs(int
highDimension) {
- int docsCount = 13;
- String field = "2048_float_vector";
- List<SolrInputDocument> docs = new ArrayList<>(docsCount);
-
- for (int i = 1; i < docsCount + 1; i++) {
- SolrInputDocument doc = new SolrInputDocument();
- doc.addField(IDField, i);
- docs.add(doc);
- }
-
- for (int i = 0; i < docsCount; i++) {
- List<Integer> highDimensionalityVector = new ArrayList<>();
- for (int j = i * highDimension; j < highDimension; j++) {
- highDimensionalityVector.add(j);
- }
- docs.get(i).addField(field, highDimensionalityVector);
- }
- Collections.reverse(docs);
- return docs;
- }
-
- private List<SolrInputDocument> prepareHighDimensionByteVectorsDocs(int
highDimension) {
- int docsCount = 13;
- String field = "2048_byte_vector";
- List<SolrInputDocument> docs = new ArrayList<>(docsCount);
-
- for (int i = 1; i < docsCount + 1; i++) {
- SolrInputDocument doc = new SolrInputDocument();
- doc.addField(IDField, i);
- docs.add(doc);
- }
-
- for (int i = 0; i < docsCount; i++) {
- List<Integer> highDimensionalityVector = new ArrayList<>();
- for (int j = i * highDimension; j < highDimension; j++) {
- highDimensionalityVector.add(j % 127);
- }
- docs.get(i).addField(field, highDimensionalityVector);
- }
- Collections.reverse(docs);
- return docs;
- }
-
@Test
public void vectorByteEncodingField_shouldSearchOnThatField() {
String vectorToSearch = "[2, 2, 1, 3]";
+ float minR =
+ VectorSimilarityFunction.COSINE.compare(new byte[] {2, 2, 1, 3}, new
byte[] {1, 2, 1, 2});
assertQ(
- req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=2}" +
vectorToSearch, "fl", "id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector_byte_encoding minReturn=" + minR +
"}" + vectorToSearch,
+ "fl",
+ "id"),
"//result[@numFound='2']",
"//result/doc[1]/str[@name='id'][.='2']",
"//result/doc[2]/str[@name='id'][.='3']");
vectorToSearch = "[8, 3, 2, 4]";
+ minR =
+ VectorSimilarityFunction.COSINE.compare(new byte[] {8, 3, 2, 4}, new
byte[] {7, 2, 1, 3});
assertQ(
- req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=2}" +
vectorToSearch, "fl", "id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector_byte_encoding minReturn=" + minR +
"}" + vectorToSearch,
+ "fl",
+ "id,score,vector_byte_encoding",
+ "indent",
+ "true"),
"//result[@numFound='2']",
"//result/doc[1]/str[@name='id'][.='8']",
"//result/doc[2]/str[@name='id'][.='4']");
@@ -329,7 +202,11 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"incorrect vector element: '8.3'. The expected format is:'[b1,b2..b3]'
where each element b is a byte (-128 to 127)",
"incorrect vector element: '8.3'. The expected format is:'[b1,b2..b3]'
where each element b is a byte (-128 to 127)",
- req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=10}" +
vectorToSearch, "fl", "id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector_byte_encoding minReturn=0.0}" +
vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -341,7 +218,11 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"incorrect vector element: ' -129'. The expected format
is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)",
"incorrect vector element: ' -129'. The expected format
is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)",
- req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=10}" +
vectorToSearch, "fl", "id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector_byte_encoding minReturn=0.0}" +
vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
vectorToSearch = "[1, 3, 156, 5]";
@@ -349,7 +230,11 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"incorrect vector element: ' 156'. The expected format
is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)",
"incorrect vector element: ' 156'. The expected format
is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)",
- req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=10}" +
vectorToSearch, "fl", "id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector_byte_encoding minReturn=0.0}" +
vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -358,7 +243,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"missing vector to search should throw Exception",
"the Dense Vector value 'v' to search is missing",
- req(CommonParams.Q, "{!knn f=vector topK=10}", "fl", "id"),
+ req(CommonParams.Q, "{!vectorSimilarity f=vector minReturn=0.0}",
"fl", "id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -368,14 +253,22 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"missing vector to search should throw Exception",
"incorrect vector dimension. The vector value has size 3 while it is
expected a vector with size 4",
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.0}" + vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
vectorToSearch = "[2.0, 4.4,,]";
assertQEx(
"incorrect vector to search should throw Exception",
"incorrect vector dimension. The vector value has size 2 while it is
expected a vector with size 4",
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.0}" + vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -385,35 +278,55 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
assertQEx(
"incorrect vector to search should throw Exception",
"incorrect vector format. The expected format is:'[f1,f2..f3]' where
each element f is a float",
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.0}" + vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
vectorToSearch = "[2.0, 4.4, 3.5, 6.4";
assertQEx(
"incorrect vector to search should throw Exception",
"incorrect vector format. The expected format is:'[f1,f2..f3]' where
each element f is a float",
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.0}" + vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
vectorToSearch = "2.0, 4.4, 3.5, 6.4]";
assertQEx(
"incorrect vector to search should throw Exception",
"incorrect vector format. The expected format is:'[f1,f2..f3]' where
each element f is a float",
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.0}" + vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
vectorToSearch = "[2.0, 4.4, 3.5, stringElement]";
assertQEx(
"incorrect vector to search should throw Exception",
"incorrect vector element: ' stringElement'. The expected format
is:'[f1,f2..f3]' where each element f is a float",
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.0}" + vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
vectorToSearch = "[2.0, 4.4, , ]";
assertQEx(
"incorrect vector to search should throw Exception",
"incorrect vector element: ' '. The expected format is:'[f1,f2..f3]'
where each element f is a float",
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.0}" + vectorToSearch,
+ "fl",
+ "id"),
SolrException.ErrorCode.BAD_REQUEST);
}
@@ -422,8 +335,12 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
assertQ(
- req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl",
"id"),
- "//result[@numFound='10']",
+ req(
+ CommonParams.Q,
+ "{!vectorSimilarity f=vector minReturn=0.8}" + vectorToSearch,
+ "fl",
+ "id"),
+ "//result[@numFound='8']",
"//result/doc[1]/str[@name='id'][.='1']",
"//result/doc[2]/str[@name='id'][.='4']",
"//result/doc[3]/str[@name='id'][.='2']",
@@ -431,38 +348,20 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
"//result/doc[5]/str[@name='id'][.='3']",
"//result/doc[6]/str[@name='id'][.='7']",
"//result/doc[7]/str[@name='id'][.='5']",
- "//result/doc[8]/str[@name='id'][.='6']",
- "//result/doc[9]/str[@name='id'][.='9']",
- "//result/doc[10]/str[@name='id'][.='8']");
- }
-
- @Test
- public void
knnQueryUsedInFilter_shouldFilterResultsBeforeTheQueryExecution() {
- String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
- assertQ(
- req(
- CommonParams.Q,
- "id:(3 4 9 2)",
- "fq",
- "{!knn f=vector topK=4}" + vectorToSearch,
- "fl",
- "id"),
- "//result[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.='2']",
- "//result/doc[2]/str[@name='id'][.='4']");
+ "//result/doc[8]/str[@name='id'][.='6']");
}
@Test
- public void
knnQueryUsedInFilters_shouldFilterResultsBeforeTheQueryExecution() {
+ public void
vecSimQueryUsedInFilters_shouldFilterResultsBeforeTheQueryExecution() {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
- // topK=4 -> 1,4,2,10
+ // minReturn=0.8 -> 1,4,2,10,3,7,5,6
assertQ(
req(
CommonParams.Q,
"id:(3 4 9 2)",
"fq",
- "{!knn f=vector topK=4}" + vectorToSearch,
+ "{!vectorSimilarity f=vector minReturn=0.8}" + vectorToSearch,
"fq",
"id:(4 20 9)",
"fl",
@@ -472,35 +371,36 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
}
@Test
- public void
knnQueryUsedInFiltersWithPreFilter_shouldFilterResultsBeforeTheQueryExecution()
{
+ public void
vecSimQueryUsedInFiltersWithPreFilter_shouldFilterResultsBeforeTheQueryExecution()
{
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
- // topK=4 w/localparam preFilter -> 1,4,7,9
+ // minReturn=0.8 w/localparam preFilter -> 1,4,7
assertQ(
req(
CommonParams.Q,
- "id:(3 4 9 2)",
+ "id:(3 4 7 2)",
"fq",
- "{!knn f=vector topK=4 preFilter='id:(1 4 7 8 9)'}" +
vectorToSearch,
+ "{!vectorSimilarity f=vector minReturn=0.8 preFilter='id:(1 4 7 8
9)'}"
+ + vectorToSearch,
"fq",
- "id:(4 20 9)",
+ "id:(4 20 7)",
"fl",
"id"),
"//result[@numFound='2']",
"//result/doc[1]/str[@name='id'][.='4']",
- "//result/doc[2]/str[@name='id'][.='9']");
+ "//result/doc[2]/str[@name='id'][.='7']");
}
@Test
- public void knnQueryUsedInFilters_rejectIncludeExclude() {
+ public void vecSimQueryUsedInFilters_rejectIncludeExclude() {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
for (String fq :
Arrays.asList(
- "{!knn f=vector topK=5 includeTags=xxx}" + vectorToSearch,
- "{!knn f=vector topK=5 excludeTags=xxx}" + vectorToSearch)) {
+ "{!vectorSimilarity f=vector minReturn=0.8 includeTags=xxx}" +
vectorToSearch,
+ "{!vectorSimilarity f=vector minReturn=0.8 excludeTags=xxx}" +
vectorToSearch)) {
assertQEx(
- "fq={!knn...} incompatible with include/exclude localparams",
+ "fq={!vectorSimilarity...} incompatible with include/exclude
localparams",
"used as a filter does not support",
req("q", "*:*", "fq", fq),
SolrException.ErrorCode.BAD_REQUEST);
@@ -508,88 +408,97 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
}
@Test
- public void knnQueryAsSubQuery() {
+ public void vecSimQueryAsSubQuery() {
final SolrParams common = params("fl", "id", "vec", "[1.0, 2.0, 3.0,
4.0]");
- final String filt = "id:(2 4 7 9 8 20 3)";
+ final String filt = "id:(2 4 7 9 8 20)";
- // When knn parser is a subquery, it should not pre-filter on any global
fq params
- // topK -> 1,4,2,10,3 -> fq -> 4,2,3
+ // When vecSim parser is a subquery, it should not pre-filter on any
global fq params
+ // minReturn -> 1,4,2,10,3,7,5,6 -> fq -> 4,2,7
assertQ(
- req(common, "fq", filt, "q", "*:* AND {!knn f=vector topK=5 v=$vec}"),
+ req(common, "fq", filt, "q", "*:* AND {!vectorSimilarity f=vector
minReturn=0.8 v=$vec}"),
"//result[@numFound='3']",
"//result/doc[1]/str[@name='id'][.='4']",
"//result/doc[2]/str[@name='id'][.='2']",
- "//result/doc[3]/str[@name='id'][.='3']");
- // topK -> 1,4,2,10,3 + '8' -> fq -> 4,2,3,8
+ "//result/doc[3]/str[@name='id'][.='7']");
+ // minReturn -> 1,4,2,10,3,7,5,6 + '8' -> fq -> 4,2,7,8
assertQ(
- req(common, "fq", filt, "q", "id:8^=0.01 OR {!knn f=vector topK=5
v=$vec}"),
+ req(
+ common,
+ "fq",
+ filt,
+ "q",
+ "id:8^=0.01 OR {!vectorSimilarity f=vector minReturn=0.8 v=$vec}"),
"//result[@numFound='4']",
"//result/doc[1]/str[@name='id'][.='4']",
"//result/doc[2]/str[@name='id'][.='2']",
- "//result/doc[3]/str[@name='id'][.='3']",
+ "//result/doc[3]/str[@name='id'][.='7']",
"//result/doc[4]/str[@name='id'][.='8']");
}
@Test
- public void knnQueryAsSubQuery_withPreFilter() {
+ public void vecSimQueryAsSubQuery_withPreFilter() {
final SolrParams common = params("fl", "id", "vec", "[1.0, 2.0, 3.0,
4.0]");
final String filt = "id:(2 4 7 9 8 20 3)";
- // knn subquery should still accept `preFilter` local param
- // filt -> topK -> 4,2,3,7,9
+ // vecSim subquery should still accept `preFilter` local param
+ // filt -> minReturn -> 4,2,3,7
assertQ(
- req(common, "q", "*:* AND {!knn f=vector topK=5 preFilter='" + filt +
"' v=$vec}"),
- "//result[@numFound='5']",
+ req(
+ common,
+ "q",
+ "*:* AND {!vectorSimilarity f=vector minReturn=0.8 preFilter='" +
filt + "' v=$vec}"),
+ "//result[@numFound='4']",
"//result/doc[1]/str[@name='id'][.='4']",
"//result/doc[2]/str[@name='id'][.='2']",
"//result/doc[3]/str[@name='id'][.='3']",
- "//result/doc[4]/str[@name='id'][.='7']",
- "//result/doc[5]/str[@name='id'][.='9']");
+ "//result/doc[4]/str[@name='id'][.='7']");
// it should not pre-filter on any global fq params
- // filt -> topK -> 4,2,3,7,9 -> fq -> 3,9
+ // filt -> minReturn -> 4,2,3,7 -> fq -> 3,7
assertQ(
req(
common,
"fq",
- "id:(1 9 20 3 5 6 8)",
+ "id:(1 9 20 3 5 7 8)",
"q",
- "*:* AND {!knn f=vector topK=5 preFilter='" + filt + "' v=$vec}"),
+ "*:* AND {!vectorSimilarity f=vector minReturn=0.8 preFilter='" +
filt + "' v=$vec}"),
"//result[@numFound='2']",
"//result/doc[1]/str[@name='id'][.='3']",
- "//result/doc[2]/str[@name='id'][.='9']");
- // filt -> topK -> 4,2,3,7,9 + '8' -> fq -> 8,3,9
+ "//result/doc[2]/str[@name='id'][.='7']");
+ // filt -> minReturn -> 4,2,3,7 + '8' -> fq -> 8,3,7
assertQ(
req(
common,
"fq",
- "id:(1 9 20 3 5 6 8)",
+ "id:(1 9 20 3 5 7 8)",
"q",
- "id:8^=100 OR {!knn f=vector topK=5 preFilter='" + filt + "'
v=$vec}"),
+ "id:8^=100 OR {!vectorSimilarity f=vector minReturn=0.8
preFilter='"
+ + filt
+ + "' v=$vec}"),
"//result[@numFound='3']",
"//result/doc[1]/str[@name='id'][.='8']",
"//result/doc[2]/str[@name='id'][.='3']",
- "//result/doc[3]/str[@name='id'][.='9']");
+ "//result/doc[3]/str[@name='id'][.='7']");
}
@Test
- public void knnQueryAsSubQuery_rejectIncludeExclude() {
+ public void vecSimQueryAsSubQuery_rejectIncludeExclude() {
final SolrParams common = params("fl", "id", "vec", "[1.0, 2.0, 3.0,
4.0]");
- for (String knn :
+ for (String subq :
Arrays.asList(
- "{!knn f=vector topK=5 includeTags=xxx v=$vec}",
- "{!knn f=vector topK=5 excludeTags=xxx v=$vec}")) {
+ "{!vectorSimilarity f=vector minReturn=0.8 includeTags=xxx
v=$vec}",
+ "{!vectorSimilarity f=vector minReturn=0.8 excludeTags=xxx
v=$vec}")) {
assertQEx(
- "knn as subquery incompatible with include/exclude localparams",
+ "vecSim as subquery incompatible with include/exclude localparams",
"used as a sub-query does not support",
- req(common, "q", "*:* OR " + knn),
+ req(common, "q", "*:* OR " + subq),
SolrException.ErrorCode.BAD_REQUEST);
}
}
@Test
- public void knnQueryWithFilterQuery_singlePreFilterEquivilence() {
+ public void vecSimQueryWithFilterQuery_singlePreFilterEquivilence() {
final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
final SolrParams common = params("fl", "id");
@@ -597,12 +506,23 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
final String filt = "id:(1 2 7 20)";
for (SolrQueryRequest req :
Arrays.asList(
- req(common, "q", "{!knn f=vector topK=10}" + vectorToSearch, "fq",
filt),
- req(common, "q", "{!knn f=vector preFilter=\"" + filt + "\"
topK=10}" + vectorToSearch),
req(
common,
"q",
- "{!knn f=vector preFilter=$my_filt topK=10}" + vectorToSearch,
+ "{!vectorSimilarity f=vector minReturn=0.8}" + vectorToSearch,
+ "fq",
+ filt),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector preFilter=\""
+ + filt
+ + "\" minReturn=0.8}"
+ + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector preFilter=$my_filt
minReturn=0.8}" + vectorToSearch,
"my_filt",
filt))) {
assertQ(
@@ -615,29 +535,42 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
}
@Test
- public void knnQueryWithFilterQuery_multiPreFilterEquivilence() {
+ public void vecSimQueryWithFilterQuery_multiPreFilterEquivilence() {
final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
final SolrParams common = params("fl", "id");
// these requests should be equivalent
final String fx = "id:(3 4 9 2 1 )"; // 1 & 10 dropped from intersection
final String fy = "id:(3 4 9 2 10)";
+ final String minR = "minReturn=0.8"; // should exclude 9
for (SolrQueryRequest req :
Arrays.asList(
- req(common, "q", "{!knn f=vector topK=4}" + vectorToSearch, "fq",
fx, "fq", fy),
req(
common,
"q",
- "{!knn f=vector preFilter=\""
+ "{!vectorSimilarity f=vector " + minR + "}" + vectorToSearch,
+ "fq",
+ fx,
+ "fq",
+ fy),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector preFilter=\""
+ fx
+ "\" preFilter=\""
+ fy
- + "\" topK=4}"
+ + "\" "
+ + minR
+ + "}"
+ vectorToSearch),
req(
common,
"q",
- "{!knn f=vector preFilter=$fx preFilter=$fy topK=4}" +
vectorToSearch,
+ "{!vectorSimilarity f=vector preFilter=$fx preFilter=$fy "
+ + minR
+ + "}"
+ + vectorToSearch,
"fx",
fx,
"fy",
@@ -645,50 +578,55 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
req(
common,
"q",
- "{!knn f=vector preFilter=$multi_filt topK=4}" +
vectorToSearch,
+ "{!vectorSimilarity f=vector preFilter=$multi_filt " + minR +
"}" + vectorToSearch,
"multi_filt",
fx,
"multi_filt",
fy))) {
assertQ(
req,
- "//result[@numFound='4']",
+ "//result[@numFound='3']",
"//result/doc[1]/str[@name='id'][.='4']",
"//result/doc[2]/str[@name='id'][.='2']",
- "//result/doc[3]/str[@name='id'][.='3']",
- "//result/doc[4]/str[@name='id'][.='9']");
+ "//result/doc[3]/str[@name='id'][.='3']");
}
}
@Test
- public void knnQueryWithPreFilter_rejectIncludeExclude() {
+ public void vecSimQueryWithPreFilter_rejectIncludeExclude() {
final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
assertQEx(
- "knn preFilter localparm incompatible with include/exclude
localparams",
+ "vecSim preFilter localparm incompatible with include/exclude
localparams",
"does not support combining preFilter localparam with either",
// shouldn't matter if global fq w/tag even exists, usage is an error
- req("q", "{!knn f=vector preFilter='id:1' includeTags=xxx}" +
vectorToSearch),
+ req(
+ "q",
+ "{!vectorSimilarity f=vector minReturn=0.8 preFilter='id:1'
includeTags=xxx}"
+ + vectorToSearch),
SolrException.ErrorCode.BAD_REQUEST);
assertQEx(
- "knn preFilter localparm incompatible with include/exclude
localparams",
+ "vecSim preFilter localparm incompatible with include/exclude
localparams",
"does not support combining preFilter localparam with either",
// shouldn't matter if global fq w/tag even exists, usage is an error
- req("q", "{!knn f=vector preFilter='id:1' excludeTags=xxx}" +
vectorToSearch),
+ req(
+ "q",
+ "{!vectorSimilarity f=vector minReturn=0.8 preFilter='id:1'
excludeTags=xxx}"
+ + vectorToSearch),
SolrException.ErrorCode.BAD_REQUEST);
}
@Test
- public void
knnQueryWithFilterQuery_preFilterLocalParamOverridesGlobalFilters() {
+ public void
vecSimQueryWithFilterQuery_preFilterLocalParamOverridesGlobalFilters() {
final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
// trivial case: empty preFilter localparam means no pre-filtering
assertQ(
req(
- "q", "{!knn f=vector preFilter='' topK=5}" + vectorToSearch,
+ "q", "{!vectorSimilarity f=vector preFilter='' minReturn=0.8}" +
vectorToSearch,
"fq", "-id:4",
"fl", "id"),
- "//result[@numFound='4']",
+ "//result[@numFound='7']",
"//result/doc[1]/str[@name='id'][.='1']",
"//result/doc[2]/str[@name='id'][.='2']",
"//result/doc[3]/str[@name='id'][.='10']",
@@ -697,18 +635,19 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
// localparam prefiltering, global fqs applied independently
assertQ(
req(
- "q", "{!knn f=vector preFilter='id:(3 4 9 2 7 8)' topK=5}" +
vectorToSearch,
+ "q",
+ "{!vectorSimilarity f=vector preFilter='id:(3 4 9 2 7 8)'
minReturn=0.8}"
+ + vectorToSearch,
"fq", "-id:4",
"fl", "id"),
- "//result[@numFound='4']",
+ "//result[@numFound='3']",
"//result/doc[1]/str[@name='id'][.='2']",
"//result/doc[2]/str[@name='id'][.='3']",
- "//result/doc[3]/str[@name='id'][.='7']",
- "//result/doc[4]/str[@name='id'][.='9']");
+ "//result/doc[3]/str[@name='id'][.='7']");
}
@Test
- public void knnQueryWithFilterQuery_localParamIncludeExcludeTags() {
+ public void vecSimQueryWithFilterQuery_localParamIncludeExcludeTags() {
final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
final SolrParams common =
params(
@@ -717,26 +656,36 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
"fq", "{!tag=yy,aa}id:(1 2 3 4 5 6 7)");
// These req's are equivalent: pre-filter everything
- // So only 7,6,5 are viable for topK=5
+ // So only 7,6,5 are viable for minReturn=0.8
for (SolrQueryRequest req :
Arrays.asList(
// default behavior is all fq's pre-filter,
- req(common, "q", "{!knn f=vector topK=5}" + vectorToSearch),
+ req(common, "q", "{!vectorSimilarity f=vector minReturn=0.8}" +
vectorToSearch),
// diff ways of explicitly requesting both fq params
- req(common, "q", "{!knn f=vector includeTags=aa topK=5}" +
vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=aa excludeTags='' topK=5}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector includeTags=aa minReturn=0.8}" +
vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=aa excludeTags=bogus topK=5}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector includeTags=aa excludeTags=''
minReturn=0.8}"
+ + vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=xx includeTags=yy topK=5}" +
vectorToSearch),
- req(common, "q", "{!knn f=vector includeTags=xx,yy,bogus topK=5}"
+ vectorToSearch))) {
+ "{!vectorSimilarity f=vector includeTags=aa excludeTags=bogus
minReturn=0.8}"
+ + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags=xx includeTags=yy
minReturn=0.8}"
+ + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags=xx,yy,bogus
minReturn=0.8}"
+ + vectorToSearch))) {
assertQ(
req,
"//result[@numFound='3']",
@@ -747,88 +696,124 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
}
@Test
- public void knnQueryWithFilterQuery_localParamsDisablesAllPreFiltering() {
+ public void vecSimQueryWithFilterQuery_localParamsDisablesAllPreFiltering() {
final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
final SolrParams common =
params(
"fl", "id",
- "fq", "{!tag=xx,aa}id:(5 6 7 8 9 10)",
- "fq", "{!tag=yy,aa}id:(1 2 3 4 5 6 7)");
+ "fq", "{!tag=xx,aa}id:(11 7 8 9 10)",
+ "fq", "{!tag=yy,aa}id:(1 2 3 4 12 7)");
// These req's are equivalent: pre-filter nothing
- // So 1,4,2,10,3,7 are the topK=6
+ // So 1,4,2,10,3,7,5,6 are the minReturn=0.8
// Only 7 matches both of the the regular fq params
for (SolrQueryRequest req :
Arrays.asList(
// explicit local empty preFilter
- req(common, "q", "{!knn f=vector preFilter='' topK=6}" +
vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector preFilter='' minReturn=0.8}" +
vectorToSearch),
// diff ways of explicitly including none of the global fq params
- req(common, "q", "{!knn f=vector includeTags='' topK=6}" +
vectorToSearch),
- req(common, "q", "{!knn f=vector includeTags=bogus topK=6}" +
vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags='' minReturn=0.8}" +
vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags=bogus minReturn=0.8}"
+ vectorToSearch),
// diff ways of explicitly excluding all of the global fq params
- req(common, "q", "{!knn f=vector excludeTags=aa topK=6}" +
vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=aa excludeTags=aa topK=6}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector excludeTags=aa minReturn=0.8}" +
vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags=aa excludeTags=aa
minReturn=0.8}"
+ + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags=aa excludeTags=xx,yy
minReturn=0.8}"
+ + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags=xx,yy excludeTags=aa
minReturn=0.8}"
+ + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector excludeTags=xx,yy minReturn=0.8}"
+ vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=aa excludeTags=xx,yy topK=6}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector excludeTags=aa minReturn=0.8}" +
vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=xx,yy excludeTags=aa topK=6}" +
vectorToSearch),
- req(common, "q", "{!knn f=vector excludeTags=xx,yy topK=6}" +
vectorToSearch),
- req(common, "q", "{!knn f=vector excludeTags=aa topK=6}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector excludeTags=xx excludeTags=yy
minReturn=0.8}"
+ + vectorToSearch),
req(
common,
"q",
- "{!knn f=vector excludeTags=xx excludeTags=yy topK=6}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector excludeTags=xx
excludeTags=yy,bogus minReturn=0.8}"
+ + vectorToSearch),
req(
common,
"q",
- "{!knn f=vector excludeTags=xx excludeTags=yy,bogus topK=6}" +
vectorToSearch),
- req(common, "q", "{!knn f=vector excludeTags=xx,yy,bogus topK=6}"
+ vectorToSearch))) {
+ "{!vectorSimilarity f=vector excludeTags=xx,yy,bogus
minReturn=0.8}"
+ + vectorToSearch))) {
assertQ(req, "//result[@numFound='1']",
"//result/doc[1]/str[@name='id'][.='7']");
}
}
@Test
- public void knnQueryWithFilterQuery_localParamCombinedIncludeExcludeTags() {
+ public void
vecSimQueryWithFilterQuery_localParamCombinedIncludeExcludeTags() {
final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
final SolrParams common =
params(
"fl", "id",
- "fq", "{!tag=xx,aa}id:(5 6 7 8 9 10)",
- "fq", "{!tag=yy,aa}id:(1 2 3 4 5 6 7)");
+ "fq", "{!tag=xx,aa}id:(11 7 8 9 10)",
+ "fq", "{!tag=yy,aa}id:(1 2 3 4 12 7)");
// These req's are equivalent: prefilter only the 'yy' fq
- // So 1,4,2,3,7 are in the topK=5.
+ // So 1,4,2,3,7 are in the minReturn=0.8
// Only 7 matches the regular 'xx' fq param
for (SolrQueryRequest req :
Arrays.asList(
// diff ways of only using the 'yy' filter
- req(common, "q", "{!knn f=vector includeTags=yy,bogus topK=5}" +
vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=yy excludeTags='' topK=5}" +
vectorToSearch),
- req(common, "q", "{!knn f=vector excludeTags=xx,bogus topK=5}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector includeTags=yy,bogus
minReturn=0.8}" + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector includeTags=yy excludeTags=''
minReturn=0.8}"
+ + vectorToSearch),
+ req(
+ common,
+ "q",
+ "{!vectorSimilarity f=vector excludeTags=xx,bogus
minReturn=0.8}" + vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=yy excludeTags=xx topK=5}" +
vectorToSearch),
+ "{!vectorSimilarity f=vector includeTags=yy excludeTags=xx
minReturn=0.8}"
+ + vectorToSearch),
req(
common,
"q",
- "{!knn f=vector includeTags=aa excludeTags=xx topK=5}" +
vectorToSearch))) {
+ "{!vectorSimilarity f=vector includeTags=aa excludeTags=xx
minReturn=0.8}"
+ + vectorToSearch))) {
assertQ(req, "//result[@numFound='1']",
"//result/doc[1]/str[@name='id'][.='7']");
}
}
@Test
- public void knnQueryWithMultiSelectFaceting_excludeTags() {
+ public void vecSimQueryWithMultiSelectFaceting_excludeTags() {
// NOTE: faceting on id is not very realistic,
// but it confirms what we care about re:filters w/o needing extra fields.
final String facet_xpath =
"//lst[@name='facet_fields']/lst[@name='id']/int";
@@ -838,7 +823,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
params(
"fl", "id",
"indent", "true",
- "q", "{!knn f=vector topK=5 excludeTags=facet_click v=$vec}",
+ "q", "{!vectorSimilarity f=vector minReturn=0.9
excludeTags=facet_click v=$vec}",
"vec", vectorToSearch,
// mimicing "inStock:true"
"fq", "-id:(2 3)",
@@ -864,7 +849,6 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
// drill down on a single facet constraint
// multi-select means facet counts shouldn't change
- // (this proves the knn isn't pre-filtering on the 'facet_click' fq)
assertQ(
req(common, "fq", "{!tag=facet_click}id:(4)"),
"//result[@numFound='1']",
@@ -878,7 +862,6 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
// drill down on an additional facet constraint
// multi-select means facet counts shouldn't change
- // (this proves the knn isn't pre-filtering on the 'facet_click' fq)
assertQ(
req(common, "fq", "{!tag=facet_click}id:(4 5)"),
"//result[@numFound='2']",
@@ -892,63 +875,12 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
facet_xpath + "[@name='5'][.='1']");
}
- @Test
- public void knnQueryWithCostlyFq_shouldPerformKnnSearchWithPostFilter() {
- String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
-
- assertQ(
- req(
- CommonParams.Q,
- "{!knn f=vector topK=10}" + vectorToSearch,
- "fq",
- "{!frange cache=false l=0.99}$q",
- "fl",
- "*,score"),
- "//result[@numFound='5']",
- "//result/doc[1]/str[@name='id'][.='1']",
- "//result/doc[2]/str[@name='id'][.='4']",
- "//result/doc[3]/str[@name='id'][.='2']",
- "//result/doc[4]/str[@name='id'][.='10']",
- "//result/doc[5]/str[@name='id'][.='3']");
- }
-
- @Test
- public void
knnQueryWithFilterQueries_shouldPerformKnnSearchWithPreFiltersAndPostFilters() {
- String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
-
- assertQ(
- req(
- CommonParams.Q,
- "{!knn f=vector topK=4}" + vectorToSearch,
- "fq",
- "id:(3 4 9 2)",
- "fq",
- "{!frange cache=false l=0.99}$q",
- "fl",
- "id"),
- "//result[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.='4']",
- "//result/doc[2]/str[@name='id'][.='2']");
- }
-
- @Test
- public void
knnQueryWithNegativeFilterQuery_shouldPerformKnnSearchInPreFilteredResults() {
- String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
- assertQ(
- req(CommonParams.Q, "{!knn f=vector topK=4}" + vectorToSearch, "fq",
"-id:4", "fl", "id"),
- "//result[@numFound='4']",
- "//result/doc[1]/str[@name='id'][.='1']",
- "//result/doc[2]/str[@name='id'][.='2']",
- "//result/doc[3]/str[@name='id'][.='10']",
- "//result/doc[4]/str[@name='id'][.='3']");
- }
-
/**
* See {@link
org.apache.solr.search.ReRankQParserPlugin.ReRankQueryRescorer#combine(float,
* boolean, float)}} for more details.
*/
@Test
- public void knnQueryAsRerank_shouldAddSimilarityFunctionScore() {
+ public void vecSimQueryAsRerank_shouldAddSimilarityFunctionScore() {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
assertQ(
@@ -958,7 +890,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
"rq",
"{!rerank reRankQuery=$rqq reRankDocs=4 reRankWeight=1}",
"rqq",
- "{!knn f=vector topK=4}" + vectorToSearch,
+ "{!vectorSimilarity f=vector minReturn=0.8}" + vectorToSearch,
"fl",
"id"),
"//result[@numFound='4']",
diff --git
a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
index ac96ef827bf..35f5f094c68 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
@@ -237,14 +237,12 @@ client.add(Arrays.asList(d1, d2));
--
== Query Time
-This is the Apache Solr query approach designed to support dense vector search:
-=== knn Query Parser
-The `knn` k-nearest neighbors query parser allows to find the k-nearest
documents to the target vector according to indexed dense vectors in the given
field. The set of documents can be Pre-Filtered to reduce the number of vector
distance calculations that must be computed, and ensure the best `topK` are
returned.
+Apache Solr provides two query parsers that work with dense vector fields,
that each support different ways of matching documents based on vector
similarity: The `knn` query parser, and the `vectorSimilarity` query parser.
-The score for a retrieved document is the approximate distance to the target
vector(defined by the similarityFunction configured at indexing time).
+Both parsers return scores for retrieved documents that are the approximate
distance to the target vector (defined by the similarityFunction configured at
indexing time) and both support "Pre-Filtering" the document graph to reduce
the number of candidate vectors evaluated (with out needing to compute their
vector similarity distances).
-It takes the following parameters:
+Common parameters for both query parsers are:
`f`::
+
@@ -255,15 +253,6 @@ s|Required |Default: none
+
The `DenseVectorField` to search in.
-`topK`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: 10
-|===
-+
-How many k-nearest results to return.
-
`preFilter`::
+
[%autowidth,frame=none]
@@ -293,22 +282,73 @@ Indicates that only `fq` filters with the specified `tag`
should be considered f
Indicates that `fq` filters with the specified `tag` should be excluded from
consideration for implicit Pre-Filtering. Must not be combined with
`preFilter`.
-Here's how to run a simple KNN search:
+=== knn Query Parser
+
+The `knn` k-nearest neighbors query parser matches k-nearest documents to the
target vector.
+
+In addition to the common parameters described above, it takes the following
parameters:
+
+`topK`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: 10
+|===
++
+How many k-nearest results to return.
+
+Here's an example of a simple `knn` search:
[source,text]
?q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]
The search results retrieved are the k=10 nearest documents to the vector in
input `[1.0, 2.0, 3.0, 4.0]`, ranked by the `similarityFunction` configured at
indexing time.
+=== vectorSimilarity Query Parser
+
+The `vectorSimilarity` vector similarity query parser matches documents whose
similarity with the target vector is a above a minimum threshold.
-==== Explicit KNN Pre-Filtering
+In addition to the common parameters described above, it takes the following
parameters:
-The `knn` query parser's `preFilter` parameter can be specified to reduce the
number of candidate documents evaluated for the k-nearest distance calculation:
+
+`minReturn`::
++
+[%autowidth,frame=none]
+|===
+s|Required |Default: none
+|===
++
+Minimum similarity threshold of nodes in the graph to be returned as matches
+
+`minTraverse`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: -Infinity
+|===
++
+Minimum similarity of nodes in the graph to continue traversal of their
neighbors
+
+Here's an example of a simple `vectorSimilarity` search:
+
+[source,text]
+?q={!vectorSimilarity f=vector minReturn=0.7}[1.0, 2.0, 3.0, 4.0]
+
+The search results retrieved are all documents whose similarity with the input
vector `[1.0, 2.0, 3.0, 4.0]` is at least `0.7` based on the
`similarityFunction` configured at indexing time
+
+
+=== Graph Pre-Filtering
+
+Pre-Filtering the set of candidate documents considered when walking the graph
can be specified either explicitly, or implicitly (based on existing `fq`
params) depending on how and when these dense vector query parsers are used.
+
+==== Explicit Pre-Filtering
+
+The `preFilter` parameter can be specified explicitly to reduce the number of
candidate documents evaluated for the distance calculation:
[source,text]
-?q={!knn f=vector topK=10 preFilter=inStock:true}[1.0, 2.0, 3.0, 4.0]
+?q={!vectorSimilarity f=vector minReturn=0.7 preFilter=inStock:true}[1.0, 2.0,
3.0, 4.0]
-In the above example, only documents matching the Pre-Filter `inStock:true`
will be candidates for consideration when evaluating the k-nearest search
against the specified vector.
+In the above example, only documents matching the Pre-Filter `inStock:true`
will be candidates for consideration when evaluating the `vectorSimilarity`
search against the specified vector.
The `preFilter` parameter may be blank (ex: `preFilter=""`) to indicate that
no Pre-Filtering should be performed; or it may be multi-valued -- either
through repetition, or via duplicated
xref:local-params.adoc#parameter-dereferencing[Parameter References].
@@ -324,22 +364,22 @@ These two examples are equivalent:
&knnPreFilter=inStock:true
----
-==== Implicit KNN Pre-Filtering
+==== Implicit Pre-Filtering
-While the `preFilter` parameter may be explicitly specified on *_any_* usage
of the `knn` query parser, the default Pre-Filtering behavior (when no
`preFilter` parameter is specified) will vary based on how the `knn` query
parser is used:
+While the `preFilter` parameter may be explicitly specified on *_any_* usage
of the `knn` or `vectorSimilarity` query parsers, the default Pre-Filtering
behavior (when no `preFilter` parameter is specified) will vary based on how
the query parser is used:
-* When used as the main `q` param: `fq` filters in the request (that are not
xref:common-query-parameters.adoc#cache-local-parameter[Solr Post Filters])
will be combined to form an implicit KNN Pre-Filter.
+* When used as the main `q` param: `fq` filters in the request (that are not
xref:common-query-parameters.adoc#cache-local-parameter[Solr Post Filters])
will be combined to form an implicit Graph Pre-Filter.
** This default behavior optimizes the number of vector distance calculations
considered, eliminating documents that would eventually be excluded by an `fq`
filter anyway.
** `includeTags` and `excludeTags` may be used to limit the set of `fq`
filters used in the Pre-Filter.
-* When used as an `fq` param, or as a subquery clause in a larger query: No
implicit Pre-Filter is used.
+* When a vector search query parser is used as an `fq` param, or as a subquery
clause in a larger query: No implicit Pre-Filter is used.
** `includeTags` and `excludeTags` must not be used in these situations.
-The example request below shows two usages of the `knn` query parser that will
get _no_ implicit Pre-Filtering from any of the `fq` parameters, because
neither usage is as the main `q` param:
+The example request below shows two usages of vector query parsers that will
get _no_ implicit Pre-Filtering from any of the `fq` parameters, because
neither usage is as the main `q` param:
[source,text]
----
-?q=(color_str:red OR {!knn f=color_vector topK=10 v="[1.0, 2.0, 3.0, 4.0]"})
+?q=(color_str:red OR {!vectorSimilarity f=color_vector minReturn=0.7 v="[1.0,
2.0, 3.0, 4.0]"})
&fq={!knn f=title_vector topK=10}[9.0, 8.0, 7.0, 6.0]
&fq=inStock:true
----
@@ -363,21 +403,24 @@ If we modify the above request to add tags to the `fq`
parameters, we can specif
&fq={!tag=for_knn}inStock:true
----
-In this example, only the `inStock:true` filter will be used for KNN
Pre-Filtering to find the the `topK=10` documents, and the `category:AAA`
filter will be applied independently; possibly resulting in less then 10 total
matches.
+In this example, only the `inStock:true` filter will be used for Pre-Filtering
to find the the `topK=10` documents, and the `category:AAA` filter will be
applied independently; possibly resulting in less then 10 total matches.
Some use cases where `includeTags` and/or `excludeTags` may be more useful
then an explicit `preFilter` parameters:
-* You have some `fq` parameters that are
xref:configuration-guide:requesthandlers-searchcomponents.adoc#paramsets-and-useparams[re-used
on many requests] (even when you don't use the `knn` parser) that you wish to
be used as KNN Pre-Filters when you _do_ use the `knn` query parser.
-* You typically want all `fq` params to be used as KNN Pre-Filters, but when
users "drill down" on Facets, you want the `fq` parameters you add to be
excluded from the KNN Pre-Filtering so that the result set gets smaller;
instead of just computing a new `topK` set.
+* You have some `fq` parameters that are
xref:configuration-guide:requesthandlers-searchcomponents.adoc#paramsets-and-useparams[re-used
on many requests] (even when you don't use search dense vector fields) that
you wish to be used as Pre-Filters when you _do_ search dense vector fields.
+* You typically want all `fq` params to be used as graph Pre-Filters on your
`knn` queries, but when users "drill down" on Facets, you want the `fq`
parameters you add to be excluded from the Pre-Filtering so that the result set
gets smaller; instead of just computing a new `topK` set.
-==== Usage as Re-Ranking Query
-The `knn` query parser can be used to rerank first pass query results:
+=== Usage in Re-Ranking Query
+
+Both dense vector search query parsers can be used to rerank first pass query
results:
+
[source,text]
&q=id:(3 4 9 2)&rq={!rerank reRankQuery=$rqq reRankDocs=4
reRankWeight=1}&rqq={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]
+
[IMPORTANT]
====
When using `knn` in re-ranking pay attention to the `topK` parameter.