This is an automated email from the ASF dual-hosted git repository. hossman pushed a commit to branch SOLR-17335 in repository https://gitbox.apache.org/repos/asf/solr.git
commit 4b137703ed2c14ec0375c9ca9b6131d3ff479b4a Author: Chris Hostetter <[email protected]> AuthorDate: Mon Jun 17 15:11:54 2024 -0700 SOLR-17335: New 'vecSim' QParser --- .../java/org/apache/solr/search/QParserPlugin.java | 2 + ...QParser.java => AbstractVectorQParserBase.java} | 52 +- .../org/apache/solr/search/neural/KnnQParser.java | 183 +------ .../apache/solr/search/neural/VecSimQParser.java | 75 +++ .../solr/search/neural/VecSimQParserPlugin.java | 33 ++ .../org/apache/solr/search/QueryEqualityTest.java | 74 +++ .../apache/solr/search/neural/KnnQParserTest.java | 2 +- ...{KnnQParserTest.java => VecSimQParserTest.java} | 532 ++++++++------------- .../query-guide/pages/dense-vector-search.adoc | 103 ++-- 9 files changed, 472 insertions(+), 584 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java index 99cb9efb0c7..544ecc5540a 100644 --- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java @@ -31,6 +31,7 @@ import org.apache.solr.search.join.HashRangeQParserPlugin; import org.apache.solr.search.mlt.MLTContentQParserPlugin; import org.apache.solr.search.mlt.MLTQParserPlugin; import org.apache.solr.search.neural.KnnQParserPlugin; +import org.apache.solr.search.neural.VecSimQParserPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin; public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrInfoBean { @@ -89,6 +90,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI map.put(HashRangeQParserPlugin.NAME, new HashRangeQParserPlugin()); map.put(RankQParserPlugin.NAME, new RankQParserPlugin()); map.put(KnnQParserPlugin.NAME, new KnnQParserPlugin()); + map.put(VecSimQParserPlugin.NAME, new VecSimQParserPlugin()); standardPlugins = Collections.unmodifiableMap(map); } diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java similarity index 83% copy from solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java copy to solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java index 252a4fcabc7..4cafb45744e 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java @@ -33,60 +33,50 @@ import org.apache.solr.search.QueryParsing; import org.apache.solr.search.QueryUtils; import org.apache.solr.search.SyntaxError; -public class KnnQParser extends QParser { +public abstract class AbstractVectorQParserBase extends QParser { static final String PRE_FILTER = "preFilter"; static final String EXCLUDE_TAGS = "excludeTags"; static final String INCLUDE_TAGS = "includeTags"; - // retrieve the top K results based on the distance similarity function - static final String TOP_K = "topK"; - static final int DEFAULT_TOP_K = 10; + private final String denseVectorFieldName; + private final String vectorToSearch; - /** - * Constructor for the QParser - * - * @param qstr The part of the query string specific to this parser - * @param localParams The set of parameters that are specific to this QParser. See - * https://solr.apache.org/guide/solr/latest/query-guide/local-params.html - * @param params The rest of the {@link SolrParams} - * @param req The original {@link SolrQueryRequest}. - */ - public KnnQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + public AbstractVectorQParserBase( + String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); + vectorToSearch = localParams.get(QueryParsing.V); + denseVectorFieldName = localParams.get(QueryParsing.F); } - @Override - public Query parse() throws SyntaxError { - String denseVectorField = localParams.get(QueryParsing.F); - String vectorToSearch = localParams.get(QueryParsing.V); - int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); - - if (denseVectorField == null || denseVectorField.isEmpty()) { + protected String getVectorToSearch() { + if (vectorToSearch == null || vectorToSearch.isEmpty()) { throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector field 'f' is missing"); + SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector value 'v' to search is missing"); } + return vectorToSearch; + } - if (vectorToSearch == null || vectorToSearch.isEmpty()) { + protected String getFieldName() { + if (denseVectorFieldName == null || denseVectorFieldName.isEmpty()) { throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector value 'v' to search is missing"); + SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector field 'f' is missing"); } + return denseVectorFieldName; + } - SchemaField schemaField = req.getCore().getLatestSchema().getField(denseVectorField); + protected static DenseVectorField getCheckedFieldType(SchemaField schemaField) { FieldType fieldType = schemaField.getType(); if (!(fieldType instanceof DenseVectorField)) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, - "only DenseVectorField is compatible with Knn Query Parser"); + "only DenseVectorField is compatible with Vector Query Parsers"); } - DenseVectorField denseVectorType = (DenseVectorField) fieldType; - - return denseVectorType.getKnnVectorQuery( - schemaField.getName(), vectorToSearch, topK, getFilterQuery()); + return (DenseVectorField) fieldType; } - private Query getFilterQuery() throws SolrException, SyntaxError { + protected Query getFilterQuery() throws SolrException, SyntaxError { // Default behavior of FQ wrapping, and suitability of some local params // depends on wether we are a sub-query or not diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index 252a4fcabc7..166dada5b7f 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -16,204 +16,31 @@ */ package org.apache.solr.search.neural; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; import org.apache.lucene.search.Query; -import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.StrUtils; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.DenseVectorField; -import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; -import org.apache.solr.search.QParser; -import org.apache.solr.search.QueryParsing; -import org.apache.solr.search.QueryUtils; import org.apache.solr.search.SyntaxError; -public class KnnQParser extends QParser { - - static final String PRE_FILTER = "preFilter"; - static final String EXCLUDE_TAGS = "excludeTags"; - static final String INCLUDE_TAGS = "includeTags"; +public class KnnQParser extends AbstractVectorQParserBase { // retrieve the top K results based on the distance similarity function static final String TOP_K = "topK"; static final int DEFAULT_TOP_K = 10; - /** - * Constructor for the QParser - * - * @param qstr The part of the query string specific to this parser - * @param localParams The set of parameters that are specific to this QParser. See - * https://solr.apache.org/guide/solr/latest/query-guide/local-params.html - * @param params The rest of the {@link SolrParams} - * @param req The original {@link SolrQueryRequest}. - */ public KnnQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); } @Override public Query parse() throws SyntaxError { - String denseVectorField = localParams.get(QueryParsing.F); - String vectorToSearch = localParams.get(QueryParsing.V); - int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); - - if (denseVectorField == null || denseVectorField.isEmpty()) { - throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector field 'f' is missing"); - } - - if (vectorToSearch == null || vectorToSearch.isEmpty()) { - throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, "the Dense Vector value 'v' to search is missing"); - } - - SchemaField schemaField = req.getCore().getLatestSchema().getField(denseVectorField); - FieldType fieldType = schemaField.getType(); - if (!(fieldType instanceof DenseVectorField)) { - throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, - "only DenseVectorField is compatible with Knn Query Parser"); - } - - DenseVectorField denseVectorType = (DenseVectorField) fieldType; + final SchemaField schemaField = req.getCore().getLatestSchema().getField(getFieldName()); + final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); + final String vectorToSearch = getVectorToSearch(); + final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); return denseVectorType.getKnnVectorQuery( schemaField.getName(), vectorToSearch, topK, getFilterQuery()); } - - private Query getFilterQuery() throws SolrException, SyntaxError { - - // Default behavior of FQ wrapping, and suitability of some local params - // depends on wether we are a sub-query or not - final boolean isSubQuery = recurseCount != 0; - - // include/exclude tags for global fqs to wrap; - // Check these up front for error handling if combined with `fq` local param. - final List<String> includedGlobalFQTags = getLocalParamTags(INCLUDE_TAGS); - final List<String> excludedGlobalFQTags = getLocalParamTags(EXCLUDE_TAGS); - final boolean haveGlobalFQTags = - !(includedGlobalFQTags.isEmpty() && excludedGlobalFQTags.isEmpty()); - - if (haveGlobalFQTags) { - // Some early error handling of incompatible options... - - if (isFilter()) { // this knn query is itself a filter query - throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, - "Knn Query Parser used as a filter does not support " - + INCLUDE_TAGS - + " or " - + EXCLUDE_TAGS - + " localparams"); - } - - if (isSubQuery) { // this knn query is a sub-query of a broader query (possibly disjunction) - throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, - "Knn Query Parser used as a sub-query does not support " - + INCLUDE_TAGS - + " or " - + EXCLUDE_TAGS - + " localparams"); - } - } - - // Explicit local params specifying the filter(s) to wrap - final String[] preFilters = getLocalParams().getParams(PRE_FILTER); - if (null != preFilters) { - - // We don't particularly care if preFilters is empty, the usage below will still work, - // but SolrParams API says it should be null not empty... - assert 0 != preFilters.length - : "SolrParams.getParams should return null, never zero len array"; - - if (haveGlobalFQTags) { - throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, - "Knn Query Parser does not support combining " - + PRE_FILTER - + " localparam with either " - + INCLUDE_TAGS - + " or " - + EXCLUDE_TAGS - + " localparams"); - } - - final List<Query> preFilterQueries = new ArrayList<>(preFilters.length); - for (String f : preFilters) { - final QParser parser = subQuery(f, null); - parser.setIsFilter(true); - - // maybe null, ie: `preFilter=""` - final Query filter = parser.getQuery(); - if (null != filter) { - preFilterQueries.add(filter); - } - } - try { - return req.getSearcher().getProcessedFilter(preFilterQueries).filter; - } catch (IOException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); - } - } - - // No explicit `preFilter` localparams specifying what we should filter on. - // - // So now, if we're either a filter or a subquery, we have to default to - // not wrapping anything... - if (isFilter() || isSubQuery) { - return null; - } - - // At this point we now are a (regular) query and can wrap global `fq` filters... - try { - // Start by assuming we wrap all global filters, - // then adjust our list based on include/exclude tag params - List<Query> globalFQs = QueryUtils.parseFilterQueries(req); - - // Adjust our globalFQs based on any include/exclude we may have - if (!includedGlobalFQTags.isEmpty()) { - // NOTE: Even if no FQs match the specified tag(s) the fact that tags were specified - // means we should replace globalFQs (even with a possibly empty list) - globalFQs = new ArrayList<>(QueryUtils.getTaggedQueries(req, includedGlobalFQTags)); - } - if (null != excludedGlobalFQTags) { - globalFQs.removeAll(QueryUtils.getTaggedQueries(req, excludedGlobalFQTags)); - } - - return req.getSearcher().getProcessedFilter(globalFQs).filter; - - } catch (IOException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); - } - } - - /** - * @return set (possibly empty) of tags specified in the given local param - * @see StrUtils#splitSmart - * @see QueryUtils#getTaggedQueries - * @see #localParams - */ - private List<String> getLocalParamTags(final String param) { - final String[] strVals = localParams.getParams(param); - if (null == strVals) { - return Collections.emptyList(); - } - final List<String> tags = new ArrayList<>(strVals.length * 2); - for (String val : strVals) { - // This ensures parity w/how QParser constructor builds tagMap, - // and that empty strings will make it into our List (for "include nothing") - if (0 < val.indexOf(',')) { - tags.addAll(StrUtils.splitSmart(val, ',')); - } else { - tags.add(val); - } - } - return tags; - } } diff --git a/solr/core/src/java/org/apache/solr/search/neural/VecSimQParser.java b/solr/core/src/java/org/apache/solr/search/neural/VecSimQParser.java new file mode 100644 index 00000000000..2dab7bd7a10 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/neural/VecSimQParser.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.search.ByteVectorSimilarityQuery; +import org.apache.lucene.search.FloatVectorSimilarityQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.util.vector.DenseVectorParser; + +public class VecSimQParser extends AbstractVectorQParserBase { + + // retrieve the top results based on the distance similarity function thresholds + static final String MIN_RETURN = "minReturn"; + static final String MIN_TRAVERSE = "minTraverse"; + + static final float DEFAULT_MIN_TRAVERSE = Float.NEGATIVE_INFINITY; + + public VecSimQParser( + String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + super(qstr, localParams, params, req); + } + + @Override + public Query parse() throws SyntaxError { + final String fieldName = getFieldName(); + final SchemaField schemaField = req.getCore().getLatestSchema().getField(fieldName); + final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); + final String vectorToSearch = getVectorToSearch(); + final float minT = localParams.getFloat(MIN_TRAVERSE, DEFAULT_MIN_TRAVERSE); + final Float minR = localParams.getFloat(MIN_RETURN); + if (null == minR) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + MIN_RETURN + " is requried to use Vector Similarity QParser"); + } + + final DenseVectorParser vectorBuilder = + denseVectorType.getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); + + final VectorEncoding vectorEncoding = denseVectorType.getVectorEncoding(); + switch (vectorEncoding) { + case FLOAT32: + return new FloatVectorSimilarityQuery( + fieldName, vectorBuilder.getFloatVector(), minT, minR, getFilterQuery()); + case BYTE: + return new ByteVectorSimilarityQuery( + fieldName, vectorBuilder.getByteVector(), minT, minR, getFilterQuery()); + default: + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Unexpected state. Vector Encoding: " + vectorEncoding); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/neural/VecSimQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/neural/VecSimQParserPlugin.java new file mode 100644 index 00000000000..49a1ea9a192 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/neural/VecSimQParserPlugin.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; + +/** A neural query parser to run min-similarity search on Dense Vector fields. */ +public class VecSimQParserPlugin extends QParserPlugin { + public static final String NAME = "vecSim"; + + @Override + public QParser createParser( + String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + return new VecSimQParser(qstr, localParams, params, req); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index 653c0935879..1fe5eee3247 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -1469,6 +1469,80 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } } + public void testQueryVecSim() throws Exception { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "0"); + doc.addField("vector", Arrays.asList(1, 2, 3, 4)); + assertU(adoc(doc)); + assertU(commit()); + + final String common = "!vecSim minReturn=0.3 f=vector"; + final String qvec = "[1.0,2.0,3.0,4.0]"; + + try (SolrQueryRequest req0 = req()) { + + // no filters + final Query fqNull = + assertQueryEqualsAndReturn( + "vecSim", + req0, + "{" + common + "}" + qvec, + "{" + common + " minTraverse='-Infinity'}" + qvec, + "{" + common + " preFilter=''}" + qvec, + "{" + common + " v=" + qvec + "}"); + + try (SolrQueryRequest req1 = req("fq", "{!tag=t1}id:1", "xxx", "id:1")) { + // either global fq, or (same) preFilter as localparam + final Query fqOne = + assertQueryEqualsAndReturn( + "vecSim", + req1, + "{" + common + "}" + qvec, + "{" + common + " includeTags=t1}" + qvec, + "{" + common + " preFilter='id:1'}" + qvec, + "{" + common + " preFilter=$xxx}" + qvec, + "{" + common + " v=" + qvec + "}"); + QueryUtils.checkUnequal(fqNull, fqOne); + + try (SolrQueryRequest req2 = req("fq", "{!tag=t2}id:2", "xxx", "id:1", "yyy", "")) { + // override global fq with local param to use different preFilter + final Query fqOneOverride = + assertQueryEqualsAndReturn( + "vecSim", + req2, + "{" + common + " preFilter='id:1'}" + qvec, + "{" + common + " preFilter=$xxx}" + qvec); + QueryUtils.checkEqual(fqOne, fqOneOverride); + + // override global fq with local param to use no preFilters + final Query fqNullOverride = + assertQueryEqualsAndReturn( + "vecSim", + req2, + "{" + common + " preFilter=''}" + qvec, + "{" + common + " excludeTags=t2}" + qvec, + "{" + common + " preFilter=$yyy}" + qvec); + QueryUtils.checkEqual(fqNull, fqNullOverride); + } + } + + try (SolrQueryRequest reqPostFilter = req("fq", "{!tag=post frange cache=false l=0}9.9")) { + // global post-filter fq should always be ignored + final Query fqPostFilter = + assertQueryEqualsAndReturn( + "vecSim", + reqPostFilter, + "{" + common + "}" + qvec, + "{" + common + " includeTags=post}" + qvec); + QueryUtils.checkEqual(fqNull, fqPostFilter); + } + + } finally { + delQ("id:0"); + assertU(commit()); + } + } + /** * NOTE: defType is not only used to pick the parser, but also to record the parser being tested * for coverage sanity checking diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index ccd0e3ecc44..f5d5668a7e5 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -178,7 +178,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "Incorrect vector field type should throw Exception", - "only DenseVectorField is compatible with Knn Query Parser", + "only DenseVectorField is compatible with Vector Query Parsers", req(CommonParams.Q, "{!knn f=id topK=10}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); } diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/VecSimQParserTest.java similarity index 57% copy from solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java copy to solr/core/src/test/org/apache/solr/search/neural/VecSimQParserTest.java index ccd0e3ecc44..9a7f68521cb 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/VecSimQParserTest.java @@ -16,12 +16,10 @@ */ package org.apache.solr.search.neural; -import static org.apache.solr.search.neural.KnnQParser.DEFAULT_TOP_K; - import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; +import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -32,7 +30,7 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; -public class KnnQParserTest extends SolrTestCaseJ4 { +public class VecSimQParserTest extends SolrTestCaseJ4 { String IDField = "id"; String vectorField = "vector"; String vectorField2 = "vector2"; @@ -115,71 +113,14 @@ public class KnnQParserTest extends SolrTestCaseJ4 { deleteCore(); } - @Test - public void incorrectTopK_shouldThrowException() { - String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - - assertQEx( - "String topK should throw Exception", - "For input string: \"string\"", - req(CommonParams.Q, "{!knn f=vector topK=string}" + vectorToSearch, "fl", "id"), - SolrException.ErrorCode.BAD_REQUEST); - - assertQEx( - "Double topK should throw Exception", - "For input string: \"4.5\"", - req(CommonParams.Q, "{!knn f=vector topK=4.5}" + vectorToSearch, "fl", "id"), - SolrException.ErrorCode.BAD_REQUEST); - } - - @Test - public void topKMissing_shouldReturnDefaultTopK() { - String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - - assertQ( - req(CommonParams.Q, "{!knn f=vector}" + vectorToSearch, "fl", "id"), - "//result[@numFound='" + DEFAULT_TOP_K + "']", - "//result/doc[1]/str[@name='id'][.='1']", - "//result/doc[2]/str[@name='id'][.='4']", - "//result/doc[3]/str[@name='id'][.='2']", - "//result/doc[4]/str[@name='id'][.='10']", - "//result/doc[5]/str[@name='id'][.='3']", - "//result/doc[6]/str[@name='id'][.='7']", - "//result/doc[7]/str[@name='id'][.='5']", - "//result/doc[8]/str[@name='id'][.='6']", - "//result/doc[9]/str[@name='id'][.='9']", - "//result/doc[10]/str[@name='id'][.='8']"); - } - - @Test - public void topK_shouldReturnOnlyTopKResults() { - String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - - assertQ( - req(CommonParams.Q, "{!knn f=vector topK=5}" + vectorToSearch, "fl", "id"), - "//result[@numFound='5']", - "//result/doc[1]/str[@name='id'][.='1']", - "//result/doc[2]/str[@name='id'][.='4']", - "//result/doc[3]/str[@name='id'][.='2']", - "//result/doc[4]/str[@name='id'][.='10']", - "//result/doc[5]/str[@name='id'][.='3']"); - - assertQ( - req(CommonParams.Q, "{!knn f=vector topK=3}" + vectorToSearch, "fl", "id"), - "//result[@numFound='3']", - "//result/doc[1]/str[@name='id'][.='1']", - "//result/doc[2]/str[@name='id'][.='4']", - "//result/doc[3]/str[@name='id'][.='2']"); - } - @Test public void incorrectVectorFieldType_shouldThrowException() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQEx( "Incorrect vector field type should throw Exception", - "only DenseVectorField is compatible with Knn Query Parser", - req(CommonParams.Q, "{!knn f=id topK=10}" + vectorToSearch, "fl", "id"), + "only DenseVectorField is compatible with ", + req(CommonParams.Q, "{!vecSim f=id}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -190,7 +131,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "Undefined vector field should throw Exception", "undefined field: \"notExistent\"", - req(CommonParams.Q, "{!knn f=notExistent topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=notExistent}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -201,7 +142,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "missing vector field should throw Exception", "the Dense Vector field 'f' is missing", - req(CommonParams.Q, "{!knn topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -210,113 +151,41 @@ public class KnnQParserTest extends SolrTestCaseJ4 { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQ( - req(CommonParams.Q, "{!knn f=vector2 topK=5}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector2 minReturn=0.8}" + vectorToSearch, "fl", "id"), "//result[@numFound='3']", "//result/doc[1]/str[@name='id'][.='11']", "//result/doc[2]/str[@name='id'][.='13']", "//result/doc[3]/str[@name='id'][.='12']"); } - @Test - public void highDimensionFloatVectorField_shouldSearchOnThatField() { - int highDimension = 2048; - List<SolrInputDocument> docsToIndex = this.prepareHighDimensionFloatVectorsDocs(highDimension); - for (SolrInputDocument doc : docsToIndex) { - assertU(adoc(doc)); - } - assertU(commit()); - - float[] highDimensionalityQueryVector = new float[highDimension]; - for (int i = 0; i < highDimension; i++) { - highDimensionalityQueryVector[i] = i; - } - String vectorToSearch = Arrays.toString(highDimensionalityQueryVector); - - assertQ( - req(CommonParams.Q, "{!knn f=2048_float_vector topK=1}" + vectorToSearch, "fl", "id"), - "//result[@numFound='1']", - "//result/doc[1]/str[@name='id'][.='1']"); - } - - @Test - public void highDimensionByteVectorField_shouldSearchOnThatField() { - int highDimension = 2048; - List<SolrInputDocument> docsToIndex = this.prepareHighDimensionByteVectorsDocs(highDimension); - for (SolrInputDocument doc : docsToIndex) { - assertU(adoc(doc)); - } - assertU(commit()); - - byte[] highDimensionalityQueryVector = new byte[highDimension]; - for (int i = 0; i < highDimension; i++) { - highDimensionalityQueryVector[i] = (byte) (i % 127); - } - String vectorToSearch = Arrays.toString(highDimensionalityQueryVector); - - assertQ( - req(CommonParams.Q, "{!knn f=2048_byte_vector topK=1}" + vectorToSearch, "fl", "id"), - "//result[@numFound='1']", - "//result/doc[1]/str[@name='id'][.='1']"); - } - - private List<SolrInputDocument> prepareHighDimensionFloatVectorsDocs(int highDimension) { - int docsCount = 13; - String field = "2048_float_vector"; - List<SolrInputDocument> docs = new ArrayList<>(docsCount); - - for (int i = 1; i < docsCount + 1; i++) { - SolrInputDocument doc = new SolrInputDocument(); - doc.addField(IDField, i); - docs.add(doc); - } - - for (int i = 0; i < docsCount; i++) { - List<Integer> highDimensionalityVector = new ArrayList<>(); - for (int j = i * highDimension; j < highDimension; j++) { - highDimensionalityVector.add(j); - } - docs.get(i).addField(field, highDimensionalityVector); - } - Collections.reverse(docs); - return docs; - } - - private List<SolrInputDocument> prepareHighDimensionByteVectorsDocs(int highDimension) { - int docsCount = 13; - String field = "2048_byte_vector"; - List<SolrInputDocument> docs = new ArrayList<>(docsCount); - - for (int i = 1; i < docsCount + 1; i++) { - SolrInputDocument doc = new SolrInputDocument(); - doc.addField(IDField, i); - docs.add(doc); - } - - for (int i = 0; i < docsCount; i++) { - List<Integer> highDimensionalityVector = new ArrayList<>(); - for (int j = i * highDimension; j < highDimension; j++) { - highDimensionalityVector.add(j % 127); - } - docs.get(i).addField(field, highDimensionalityVector); - } - Collections.reverse(docs); - return docs; - } - @Test public void vectorByteEncodingField_shouldSearchOnThatField() { String vectorToSearch = "[2, 2, 1, 3]"; + float minR = + VectorSimilarityFunction.COSINE.compare(new byte[] {2, 2, 1, 3}, new byte[] {1, 2, 1, 2}); assertQ( - req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=2}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!vecSim f=vector_byte_encoding minReturn=" + minR + "}" + vectorToSearch, + "fl", + "id"), "//result[@numFound='2']", "//result/doc[1]/str[@name='id'][.='2']", "//result/doc[2]/str[@name='id'][.='3']"); vectorToSearch = "[8, 3, 2, 4]"; + minR = + VectorSimilarityFunction.COSINE.compare(new byte[] {8, 3, 2, 4}, new byte[] {7, 2, 1, 3}); assertQ( - req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=2}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!vecSim f=vector_byte_encoding minReturn=" + minR + "}" + vectorToSearch, + "fl", + "id,score,vector_byte_encoding", + "indent", + "true"), "//result[@numFound='2']", "//result/doc[1]/str[@name='id'][.='8']", "//result/doc[2]/str[@name='id'][.='4']"); @@ -329,7 +198,11 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "incorrect vector element: '8.3'. The expected format is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)", "incorrect vector element: '8.3'. The expected format is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)", - req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=10}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!vecSim f=vector_byte_encoding minReturn=0.0}" + vectorToSearch, + "fl", + "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -341,7 +214,11 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "incorrect vector element: ' -129'. The expected format is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)", "incorrect vector element: ' -129'. The expected format is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)", - req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=10}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!vecSim f=vector_byte_encoding minReturn=0.0}" + vectorToSearch, + "fl", + "id"), SolrException.ErrorCode.BAD_REQUEST); vectorToSearch = "[1, 3, 156, 5]"; @@ -349,7 +226,11 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "incorrect vector element: ' 156'. The expected format is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)", "incorrect vector element: ' 156'. The expected format is:'[b1,b2..b3]' where each element b is a byte (-128 to 127)", - req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=10}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!vecSim f=vector_byte_encoding minReturn=0.0}" + vectorToSearch, + "fl", + "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -358,7 +239,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "missing vector to search should throw Exception", "the Dense Vector value 'v' to search is missing", - req(CommonParams.Q, "{!knn f=vector topK=10}", "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}", "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -368,14 +249,14 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "missing vector to search should throw Exception", "incorrect vector dimension. The vector value has size 3 while it is expected a vector with size 4", - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); vectorToSearch = "[2.0, 4.4,,]"; assertQEx( "incorrect vector to search should throw Exception", "incorrect vector dimension. The vector value has size 2 while it is expected a vector with size 4", - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -385,35 +266,35 @@ public class KnnQParserTest extends SolrTestCaseJ4 { assertQEx( "incorrect vector to search should throw Exception", "incorrect vector format. The expected format is:'[f1,f2..f3]' where each element f is a float", - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); vectorToSearch = "[2.0, 4.4, 3.5, 6.4"; assertQEx( "incorrect vector to search should throw Exception", "incorrect vector format. The expected format is:'[f1,f2..f3]' where each element f is a float", - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); vectorToSearch = "2.0, 4.4, 3.5, 6.4]"; assertQEx( "incorrect vector to search should throw Exception", "incorrect vector format. The expected format is:'[f1,f2..f3]' where each element f is a float", - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); vectorToSearch = "[2.0, 4.4, 3.5, stringElement]"; assertQEx( "incorrect vector to search should throw Exception", "incorrect vector element: ' stringElement'. The expected format is:'[f1,f2..f3]' where each element f is a float", - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); vectorToSearch = "[2.0, 4.4, , ]"; assertQEx( "incorrect vector to search should throw Exception", "incorrect vector element: ' '. The expected format is:'[f1,f2..f3]' where each element f is a float", - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.0}" + vectorToSearch, "fl", "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -422,8 +303,8 @@ public class KnnQParserTest extends SolrTestCaseJ4 { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQ( - req(CommonParams.Q, "{!knn f=vector topK=10}" + vectorToSearch, "fl", "id"), - "//result[@numFound='10']", + req(CommonParams.Q, "{!vecSim f=vector minReturn=0.8}" + vectorToSearch, "fl", "id"), + "//result[@numFound='8']", "//result/doc[1]/str[@name='id'][.='1']", "//result/doc[2]/str[@name='id'][.='4']", "//result/doc[3]/str[@name='id'][.='2']", @@ -431,38 +312,20 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "//result/doc[5]/str[@name='id'][.='3']", "//result/doc[6]/str[@name='id'][.='7']", "//result/doc[7]/str[@name='id'][.='5']", - "//result/doc[8]/str[@name='id'][.='6']", - "//result/doc[9]/str[@name='id'][.='9']", - "//result/doc[10]/str[@name='id'][.='8']"); + "//result/doc[8]/str[@name='id'][.='6']"); } @Test - public void knnQueryUsedInFilter_shouldFilterResultsBeforeTheQueryExecution() { + public void vecSimQueryUsedInFilters_shouldFilterResultsBeforeTheQueryExecution() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - assertQ( - req( - CommonParams.Q, - "id:(3 4 9 2)", - "fq", - "{!knn f=vector topK=4}" + vectorToSearch, - "fl", - "id"), - "//result[@numFound='2']", - "//result/doc[1]/str[@name='id'][.='2']", - "//result/doc[2]/str[@name='id'][.='4']"); - } - @Test - public void knnQueryUsedInFilters_shouldFilterResultsBeforeTheQueryExecution() { - String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - - // topK=4 -> 1,4,2,10 + // minReturn=0.8 -> 1,4,2,10,3,7,5,6 assertQ( req( CommonParams.Q, "id:(3 4 9 2)", "fq", - "{!knn f=vector topK=4}" + vectorToSearch, + "{!vecSim f=vector minReturn=0.8}" + vectorToSearch, "fq", "id:(4 20 9)", "fl", @@ -472,35 +335,35 @@ public class KnnQParserTest extends SolrTestCaseJ4 { } @Test - public void knnQueryUsedInFiltersWithPreFilter_shouldFilterResultsBeforeTheQueryExecution() { + public void vecSimQueryUsedInFiltersWithPreFilter_shouldFilterResultsBeforeTheQueryExecution() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - // topK=4 w/localparam preFilter -> 1,4,7,9 + // minReturn=0.8 w/localparam preFilter -> 1,4,7 assertQ( req( CommonParams.Q, - "id:(3 4 9 2)", + "id:(3 4 7 2)", "fq", - "{!knn f=vector topK=4 preFilter='id:(1 4 7 8 9)'}" + vectorToSearch, + "{!vecSim f=vector minReturn=0.8 preFilter='id:(1 4 7 8 9)'}" + vectorToSearch, "fq", - "id:(4 20 9)", + "id:(4 20 7)", "fl", "id"), "//result[@numFound='2']", "//result/doc[1]/str[@name='id'][.='4']", - "//result/doc[2]/str[@name='id'][.='9']"); + "//result/doc[2]/str[@name='id'][.='7']"); } @Test - public void knnQueryUsedInFilters_rejectIncludeExclude() { + public void vecSimQueryUsedInFilters_rejectIncludeExclude() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; for (String fq : Arrays.asList( - "{!knn f=vector topK=5 includeTags=xxx}" + vectorToSearch, - "{!knn f=vector topK=5 excludeTags=xxx}" + vectorToSearch)) { + "{!vecSim f=vector minReturn=0.8 includeTags=xxx}" + vectorToSearch, + "{!vecSim f=vector minReturn=0.8 excludeTags=xxx}" + vectorToSearch)) { assertQEx( - "fq={!knn...} incompatible with include/exclude localparams", + "fq={!vecSim...} incompatible with include/exclude localparams", "used as a filter does not support", req("q", "*:*", "fq", fq), SolrException.ErrorCode.BAD_REQUEST); @@ -508,88 +371,90 @@ public class KnnQParserTest extends SolrTestCaseJ4 { } @Test - public void knnQueryAsSubQuery() { + public void vecSimQueryAsSubQuery() { final SolrParams common = params("fl", "id", "vec", "[1.0, 2.0, 3.0, 4.0]"); - final String filt = "id:(2 4 7 9 8 20 3)"; + final String filt = "id:(2 4 7 9 8 20)"; - // When knn parser is a subquery, it should not pre-filter on any global fq params - // topK -> 1,4,2,10,3 -> fq -> 4,2,3 + // When vecSim parser is a subquery, it should not pre-filter on any global fq params + // minReturn -> 1,4,2,10,3,7,5,6 -> fq -> 4,2,7 assertQ( - req(common, "fq", filt, "q", "*:* AND {!knn f=vector topK=5 v=$vec}"), + req(common, "fq", filt, "q", "*:* AND {!vecSim f=vector minReturn=0.8 v=$vec}"), "//result[@numFound='3']", "//result/doc[1]/str[@name='id'][.='4']", "//result/doc[2]/str[@name='id'][.='2']", - "//result/doc[3]/str[@name='id'][.='3']"); - // topK -> 1,4,2,10,3 + '8' -> fq -> 4,2,3,8 + "//result/doc[3]/str[@name='id'][.='7']"); + // minReturn -> 1,4,2,10,3,7,5,6 + '8' -> fq -> 4,2,7,8 assertQ( - req(common, "fq", filt, "q", "id:8^=0.01 OR {!knn f=vector topK=5 v=$vec}"), + req(common, "fq", filt, "q", "id:8^=0.01 OR {!vecSim f=vector minReturn=0.8 v=$vec}"), "//result[@numFound='4']", "//result/doc[1]/str[@name='id'][.='4']", "//result/doc[2]/str[@name='id'][.='2']", - "//result/doc[3]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='7']", "//result/doc[4]/str[@name='id'][.='8']"); } @Test - public void knnQueryAsSubQuery_withPreFilter() { + public void vecSimQueryAsSubQuery_withPreFilter() { final SolrParams common = params("fl", "id", "vec", "[1.0, 2.0, 3.0, 4.0]"); final String filt = "id:(2 4 7 9 8 20 3)"; - // knn subquery should still accept `preFilter` local param - // filt -> topK -> 4,2,3,7,9 + // vecSim subquery should still accept `preFilter` local param + // filt -> minReturn -> 4,2,3,7 assertQ( - req(common, "q", "*:* AND {!knn f=vector topK=5 preFilter='" + filt + "' v=$vec}"), - "//result[@numFound='5']", + req( + common, + "q", + "*:* AND {!vecSim f=vector minReturn=0.8 preFilter='" + filt + "' v=$vec}"), + "//result[@numFound='4']", "//result/doc[1]/str[@name='id'][.='4']", "//result/doc[2]/str[@name='id'][.='2']", "//result/doc[3]/str[@name='id'][.='3']", - "//result/doc[4]/str[@name='id'][.='7']", - "//result/doc[5]/str[@name='id'][.='9']"); + "//result/doc[4]/str[@name='id'][.='7']"); // it should not pre-filter on any global fq params - // filt -> topK -> 4,2,3,7,9 -> fq -> 3,9 + // filt -> minReturn -> 4,2,3,7 -> fq -> 3,7 assertQ( req( common, "fq", - "id:(1 9 20 3 5 6 8)", + "id:(1 9 20 3 5 7 8)", "q", - "*:* AND {!knn f=vector topK=5 preFilter='" + filt + "' v=$vec}"), + "*:* AND {!vecSim f=vector minReturn=0.8 preFilter='" + filt + "' v=$vec}"), "//result[@numFound='2']", "//result/doc[1]/str[@name='id'][.='3']", - "//result/doc[2]/str[@name='id'][.='9']"); - // filt -> topK -> 4,2,3,7,9 + '8' -> fq -> 8,3,9 + "//result/doc[2]/str[@name='id'][.='7']"); + // filt -> minReturn -> 4,2,3,7 + '8' -> fq -> 8,3,7 assertQ( req( common, "fq", - "id:(1 9 20 3 5 6 8)", + "id:(1 9 20 3 5 7 8)", "q", - "id:8^=100 OR {!knn f=vector topK=5 preFilter='" + filt + "' v=$vec}"), + "id:8^=100 OR {!vecSim f=vector minReturn=0.8 preFilter='" + filt + "' v=$vec}"), "//result[@numFound='3']", "//result/doc[1]/str[@name='id'][.='8']", "//result/doc[2]/str[@name='id'][.='3']", - "//result/doc[3]/str[@name='id'][.='9']"); + "//result/doc[3]/str[@name='id'][.='7']"); } @Test - public void knnQueryAsSubQuery_rejectIncludeExclude() { + public void vecSimQueryAsSubQuery_rejectIncludeExclude() { final SolrParams common = params("fl", "id", "vec", "[1.0, 2.0, 3.0, 4.0]"); - for (String knn : + for (String subq : Arrays.asList( - "{!knn f=vector topK=5 includeTags=xxx v=$vec}", - "{!knn f=vector topK=5 excludeTags=xxx v=$vec}")) { + "{!vecSim f=vector minReturn=0.8 includeTags=xxx v=$vec}", + "{!vecSim f=vector minReturn=0.8 excludeTags=xxx v=$vec}")) { assertQEx( - "knn as subquery incompatible with include/exclude localparams", + "vecSim as subquery incompatible with include/exclude localparams", "used as a sub-query does not support", - req(common, "q", "*:* OR " + knn), + req(common, "q", "*:* OR " + subq), SolrException.ErrorCode.BAD_REQUEST); } } @Test - public void knnQueryWithFilterQuery_singlePreFilterEquivilence() { + public void vecSimQueryWithFilterQuery_singlePreFilterEquivilence() { final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; final SolrParams common = params("fl", "id"); @@ -597,12 +462,15 @@ public class KnnQParserTest extends SolrTestCaseJ4 { final String filt = "id:(1 2 7 20)"; for (SolrQueryRequest req : Arrays.asList( - req(common, "q", "{!knn f=vector topK=10}" + vectorToSearch, "fq", filt), - req(common, "q", "{!knn f=vector preFilter=\"" + filt + "\" topK=10}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector minReturn=0.8}" + vectorToSearch, "fq", filt), + req( + common, + "q", + "{!vecSim f=vector preFilter=\"" + filt + "\" minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector preFilter=$my_filt topK=10}" + vectorToSearch, + "{!vecSim f=vector preFilter=$my_filt minReturn=0.8}" + vectorToSearch, "my_filt", filt))) { assertQ( @@ -615,29 +483,39 @@ public class KnnQParserTest extends SolrTestCaseJ4 { } @Test - public void knnQueryWithFilterQuery_multiPreFilterEquivilence() { + public void vecSimQueryWithFilterQuery_multiPreFilterEquivilence() { final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; final SolrParams common = params("fl", "id"); // these requests should be equivalent final String fx = "id:(3 4 9 2 1 )"; // 1 & 10 dropped from intersection final String fy = "id:(3 4 9 2 10)"; + final String minR = "minReturn=0.8"; // should exclude 9 for (SolrQueryRequest req : Arrays.asList( - req(common, "q", "{!knn f=vector topK=4}" + vectorToSearch, "fq", fx, "fq", fy), req( common, "q", - "{!knn f=vector preFilter=\"" + "{!vecSim f=vector " + minR + "}" + vectorToSearch, + "fq", + fx, + "fq", + fy), + req( + common, + "q", + "{!vecSim f=vector preFilter=\"" + fx + "\" preFilter=\"" + fy - + "\" topK=4}" + + "\" " + + minR + + "}" + vectorToSearch), req( common, "q", - "{!knn f=vector preFilter=$fx preFilter=$fy topK=4}" + vectorToSearch, + "{!vecSim f=vector preFilter=$fx preFilter=$fy " + minR + "}" + vectorToSearch, "fx", fx, "fy", @@ -645,50 +523,53 @@ public class KnnQParserTest extends SolrTestCaseJ4 { req( common, "q", - "{!knn f=vector preFilter=$multi_filt topK=4}" + vectorToSearch, + "{!vecSim f=vector preFilter=$multi_filt " + minR + "}" + vectorToSearch, "multi_filt", fx, "multi_filt", fy))) { assertQ( req, - "//result[@numFound='4']", + "//result[@numFound='3']", "//result/doc[1]/str[@name='id'][.='4']", "//result/doc[2]/str[@name='id'][.='2']", - "//result/doc[3]/str[@name='id'][.='3']", - "//result/doc[4]/str[@name='id'][.='9']"); + "//result/doc[3]/str[@name='id'][.='3']"); } } @Test - public void knnQueryWithPreFilter_rejectIncludeExclude() { + public void vecSimQueryWithPreFilter_rejectIncludeExclude() { final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQEx( - "knn preFilter localparm incompatible with include/exclude localparams", + "vecSim preFilter localparm incompatible with include/exclude localparams", "does not support combining preFilter localparam with either", // shouldn't matter if global fq w/tag even exists, usage is an error - req("q", "{!knn f=vector preFilter='id:1' includeTags=xxx}" + vectorToSearch), + req( + "q", + "{!vecSim f=vector minReturn=0.8 preFilter='id:1' includeTags=xxx}" + vectorToSearch), SolrException.ErrorCode.BAD_REQUEST); assertQEx( - "knn preFilter localparm incompatible with include/exclude localparams", + "vecSim preFilter localparm incompatible with include/exclude localparams", "does not support combining preFilter localparam with either", // shouldn't matter if global fq w/tag even exists, usage is an error - req("q", "{!knn f=vector preFilter='id:1' excludeTags=xxx}" + vectorToSearch), + req( + "q", + "{!vecSim f=vector minReturn=0.8 preFilter='id:1' excludeTags=xxx}" + vectorToSearch), SolrException.ErrorCode.BAD_REQUEST); } @Test - public void knnQueryWithFilterQuery_preFilterLocalParamOverridesGlobalFilters() { + public void vecSimQueryWithFilterQuery_preFilterLocalParamOverridesGlobalFilters() { final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; // trivial case: empty preFilter localparam means no pre-filtering assertQ( req( - "q", "{!knn f=vector preFilter='' topK=5}" + vectorToSearch, + "q", "{!vecSim f=vector preFilter='' minReturn=0.8}" + vectorToSearch, "fq", "-id:4", "fl", "id"), - "//result[@numFound='4']", + "//result[@numFound='7']", "//result/doc[1]/str[@name='id'][.='1']", "//result/doc[2]/str[@name='id'][.='2']", "//result/doc[3]/str[@name='id'][.='10']", @@ -697,18 +578,17 @@ public class KnnQParserTest extends SolrTestCaseJ4 { // localparam prefiltering, global fqs applied independently assertQ( req( - "q", "{!knn f=vector preFilter='id:(3 4 9 2 7 8)' topK=5}" + vectorToSearch, + "q", "{!vecSim f=vector preFilter='id:(3 4 9 2 7 8)' minReturn=0.8}" + vectorToSearch, "fq", "-id:4", "fl", "id"), - "//result[@numFound='4']", + "//result[@numFound='3']", "//result/doc[1]/str[@name='id'][.='2']", "//result/doc[2]/str[@name='id'][.='3']", - "//result/doc[3]/str[@name='id'][.='7']", - "//result/doc[4]/str[@name='id'][.='9']"); + "//result/doc[3]/str[@name='id'][.='7']"); } @Test - public void knnQueryWithFilterQuery_localParamIncludeExcludeTags() { + public void vecSimQueryWithFilterQuery_localParamIncludeExcludeTags() { final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; final SolrParams common = params( @@ -717,26 +597,30 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "fq", "{!tag=yy,aa}id:(1 2 3 4 5 6 7)"); // These req's are equivalent: pre-filter everything - // So only 7,6,5 are viable for topK=5 + // So only 7,6,5 are viable for minReturn=0.8 for (SolrQueryRequest req : Arrays.asList( // default behavior is all fq's pre-filter, - req(common, "q", "{!knn f=vector topK=5}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector minReturn=0.8}" + vectorToSearch), // diff ways of explicitly requesting both fq params - req(common, "q", "{!knn f=vector includeTags=aa topK=5}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector includeTags=aa minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=aa excludeTags='' topK=5}" + vectorToSearch), + "{!vecSim f=vector includeTags=aa excludeTags='' minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=aa excludeTags=bogus topK=5}" + vectorToSearch), + "{!vecSim f=vector includeTags=aa excludeTags=bogus minReturn=0.8}" + + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=xx includeTags=yy topK=5}" + vectorToSearch), - req(common, "q", "{!knn f=vector includeTags=xx,yy,bogus topK=5}" + vectorToSearch))) { + "{!vecSim f=vector includeTags=xx includeTags=yy minReturn=0.8}" + vectorToSearch), + req( + common, + "q", + "{!vecSim f=vector includeTags=xx,yy,bogus minReturn=0.8}" + vectorToSearch))) { assertQ( req, "//result[@numFound='3']", @@ -747,88 +631,101 @@ public class KnnQParserTest extends SolrTestCaseJ4 { } @Test - public void knnQueryWithFilterQuery_localParamsDisablesAllPreFiltering() { + public void vecSimQueryWithFilterQuery_localParamsDisablesAllPreFiltering() { final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; final SolrParams common = params( "fl", "id", - "fq", "{!tag=xx,aa}id:(5 6 7 8 9 10)", - "fq", "{!tag=yy,aa}id:(1 2 3 4 5 6 7)"); + "fq", "{!tag=xx,aa}id:(11 7 8 9 10)", + "fq", "{!tag=yy,aa}id:(1 2 3 4 12 7)"); // These req's are equivalent: pre-filter nothing - // So 1,4,2,10,3,7 are the topK=6 + // So 1,4,2,10,3,7,5,6 are the minReturn=0.8 // Only 7 matches both of the the regular fq params for (SolrQueryRequest req : Arrays.asList( // explicit local empty preFilter - req(common, "q", "{!knn f=vector preFilter='' topK=6}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector preFilter='' minReturn=0.8}" + vectorToSearch), // diff ways of explicitly including none of the global fq params - req(common, "q", "{!knn f=vector includeTags='' topK=6}" + vectorToSearch), - req(common, "q", "{!knn f=vector includeTags=bogus topK=6}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector includeTags='' minReturn=0.8}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector includeTags=bogus minReturn=0.8}" + vectorToSearch), // diff ways of explicitly excluding all of the global fq params - req(common, "q", "{!knn f=vector excludeTags=aa topK=6}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector excludeTags=aa minReturn=0.8}" + vectorToSearch), + req( + common, + "q", + "{!vecSim f=vector includeTags=aa excludeTags=aa minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=aa excludeTags=aa topK=6}" + vectorToSearch), + "{!vecSim f=vector includeTags=aa excludeTags=xx,yy minReturn=0.8}" + + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=aa excludeTags=xx,yy topK=6}" + vectorToSearch), + "{!vecSim f=vector includeTags=xx,yy excludeTags=aa minReturn=0.8}" + + vectorToSearch), + req(common, "q", "{!vecSim f=vector excludeTags=xx,yy minReturn=0.8}" + vectorToSearch), + req(common, "q", "{!vecSim f=vector excludeTags=aa minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=xx,yy excludeTags=aa topK=6}" + vectorToSearch), - req(common, "q", "{!knn f=vector excludeTags=xx,yy topK=6}" + vectorToSearch), - req(common, "q", "{!knn f=vector excludeTags=aa topK=6}" + vectorToSearch), + "{!vecSim f=vector excludeTags=xx excludeTags=yy minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector excludeTags=xx excludeTags=yy topK=6}" + vectorToSearch), + "{!vecSim f=vector excludeTags=xx excludeTags=yy,bogus minReturn=0.8}" + + vectorToSearch), req( common, "q", - "{!knn f=vector excludeTags=xx excludeTags=yy,bogus topK=6}" + vectorToSearch), - req(common, "q", "{!knn f=vector excludeTags=xx,yy,bogus topK=6}" + vectorToSearch))) { + "{!vecSim f=vector excludeTags=xx,yy,bogus minReturn=0.8}" + vectorToSearch))) { assertQ(req, "//result[@numFound='1']", "//result/doc[1]/str[@name='id'][.='7']"); } } @Test - public void knnQueryWithFilterQuery_localParamCombinedIncludeExcludeTags() { + public void vecSimQueryWithFilterQuery_localParamCombinedIncludeExcludeTags() { final String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; final SolrParams common = params( "fl", "id", - "fq", "{!tag=xx,aa}id:(5 6 7 8 9 10)", - "fq", "{!tag=yy,aa}id:(1 2 3 4 5 6 7)"); + "fq", "{!tag=xx,aa}id:(11 7 8 9 10)", + "fq", "{!tag=yy,aa}id:(1 2 3 4 12 7)"); // These req's are equivalent: prefilter only the 'yy' fq - // So 1,4,2,3,7 are in the topK=5. + // So 1,4,2,3,7 are in the minReturn=0.8 // Only 7 matches the regular 'xx' fq param for (SolrQueryRequest req : Arrays.asList( // diff ways of only using the 'yy' filter - req(common, "q", "{!knn f=vector includeTags=yy,bogus topK=5}" + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=yy excludeTags='' topK=5}" + vectorToSearch), - req(common, "q", "{!knn f=vector excludeTags=xx,bogus topK=5}" + vectorToSearch), + "{!vecSim f=vector includeTags=yy,bogus minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=yy excludeTags=xx topK=5}" + vectorToSearch), + "{!vecSim f=vector includeTags=yy excludeTags='' minReturn=0.8}" + vectorToSearch), req( common, "q", - "{!knn f=vector includeTags=aa excludeTags=xx topK=5}" + vectorToSearch))) { + "{!vecSim f=vector excludeTags=xx,bogus minReturn=0.8}" + vectorToSearch), + req( + common, + "q", + "{!vecSim f=vector includeTags=yy excludeTags=xx minReturn=0.8}" + vectorToSearch), + req( + common, + "q", + "{!vecSim f=vector includeTags=aa excludeTags=xx minReturn=0.8}" + + vectorToSearch))) { assertQ(req, "//result[@numFound='1']", "//result/doc[1]/str[@name='id'][.='7']"); } } @Test - public void knnQueryWithMultiSelectFaceting_excludeTags() { + public void vecSimQueryWithMultiSelectFaceting_excludeTags() { // NOTE: faceting on id is not very realistic, // but it confirms what we care about re:filters w/o needing extra fields. final String facet_xpath = "//lst[@name='facet_fields']/lst[@name='id']/int"; @@ -838,7 +735,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { params( "fl", "id", "indent", "true", - "q", "{!knn f=vector topK=5 excludeTags=facet_click v=$vec}", + "q", "{!vecSim f=vector minReturn=0.9 excludeTags=facet_click v=$vec}", "vec", vectorToSearch, // mimicing "inStock:true" "fq", "-id:(2 3)", @@ -864,7 +761,6 @@ public class KnnQParserTest extends SolrTestCaseJ4 { // drill down on a single facet constraint // multi-select means facet counts shouldn't change - // (this proves the knn isn't pre-filtering on the 'facet_click' fq) assertQ( req(common, "fq", "{!tag=facet_click}id:(4)"), "//result[@numFound='1']", @@ -878,7 +774,6 @@ public class KnnQParserTest extends SolrTestCaseJ4 { // drill down on an additional facet constraint // multi-select means facet counts shouldn't change - // (this proves the knn isn't pre-filtering on the 'facet_click' fq) assertQ( req(common, "fq", "{!tag=facet_click}id:(4 5)"), "//result[@numFound='2']", @@ -892,63 +787,12 @@ public class KnnQParserTest extends SolrTestCaseJ4 { facet_xpath + "[@name='5'][.='1']"); } - @Test - public void knnQueryWithCostlyFq_shouldPerformKnnSearchWithPostFilter() { - String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - - assertQ( - req( - CommonParams.Q, - "{!knn f=vector topK=10}" + vectorToSearch, - "fq", - "{!frange cache=false l=0.99}$q", - "fl", - "*,score"), - "//result[@numFound='5']", - "//result/doc[1]/str[@name='id'][.='1']", - "//result/doc[2]/str[@name='id'][.='4']", - "//result/doc[3]/str[@name='id'][.='2']", - "//result/doc[4]/str[@name='id'][.='10']", - "//result/doc[5]/str[@name='id'][.='3']"); - } - - @Test - public void knnQueryWithFilterQueries_shouldPerformKnnSearchWithPreFiltersAndPostFilters() { - String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - - assertQ( - req( - CommonParams.Q, - "{!knn f=vector topK=4}" + vectorToSearch, - "fq", - "id:(3 4 9 2)", - "fq", - "{!frange cache=false l=0.99}$q", - "fl", - "id"), - "//result[@numFound='2']", - "//result/doc[1]/str[@name='id'][.='4']", - "//result/doc[2]/str[@name='id'][.='2']"); - } - - @Test - public void knnQueryWithNegativeFilterQuery_shouldPerformKnnSearchInPreFilteredResults() { - String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - assertQ( - req(CommonParams.Q, "{!knn f=vector topK=4}" + vectorToSearch, "fq", "-id:4", "fl", "id"), - "//result[@numFound='4']", - "//result/doc[1]/str[@name='id'][.='1']", - "//result/doc[2]/str[@name='id'][.='2']", - "//result/doc[3]/str[@name='id'][.='10']", - "//result/doc[4]/str[@name='id'][.='3']"); - } - /** * See {@link org.apache.solr.search.ReRankQParserPlugin.ReRankQueryRescorer#combine(float, * boolean, float)}} for more details. */ @Test - public void knnQueryAsRerank_shouldAddSimilarityFunctionScore() { + public void vecSimQueryAsRerank_shouldAddSimilarityFunctionScore() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQ( @@ -958,7 +802,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "rq", "{!rerank reRankQuery=$rqq reRankDocs=4 reRankWeight=1}", "rqq", - "{!knn f=vector topK=4}" + vectorToSearch, + "{!vecSim f=vector minReturn=0.8}" + vectorToSearch, "fl", "id"), "//result[@numFound='4']", diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index ac96ef827bf..df761eeaf8d 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -237,14 +237,12 @@ client.add(Arrays.asList(d1, d2)); -- == Query Time -This is the Apache Solr query approach designed to support dense vector search: -=== knn Query Parser -The `knn` k-nearest neighbors query parser allows to find the k-nearest documents to the target vector according to indexed dense vectors in the given field. The set of documents can be Pre-Filtered to reduce the number of vector distance calculations that must be computed, and ensure the best `topK` are returned. +Apache Solr provides two query parsers that work with dense vector fields, that each support differnet ways of matching documents based on vector similarity: The `knn` query parser, and the `vecSim` query parser. -The score for a retrieved document is the approximate distance to the target vector(defined by the similarityFunction configured at indexing time). +Both parsers return scores for retrieved documents that is the approximate distance to the target vector (defined by the similarityFunction configured at indexing time) and both support "Pre-Filtering" the document graph to reduce the number of candidate vectors evaluated (with out needing to compute their vector similarity distances). -It takes the following parameters: +Common parameters for both query parsers are: `f`:: + @@ -255,15 +253,6 @@ s|Required |Default: none + The `DenseVectorField` to search in. -`topK`:: -+ -[%autowidth,frame=none] -|=== -|Optional |Default: 10 -|=== -+ -How many k-nearest results to return. - `preFilter`:: + [%autowidth,frame=none] @@ -293,22 +282,73 @@ Indicates that only `fq` filters with the specified `tag` should be considered f Indicates that `fq` filters with the specified `tag` should be excluded from consideration for implicit Pre-Filtering. Must not be combined with `preFilter`. -Here's how to run a simple KNN search: +=== knn Query Parser + +The `knn` k-nearest neighbors query parser matches k-nearest documents to the target vector. + +In addition to the common parameters described above, it takes the following parameters: + +`topK`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: 10 +|=== ++ +How many k-nearest results to return. + +Here's an example of a simple `knn` search: [source,text] ?q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0] The search results retrieved are the k=10 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`, ranked by the `similarityFunction` configured at indexing time. +=== vecSim Query Parser + +The `vecSim` vector similarity query parser matches documents whose similarity with the target vector is a above a minimum threshold. -==== Explicit KNN Pre-Filtering +In addition to the common parameters described above, it takes the following parameters: -The `knn` query parser's `preFilter` parameter can be specified to reduce the number of candidate documents evaluated for the k-nearest distance calculation: + +`minReturn`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +Minimum similarity threshold of nodes in the graph to be returned as matches + +`minTraverse`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: -Infinity +|=== ++ +Minimum similarity of nodes in the graph to continue traversal of their neighbors + +Here's an example of a simple `vecSim` search: + +[source,text] +?q={!vecSim f=vector minReturn=0.7}[1.0, 2.0, 3.0, 4.0] + +The search results retrieved are all documents whose similarity with the input vector `[1.0, 2.0, 3.0, 4.0]` is at least `0.7` based on the `similarityFunction` configured at indexing time + + +=== Graph Pre-Filtering + +Pre-Filtering the set of candidate documents considered when walking the graph can be specified either explicitly, or implicitly (based on existing `fq` params) depending on how and when these dense vector query parsers are used. + +==== Explicit Pre-Filtering + +The `preFilter` parameter can be specified explicitly to reduce the number of candidate documents evaluated for the distance calculation: [source,text] -?q={!knn f=vector topK=10 preFilter=inStock:true}[1.0, 2.0, 3.0, 4.0] +?q={!vecSim f=vector minReturn=0.7 preFilter=inStock:true}[1.0, 2.0, 3.0, 4.0] -In the above example, only documents matching the Pre-Filter `inStock:true` will be candidates for consideration when evaluating the k-nearest search against the specified vector. +In the above example, only documents matching the Pre-Filter `inStock:true` will be candidates for consideration when evaluating the `knn` search against the specified vector. The `preFilter` parameter may be blank (ex: `preFilter=""`) to indicate that no Pre-Filtering should be performed; or it may be multi-valued -- either through repetition, or via duplicated xref:local-params.adoc#parameter-dereferencing[Parameter References]. @@ -324,22 +364,22 @@ These two examples are equivalent: &knnPreFilter=inStock:true ---- -==== Implicit KNN Pre-Filtering +==== Implicit Pre-Filtering -While the `preFilter` parameter may be explicitly specified on *_any_* usage of the `knn` query parser, the default Pre-Filtering behavior (when no `preFilter` parameter is specified) will vary based on how the `knn` query parser is used: +While the `preFilter` parameter may be explicitly specified on *_any_* usage of the `knn` or `vecSim` query parsers, the default Pre-Filtering behavior (when no `preFilter` parameter is specified) will vary based on how the query parser is used: -* When used as the main `q` param: `fq` filters in the request (that are not xref:common-query-parameters.adoc#cache-local-parameter[Solr Post Filters]) will be combined to form an implicit KNN Pre-Filter. +* When used as the main `q` param: `fq` filters in the request (that are not xref:common-query-parameters.adoc#cache-local-parameter[Solr Post Filters]) will be combined to form an implicit Graph Pre-Filter. ** This default behavior optimizes the number of vector distance calculations considered, eliminating documents that would eventually be excluded by an `fq` filter anyway. ** `includeTags` and `excludeTags` may be used to limit the set of `fq` filters used in the Pre-Filter. -* When used as an `fq` param, or as a subquery clause in a larger query: No implicit Pre-Filter is used. +* When a vector search query parser is used as an `fq` param, or as a subquery clause in a larger query: No implicit Pre-Filter is used. ** `includeTags` and `excludeTags` must not be used in these situations. -The example request below shows two usages of the `knn` query parser that will get _no_ implicit Pre-Filtering from any of the `fq` parameters, because neither usage is as the main `q` param: +The example request below shows two usages of vector query parsers that will get _no_ implicit Pre-Filtering from any of the `fq` parameters, because neither usage is as the main `q` param: [source,text] ---- -?q=(color_str:red OR {!knn f=color_vector topK=10 v="[1.0, 2.0, 3.0, 4.0]"}) +?q=(color_str:red OR {!vecSim f=color_vector minReturn=0.7 v="[1.0, 2.0, 3.0, 4.0]"}) &fq={!knn f=title_vector topK=10}[9.0, 8.0, 7.0, 6.0] &fq=inStock:true ---- @@ -363,21 +403,24 @@ If we modify the above request to add tags to the `fq` parameters, we can specif &fq={!tag=for_knn}inStock:true ---- -In this example, only the `inStock:true` filter will be used for KNN Pre-Filtering to find the the `topK=10` documents, and the `category:AAA` filter will be applied independently; possibly resulting in less then 10 total matches. +In this example, only the `inStock:true` filter will be used for Pre-Filtering to find the the `topK=10` documents, and the `category:AAA` filter will be applied independently; possibly resulting in less then 10 total matches. Some use cases where `includeTags` and/or `excludeTags` may be more useful then an explicit `preFilter` parameters: -* You have some `fq` parameters that are xref:configuration-guide:requesthandlers-searchcomponents.adoc#paramsets-and-useparams[re-used on many requests] (even when you don't use the `knn` parser) that you wish to be used as KNN Pre-Filters when you _do_ use the `knn` query parser. -* You typically want all `fq` params to be used as KNN Pre-Filters, but when users "drill down" on Facets, you want the `fq` parameters you add to be excluded from the KNN Pre-Filtering so that the result set gets smaller; instead of just computing a new `topK` set. +* You have some `fq` parameters that are xref:configuration-guide:requesthandlers-searchcomponents.adoc#paramsets-and-useparams[re-used on many requests] (even when you don't use search dense vector fields) that you wish to be used as Pre-Filters when you _do_ search dense vector fields. +* You typically want all `fq` params to be used as graph Pre-Filters on your `knn` queries, but when users "drill down" on Facets, you want the `fq` parameters you add to be excluded from the Pre-Filtering so that the result set gets smaller; instead of just computing a new `topK` set. -==== Usage as Re-Ranking Query -The `knn` query parser can be used to rerank first pass query results: +=== Usage in Re-Ranking Query + +Both dense vector search query parsers can be used to rerank first pass query results: + [source,text] &q=id:(3 4 9 2)&rq={!rerank reRankQuery=$rqq reRankDocs=4 reRankWeight=1}&rqq={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0] + [IMPORTANT] ==== When using `knn` in re-ranking pay attention to the `topK` parameter.
