This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 4f02148b1f7 SOLR-17814: Add support for PatienceKnnVectorQuery (#3644)
4f02148b1f7 is described below
commit 4f02148b1f7fda769dcb74bd8e5f77983910f67a
Author: Ilaria Petreti <[email protected]>
AuthorDate: Tue Sep 23 11:50:22 2025 +0200
SOLR-17814: Add support for PatienceKnnVectorQuery (#3644)
* Added support for PatienceKnnVectorQuery
* Addressed comments after the review with Alessandro Benedetti
* Moved the logic inside the DenseVectorField
* Updated documentation
* Made the EarlyTerminationParams static
---
solr/CHANGES.txt | 2 +
.../org/apache/solr/schema/DenseVectorField.java | 35 +++-
.../org/apache/solr/search/neural/KnnQParser.java | 57 ++++-
.../apache/solr/search/neural/KnnQParserTest.java | 231 +++++++++++++++++++++
.../search/TextToVectorQParserTest.java | 26 +++
.../query-guide/pages/dense-vector-search.adoc | 47 +++++
6 files changed, 393 insertions(+), 5 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 57e206adaf6..abe7b8f5563 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -23,6 +23,8 @@ New Features
* SOLR-17023: Use Modern NLP Models from Apache OpenNLP with Solr (Jeff
Zemerick, Eric Pugh)
+* SOLR-17814: Add support for PatienceKnnVectorQuery. (Ilaria Petreti via
Alessandro Benedetti)
+
Improvements
---------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
index c355e167a76..22d0add817c 100644
--- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
@@ -39,12 +39,14 @@ import
org.apache.lucene.queries.function.valuesource.ByteKnnVectorFieldSource;
import
org.apache.lucene.queries.function.valuesource.FloatKnnVectorFieldSource;
import org.apache.lucene.search.KnnByteVectorQuery;
import org.apache.lucene.search.KnnFloatVectorQuery;
+import org.apache.lucene.search.PatienceKnnVectorQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.solr.common.SolrException;
import org.apache.solr.search.QParser;
+import org.apache.solr.search.neural.KnnQParser.EarlyTerminationParams;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.vector.ByteDenseVectorParser;
import org.apache.solr.util.vector.DenseVectorParser;
@@ -371,17 +373,42 @@ public class DenseVectorField extends FloatPointField {
}
public Query getKnnVectorQuery(
- String fieldName, String vectorToSearch, int topK, Query filterQuery) {
+ String fieldName,
+ String vectorToSearch,
+ int topK,
+ Query filterQuery,
+ EarlyTerminationParams earlyTermination) {
DenseVectorParser vectorBuilder =
getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY);
switch (vectorEncoding) {
case FLOAT32:
- return new KnnFloatVectorQuery(
- fieldName, vectorBuilder.getFloatVector(), topK, filterQuery);
+ KnnFloatVectorQuery knnFloatVectorQuery =
+ new KnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(),
topK, filterQuery);
+ if (earlyTermination.isEnabled()) {
+ return (earlyTermination.getSaturationThreshold() != null
+ && earlyTermination.getPatience() != null)
+ ? PatienceKnnVectorQuery.fromFloatQuery(
+ knnFloatVectorQuery,
+ earlyTermination.getSaturationThreshold(),
+ earlyTermination.getPatience())
+ : PatienceKnnVectorQuery.fromFloatQuery(knnFloatVectorQuery);
+ }
+ return knnFloatVectorQuery;
case BYTE:
- return new KnnByteVectorQuery(fieldName,
vectorBuilder.getByteVector(), topK, filterQuery);
+ KnnByteVectorQuery knnByteVectorQuery =
+ new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(),
topK, filterQuery);
+ if (earlyTermination.isEnabled()) {
+ return (earlyTermination.getSaturationThreshold() != null
+ && earlyTermination.getPatience() != null)
+ ? PatienceKnnVectorQuery.fromByteQuery(
+ knnByteVectorQuery,
+ earlyTermination.getSaturationThreshold(),
+ earlyTermination.getPatience())
+ : PatienceKnnVectorQuery.fromByteQuery(knnByteVectorQuery);
+ }
+ return knnByteVectorQuery;
default:
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR,
diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
index b6d9f2541cd..189069805cd 100644
--- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
@@ -16,7 +16,9 @@
*/
package org.apache.solr.search.neural;
+import java.util.Optional;
import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.DenseVectorField;
@@ -29,10 +31,63 @@ public class KnnQParser extends AbstractVectorQParserBase {
protected static final String TOP_K = "topK";
protected static final int DEFAULT_TOP_K = 10;
+ // parameters for PatienceKnnVectorQuery, a version of knn vector query that
exits early when HNSW
+ // queue
+ // saturates over a {@code #saturationThreshold} for more than {@code
#patience} times.
+ protected static final String EARLY_TERMINATION = "earlyTermination";
+ protected static final boolean DEFAULT_EARLY_TERMINATION = false;
+ protected static final String SATURATION_THRESHOLD = "saturationThreshold";
+ protected static final String PATIENCE = "patience";
+
public KnnQParser(String qstr, SolrParams localParams, SolrParams params,
SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
+ public static class EarlyTerminationParams {
+ private final boolean enabled;
+ private final Double saturationThreshold;
+ private final Integer patience;
+
+ public EarlyTerminationParams(boolean enabled, Double saturationThreshold,
Integer patience) {
+ this.enabled = enabled;
+ this.saturationThreshold = saturationThreshold;
+ this.patience = patience;
+ }
+
+ public boolean isEnabled() {
+ return enabled;
+ }
+
+ public Double getSaturationThreshold() {
+ return saturationThreshold;
+ }
+
+ public Integer getPatience() {
+ return patience;
+ }
+ }
+
+ public EarlyTerminationParams getEarlyTerminationParams() {
+ final Double saturationThreshold =
+ Optional.ofNullable(localParams.get(SATURATION_THRESHOLD))
+ .map(Double::parseDouble)
+ .orElse(null);
+
+ final Integer patience =
+
Optional.ofNullable(localParams.get(PATIENCE)).map(Integer::parseInt).orElse(null);
+
+ final boolean useExplicitParams = (saturationThreshold != null && patience
!= null);
+ if ((saturationThreshold == null) != (patience == null)) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Parameters 'saturationThreshold' and 'patience' must both be
provided, or neither.");
+ }
+
+ final boolean enabled =
+ localParams.getBool(EARLY_TERMINATION, DEFAULT_EARLY_TERMINATION) ||
useExplicitParams;
+ return new EarlyTerminationParams(enabled, saturationThreshold, patience);
+ }
+
@Override
public Query parse() throws SyntaxError {
final SchemaField schemaField =
req.getCore().getLatestSchema().getField(getFieldName());
@@ -41,6 +96,6 @@ public class KnnQParser extends AbstractVectorQParserBase {
final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
return denseVectorType.getKnnVectorQuery(
- schemaField.getName(), vectorToSearch, topK, getFilterQuery());
+ schemaField.getName(), vectorToSearch, topK, getFilterQuery(),
getEarlyTerminationParams());
}
}
diff --git
a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
index f5d5668a7e5..fe417165197 100644
--- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.Locale;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@@ -967,4 +968,234 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
"//result/doc[3]/str[@name='id'][.='3']",
"//result/doc[4]/str[@name='id'][.='9']");
}
+
+ @Test
+ public void
testKnnFloatWithoutExplicitlyEarlyTermination_returnsKnnFloatVectorQuery() {
+ // It verifies that when no early termination parameters are provided,
+ // the default behavior is applied (early termination is disabled), and no
special logic is
+ // triggered.
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ assertQ(
+ req(
+ CommonParams.Q,
+ "{!knn f=vector topK=5}" + vectorToSearch,
+ "fl",
+ "id",
+ "debugQuery",
+ "true"),
+ "//result[@numFound='5']",
+
"//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][5])']");
+ }
+
+ @Test
+ public void testKnnFloatWithoutEarlyTermination_returnsKnnFloatVectorQuery()
{
+ // It verifies that when early termination is explicitly set to false, no
special logic is
+ // triggered.
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ assertQ(
+ req(
+ CommonParams.Q,
+ "{!knn f=vector topK=5 earlyTermination=false}" + vectorToSearch,
+ "fl",
+ "id",
+ "debugQuery",
+ "true"),
+ "//result[@numFound='5']",
+
"//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][5])']");
+ }
+
+ @Test
+ public void
testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVectorQuery() {
+ // It verifies that when early termination is explicitly set to true but
no other parameters are
+ // provided, the PatienceKnnVectorQuery is executed using the default
values for
+ // saturationThreshold and
+ // patience.
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ // IMPORTANT: The default values for `saturationThreshold` and `patience`
are hardcoded here
+ // because they are currently private in Lucene's PatienceKnnVectorQuery
implementation.
+ // If Lucene changes these defaults in a future release, this test will
break and must be
+ // updated accordingly.
+ double defaultSaturationThreshold = 0.995;
+ int defaultPatience = 7;
+
+ String expectedParsedQuery =
+ String.format(
+ Locale.US,
+
"PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f,
patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})",
+ defaultSaturationThreshold,
+ defaultPatience);
+
+ assertQ(
+ req(
+ CommonParams.Q,
+ "{!knn f=vector topK=10 earlyTermination=true}" + vectorToSearch,
+ "fl",
+ "id",
+ "debugQuery",
+ "true"),
+ "//result[@numFound='10']",
+ "//str[@name='parsedquery'][.='" + expectedParsedQuery + "']");
+ }
+
+ @Test
+ public void
+
testKnnFloatWithEarlyTerminationExplicitParams_returnsPatienceKnnVectorQueryExplicitParams()
{
+ // It verifies that when early termination is explicitly set to true and
both
+ // saturationThreshold and patience
+ // parameters are provided, the PatienceKnnVectorQuery is executed using
the specified input
+ // values.
+ String vectorToSearch = "[1.0, 2.0 ,3.0, 4.0]";
+
+ double explicitSaturationThreshold = 0.989;
+ int explicitPatience = 10;
+
+ String query =
+ String.format(
+ Locale.US,
+ "{!knn f=vector topK=10 earlyTermination=true
saturationThreshold=%.3f patience=%d}"
+ + vectorToSearch,
+ explicitSaturationThreshold,
+ explicitPatience);
+
+ String expectedParsedQuery =
+ String.format(
+ Locale.US,
+
"PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f,
patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})",
+ explicitSaturationThreshold,
+ explicitPatience);
+
+ assertQ(
+ req(CommonParams.Q, query, "fl", "id", "debugQuery", "true"),
+ "//result[@numFound='10']",
+ "//str[@name='parsedquery'][.='" + expectedParsedQuery + "']");
+ }
+
+ @Test
+ public void
+
testKnnByteWithEarlyTerminationExplicitParams_returnsPatienceKnnVectorQueryExplicitParams()
{
+ // It verifies that when early termination is explicitly set to true and
both
+ // saturationThreshold and patience
+ // parameters are provided, the PatienceKnnVectorQuery is executed using
the specified input
+ // values.
+ String vectorToSearch = "[2, 2, 1, 3]";
+
+ double explicitSaturationThreshold = 0.989;
+ int explicitPatience = 10;
+
+ String query =
+ String.format(
+ Locale.US,
+ "{!knn f=vector_byte_encoding topK=5 earlyTermination=true
saturationThreshold=%.3f patience=%d}"
+ + vectorToSearch,
+ explicitSaturationThreshold,
+ explicitPatience);
+
+ String expectedParsedQuery =
+ String.format(
+ Locale.US,
+
"PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f,
patience=%d, delegate=KnnByteVectorQuery:vector_byte_encoding[2,...][5]})",
+ explicitSaturationThreshold,
+ explicitPatience);
+
+ assertQ(
+ req(CommonParams.Q, query, "fl", "id", "debugQuery", "true"),
+ "//result[@numFound='5']",
+ "//str[@name='parsedquery'][.='" + expectedParsedQuery + "']");
+ }
+
+ @Test
+ public void
+
testKnnFloatWithEarlyTerminationOnlyExplicitParams_returnsPatienceKnnVectorQueryExplicitParams()
{
+ // It verifies that when early termination is NOT explicitly passed but
both saturationThreshold
+ // and patience
+ // parameters are provided, the PatienceKnnVectorQuery is executed using
the specified input
+ // values.
+ String vectorToSearch = "[1.0, 2.0 ,3.0, 4.0]";
+
+ double explicitSaturationThreshold = 0.989;
+ int explicitPatience = 10;
+
+ String query =
+ String.format(
+ Locale.US,
+ "{!knn f=vector topK=10 saturationThreshold=%.3f patience=%d}" +
vectorToSearch,
+ explicitSaturationThreshold,
+ explicitPatience);
+
+ String expectedParsedQuery =
+ String.format(
+ Locale.US,
+
"PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f,
patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})",
+ explicitSaturationThreshold,
+ explicitPatience);
+
+ assertQ(
+ req(CommonParams.Q, query, "fl", "id", "debugQuery", "true"),
+ "//result[@numFound='10']",
+ "//str[@name='parsedquery'][.='" + expectedParsedQuery + "']");
+ }
+
+ @Test(expected = NumberFormatException.class)
+ public void
incorrectSaturationThresholdValue_shouldThrowNumberFormatException()
+ throws Exception {
+ // It verifies that when an invalid saturationThreshold value, e.g. 95%,
is provided in the
+ // query, Solr throws an exception.
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ String saturationThreshold = "95%";
+ int patience = 10;
+
+ String query =
+ String.format(
+ Locale.ROOT,
+ "{!knn f=vector topK=10 saturationThreshold=%s patience=%d}%s",
+ saturationThreshold,
+ patience,
+ vectorToSearch);
+
+ h.query(req(CommonParams.Q, query));
+ }
+
+ @Test(expected = NumberFormatException.class)
+ public void incorrectPatienceValue_shouldThrowNumberFormatException() throws
Exception {
+ // It verifies that when an invalid Patience value
+ // is provided in the query, Solr throws an exception.
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ double saturationThreshold = 0.995;
+ double patience = 7.9; // double instead of int
+
+ String query =
+ String.format(
+ Locale.ROOT,
+ "{!knn f=vector topK=10 saturationThreshold=%.3f patience=%.3f}%s",
+ saturationThreshold,
+ patience,
+ vectorToSearch);
+
+ h.query(req(CommonParams.Q, query));
+ }
+
+ @Test
+ public void onlyOneInputParam_shouldThrowException() {
+ // It verifies that when only one input param is provided in the query,
+ // Solr throws a BAD_REQUEST exception with the expected message.
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ double saturationThreshold = 0.995;
+
+ assertQEx(
+ "Parameters 'saturationThreshold' and 'patience' must both be
provided, or neither.",
+ req(
+ CommonParams.Q,
+ String.format(
+ Locale.ROOT,
+ "{!knn f=vector topK=10 saturationThreshold=%.3f}%s",
+ saturationThreshold,
+ vectorToSearch)),
+ SolrException.ErrorCode.BAD_REQUEST);
+ }
}
diff --git
a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java
b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java
index 516d1b17e2f..6c23ae21d1c 100644
---
a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java
+++
b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java
@@ -17,6 +17,7 @@
package org.apache.solr.llm.textvectorisation.search;
import java.util.Arrays;
+import java.util.Locale;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.llm.TestLlmBase;
import org.junit.AfterClass;
@@ -389,4 +390,29 @@ public class TextToVectorQParserTest extends TestLlmBase {
"/response/docs/[2]/id=='3'",
"/response/docs/[3]/id=='9'");
}
+
+ @Test
+ public void earlyTerminationEnabled_returnsPatienceKnnVectorQuery() throws
Exception {
+ final String solrQuery =
+ "{!knn_text_to_vector model=dummy-1 f=vector topK=5
earlyTermination=true}hello world";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.add("fl", "id");
+ query.add("debugQuery", "true");
+
+ double defaultSaturationThreshold = 0.995;
+ int defaultPatience = 7;
+
+ String expectedParsedQuery =
+ String.format(
+ Locale.US,
+
"PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f,
patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][5]})",
+ defaultSaturationThreshold,
+ defaultPatience);
+
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/numFound==5]",
+ "/debug/parsedquery=='" + expectedParsedQuery + "'");
+ }
}
diff --git
a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
index e66501faae3..d596fbaf985 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
@@ -384,6 +384,53 @@ Here's an example of a simple `knn` search:
The search results retrieved are the k=10 nearest documents to the vector in
input `[1.0, 2.0, 3.0, 4.0]`, ranked by the `similarityFunction` configured at
indexing time.
+`earlyTermination`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `false`
+|===
++
+Early termination is an HNSW optimization. Solr relies on the Lucene’s
implementation of early termination for kNN queries, based on
https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf[Patience
in Proximity: A Simple Early Termination Strategy for HNSW Graph Traversal in
Approximate k-Nearest Neighbor Search].
++
+When enabled (true), the search may exit early when the HNSW candidate queue
remains saturated over a threshold (saturationThreshold) for more than a given
number of iterations (patience). Refer to the two parameters below for more
details.
++
+Enabling early termination typically reduces query latency and resource usage,
with a potential small trade-off in recall.
+
+`saturationThreshold`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `0.995`
+|===
++
+(advanced) The early exit saturation threshold.
++
+Our recommendation is to rely on the default value and change this parameter
only if you are confident about its impact. Using values that are too low can
cause the search to terminate prematurely, leading to poor recall.
++
+This parameter must be used together with `patience`; either specify both to
customize the behavior, or omit both to rely on the default values.
+
+`patience`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `max(7, topK * 0.3)`
+|===
++
+(advanced) The number of consecutive iterations the search will continue after
the candidate queue is considered saturated. The default value is not a fixed
value (integer) but a formula based on the topK parameter.
++
+Our recommendation is to rely on the default value and change this parameter
only if you are confident about its impact:
++
+* Using values that are too low can make the search stop too aggressively,
reducing recall.
+* Using values that are too high reduces the benefit of early termination,
since the search runs nearly as long as without it.
+
++
+This parameter must be used together with `saturationThreshold`; either
specify both to customize the behavior, or omit both to rely on the default
values.
+
+Here's an example of a `knn` search using the early termination with input
parameters:
+
+[source,text]
+?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989
patience=10}[1.0, 2.0, 3.0, 4.0]
=== knn_text_to_vector Query Parser