This is an automated email from the ASF dual-hosted git repository. ishan pushed a commit to branch jira/solr-17927 in repository https://gitbox.apache.org/repos/asf/solr.git
commit 36b15223101b3a74be3090066226fdfd483f28df Author: punAhuja <[email protected]> AuthorDate: Thu Nov 6 14:47:54 2025 +0530 Exposing efSearchScaleFactor instead of efSearch, and using it to calculate efSearch internally -Set default efSearchScaleFactor as 1.0, which means default efSearch = topK --- .../java/org/apache/solr/search/vector/KnnQParser.java | 8 +++++--- .../org/apache/solr/schema/DenseVectorFieldTest.java | 12 ++++++------ .../org/apache/solr/search/vector/KnnQParserTest.java | 17 +++++++---------- .../search/TextToVectorQParserTest.java | 2 +- .../modules/query-guide/pages/dense-vector-search.adoc | 12 ++++++------ .../upgrade-notes/pages/major-changes-in-solr-10.adoc | 4 ++++ 6 files changed, 29 insertions(+), 26 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java index 9f8d9f6de79..413eebe54f4 100644 --- a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java @@ -108,11 +108,13 @@ public class KnnQParser extends AbstractVectorQParserBase { final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); final String vectorToSearch = getVectorToSearch(); final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); - final int efSearch = localParams.getInt("efSearch", topK * 2); - if (efSearch < topK) { + + final double efSearchScaleFactor = localParams.getDouble("efSearchScaleFactor", 1.0); + if (efSearchScaleFactor < 1.0) { throw new IllegalArgumentException( - "efSearch (" + efSearch + ") must be >= topK (" + topK + ")"); + "efSearchScaleFactor (" + efSearchScaleFactor + ") must be >= 1.0"); } + final int efSearch = (int) Math.round(efSearchScaleFactor * topK); final Integer filteredSearchThreshold = localParams.getInt(FILTERED_SEARCH_THRESHOLD); diff --git a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java index b8e426470f1..077871e3b6f 100644 --- a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java @@ -868,7 +868,7 @@ public class DenseVectorFieldTest extends AbstractBadConfigTestBase { DenseVectorField type = (DenseVectorField) vectorField.getType(); KnnFloatVectorQuery vectorQuery = (KnnFloatVectorQuery) - type.getKnnVectorQuery("vector", "[2, 1, 3, 4]", 3, null, null, null, null); + type.getKnnVectorQuery("vector", "[2, 1, 3, 4]", 3, 3, null, null, null, null); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -892,7 +892,7 @@ public class DenseVectorFieldTest extends AbstractBadConfigTestBase { KnnFloatVectorQuery vectorQuery = (KnnFloatVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, null, null, null, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, null, null, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -917,7 +917,7 @@ public class DenseVectorFieldTest extends AbstractBadConfigTestBase { SeededKnnVectorQuery vectorQuery = (SeededKnnVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, null, seedQuery, null, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, seedQuery, null, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -944,7 +944,7 @@ public class DenseVectorFieldTest extends AbstractBadConfigTestBase { PatienceKnnVectorQuery vectorQuery = (PatienceKnnVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, null, null, earlyTermination, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, null, earlyTermination, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -1002,7 +1002,7 @@ public class DenseVectorFieldTest extends AbstractBadConfigTestBase { KnnByteVectorQuery vectorQuery = (KnnByteVectorQuery) type.getKnnVectorQuery( - "vector_byte_encoding", "[2, 1, 3, 4]", 3, null, null, null, null); + "vector_byte_encoding", "[2, 1, 3, 4]", 3, 3, null, null, null, null); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -1026,7 +1026,7 @@ public class DenseVectorFieldTest extends AbstractBadConfigTestBase { KnnByteVectorQuery vectorQuery = (KnnByteVectorQuery) type.getKnnVectorQuery( - "vector_byte_encoding", "[2, 1, 3, 4]", 3, null, null, null, expectedThreshold); + "vector_byte_encoding", "[2, 1, 3, 4]", 3, 3, null, null, null, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); diff --git a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java index ca13e9fe02e..5649fdae6e5 100644 --- a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java @@ -990,7 +990,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][10])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][5])']"); } @Test @@ -1008,7 +1008,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][10])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][5])']"); } @Test @@ -1029,7 +1029,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", defaultSaturationThreshold, defaultPatience); @@ -1068,7 +1068,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", explicitSaturationThreshold, explicitPatience); @@ -1101,7 +1101,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][5]})", explicitSaturationThreshold, explicitPatience); @@ -1133,7 +1133,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", explicitSaturationThreshold, explicitPatience); @@ -1219,7 +1219,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][4]})']"); } @Test @@ -1239,7 +1239,6 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=KnnByteVectorQuery:vector_byte_encoding[2,...][4]})']"); } @Test @@ -1286,7 +1285,6 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=KnnFloatVectorQuery:vector[0.1,...][4], seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); } @Test @@ -1317,7 +1315,6 @@ public class KnnQParserTest extends SolrTestCaseJ4 { "//str[@name='parsedquery'][contains(.,'seedWeight=')]", // Verify that the final delegate is a KnnFloatVectorQuery with the expected vector and topK // value - "//str[@name='parsedquery'][contains(.,'delegate=KnnFloatVectorQuery:vector[1.0,...][4]')]"); } @Test diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java index 1756554aa20..be47a160f64 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java @@ -406,7 +406,7 @@ public class TextToVectorQParserTest extends TestLanguageModelBase { String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][5]})", defaultSaturationThreshold, defaultPatience); diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index 9f9e3004aaa..db5d3dd5639 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -455,24 +455,24 @@ Our recommendation is to rely on the default value and change this parameter onl + This parameter must be used together with `saturationThreshold`; either specify both to customize the behavior, or omit both to rely on the default values. -`ef-search`:: +`efSearchScaleFactor`:: + [%autowidth,frame=none] |=== -|Optional | Default: `topK * 2` +|Optional | Default: `1.0` |=== + -(advanced) Controls how many candidates the HNSW algorithm examines during search. +(advanced) Multiplier factor for calculating how many candidates the HNSW algorithm examines during search. + -The algorithm fetches more results than the requested `topK` and then selects the best ones. Higher values fetch more candidates, improving recall but slowing down the search. Lower values fetch fewer candidates for faster performance but may miss some good matches. +The effective `efSearch` value is calculated internally as `efSearchScaleFactor * topK`. Lower values fetch fewer candidates for faster performance but may miss some good matches. Higher values fetch more candidates, improving recall but slowing down the search. + Accepted values: -Any positive integer. +Any float >= 1.0. Here's an example of a `knn` search using the early termination with input parameters: [source,text] -?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10 ef-search=30}[1.0, 2.0, 3.0, 4.0] +?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10 efSearchScaleFactor=3.0}[1.0, 2.0, 3.0, 4.0] `seedQuery`:: + diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc index c6e61e70969..4a53180323e 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc @@ -137,6 +137,10 @@ Solr now lets you access models encoded in ONNX format, commonly sourced from Hu The DocumentCategorizerUpdateProcessorFactorythat lets you perform sentiment and other classification tasks on fields. It is available as part of the `analysis-extras` module. +=== Vector Search Enhancements + +* The `efSearchScaleFactor` parameter is now available for the KNN query parser (SOLR-17928). This parameter controls how many candidate vectors are explored during HNSW graph traversal, allowing users to independently tune search accuracy versus the number of results returned. Previously, improving accuracy required increasing `topK` (which returns more results), but `efSearchScaleFactor` enables exploring more candidates while still receiving exactly `topK` results. The `efSearch` valu [...] + === Deprecation removals * The `jaegertracer-configurator` module, which was deprecated in 9.2, is removed. Users should migrate to the `opentelemetry` module.
