This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new e767003fa11 SOLR-17018: add QueryLimits support to Learning To Rank
rescoring (#2348)
e767003fa11 is described below
commit e767003fa110cc21c52f83cb5c95c0dba1ad2e27
Author: Alessandro Benedetti <[email protected]>
AuthorDate: Fri Apr 5 11:53:38 2024 +0200
SOLR-17018: add QueryLimits support to Learning To Rank rescoring (#2348)
Co-authored-by: Christine Poerschke <[email protected]>
(cherry picked from commit 44211bc40a7cd93507772cf2746a07bcecdee9a0)
---
solr/CHANGES.txt | 3 +
.../solr/search/IncompleteRerankingException.java | 24 ++++++
.../org/apache/solr/search/ReRankCollector.java | 30 ++++---
.../src/java/org/apache/solr/ltr/LTRRescorer.java | 9 +++
.../test-files/featureExamples/features-slow.json | 7 ++
.../modelExamples/linear-slow-model.json | 14 ++++
.../org/apache/solr/ltr/TestLTRQParserPlugin.java | 92 ++++++++++++++++++++++
.../query-guide/pages/learning-to-rank.adoc | 11 +++
8 files changed, 178 insertions(+), 12 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index db3318301a2..b45f346ec38 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -68,6 +68,9 @@ Bug Fixes
* SOLR-17198: AffinityPlacementFactory can fail if Shard leadership changes
occur while it is collecting metrics.
(Paul McArthur)
+
+* SOLR-17018: Add QueryLimits support to Learning To Rank rescoring.
+ (Alessandro Benedetti)
* SOLR-14892: Queries with shards.info and shards.tolerant can yield multiple
null keys in place of shard names
(Mathieu Marie, David Smiley)
diff --git
a/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java
b/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java
new file mode 100644
index 00000000000..c3f7190b3f2
--- /dev/null
+++
b/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+public class IncompleteRerankingException extends RuntimeException {
+
+ public IncompleteRerankingException() {
+ super();
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java
b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java
index 17f206de646..bf4c19b4063 100644
--- a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java
@@ -128,22 +128,26 @@ public class ReRankCollector extends
TopDocsCollector<ScoreDoc> {
}
ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs;
- ScoreDoc[] mainScoreDocsClone =
- (reRankScaler != null && reRankScaler.scaleScores())
- ? deepCloneAndZeroOut(mainScoreDocs)
- : null;
+ boolean zeroOutScores = reRankScaler != null &&
reRankScaler.scaleScores();
+ ScoreDoc[] mainScoreDocsClone = deepClone(mainScoreDocs, zeroOutScores);
ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length,
reRankDocs)];
System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0,
reRankScoreDocs.length);
mainDocs.scoreDocs = reRankScoreDocs;
// If we're scaling scores use the replace rescorer because we just want
the re-rank score.
- TopDocs rescoredDocs =
- reRankScaler != null && reRankScaler.scaleScores()
- ? reRankScaler
- .getReplaceRescorer()
- .rescore(searcher, mainDocs, mainDocs.scoreDocs.length)
- : reRankQueryRescorer.rescore(searcher, mainDocs,
mainDocs.scoreDocs.length);
+ TopDocs rescoredDocs;
+ try {
+ rescoredDocs =
+ zeroOutScores // previously zero-ed out scores are to be replaced
+ ? reRankScaler
+ .getReplaceRescorer()
+ .rescore(searcher, mainDocs, mainDocs.scoreDocs.length)
+ : reRankQueryRescorer.rescore(searcher, mainDocs,
mainDocs.scoreDocs.length);
+ } catch (IncompleteRerankingException ex) {
+ mainDocs.scoreDocs = mainScoreDocsClone;
+ rescoredDocs = mainDocs;
+ }
// Lower howMany to return if we've collected fewer documents.
howMany = Math.min(howMany, mainScoreDocs.length);
@@ -208,13 +212,15 @@ public class ReRankCollector extends
TopDocsCollector<ScoreDoc> {
}
}
- private ScoreDoc[] deepCloneAndZeroOut(ScoreDoc[] scoreDocs) {
+ private ScoreDoc[] deepClone(ScoreDoc[] scoreDocs, boolean zeroOut) {
ScoreDoc[] scoreDocs1 = new ScoreDoc[scoreDocs.length];
for (int i = 0; i < scoreDocs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
if (scoreDoc != null) {
scoreDocs1[i] = new ScoreDoc(scoreDoc.doc, scoreDoc.score);
- scoreDoc.score = 0f;
+ if (zeroOut) {
+ scoreDoc.score = 0f;
+ }
}
}
return scoreDocs1;
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
index 19ac717bdda..a6b45342d9e 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
@@ -31,6 +31,8 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.Weight;
import org.apache.solr.ltr.interleaving.OriginalRankingLTRScoringQuery;
+import org.apache.solr.search.IncompleteRerankingException;
+import org.apache.solr.search.QueryLimits;
import org.apache.solr.search.SolrIndexSearcher;
/**
@@ -234,6 +236,13 @@ public class LTRRescorer extends Rescorer {
scorer.getDocInfo().setOriginalDocScore(hit.score);
hit.score = scorer.score();
+ if (QueryLimits.getCurrentLimits()
+ .maybeExitWithPartialResults(
+ "Learning To Rank rescoring -"
+ + " The full reranking didn't complete."
+ + " If partial results are tolerated the reranking got
reverted and all documents preserved their original score and ranking.")) {
+ throw new IncompleteRerankingException();
+ }
if (hitUpto < topN) {
reranked[hitUpto] = hit;
// if the heap is not full, maybe I want to log the features for this
diff --git a/solr/modules/ltr/src/test-files/featureExamples/features-slow.json
b/solr/modules/ltr/src/test-files/featureExamples/features-slow.json
new file mode 100644
index 00000000000..a60c47db73b
--- /dev/null
+++ b/solr/modules/ltr/src/test-files/featureExamples/features-slow.json
@@ -0,0 +1,7 @@
+[
+ {
+ "name" : "slow",
+ "class" : "org.apache.solr.ltr.feature.SolrFeature",
+ "params" : { "q" : "{!func}sleep(1000,999)" }
+ }
+]
diff --git
a/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json
b/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json
new file mode 100644
index 00000000000..824b9c473e3
--- /dev/null
+++ b/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json
@@ -0,0 +1,14 @@
+{
+ "class": "org.apache.solr.ltr.model.LinearModel",
+ "name": "slowModel",
+ "features": [
+ {
+ "name": "slow"
+ }
+ ],
+ "params": {
+ "weights": {
+ "slow": 1
+ }
+ }
+}
diff --git
a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
index c2c47c2fa6a..a8924b2e1da 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
@@ -29,6 +29,9 @@ public class TestLTRQParserPlugin extends TestRerankBase {
loadFeatures("features-linear.json");
loadModels("linear-model.json");
+
+ loadFeatures("features-slow.json");
+ loadModels("linear-slow-model.json"); // just a linear model with one
feature
}
@AfterClass
@@ -137,4 +140,93 @@ public class TestLTRQParserPlugin extends TestRerankBase {
query.add("rq", "{!ltr reRankDocs=3 model=6029760550880411648}");
assertJQ("/query" + query.toQueryString(), "/response/numFound/==0");
}
+
+ @Test
+ public void
ltr_expensiveFeatureRescoring_shouldTimeOutAndReturnPartialResults()
+ throws Exception {
+ /* One SolrFeature is defined: {!func}sleep(1000,999)
+ * It simulates a slow feature extraction, sleeping for 1000ms and
returning 999 as a score when finished
+ * */
+
+ final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3
6^=1'}";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.setFields("id", "score");
+ query.setRows(4);
+ query.setTimeAllowed(300);
+ query.add("fv", "true");
+ query.add("rq", "{!ltr model=slowModel reRankDocs=3}");
+
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/numFound/==4",
+ "/responseHeader/partialResults/==true",
+ "/responseHeader/partialResultsDetails/=='Limits exceeded! (Learning
To Rank rescoring - "
+ + "The full reranking didn\\'t complete. "
+ + "If partial results are tolerated the reranking got reverted and
"
+ + "all documents preserved their original score and ranking.)"
+ + ": Query limits: [TimeAllowedLimit:LIMIT EXCEEDED]'",
+ "/response/docs/[0]/id=='8'",
+ "/response/docs/[0]/score==10.0",
+ "/response/docs/[1]/id=='9'",
+ "/response/docs/[1]/score==5.0",
+ "/response/docs/[2]/id=='7'",
+ "/response/docs/[2]/score==3.0",
+ "/response/docs/[3]/id=='6'",
+ "/response/docs/[3]/score==1.0");
+ }
+
+ @Test
+ public void
ltr_expensiveFeatureRescoringAndPartialResultsNotTolerated_shouldRaiseException()
+ throws Exception {
+ /* One SolrFeature is defined: {!func}sleep(1000,999)
+ * It simulates a slow feature extraction, sleeping for 1000ms and
returning 999 as a score when finished
+ * */
+ final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3
6^=1'}";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.setFields("id", "score");
+ query.setRows(4);
+ query.setTimeAllowed(300);
+ query.add("partialResults", "false");
+ query.add("fv", "true");
+ query.add("rq", "{!ltr model=slowModel reRankDocs=3}");
+
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/error/msg=='org.apache.solr.search.QueryLimitsExceededException:
Limits exceeded! (Learning To Rank rescoring - "
+ + "The full reranking didn\\'t complete. "
+ + "If partial results are tolerated the reranking got reverted and
all documents preserved their original score and ranking.)"
+ + ": Query limits: [TimeAllowedLimit:LIMIT EXCEEDED]'");
+ }
+
+ @Test
+ public void
ltr_expensiveFeatureRescoringWithinTimeAllowed_shouldReturnRerankedResults()
+ throws Exception {
+ /* One SolrFeature is defined: {!func}sleep(1000,999)
+ * It simulates a slow feature extraction, sleeping for 1000ms and
returning 999 as a score when finished
+ * */
+
+ final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3
6^=1'}";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.setFields("id", "score");
+ query.setRows(4);
+ query.setTimeAllowed(5000);
+ query.add("fv", "true");
+ query.add("rq", "{!ltr model=slowModel reRankDocs=3}");
+
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/numFound/==4",
+ "/response/docs/[0]/id=='7'",
+ "/response/docs/[0]/score==999.0",
+ "/response/docs/[1]/id=='8'",
+ "/response/docs/[1]/score==999.0",
+ "/response/docs/[2]/id=='9'",
+ "/response/docs/[2]/score==999.0",
+ "/response/docs/[3]/id=='6'",
+ // original score for the 4th document due to reRankDocs=3 limit
+ "/response/docs/[3]/score==1.0");
+ }
}
diff --git
a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
index 411b6dfb88f..ef4c519c34f 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
@@ -499,6 +499,17 @@ The output will include feature values as a
comma-separated list, resembling the
}}
----
+=== Running a Rerank Query and Query Limits
+
+Apache Solr allows to define Query Limits to interrupt particularly expensive
queries
(xref:query-guide:common-query-parameters.adoc#timeallowed-parameter[Time
Allowed],
xref:query-guide:common-query-parameters.adoc#cpuallowed-parameter[Cpu
Allowed]).
+
+If a query limit is exceeded while reranking, the rescoring is aborted and
fully reverted.
+
+The original ranked list is returned and the response marked with the
responseHeader 'partialResults'.
+The details of what limit was exceeded is returned in the responseHeader
'partialResultsDetails'.
+
+See
xref:query-guide:common-query-parameters.adoc#partialresults-parameter[Partial
Results Parameter] for more details on how to handle partial results.
+
=== Running a Rerank Query Interleaving Two Models
To rerank the results of a query, interleaving two models (myModelA, myModelB)
add the `rq` parameter to your search, passing two models in input, for example: