This is an automated email from the ASF dual-hosted git repository. houston pushed a commit to branch branch_9x in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push: new fcf0caca7fa SOLR-17678, SOLR-17732: Add matchScore support for ReRank queries (#3222) fcf0caca7fa is described below commit fcf0caca7fac47d1bb5e0e558ed6e201fa2ee703 Author: Siju Varghese <varghese.s...@gmail.com> AuthorDate: Wed Apr 9 12:04:15 2025 -0700 SOLR-17678, SOLR-17732: Add matchScore support for ReRank queries (#3222) * Add support to return match score ( the query score ) in case of rerank query. * Update Transformer code to be easier to extend * Compute distributed score fields in first pass, then pass to return fields Co-authored-by: Houston Putman <hous...@apache.org> (cherry picked from commit b226f4726f22b699332f617f1393b5d1ccb45d74) --- solr/CHANGES.txt | 4 + .../solr/handler/component/QueryComponent.java | 90 ++++++++++---- .../apache/solr/handler/component/ShardDoc.java | 16 +++ .../org/apache/solr/response/DocsStreamer.java | 21 ++-- .../solr/response/transform/DocTransformer.java | 22 ++++ .../solr/response/transform/DocTransformers.java | 12 +- ...coreAugmenter.java => MatchScoreAugmenter.java} | 15 ++- .../solr/response/transform/ScoreAugmenter.java | 9 +- .../{DocIterator.java => DocIterationInfo.java} | 30 ++--- .../java/org/apache/solr/search/DocIterator.java | 10 +- .../src/java/org/apache/solr/search/DocSlice.java | 18 +-- .../org/apache/solr/search/ReRankCollector.java | 30 ++++- .../apache/solr/search/ReRankQParserPlugin.java | 5 +- .../java/org/apache/solr/search/ReturnFields.java | 26 ++++ .../org/apache/solr/search/SolrIndexSearcher.java | 81 ++++-------- .../org/apache/solr/search/SolrReturnFields.java | 19 ++- .../java/org/apache/solr/search/TopDocsSlice.java | 134 ++++++++++++++++++++ .../solr/cloud/TestCloudPseudoReturnFields.java | 22 +++- .../handler/component/MockResponseBuilder.java | 1 + .../solr/search/DistributedReRankExplainTest.java | 19 ++- .../apache/solr/search/TestPseudoReturnFields.java | 11 +- .../solr/search/TestReRankQParserPlugin.java | 137 +++++++++++++++++++++ .../LTRFeatureLoggerTransformerFactory.java | 11 +- .../LTRInterleavingTransformerFactory.java | 5 - .../query-guide/pages/query-re-ranking.adoc | 8 ++ .../java/org/apache/solr/common/SolrDocument.java | 15 +++ 26 files changed, 603 insertions(+), 168 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b10b9037172..b215f4ea0a6 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -21,6 +21,8 @@ New Features * SOLR-17714: Added a FuzzyQParser to enable all FuzzyQuery customizations. (Houston Putman, Siju Varghese) +* SOLR-17678: ReRank queries can now return the matchScore (original score) in addition to the re-ranked score. (Siju Varghese, Houston Putman) + Improvements --------------------- * SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments" APIs, which can be used to fetch detailed information about @@ -43,6 +45,8 @@ Improvements * SOLR-10998: v2 APIs now obey the "Accept" request header for content-negotiation if 'wt' is unspecified. JSON is still used as a default when neither 'Accept' or 'wt' are specified. (Jason Gerlowski) +* SOLR-17732: Score-based return fields other than "score" can now be returned in distributed queries. (Houston Putman) + Optimizations --------------------- * SOLR-17578: Remove ZkController internal core supplier, for slightly faster reconnection after Zookeeper session loss. (Pierre Salagnac) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 2a2363dab3b..fbd19cc99bc 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -25,8 +25,8 @@ import java.io.StringWriter; import java.lang.invoke.MethodHandles; import java.lang.reflect.Array; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; @@ -34,6 +34,9 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; import org.apache.lucene.index.ExitableDirectoryReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; @@ -780,13 +783,17 @@ public class QueryComponent extends SearchComponent { // distrib.singlePass=true forces a one-pass query regardless of requested fields boolean distribSinglePass = rb.req.getParams().getBool(ShardParams.DISTRIB_SINGLE_PASS, false); - if (distribSinglePass - || (fields != null - && fields.wantsField(keyFieldName) - && fields.getRequestedFieldNames() != null - && (!fields.hasPatternMatching() - && Arrays.asList(keyFieldName, "score") - .containsAll(fields.getRequestedFieldNames())))) { + boolean requiresNonIdAndScoreFields = true; + if (!distribSinglePass) { + Set<String> nonScoreDependentFieldNames = fields.getNonScoreDependentReturnFieldNames(); + requiresNonIdAndScoreFields = + fields.hasPatternMatching() + || nonScoreDependentFieldNames == null + || nonScoreDependentFieldNames.size() > 1 + || (nonScoreDependentFieldNames.size() == 1 + && !nonScoreDependentFieldNames.contains(keyFieldName)); + } + if (distribSinglePass || !requiresNonIdAndScoreFields) { sreq.purpose |= ShardRequest.PURPOSE_GET_FIELDS; rb.onePassDistributedQuery = true; } @@ -825,7 +832,7 @@ public class QueryComponent extends SearchComponent { || rb.getSortSpec().includesScore(); StringBuilder additionalFL = new StringBuilder(); boolean additionalAdded = false; - if (distribSinglePass) { + if (rb.onePassDistributedQuery) { String[] fls = rb.req.getParams().getParams(CommonParams.FL); if (fls != null && fls.length > 0 && (fls.length != 1 || !fls[0].isEmpty())) { // If the outer request contains actual FL's use them... @@ -838,15 +845,27 @@ public class QueryComponent extends SearchComponent { // additional fields below sreq.params.set(CommonParams.FL, "*"); } - if (!fields.wantsScore() && shardQueryIncludeScore) { - additionalAdded = addFL(additionalFL, "score", additionalAdded); - } } else { // reset so that only unique key is requested in shard requests sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName()); - if (shardQueryIncludeScore) { - additionalAdded = addFL(additionalFL, "score", additionalAdded); - } + + final AtomicBoolean hasAdditionalAdded = new AtomicBoolean(additionalAdded); + fields + .getScoreDependentReturnFields() + .forEach( + (name, value) -> { + if (value.isEmpty()) { + addFL(additionalFL, name, hasAdditionalAdded.getAndSet(true)); + } else { + addFL(additionalFL, name + ":" + value, hasAdditionalAdded.getAndSet(true)); + } + }); + additionalAdded = hasAdditionalAdded.get(); + } + if ((fields.getExplicitlyRequestedFieldNames() == null + || !fields.getExplicitlyRequestedFieldNames().contains(SolrReturnFields.SCORE)) + && shardQueryIncludeScore) { + additionalAdded = addFL(additionalFL, SolrReturnFields.SCORE, additionalAdded); } // TODO: should this really sendGlobalDfs if just includeScore? @@ -894,6 +913,19 @@ public class QueryComponent extends SearchComponent { sortFields = new SortField[] {SortField.FIELD_SCORE}; } + // If the shard request was also used to get fields (along with the scores), there is no reason + // to copy over the score dependent fields, since those will already exist in the document with + // the return fields + Set<String> scoreDependentFields; + if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) == 0) { + scoreDependentFields = + rb.rsp.getReturnFields().getScoreDependentReturnFields().keySet().stream() + .filter(field -> !field.equals(SolrReturnFields.SCORE)) + .collect(Collectors.toSet()); + } else { + scoreDependentFields = Collections.emptySet(); + } + IndexSchema schema = rb.req.getSchema(); SchemaField uniqueKeyField = schema.getUniqueKeyField(); @@ -1071,14 +1103,17 @@ public class QueryComponent extends SearchComponent { shardDoc.id = id; shardDoc.shard = srsp.getShard(); shardDoc.orderInShard = i; - Object scoreObj = doc.getFieldValue("score"); + Object scoreObj = doc.getFieldValue(SolrReturnFields.SCORE); if (scoreObj != null) { if (scoreObj instanceof String) { shardDoc.score = Float.parseFloat((String) scoreObj); } else { - shardDoc.score = (Float) scoreObj; + shardDoc.score = ((Number) scoreObj).floatValue(); } } + if (!scoreDependentFields.isEmpty()) { + shardDoc.scoreDependentFields = doc.getSubsetOfFields(scoreDependentFields); + } shardDoc.sortFieldValues = unmarshalledSortFieldValues; @@ -1302,11 +1337,15 @@ public class QueryComponent extends SearchComponent { // TODO: merge fsv to if requested if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) { - boolean returnScores = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0; - final String uniqueKey = rb.req.getSchema().getUniqueKeyField().getName(); String keyFieldName = uniqueKey; boolean removeKeyField = !rb.rsp.getReturnFields().wantsField(keyFieldName); + boolean returnRawScore = + rb.rsp.getReturnFields().getExplicitlyRequestedFieldNames() != null + && rb.rsp + .getReturnFields() + .getExplicitlyRequestedFieldNames() + .contains(SolrReturnFields.SCORE); if (rb.rsp.getReturnFields().getFieldRenames().get(keyFieldName) != null) { // if id was renamed we need to use the new name keyFieldName = rb.rsp.getReturnFields().getFieldRenames().get(keyFieldName); @@ -1361,13 +1400,16 @@ public class QueryComponent extends SearchComponent { final ShardDoc sdoc = rb.resultIds.get(lastKeyString); if (sdoc != null) { shardDocFoundInResults = Boolean.TRUE; - if (returnScores) { - doc.setField("score", sdoc.score); - } else { + // There is no need to add scores to a document if the documents were retrieved in + // one-pass (GET_FIELDS was done at the same time as GET_TOP_IDS), because the scores + // will already exist in the document + if (!rb.onePassDistributedQuery) { + sdoc.consumeScoreDependentFields(returnRawScore, doc::setField); + } else if (!returnRawScore) { // Score might have been added (in createMainQuery) to shard-requests (and therefore // in shard-response-docs) Remove score if the outer request did not ask for it - // returned - doc.remove("score"); + // returned. + doc.remove(SolrReturnFields.SCORE); } if (removeKeyField) { doc.removeFields(keyFieldName); diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java b/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java index ad204a0904b..dc7f0d63126 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java @@ -16,10 +16,14 @@ */ package org.apache.solr.handler.component; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Objects; +import java.util.function.BiConsumer; import org.apache.lucene.search.FieldDoc; import org.apache.solr.common.util.NamedList; +import org.apache.solr.search.SolrReturnFields; public class ShardDoc extends FieldDoc { public String shard; @@ -46,6 +50,8 @@ public class ShardDoc extends FieldDoc { public int positionInResponse; + public Map<String, Object> scoreDependentFields = Collections.emptyMap(); + // the ordinal position in the merged response arraylist public ShardDoc(float score, Object[] fields, Object uniqueId, String shard) { @@ -58,6 +64,16 @@ public class ShardDoc extends FieldDoc { super(-1, Float.NaN); } + public void consumeScoreDependentFields( + boolean returnRawScore, BiConsumer<String, Object> consumer) { + if (returnRawScore) { + consumer.accept(SolrReturnFields.SCORE, score); + } + if (!scoreDependentFields.isEmpty()) { + scoreDependentFields.forEach(consumer); + } + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java index 485d9c9a1e8..7f34d0e4a80 100644 --- a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java +++ b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java @@ -60,6 +60,8 @@ public class DocsStreamer implements Iterator<SolrDocument> { private final org.apache.solr.response.ResultContext rctx; private final SolrDocumentFetcher docFetcher; // a collaborator of SolrIndexSearcher private final DocList docs; + private boolean doScore; + private boolean doMatchScore; private final DocTransformer transformer; private final DocIterator docIterator; @@ -76,7 +78,9 @@ public class DocsStreamer implements Iterator<SolrDocument> { docFetcher = rctx.getDocFetcher(); solrReturnFields = (SolrReturnFields) rctx.getReturnFields(); - if (transformer != null) transformer.setContext(rctx); + if (transformer != null) { + transformer.setContext(rctx); + } } public int currentIndex() { @@ -95,10 +99,9 @@ public class DocsStreamer implements Iterator<SolrDocument> { SolrDocument sdoc = docFetcher.solrDoc(id, solrReturnFields); if (transformer != null) { - boolean doScore = rctx.wantsScores(); try { - if (doScore) { - transformer.transform(sdoc, id, docIterator.score()); + if (docs.hasScores()) { + transformer.transform(sdoc, id, docIterator); } else { transformer.transform(sdoc, id); } @@ -114,8 +117,8 @@ public class DocsStreamer implements Iterator<SolrDocument> { * This method is less efficient then the 3 arg version because it may convert some fields that * are not needed * + * @see #convertLuceneDocToSolrDoc(Document, IndexSchema, ReturnFields) * @deprecated use the 3 arg version for better performance - * @see #convertLuceneDocToSolrDoc(Document,IndexSchema,ReturnFields) */ @Deprecated public static SolrDocument convertLuceneDocToSolrDoc(Document doc, final IndexSchema schema) { @@ -185,7 +188,9 @@ public class DocsStreamer implements Iterator<SolrDocument> { public static Object getValue(SchemaField sf, IndexableField f) { FieldType ft = null; - if (sf != null) ft = sf.getType(); + if (sf != null) { + ft = sf.getType(); + } if (ft == null) { // handle fields not in the schema BytesRef bytesRef = f.binaryValue(); @@ -197,7 +202,9 @@ public class DocsStreamer implements Iterator<SolrDocument> { System.arraycopy(bytesRef.bytes, bytesRef.offset, bytes, 0, bytesRef.length); return bytes; } - } else return f.stringValue(); + } else { + return f.stringValue(); + } } else { if (KNOWN_TYPES.contains(ft.getClass())) { return ft.toObject(f); diff --git a/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java index c93d49ee619..5ce18a4b3bc 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java @@ -22,6 +22,7 @@ import java.util.Collections; import org.apache.solr.common.SolrDocument; import org.apache.solr.response.QueryResponseWriter; import org.apache.solr.response.ResultContext; +import org.apache.solr.search.DocIterationInfo; import org.apache.solr.search.SolrIndexSearcher; /** @@ -101,11 +102,30 @@ public abstract class DocTransformer { * @param score the score for this document * @throws IOException If there is a low-level I/O error. * @see #needsSolrIndexSearcher + * @deprecated use {@link #transform(SolrDocument, int, DocIterationInfo)} instead */ + @Deprecated(forRemoval = true, since = "9.9.0") public void transform(SolrDocument doc, int docid, float score) throws IOException { transform(doc, docid); } + /** + * This is where implementations do the actual work. If implementations require a valid docId and + * index access, the {@link #needsSolrIndexSearcher} method must return true + * + * <p>Default implementation calls {@link #transform(SolrDocument, int)}. + * + * @param doc The document to alter + * @param docid The Lucene internal doc id, or -1 in cases where the <code>doc</code> did not come + * from the index + * @param docInfo the document information for this document, including the score + * @throws IOException If there is a low-level I/O error. + * @see #needsSolrIndexSearcher + */ + public void transform(SolrDocument doc, int docid, DocIterationInfo docInfo) throws IOException { + transform(doc, docid, docInfo.score()); + } + /** * This is where implementations do the actual work. If implementations require a valid docId and * index access, the {@link #needsSolrIndexSearcher} method must return true @@ -115,7 +135,9 @@ public abstract class DocTransformer { * from the index * @throws IOException If there is a low-level I/O error. * @see #needsSolrIndexSearcher + * @deprecated use {@link #transform(SolrDocument, int, DocIterationInfo)} instead */ + @Deprecated(since = "9.9.0") public abstract void transform(SolrDocument doc, int docid) throws IOException; /** diff --git a/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java b/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java index 238d1357daa..62785c0a759 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java +++ b/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.solr.common.SolrDocument; import org.apache.solr.response.ResultContext; +import org.apache.solr.search.DocIterationInfo; /** Transform a document before it gets sent out */ public class DocTransformers extends DocTransformer { @@ -72,9 +73,9 @@ public class DocTransformers extends DocTransformer { } @Override - public void transform(SolrDocument doc, int docid, float score) throws IOException { + public void transform(SolrDocument doc, int docid, DocIterationInfo docInfo) throws IOException { for (DocTransformer a : children) { - a.transform(doc, docid, score); + a.transform(doc, docid, docInfo); } } @@ -88,11 +89,6 @@ public class DocTransformers extends DocTransformer { /** Returns true if and only if at least 1 child transformer returns true */ @Override public boolean needsSolrIndexSearcher() { - for (DocTransformer kid : children) { - if (kid.needsSolrIndexSearcher()) { - return true; - } - } - return false; + return children.stream().anyMatch(DocTransformer::needsSolrIndexSearcher); } } diff --git a/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java b/solr/core/src/java/org/apache/solr/response/transform/MatchScoreAugmenter.java similarity index 76% copy from solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java copy to solr/core/src/java/org/apache/solr/response/transform/MatchScoreAugmenter.java index 22af639e80a..441c18dd285 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java +++ b/solr/core/src/java/org/apache/solr/response/transform/MatchScoreAugmenter.java @@ -17,16 +17,17 @@ package org.apache.solr.response.transform; import org.apache.solr.common.SolrDocument; +import org.apache.solr.search.DocIterationInfo; /** - * Simple Augmenter that adds the score + * Simple Augmenter that adds the matchScore * * @since solr 4.0 */ -public class ScoreAugmenter extends DocTransformer { +public class MatchScoreAugmenter extends DocTransformer { final String name; - public ScoreAugmenter(String display) { + public MatchScoreAugmenter(String display) { this.name = display; } @@ -36,14 +37,12 @@ public class ScoreAugmenter extends DocTransformer { } @Override - public void transform(SolrDocument doc, int docid, float score) { - if (context != null && context.wantsScores()) { - doc.setField(name, score); - } + public void transform(SolrDocument doc, int docid, DocIterationInfo docInfo) { + doc.setField(name, docInfo.matchScore()); } @Override public void transform(SolrDocument doc, int docid) { - transform(doc, docid, 0.0f); + // No-op } } diff --git a/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java b/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java index 22af639e80a..049ed2a9bec 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java @@ -17,6 +17,7 @@ package org.apache.solr.response.transform; import org.apache.solr.common.SolrDocument; +import org.apache.solr.search.DocIterationInfo; /** * Simple Augmenter that adds the score @@ -36,14 +37,16 @@ public class ScoreAugmenter extends DocTransformer { } @Override - public void transform(SolrDocument doc, int docid, float score) { + public void transform(SolrDocument doc, int docid, DocIterationInfo docInfo) { if (context != null && context.wantsScores()) { - doc.setField(name, score); + doc.setField(name, docInfo.score()); } } @Override public void transform(SolrDocument doc, int docid) { - transform(doc, docid, 0.0f); + if (context != null && context.wantsScores()) { + doc.setField(name, 0.0f); + } } } diff --git a/solr/core/src/java/org/apache/solr/search/DocIterator.java b/solr/core/src/java/org/apache/solr/search/DocIterationInfo.java similarity index 64% copy from solr/core/src/java/org/apache/solr/search/DocIterator.java copy to solr/core/src/java/org/apache/solr/search/DocIterationInfo.java index f6e5833d352..d67165738c3 100644 --- a/solr/core/src/java/org/apache/solr/search/DocIterator.java +++ b/solr/core/src/java/org/apache/solr/search/DocIterationInfo.java @@ -16,25 +16,8 @@ */ package org.apache.solr.search; -import java.util.Iterator; - -/** - * Simple Iterator of document Ids which may include score information. - * - * <p>The order of the documents is determined by the context in which the DocIterator instance was - * retrieved. - */ -public interface DocIterator extends Iterator<Integer> { - // already declared in superclass, redeclaring prevents javadoc inheritance - // public boolean hasNext(); - - /** - * Returns the next document id if <code>hasNext()==true</code> This method is equivalent to - * <code>next()</code>, but avoids the creation of an Integer Object. - * - * @see #next() - */ - public int nextDoc(); +/** Information for the current document in the <code>DocIterator</code>. */ +public interface DocIterationInfo { /** * Returns the score for the document just returned by <code>nextDoc()</code> @@ -43,4 +26,13 @@ public interface DocIterator extends Iterator<Integer> { * instance was retrieved. */ public float score(); + + /** + * Returns the query match score in case of rerank queries + * + * @return the query match score in case of a rerank query, null otherwise. + */ + public default Float matchScore() { + return null; + } } diff --git a/solr/core/src/java/org/apache/solr/search/DocIterator.java b/solr/core/src/java/org/apache/solr/search/DocIterator.java index f6e5833d352..390c4150b43 100644 --- a/solr/core/src/java/org/apache/solr/search/DocIterator.java +++ b/solr/core/src/java/org/apache/solr/search/DocIterator.java @@ -24,7 +24,7 @@ import java.util.Iterator; * <p>The order of the documents is determined by the context in which the DocIterator instance was * retrieved. */ -public interface DocIterator extends Iterator<Integer> { +public interface DocIterator extends Iterator<Integer>, DocIterationInfo { // already declared in superclass, redeclaring prevents javadoc inheritance // public boolean hasNext(); @@ -35,12 +35,4 @@ public interface DocIterator extends Iterator<Integer> { * @see #next() */ public int nextDoc(); - - /** - * Returns the score for the document just returned by <code>nextDoc()</code> - * - * <p>The value returned may be meaningless depending on the context in which the DocIterator - * instance was retrieved. - */ - public float score(); } diff --git a/solr/core/src/java/org/apache/solr/search/DocSlice.java b/solr/core/src/java/org/apache/solr/search/DocSlice.java index 837d47cdd37..7c01493aac6 100644 --- a/solr/core/src/java/org/apache/solr/search/DocSlice.java +++ b/solr/core/src/java/org/apache/solr/search/DocSlice.java @@ -32,15 +32,16 @@ public class DocSlice implements DocList, Accountable { RamUsageEstimator.shallowSizeOfInstance(DocSlice.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; - final int offset; // starting position of the docs (zero based) - final int len; // number of positions used in arrays - final int[] docs; // a slice of documents (docs 0-100 of the query) + protected final int offset; // starting position of the docs (zero based) + protected final int len; // number of positions used in arrays + protected final long matches; + protected final TotalHits.Relation matchesRelation; + protected final float maxScore; + protected int docLength; // number of documents in the result - final float[] scores; // optional score list - final long matches; - final TotalHits.Relation matchesRelation; - final float maxScore; - final long ramBytesUsed; // cached value + private final int[] docs; // a slice of documents (docs 0-100 of the query) + private final float[] scores; // optional score list + private final long ramBytesUsed; // cached value /** * Primary constructor for a DocSlice instance. @@ -73,6 +74,7 @@ public class DocSlice implements DocList, Accountable { ? 0 : ((long) scores.length << 2) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER); this.matchesRelation = matchesRelation; + this.docLength = docs == null ? 0 : docs.length; } @Override diff --git a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java index d5258ff7152..10abc972b3d 100644 --- a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java +++ b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java @@ -17,6 +17,7 @@ package org.apache.solr.search; import com.carrotsearch.hppc.IntFloatHashMap; +import com.carrotsearch.hppc.IntFloatMap; import com.carrotsearch.hppc.IntIntHashMap; import java.io.IOException; import java.util.Arrays; @@ -129,7 +130,8 @@ public class ReRankCollector extends TopDocsCollector<ScoreDoc> { ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs; boolean zeroOutScores = reRankScaler != null && reRankScaler.scaleScores(); - ScoreDoc[] mainScoreDocsClone = deepClone(mainScoreDocs, zeroOutScores); + IntFloatMap docToOriginalScore = new IntFloatHashMap(); + ScoreDoc[] mainScoreDocsClone = deepClone(mainScoreDocs, docToOriginalScore, zeroOutScores); ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)]; System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length); @@ -175,7 +177,6 @@ public class ReRankCollector extends TopDocsCollector<ScoreDoc> { reRankScaler.scaleScores( mainScoreDocsClone, rescoredDocs.scoreDocs, reRankScoreDocs.length); } - return rescoredDocs; // Just return the rescoredDocs } else if (howMany > rescoredDocs.scoreDocs.length) { // We need to return more then we've reRanked, so create the combined page. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; @@ -193,7 +194,6 @@ public class ReRankCollector extends TopDocsCollector<ScoreDoc> { reRankScaler.scaleScores( mainScoreDocsClone, rescoredDocs.scoreDocs, reRankScoreDocs.length); } - return rescoredDocs; } else { // We've rescored more then we need to return. @@ -205,18 +205,29 @@ public class ReRankCollector extends TopDocsCollector<ScoreDoc> { ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany); rescoredDocs.scoreDocs = scoreDocs; - return rescoredDocs; } + return toRescoredDocs(rescoredDocs, docToOriginalScore); } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } } - private ScoreDoc[] deepClone(ScoreDoc[] scoreDocs, boolean zeroOut) { + private TopDocs toRescoredDocs(TopDocs topDocs, IntFloatMap originalScores) { + ScoreDoc[] scoreDocs = topDocs.scoreDocs; + final RescoreDoc[] rescoredDocs = new RescoreDoc[scoreDocs.length]; + for (int i = 0; i < scoreDocs.length; i++) { + rescoredDocs[i] = new RescoreDoc(scoreDocs[i], originalScores.get(scoreDocs[i].doc)); + } + return new TopDocs(topDocs.totalHits, rescoredDocs); + } + + private ScoreDoc[] deepClone( + ScoreDoc[] scoreDocs, IntFloatMap originalScoreMap, boolean zeroOut) { ScoreDoc[] scoreDocs1 = new ScoreDoc[scoreDocs.length]; for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; if (scoreDoc != null) { + originalScoreMap.put(scoreDoc.doc, scoreDoc.score); scoreDocs1[i] = new ScoreDoc(scoreDoc.doc, scoreDoc.score); if (zeroOut) { scoreDoc.score = 0f; @@ -258,4 +269,13 @@ public class ReRankCollector extends TopDocsCollector<ScoreDoc> { return -Float.compare(score1, score2); } } + + static class RescoreDoc extends ScoreDoc { + public float matchScore; + + public RescoreDoc(ScoreDoc scoreDoc, float matchScore) { + super(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex); + this.matchScore = matchScore; + } + } } diff --git a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java index 5bf5ce4067d..53373c03de5 100644 --- a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java @@ -179,11 +179,10 @@ public class ReRankQParserPlugin extends QParserPlugin { @Override protected float combine( float firstPassScore, boolean secondPassMatches, float secondPassScore) { - float score = firstPassScore; if (secondPassMatches) { - return scoreCombiner.func(score, secondPassScore); + return scoreCombiner.func(firstPassScore, secondPassScore); } - return score; + return firstPassScore; } } diff --git a/solr/core/src/java/org/apache/solr/search/ReturnFields.java b/solr/core/src/java/org/apache/solr/search/ReturnFields.java index 44dcb12491e..e01db9f1f5c 100644 --- a/solr/core/src/java/org/apache/solr/search/ReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/ReturnFields.java @@ -18,6 +18,7 @@ package org.apache.solr.search; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import org.apache.solr.response.transform.DocTransformer; /** @@ -71,6 +72,31 @@ public abstract class ReturnFields { */ public abstract Map<String, String> getFieldRenames(); + /** + * A mapping of return fields that depend on score and the names they are associated with. + * + * @return a mapping from return field name to the string representation of its definition + */ + public abstract Map<String, String> getScoreDependentReturnFields(); + + /** + * The requested field names (includes pseudo fields) that do not depend on a score + * + * @return Set of field names or <code>null</code> (all fields). + */ + public Set<String> getNonScoreDependentReturnFieldNames() { + Set<String> allFieldNames = getRequestedFieldNames(); + Map<String, String> scoreDependentFields = getScoreDependentReturnFields(); + if (allFieldNames == null || scoreDependentFields == null) { + return allFieldNames; + } else { + Set<String> scoreDependentFieldNames = scoreDependentFields.keySet(); + return allFieldNames.stream() + .filter(fieldName -> !scoreDependentFieldNames.contains(fieldName)) + .collect(Collectors.toSet()); + } + } + /** * Returns <code>true</code> if the specified field should be returned <em>to the external * client</em> -- either using its own name, or via an alias. This method returns <code>false diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 9172fbce7c7..707d1a1be53 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -32,7 +32,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; @@ -183,7 +182,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI @SuppressWarnings({"rawtypes"}) private final SolrCache[] cacheList; - private DirectoryFactory directoryFactory; + private final DirectoryFactory directoryFactory; private final LeafReader leafReader; // only for addIndexes etc (no fieldcache) @@ -194,7 +193,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI private final StatsCache statsCache; - private Set<String> metricNames = ConcurrentHashMap.newKeySet(); private SolrMetricsContext solrMetricsContext; private static DirectoryReader getReader( @@ -1890,11 +1888,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI int last = len; if (last < 0 || last > maxDoc()) last = maxDoc(); final int lastDocRequested = last; - int nDocsReturned = 0; int totalHits; - float maxScore; - int[] ids; - float[] scores; + final float maxScore; + final DocList docList; final boolean needScores = (cmd.getFlags() & GET_SCORES) != 0; @@ -1949,13 +1945,13 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter); - ids = new int[nDocsReturned]; - scores = new float[nDocsReturned]; totalHits = numHits[0]; maxScore = totalHits > 0 ? topscore[0] : 0.0f; + docList = + new DocSlice( + 0, 0, new int[0], new float[0], totalHits, maxScore, TotalHits.Relation.EQUAL_TO); // no docs on this page, so cursor doesn't change qr.setNextCursorMark(cmd.getCursorMark()); - hitsRelation = Relation.EQUAL_TO; } else { if (log.isDebugEnabled()) { log.debug("calling from 2, query: {}", query.getClass()); @@ -1998,19 +1994,12 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI hitsRelation = populateScoresIfNeeded(cmd, needScores, topDocs, query, scoreModeUsed); populateNextCursorMarkFromTopDocs(qr, cmd, topDocs); - nDocsReturned = topDocs.scoreDocs.length; - ids = new int[nDocsReturned]; - scores = needScores ? new float[nDocsReturned] : null; - for (int i = 0; i < nDocsReturned; i++) { - ScoreDoc scoreDoc = topDocs.scoreDocs[i]; - ids[i] = scoreDoc.doc; - if (scores != null) scores[i] = scoreDoc.score; - } + int nDocsReturned = topDocs.scoreDocs.length; + int sliceLen = Math.min(lastDocRequested, nDocsReturned); + docList = + new TopDocsSlice(0, sliceLen, topDocs, totalHits, needScores, maxScore, hitsRelation); } - - int sliceLen = Math.min(lastDocRequested, nDocsReturned); - if (sliceLen < 0) sliceLen = 0; - qr.setDocList(new DocSlice(0, sliceLen, ids, scores, totalHits, maxScore, hitsRelation)); + qr.setDocList(docList); } // any DocSet returned is for the query only, without any filtering... that way it may @@ -2023,9 +2012,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI final int nDocsReturned; final int totalHits; final float maxScore; - final int[] ids; - final float[] scores; final DocSet set; + final DocList docList; final boolean needScores = (cmd.getFlags() & GET_SCORES) != 0; final int maxDoc = maxDoc(); @@ -2074,11 +2062,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI set = DocSetUtil.getDocSet(setCollector, this); - nDocsReturned = 0; - ids = new int[nDocsReturned]; - scores = new float[nDocsReturned]; - totalHits = set.size(); - maxScore = totalHits > 0 ? topscore[0] : 0.0f; + maxScore = set.size() > 0 ? topscore[0] : 0.0f; + docList = + new DocSlice( + 0, 0, new int[0], new float[0], set.size(), maxScore, TotalHits.Relation.EQUAL_TO); // no docs on this page, so cursor doesn't change qr.setNextCursorMark(cmd.getCursorMark()); } else { @@ -2114,47 +2101,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI } else { log.trace("MULTI-THREADED search, using CollectorManager in getDocListAndSetNC"); - boolean needMaxScore = needScores; MultiThreadedSearcher.SearchResult searchResult = new MultiThreadedSearcher(this) - .searchCollectorManagers(len, cmd, query, true, needMaxScore, true); + .searchCollectorManagers(len, cmd, query, true, needScores, true); MultiThreadedSearcher.TopDocsResult topDocsResult = searchResult.getTopDocsResult(); totalHits = topDocsResult.totalHits; topDocs = topDocsResult.topDocs; maxScore = searchResult.getMaxScore(totalHits); set = new BitDocSet(searchResult.getFixedBitSet()); - - // TODO: Is this correct? - // hitsRelation = populateScoresIfNeeded(cmd, needScores, topDocs, query, - // searchResult.scoreMode); - - // nDocsReturned = topDocs.scoreDocs.length; - // TODO: Is this correct? - // hitsRelation = topDocs.totalHits.relation; - // } else { - // hitsRelation = Relation.EQUAL_TO; - // } - + // TODO: Think about using ScoreMode from searchResult down below } - - populateScoresIfNeeded(cmd, needScores, topDocs, query, ScoreMode.COMPLETE); + final Relation relation = + populateScoresIfNeeded(cmd, needScores, topDocs, query, ScoreMode.COMPLETE); populateNextCursorMarkFromTopDocs(qr, cmd, topDocs); nDocsReturned = topDocs.scoreDocs.length; - - ids = new int[nDocsReturned]; - scores = needScores ? new float[nDocsReturned] : null; - for (int i = 0; i < nDocsReturned; i++) { - ScoreDoc scoreDoc = topDocs.scoreDocs[i]; - ids[i] = scoreDoc.doc; - if (scores != null) scores[i] = scoreDoc.score; - } + int sliceLen = Math.min(lastDocRequested, nDocsReturned); + docList = new TopDocsSlice(0, sliceLen, topDocs, totalHits, needScores, maxScore, relation); } - int sliceLen = Math.min(lastDocRequested, nDocsReturned); - if (sliceLen < 0) sliceLen = 0; - - qr.setDocList( - new DocSlice(0, sliceLen, ids, scores, totalHits, maxScore, TotalHits.Relation.EQUAL_TO)); + qr.setDocList(docList); // TODO: if we collect results before the filter, we just need to intersect with // that filter to generate the DocSet for qr.setDocSet() qr.setDocSet(set); diff --git a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java index af35245af15..f0b9cbb1f42 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java @@ -40,6 +40,7 @@ import org.apache.solr.common.util.GlobPatternUtil; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.transform.DocTransformer; import org.apache.solr.response.transform.DocTransformers; +import org.apache.solr.response.transform.MatchScoreAugmenter; import org.apache.solr.response.transform.RenameFieldTransformer; import org.apache.solr.response.transform.ScoreAugmenter; import org.apache.solr.response.transform.TransformerFactory; @@ -50,6 +51,7 @@ import org.apache.solr.search.SolrDocumentFetcher.RetrieveFieldsOptimizer; public class SolrReturnFields extends ReturnFields { // Special Field Keys public static final String SCORE = "score"; + public static final String MATCH_SCORE = "matchScore"; private final List<String> globs = new ArrayList<>(1); @@ -70,6 +72,8 @@ public class SolrReturnFields extends ReturnFields { protected boolean _wantsAllFields = false; protected Map<String, String> renameFields = Collections.emptyMap(); + private final Map<String, String> scoreDependentFields = new HashMap<>(); + // Only set currently with the SolrDocumentFetcher.solrDoc method. Primarily used // at this time for testing to ensure we get fields from the expected places. public enum FIELD_SOURCES { @@ -107,13 +111,14 @@ public class SolrReturnFields extends ReturnFields { if (fl == null) { parseFieldList((String[]) null, req); } else { - if (fl.trim().length() == 0) { + if (fl.trim().isEmpty()) { // legacy thing to support fl=' ' => fl=*,score! // maybe time to drop support for this? // See ConvertedLegacyTest _wantsScore = true; _wantsAllFields = true; transformer = new ScoreAugmenter(SCORE); + scoreDependentFields.put(SCORE, ""); } else { parseFieldList(new String[] {fl}, req); } @@ -536,6 +541,13 @@ public class SolrReturnFields extends ReturnFields { String disp = (key == null) ? field : key; augmenters.addTransformer(new ScoreAugmenter(disp)); + scoreDependentFields.put(disp, disp.equals(SCORE) ? "" : SCORE); + } else if (MATCH_SCORE.equals(field)) { + _wantsScore = true; + + String disp = (key == null) ? field : key; + augmenters.addTransformer(new MatchScoreAugmenter(disp)); + scoreDependentFields.put(disp, disp.equals(MATCH_SCORE) ? "" : MATCH_SCORE); } } @@ -595,6 +607,11 @@ public class SolrReturnFields extends ReturnFields { return _wantsScore; } + @Override + public Map<String, String> getScoreDependentReturnFields() { + return scoreDependentFields; + } + @Override public DocTransformer getTransformer() { return transformer; diff --git a/solr/core/src/java/org/apache/solr/search/TopDocsSlice.java b/solr/core/src/java/org/apache/solr/search/TopDocsSlice.java new file mode 100644 index 00000000000..d6141e156cd --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/TopDocsSlice.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; + +/** + * <code>TopDocsSlice</code> implements DocList based off provided <code>TopDocs</code>. + * + * @since solr 9.9 + */ +public class TopDocsSlice extends DocSlice { + + private final TopDocs topDocs; + + private final boolean hasScores; + + /** + * Construct a slice off topDocs + * + * @param offset starting offset for this range of docs + * @param len length of results + * @param matches total number of matches for the query + */ + public TopDocsSlice( + int offset, + int len, + TopDocs topDocs, + long matches, + boolean hasScores, + float maxScore, + TotalHits.Relation matchesRelation) { + super(offset, len, null, null, matches, maxScore, matchesRelation); + this.topDocs = topDocs; + this.hasScores = hasScores; + super.docLength = topDocs.scoreDocs.length; + } + + @Override + public TopDocsSlice subset(int offset, int len) { + if (this.offset == offset && this.len == len) { + return this; + } + + // if we didn't store enough (and there was more to store) + // then we can't take a subset. + int requestedEnd = offset + len; + if (requestedEnd > docLength && this.matches > docLength) { + return null; + } + int realEndDoc = Math.min(requestedEnd, docLength); + int realLen = Math.max(realEndDoc - offset, 0); + if (this.offset == offset && this.len == realLen) { + return this; + } + return new TopDocsSlice( + offset, realLen, topDocs, matches, hasScores, maxScore, matchesRelation); + } + + @Override + public boolean hasScores() { + return topDocs != null && hasScores; + } + + @Override + public DocIterator iterator() { + boolean hasMatchScore = + topDocs.scoreDocs.length > 0 && topDocs.scoreDocs[0] instanceof ReRankCollector.RescoreDoc; + if (hasMatchScore) { + return new ReRankedTopDocsIterator(); + } else { + return new TopDocsIterator(); + } + } + + class TopDocsIterator implements DocIterator { + int pos = offset; + final int end = offset + len; + + @Override + public boolean hasNext() { + return pos < end; + } + + @Override + public Integer next() { + return nextDoc(); + } + + /** The remove operation is not supported by this Iterator. */ + @Override + public void remove() { + throw new UnsupportedOperationException( + "The remove operation is not supported by this Iterator."); + } + + @Override + public int nextDoc() { + return topDocs.scoreDocs[pos++].doc; + } + + @Override + public float score() { + return topDocs.scoreDocs[pos - 1].score; + } + } + + class ReRankedTopDocsIterator extends TopDocsIterator { + + @Override + public Float matchScore() { + try { + return ((ReRankCollector.RescoreDoc) topDocs.scoreDocs[pos - 1]).matchScore; + } catch (ClassCastException e) { + return null; + } + } + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java index c40a986c024..c9c2e03a62c 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java @@ -912,7 +912,8 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase { } public void testAugmentersAndScore() throws Exception { - SolrParams params = params("q", "*:*", "fl", "[docid],x_alias:[value v=10 t=int],score"); + SolrParams params = + params("q", "*:*", "fl", "[docid],x_alias:[value v=10 t=int],s_alias:score"); SolrDocumentList docs = assertSearch(params); assertEquals(params + " => " + docs, 5, docs.getNumFound()); // shouldn't matter what doc we pick... @@ -922,7 +923,8 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase { assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); assertEquals(msg, 10, doc.getFieldValue("x_alias")); - assertTrue(msg, doc.getFieldValue("score") instanceof Float); + assertTrue(msg, doc.getFieldValue("s_alias") instanceof Float); + assertTrue(msg, (Float) doc.getFieldValue("s_alias") > 0); } for (SolrParams p : Arrays.asList( @@ -961,6 +963,22 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase { assertTrue(msg, doc.getFieldValue("score") instanceof Float); } } + params = params("q", "*:*", "fl", "[docid],x_alias:[value v=10 t=int],s_alias:score,score"); + docs = assertSearch(params); + assertEquals(params + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = params + " => " + doc; + assertEquals(msg, 4, doc.size()); + assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + assertTrue(msg, doc.getFieldValue("s_alias") instanceof Float); + assertTrue(msg, (Float) doc.getFieldValue("s_alias") > 0); + assertTrue(msg, doc.getFieldValue("score") instanceof Float); + assertTrue(msg, (Float) doc.getFieldValue("score") > 0); + assertEquals(msg, doc.getFieldValue("score"), doc.getFieldValue("s_alias")); + } } public void testAugmentersAndScoreRTG() throws Exception { diff --git a/solr/core/src/test/org/apache/solr/handler/component/MockResponseBuilder.java b/solr/core/src/test/org/apache/solr/handler/component/MockResponseBuilder.java index 6ef06bcb6d3..241ff703fb5 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/MockResponseBuilder.java +++ b/solr/core/src/test/org/apache/solr/handler/component/MockResponseBuilder.java @@ -56,6 +56,7 @@ public class MockResponseBuilder extends ResponseBuilder { Mockito.when(params.getBool(ShardParams.SHARDS_INFO)).thenReturn(false); Mockito.when(request.getParams()).thenReturn(params); Mockito.when(response.getResponseHeader()).thenReturn(responseHeader); + Mockito.when(response.getReturnFields()).thenCallRealMethod(); List<SearchComponent> components = new ArrayList<>(); return new MockResponseBuilder(request, response, components); diff --git a/solr/core/src/test/org/apache/solr/search/DistributedReRankExplainTest.java b/solr/core/src/test/org/apache/solr/search/DistributedReRankExplainTest.java index 63343d3a7ed..6597f1b2c85 100644 --- a/solr/core/src/test/org/apache/solr/search/DistributedReRankExplainTest.java +++ b/solr/core/src/test/org/apache/solr/search/DistributedReRankExplainTest.java @@ -29,6 +29,7 @@ import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ShardParams; @@ -155,10 +156,24 @@ public class DistributedReRankExplainTest extends SolrCloudTestCase { final QueryRequest queryRequest = new QueryRequest( SolrParams.wrapDefaults( - params, params(CommonParams.Q, "test_s:hello", "fl", "id,test_s,score"))); + params, + params( + CommonParams.Q, + "test_s:hello", + "fl", + "id,test_s,score,originalScore:matchScore,matchScore"))); final QueryResponse queryResponse = queryRequest.process(client, COLLECTIONORALIAS); - assertNotNull(queryResponse.getResults().get(0).getFieldValue("test_s")); + for (SolrDocument doc : queryResponse.getResults()) { + assertNotNull("test_s", doc.getFieldValue("test_s")); + assertNotNull("matchScore", doc.getFieldValue("matchScore")); + assertTrue(queryResponse.toString(), doc.getFieldValue("matchScore") instanceof Float); + assertNotNull("originalScore", doc.getFieldValue("originalScore")); + assertTrue( + doc.getFieldValue("originalScore").toString(), + doc.getFieldValue("originalScore") instanceof Float); + assertEquals(doc.getFieldValue("matchScore"), doc.getFieldValue("originalScore")); + } return queryResponse; } } diff --git a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java index cd0836da9bc..1d3d3ca45bd 100644 --- a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java +++ b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java @@ -680,12 +680,19 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { public void testAugmentersAndScore() { assertQ( - req("q", "*:*", "rows", "1", "fl", "[docid],x_alias:[value v=10 t=int],score"), + req( + "q", + "*:*", + "rows", + "1", + "fl", + "[docid],x_alias:[value v=10 t=int],score,s_alias:score"), "//result[@numFound='5']", "//result/doc/int[@name='[docid]']", "//result/doc/int[@name='x_alias'][.=10]", "//result/doc/float[@name='score']", - "//result/doc[count(*)=3]"); + "//result/doc/float[@name='s_alias']", + "//result/doc[count(*)=4]"); for (SolrParams p : Arrays.asList( params("fl", "[docid],x_alias:[value v=10 t=int],[explain],score"), diff --git a/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java index 0c53d2fcc70..e7d457b06be 100644 --- a/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java @@ -64,6 +64,143 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 { assertEquals(ReRankQParserPlugin.RERANK_OPERATOR, "reRankOperator"); } + @Test + public void testRerankReturnMatchScore() throws Exception { + + assertU(delQ("*:*")); + assertU(commit()); + + String[] doc = { + "id", "1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", + "2000" + }; + assertU(adoc(doc)); + assertU(commit()); + String[] doc1 = { + "id", "2", "term_s", "YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", + "test_tf", "200" + }; + assertU(adoc(doc1)); + + String[] doc2 = { + "id", "3", "term_s", "YYYY", "test_ti", "5000", "test_tl", "100", "test_tf", "200" + }; + assertU(adoc(doc2)); + assertU(commit()); + String[] doc3 = { + "id", "4", "term_s", "YYYY", "test_ti", "500", "test_tl", "1000", "test_tf", "2000" + }; + assertU(adoc(doc3)); + + String[] doc4 = { + "id", "5", "term_s", "YYYY", "group_s", "group2", "test_ti", "4", "test_tl", "10", "test_tf", + "2000" + }; + assertU(adoc(doc4)); + assertU(commit()); + String[] doc5 = { + "id", "6", "term_s", "YYYY", "group_s", "group2", "test_ti", "10", "test_tl", "100", + "test_tf", "200" + }; + assertU(adoc(doc5)); + assertU(commit()); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add( + "rq", + "{!" + + ReRankQParserPlugin.NAME + + " " + + ReRankQParserPlugin.RERANK_QUERY + + "=$rqq " + + ReRankQParserPlugin.RERANK_DOCS + + "=200}"); + params.add("q", "term_s:YYYY"); + params.add("rqq", "{!edismax bf=$bff}*:*"); + params.add("bff", "field(test_ti)"); + params.add("start", "0"); + params.add("rows", "6"); + params.add("df", "text"); + params.add("fl", "id,test_ti,score,matchScore"); + + assertQ( + req(params), + "*[count(//doc)=6]", + "//result/doc[1]/str[@name='id'][.='3']", + "//result/doc[1]/float[@name='score'][.>'10000.03']", + "//result/doc[1]/float[@name='matchScore'][.>'0.03']", + "//result/doc[2]/str[@name='id'][.='4']", + "//result/doc[2]/float[@name='score'][.>'1000.03']", + "//result/doc[2]/float[@name='matchScore'][.>'0.03']", + "//result/doc[3]/str[@name='id'][.='2']", + "//result/doc[4]/str[@name='id'][.='6']", + "//result/doc[5]/str[@name='id'][.='1']", + "//result/doc[6]/str[@name='id'][.='5']"); + } + + @Test + public void testRerankReturnMatchScoreNotRequested() throws Exception { + + assertU(delQ("*:*")); + assertU(commit()); + + String[] doc = { + "id", "1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", + "2000" + }; + assertU(adoc(doc)); + assertU(commit()); + String[] doc1 = { + "id", "2", "term_s", "YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", + "test_tf", "200" + }; + assertU(adoc(doc1)); + + String[] doc2 = { + "id", "3", "term_s", "YYYY", "test_ti", "5000", "test_tl", "100", "test_tf", "200" + }; + assertU(adoc(doc2)); + assertU(commit()); + String[] doc3 = { + "id", "4", "term_s", "YYYY", "test_ti", "500", "test_tl", "1000", "test_tf", "2000" + }; + assertU(adoc(doc3)); + + String[] doc4 = { + "id", "5", "term_s", "YYYY", "group_s", "group2", "test_ti", "4", "test_tl", "10", "test_tf", + "2000" + }; + assertU(adoc(doc4)); + assertU(commit()); + String[] doc5 = { + "id", "6", "term_s", "YYYY", "group_s", "group2", "test_ti", "10", "test_tl", "100", + "test_tf", "200" + }; + assertU(adoc(doc5)); + assertU(commit()); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add( + "rq", + "{!" + + ReRankQParserPlugin.NAME + + " " + + ReRankQParserPlugin.RERANK_QUERY + + "=$rqq " + + ReRankQParserPlugin.RERANK_DOCS + + "=200}"); + params.add("q", "term_s:YYYY"); + params.add("rqq", "{!edismax bf=$bff}*:*"); + params.add("bff", "field(test_ti)"); + params.add("start", "0"); + params.add("rows", "6"); + params.add("df", "text"); + params.add("fl", "id,test_ti,score"); + + String response = JQ(req(params)); + assertFalse(response.contains("matchScore")); + } + @Test public void testReRankQueries() { diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java index ebf8661bd70..8c2a92c9484 100644 --- a/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java +++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java @@ -46,6 +46,7 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.ResultContext; import org.apache.solr.response.transform.DocTransformer; import org.apache.solr.response.transform.TransformerFactory; +import org.apache.solr.search.DocIterationInfo; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.SolrPluginUtils; @@ -402,8 +403,9 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { } @Override - public void transform(SolrDocument doc, int docid, float score) throws IOException { - implTransform(doc, docid, score); + public void transform(SolrDocument doc, int docid, DocIterationInfo docInfo) + throws IOException { + implTransform(doc, docid, docInfo); } @Override @@ -411,7 +413,8 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { implTransform(doc, docid, null); } - private void implTransform(SolrDocument doc, int docid, Float score) throws IOException { + private void implTransform(SolrDocument doc, int docid, DocIterationInfo docInfo) + throws IOException { LTRScoringQuery rerankingQuery = rerankingQueries[0]; LTRScoringQuery.ModelWeight rerankingModelWeight = modelWeights[0]; for (int i = 1; i < rerankingQueries.length; i++) { @@ -430,7 +433,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { LTRRescorer.extractFeaturesInfo( rerankingModelWeight, docid, - (!docsWereReranked ? score : null), + (!docsWereReranked && docInfo != null) ? docInfo.score() : null, leafContexts)); } doc.addField(name, featureVector); diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRInterleavingTransformerFactory.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRInterleavingTransformerFactory.java index e08b564fb22..8105777b857 100644 --- a/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRInterleavingTransformerFactory.java +++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRInterleavingTransformerFactory.java @@ -87,11 +87,6 @@ public class LTRInterleavingTransformerFactory extends TransformerFactory { } } - @Override - public void transform(SolrDocument doc, int docid, float score) throws IOException { - implTransform(doc, docid); - } - @Override public void transform(SolrDocument doc, int docid) throws IOException { implTransform(doc, docid); diff --git a/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc b/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc index 0cf15be3ff6..e4533412f5e 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc @@ -107,6 +107,14 @@ q=greetings&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=3}&rqq=(hi ---- If a document matches the original query, but does not match the re-ranking query, the document's original score will remain. +For reranked documents, an additional `matchScore` field in the response will indicate the original score for a reranked doc. This +is the score for the document prior to rerank being applied. For documents that were not reranked, the matchScore and score fields +will have the same value. For the example above, you would use the following to return the match score: + +[source,text] +---- +q=greetings&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=3}&rqq=(hi+hello+hey+hiya)&fl=id,matchScore +---- Setting `reRankOperator` to `multiply` will multiply the three numbers instead. This means that other multiplying operations such as xref:edismax-query-parser.adoc#extended-dismax-parameters[eDisMax `boost` functions] can be converted to Re-Rank operations. diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java index f9fc0393058..f9fa042a979 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java @@ -20,12 +20,14 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; +import org.apache.solr.common.util.CollectionUtil; import org.apache.solr.common.util.NamedList; /** @@ -196,6 +198,19 @@ public class SolrDocument extends SolrDocumentBase<Object, SolrDocument> return null; } + /** Get the value or collection of values for a given field. */ + public Map<String, Object> getSubsetOfFields(Set<String> fieldNames) { + final HashMap<String, Object> subset = CollectionUtil.newHashMap(fieldNames.size()); + fieldNames.forEach( + f -> { + Object v = getFieldValue(f); + if (v != null) { + subset.put(f, getFieldValue(f)); + } + }); + return subset; + } + @Override public String toString() { return "SolrDocument" + _fields;