This is an automated email from the ASF dual-hosted git repository.
magibney pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 521b19c SOLR-14595: Consistent overrequest across different facet
methods for `sort:index` JSON Facet field (#447)
521b19c is described below
commit 521b19c12784146923500505bf34f53f00ba3c68
Author: Michael Gibney <[email protected]>
AuthorDate: Wed Jan 26 21:05:32 2022 -0500
SOLR-14595: Consistent overrequest across different facet methods for
`sort:index` JSON Facet field (#447)
(cherry picked from commit cfacd182dadaff75e8376afd85fa8396a854175e)
---
solr/CHANGES.txt | 2 ++
.../solr/search/facet/FacetFieldProcessor.java | 39 ++++++++++++++++++----
.../FacetFieldProcessorByEnumTermsStream.java | 16 ++++++++-
.../search/facet/TestCloudJSONFacetSKGEquiv.java | 13 --------
.../solr/search/facet/TestJsonFacetRefinement.java | 13 +++++---
5 files changed, 58 insertions(+), 25 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 416bd8c..6cf80cf 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -560,6 +560,8 @@ Bug Fixes
* SOLR-15944: The Tagger's JSON response format now always uses an object/map
to represent each tag instead of an
array, which didn't make sense. (David Smiley)
+* SOLR-14595: Consistent overrequest across different facet methods for
`sort:index` JSON Facet field (Michael Gibney, hossman)
+
================== 8.11.2 ==================
Bug Fixes
diff --git
a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
index 330d72d..319d36a 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
@@ -292,6 +292,15 @@ abstract class FacetFieldProcessor extends
FacetProcessor<FacetField> {
}
}
+ private static long applyDefaultOverrequest(long offset, long limit) {
+ // NOTE: consider modifying the below heuristic; see SOLR-15760
+ // add over-request if this is a shard request and if we have a small
offset (large offsets will already be gathering many more buckets than needed)
+ if (offset < 10) {
+ return (long) (limit * 1.1 + 4); // default: add 10% plus 4 (to
overrequest for very small limits)
+ }
+ return limit;
+ }
+
/** Processes the collected data to finds the top slots, and composes it in
the response NamedList. */
SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int
slotCardinality,
@SuppressWarnings("rawtypes")
IntFunction<Comparable> bucketValFromSlotNumFunc,
@@ -305,13 +314,31 @@ abstract class FacetFieldProcessor extends
FacetProcessor<FacetField> {
if (freq.limit >= 0) {
effectiveLimit = freq.limit;
if (fcontext.isShard()) {
- if (freq.overrequest == -1) {
- // add over-request if this is a shard request and if we have a
small offset (large offsets will already be gathering many more buckets than
needed)
- if (freq.offset < 10) {
- effectiveLimit = (long) (effectiveLimit * 1.1 + 4); // default:
add 10% plus 4 (to overrequest for very small limits)
- }
- } else {
+ if (freq.overrequest > 0) {
+ // NOTE: although _default_ distrib overrequest is disabled for the
"index sort" case (see
+ // below), we _do_ want to respect an _explicit_ `overrequest`
value, if present. Overrequest
+ // is always relevant (regardless of prelim sort) for the `resort`
case; but even in the case of
+ // "index sort, no resort", overrequest can be relevant in some edge
cases of the "shard" case,
+ // where it can affect the behavior of `isBucketComplete()` (see
SOLR-14595).
effectiveLimit += freq.overrequest;
+ } else {
+ switch (freq.overrequest) {
+ case 0:
+ // no-op (overrequest explicitly disabled)
+ break;
+ case -1:
+ // default
+ if (!"index".equals(this.sort.sortVariable)) {
+ // NOTE: even for distrib requests, `overrequest` is not
directly relevant for "index" sort, hence
+ // there is no default/implicit overrequest for "index sort"
(even if `resort` is also specified --
+ // overrequest that is exclusively for `resort` must be
explicit, even in a distrib context)
+ effectiveLimit = applyDefaultOverrequest(freq.offset,
effectiveLimit);
+ }
+ break;
+ default:
+ // other negative values are not supported
+ throw new IllegalArgumentException("Illegal `overrequest`
specified: " + freq.overrequest);
+ }
}
} else if (null != resort && 0 < freq.overrequest) {
// in non-shard situations, if we have a 'resort' we check for
explicit overrequest > 0
diff --git
a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
index 99e9338..59d1082 100644
---
a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
+++
b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
@@ -48,6 +48,7 @@ import org.apache.solr.search.facet.SlotAcc.SlotContext;
* It's able to stream since no data needs to be accumulated so long as it's
index order.
*/
class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor
implements Closeable {
+ long effectiveLimit;
long bucketsToSkip;
long bucketsReturned;
@@ -144,6 +145,19 @@ class FacetFieldProcessorByEnumTermsStream extends
FacetFieldProcessor implement
hasSubFacets = freq.subFacets.size() > 0;
bucketsToSkip = freq.offset;
+ effectiveLimit = freq.limit;
+ if (freq.overrequest < -1) {
+ // other negative values are not supported
+ throw new IllegalArgumentException("Illegal `overrequest` specified: " +
freq.overrequest);
+ } else if (freq.overrequest > 0 && (fcontext.isShard() || null != resort))
{
+ // NOTE: "index sort" _never_ applies a default overrequest. In both the
shard case and the resort case, the
+ // default overrequest is `0`. However, if `overrequest` is explicitly
specified, we respect it except for
+ // non-distrib, no-resort request. Overrequest is relevant for the
`resort` case; but it can also be relevant
+ // in some edge cases of the "shard" case, where it can affect the
behavior of `isBucketComplete()`
+ // (see SOLR-14595).
+ effectiveLimit += freq.overrequest;
+ }
+
createAccs(-1, 1);
// Minimum term docFreq in order to use the filterCache for that term.
@@ -324,7 +338,7 @@ class FacetFieldProcessorByEnumTermsStream extends
FacetFieldProcessor implement
continue;
}
- if (freq.limit >= 0 && ++bucketsReturned > freq.limit) {
+ if (effectiveLimit >= 0 && ++bucketsReturned > effectiveLimit) {
shardHasMoreBuckets.set(true);
return null;
}
diff --git
a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
index b3ec3ea..607f9f6 100644
---
a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
+++
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
@@ -1103,25 +1103,12 @@ public class TestCloudJSONFacetSKGEquiv extends
SolrCloudTestCase {
/**
* picks a random value for the "overrequest" param, biased in favor of
interesting test cases.
- * <p>
- * <b>NOTE:</b> due to variations in overrequest behavior betewen
<code>metod:enum<code> and other
- * processors (see <a
href="https://issues.apache.org/jira/browse/SOLR-14595">SOLR-14595</a>) this
- * method takes in the "sort" param and returns a constant value of
<code>0</code> if the sort is
- * <code>index asc</code> to ensure that the set of candidate buckets
considered during merging
- * (and refinement) is consistent regardless of what processor is used
(and/or what sort is used
- * on the parent facet)
- * </p>
*
* @return a number to specify in the request, or null to specify nothing
(trigger default behavior)
* @see #UNIQUE_FIELD_VALS
- * @see <a
href="https://issues.apache.org/jira/browse/SOLR-14595">SOLR-14595</a>
*/
public static Integer randomOverrequestParam(final Random r, final String
sort) {
- if ("index asc".equals(sort)) {
- return 0; // test work around for SOLR-14595
- }
-
switch(r.nextInt(10)) {
case 0:
case 1:
diff --git
a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
index cbc4d3f..2253745 100644
---
a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
+++
b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
@@ -20,6 +20,7 @@ package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
+import java.util.Locale;
import org.apache.lucene.util.TestUtil;
@@ -1470,7 +1471,6 @@ public class TestJsonFacetRefinement extends
SolrTestCaseHS {
} // end method loop
}
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-14595")
public void testIndexAscRefineConsistency() throws Exception {
initServers();
final Client client = servers.getClient(random().nextInt());
@@ -1495,8 +1495,7 @@ public class TestJsonFacetRefinement extends
SolrTestCaseHS {
client.commit();
- // TODO once SOLR-14595 is fixed, modify test to check full EnumSet, not
just these two...
- for (String m : Arrays.asList("smart", "enum")) {
+ for (FacetField.FacetMethod m : FacetField.FacetMethod.values()) {
client.testJQ(params("q", "*:*", "rows", "0", "json.facet", "{"
+ " cat : { type:terms, field:cat_s, limit:1,
refine:true,"
+ " overrequest:0, " // to trigger parent
refinement given small data set
@@ -1504,13 +1503,17 @@ public class TestJsonFacetRefinement extends
SolrTestCaseHS {
+ " facet: { sum : 'sum(price_i)', "
+ " child_"+m+" : { "
+ " type:terms, field:child_s,
limit:1, refine:true,"
- + " sort:'index asc', method:"
+ m + " } "
+ + " sort:'index asc', method:"
+ m.toString().toLowerCase(Locale.ROOT) + " } "
+ " }} }"
)
, "facets=={ count:5"
+ ", cat:{buckets:[ { val:X, count:3, sum:6.0, "
- + " child_"+m+":{buckets:[{val:A,
count:1}]}}]}}"
+ + " child_"+m+":{buckets:[{val:B,
count:1}]}}]}}" // * (see below)
);
}
+ // * NOTE: the intuitive value to return here would be "A"; but we're
testing for _consistency_
+ // here, and artificially setting `overrequest:0`. With default
overrequest, the intuitive
+ // "correct" behavior would indeed be achieved -- but we then wouldn't be
triggering the behavior
+ // that this test is designed to evaluate.
}
}