This is an automated email from the ASF dual-hosted git repository.

magibney pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 521b19c  SOLR-14595: Consistent overrequest across different facet 
methods for `sort:index` JSON Facet field (#447)
521b19c is described below

commit 521b19c12784146923500505bf34f53f00ba3c68
Author: Michael Gibney <[email protected]>
AuthorDate: Wed Jan 26 21:05:32 2022 -0500

    SOLR-14595: Consistent overrequest across different facet methods for 
`sort:index` JSON Facet field (#447)
    
    (cherry picked from commit cfacd182dadaff75e8376afd85fa8396a854175e)
---
 solr/CHANGES.txt                                   |  2 ++
 .../solr/search/facet/FacetFieldProcessor.java     | 39 ++++++++++++++++++----
 .../FacetFieldProcessorByEnumTermsStream.java      | 16 ++++++++-
 .../search/facet/TestCloudJSONFacetSKGEquiv.java   | 13 --------
 .../solr/search/facet/TestJsonFacetRefinement.java | 13 +++++---
 5 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 416bd8c..6cf80cf 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -560,6 +560,8 @@ Bug Fixes
 * SOLR-15944: The Tagger's JSON response format now always uses an object/map 
to represent each tag instead of an
   array, which didn't make sense. (David Smiley)
 
+* SOLR-14595: Consistent overrequest across different facet methods for 
`sort:index` JSON Facet field (Michael Gibney, hossman)
+
 ==================  8.11.2 ==================
 
 Bug Fixes
diff --git 
a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java 
b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
index 330d72d..319d36a 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
@@ -292,6 +292,15 @@ abstract class FacetFieldProcessor extends 
FacetProcessor<FacetField> {
     }
   }
 
+  private static long applyDefaultOverrequest(long offset, long limit) {
+    // NOTE: consider modifying the below heuristic; see SOLR-15760
+    // add over-request if this is a shard request and if we have a small 
offset (large offsets will already be gathering many more buckets than needed)
+    if (offset < 10) {
+      return (long) (limit * 1.1 + 4); // default: add 10% plus 4 (to 
overrequest for very small limits)
+    }
+    return limit;
+  }
+
   /** Processes the collected data to finds the top slots, and composes it in 
the response NamedList. */
   SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int 
slotCardinality,
                                         @SuppressWarnings("rawtypes") 
IntFunction<Comparable> bucketValFromSlotNumFunc,
@@ -305,13 +314,31 @@ abstract class FacetFieldProcessor extends 
FacetProcessor<FacetField> {
     if (freq.limit >= 0) {
       effectiveLimit = freq.limit;
       if (fcontext.isShard()) {
-        if (freq.overrequest == -1) {
-          // add over-request if this is a shard request and if we have a 
small offset (large offsets will already be gathering many more buckets than 
needed)
-          if (freq.offset < 10) {
-            effectiveLimit = (long) (effectiveLimit * 1.1 + 4); // default: 
add 10% plus 4 (to overrequest for very small limits)
-          }
-        } else {
+        if (freq.overrequest > 0) {
+          // NOTE: although _default_ distrib overrequest is disabled for the 
"index sort" case (see
+          // below), we _do_ want to respect an _explicit_ `overrequest` 
value, if present. Overrequest
+          // is always relevant (regardless of prelim sort) for the `resort` 
case; but even in the case of
+          // "index sort, no resort", overrequest can be relevant in some edge 
cases of the "shard" case,
+          // where it can affect the behavior of `isBucketComplete()` (see 
SOLR-14595).
           effectiveLimit += freq.overrequest;
+        } else {
+          switch (freq.overrequest) {
+            case 0:
+              // no-op (overrequest explicitly disabled)
+              break;
+            case -1:
+              // default
+              if (!"index".equals(this.sort.sortVariable)) {
+                // NOTE: even for distrib requests, `overrequest` is not 
directly relevant for "index" sort, hence
+                // there is no default/implicit overrequest for "index sort" 
(even if `resort` is also specified --
+                // overrequest that is exclusively for `resort` must be 
explicit, even in a distrib context)
+                effectiveLimit = applyDefaultOverrequest(freq.offset, 
effectiveLimit);
+              }
+              break;
+            default:
+              // other negative values are not supported
+              throw new IllegalArgumentException("Illegal `overrequest` 
specified: " + freq.overrequest);
+          }
         }
       } else if (null != resort && 0 < freq.overrequest) {
         // in non-shard situations, if we have a 'resort' we check for 
explicit overrequest > 0
diff --git 
a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
 
b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
index 99e9338..59d1082 100644
--- 
a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
+++ 
b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
@@ -48,6 +48,7 @@ import org.apache.solr.search.facet.SlotAcc.SlotContext;
  * It's able to stream since no data needs to be accumulated so long as it's 
index order.
  */
 class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor 
implements Closeable {
+  long effectiveLimit;
   long bucketsToSkip;
   long bucketsReturned;
 
@@ -144,6 +145,19 @@ class FacetFieldProcessorByEnumTermsStream extends 
FacetFieldProcessor implement
     hasSubFacets = freq.subFacets.size() > 0;
     bucketsToSkip = freq.offset;
 
+    effectiveLimit = freq.limit;
+    if (freq.overrequest < -1) {
+      // other negative values are not supported
+      throw new IllegalArgumentException("Illegal `overrequest` specified: " + 
freq.overrequest);
+    } else if (freq.overrequest > 0 && (fcontext.isShard() || null != resort)) 
{
+      // NOTE: "index sort" _never_ applies a default overrequest. In both the 
shard case and the resort case, the
+      // default overrequest is `0`. However, if `overrequest` is explicitly 
specified, we respect it except for
+      // non-distrib, no-resort request. Overrequest is relevant for the 
`resort` case; but it can also be relevant
+      // in some edge cases of the "shard" case, where it can affect the 
behavior of `isBucketComplete()`
+      // (see SOLR-14595).
+      effectiveLimit += freq.overrequest;
+    }
+
     createAccs(-1, 1);
 
     // Minimum term docFreq in order to use the filterCache for that term.
@@ -324,7 +338,7 @@ class FacetFieldProcessorByEnumTermsStream extends 
FacetFieldProcessor implement
           continue;
         }
 
-        if (freq.limit >= 0 && ++bucketsReturned > freq.limit) {
+        if (effectiveLimit >= 0 && ++bucketsReturned > effectiveLimit) {
           shardHasMoreBuckets.set(true);
           return null;
         }
diff --git 
a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
 
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
index b3ec3ea..607f9f6 100644
--- 
a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
+++ 
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
@@ -1103,25 +1103,12 @@ public class TestCloudJSONFacetSKGEquiv extends 
SolrCloudTestCase {
     
     /**
      * picks a random value for the "overrequest" param, biased in favor of 
interesting test cases.
-     * <p>
-     * <b>NOTE:</b> due to variations in overrequest behavior betewen 
<code>metod:enum<code> and other 
-     * processors (see <a 
href="https://issues.apache.org/jira/browse/SOLR-14595";>SOLR-14595</a>) this 
-     * method takes in the "sort" param and returns a constant value of 
<code>0</code> if the sort is 
-     * <code>index asc</code> to ensure that the set of candidate buckets 
considered during merging 
-     * (and refinement) is consistent regardless of what processor is used 
(and/or what sort is used 
-     * on the parent facet)
-     * </p>
      *
      * @return a number to specify in the request, or null to specify nothing 
(trigger default behavior)
      * @see #UNIQUE_FIELD_VALS
-     * @see <a 
href="https://issues.apache.org/jira/browse/SOLR-14595";>SOLR-14595</a>
      */
     public static Integer randomOverrequestParam(final Random r, final String 
sort) {
 
-      if ("index asc".equals(sort)) {
-        return 0; // test work around for SOLR-14595
-      }
-      
       switch(r.nextInt(10)) {
         case 0:
         case 1:
diff --git 
a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java 
b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
index cbc4d3f..2253745 100644
--- 
a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
+++ 
b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
@@ -20,6 +20,7 @@ package org.apache.solr.search.facet;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Locale;
 
 import org.apache.lucene.util.TestUtil;
 
@@ -1470,7 +1471,6 @@ public class TestJsonFacetRefinement extends 
SolrTestCaseHS {
     } // end method loop
   }
 
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-14595";)
   public void testIndexAscRefineConsistency() throws Exception {
     initServers();
     final Client client = servers.getClient(random().nextInt());
@@ -1495,8 +1495,7 @@ public class TestJsonFacetRefinement extends 
SolrTestCaseHS {
     
     client.commit();
 
-    // TODO once SOLR-14595 is fixed, modify test to check full EnumSet, not 
just these two...
-    for (String m : Arrays.asList("smart", "enum")) {
+    for (FacetField.FacetMethod m : FacetField.FacetMethod.values()) {
       client.testJQ(params("q", "*:*", "rows", "0", "json.facet", "{"
                            + " cat : { type:terms, field:cat_s, limit:1, 
refine:true,"
                            + "         overrequest:0, " // to trigger parent 
refinement given small data set
@@ -1504,13 +1503,17 @@ public class TestJsonFacetRefinement extends 
SolrTestCaseHS {
                            + "         facet: { sum : 'sum(price_i)', "
                            + "                  child_"+m+" : { "
                            + "                     type:terms, field:child_s, 
limit:1, refine:true,"
-                           + "                     sort:'index asc', method:" 
+ m + " } "
+                           + "                     sort:'index asc', method:" 
+ m.toString().toLowerCase(Locale.ROOT) + " } "
                            + "       }} }"
                            )
                     , "facets=={ count:5"
                     + ", cat:{buckets:[ { val:X, count:3, sum:6.0, "
-                    + "                   child_"+m+":{buckets:[{val:A, 
count:1}]}}]}}"
+                    + "                   child_"+m+":{buckets:[{val:B, 
count:1}]}}]}}" // * (see below)
                     );
     }
+    // * NOTE: the intuitive value to return here would be "A"; but we're 
testing for _consistency_
+    // here, and artificially setting `overrequest:0`. With default 
overrequest, the intuitive
+    // "correct" behavior would indeed be achieved -- but we then wouldn't be 
triggering the behavior
+    // that this test is designed to evaluate.
   }
 }

Reply via email to