[solr] 20/49: SOLR-14514: add extra checks for picking 'stream' method in JSON facet

dweiss Wed, 10 Mar 2021 02:07:50 -0800

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch jira/solr-13105-toMerge
in repository https://gitbox.apache.org/repos/asf/solr.git


commit 808f44e603447d9415f027e726d2435c8e5b1564
Author: Munendra S N <[email protected]>
AuthorDate: Thu Nov 26 12:01:51 2020 +0530

    SOLR-14514: add extra checks for picking 'stream' method in JSON facet
    
    missing, allBuckets, and numBuckets is not supported with stream method.
    So, avoiding picking stream method when any one of them is enabled even if
    facet sort is 'index asc'
---
 solr/CHANGES.txt                                   |  3 +++
 .../org/apache/solr/search/facet/FacetField.java   |  5 +++-
 .../solr/search/facet/TestCloudJSONFacetSKG.java   | 29 ++++++++--------------
 .../search/facet/TestCloudJSONFacetSKGEquiv.java   | 23 ++++++-----------
 .../apache/solr/search/facet/TestJsonFacets.java   | 28 +++++++++++----------
 solr/solr-ref-guide/src/json-facet-api.adoc        |  2 +-
 6 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b6e74bb..a8555a2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -310,6 +310,9 @@ Bug Fixes
 * SOLR-12539: Handle parsing of values for 'other', 'include', and 
'excludeTags' in JSON facets when specified as
   comma-separated values with extra spaces (hossman, Munendra S N)
 
+* SOLR-14514: Avoid picking 'stream' method in JSON facet when any of 
'allBuckets', 'numBuckets', and 'missing' parameters are enabled
+  (hossman, Munendra S N)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java 
b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index 728cd6e..fa1c085 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -105,7 +105,10 @@ public class FacetField extends FacetRequestSorted {
       method = FacetMethod.STREAM;
     }
     if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField() &&
-        // wether we can use stream processing depends on wether this is a 
shard request, wether
+        // streaming doesn't support allBuckets, numBuckets or missing
+        // so, don't use stream processor if anyone of them is enabled
+        !(allBuckets || numBuckets || missing) &&
+        // whether we can use stream processing depends on whether this is a 
shard request, whether
         // re-sorting has been requested, and if the effective sort during 
collection is "index asc"
         ( fcontext.isShard()
           // for a shard request, the effective per-shard sort must be index 
asc
diff --git 
a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java 
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
index 0992af8..f2b67d5 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
@@ -45,18 +45,17 @@ import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
-import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
-import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
-
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
 import org.noggit.JSONWriter.Writable;
-
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
+import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
+
 /** 
  * <p>
  * A randomized test of nested facets using the <code>relatedness()</code> 
function, that asserts the 
@@ -65,13 +64,13 @@ import org.slf4j.LoggerFactory;
  * <p>
  * Note that unlike normal facet "count" verification, using a high limit + 
overrequest isn't a substitute 
  * for refinement in order to ensure accurate "skg" computation across shards. 
 For that reason, this 
- * tests forces <code>refine: true</code> (unlike {@link 
TestCloudJSONFacetJoinDomain}) and specifices a 
- * <code>domain: { 'query':'*:*' }</code> for every facet, in order to 
garuntee that all shards 
+ * tests forces <code>refine: true</code> (unlike {@link 
TestCloudJSONFacetJoinDomain}) and specifies a
+ * <code>domain: { 'query':'*:*' }</code> for every facet, in order to 
guarantee that all shards
  * participate in all facets, so that the popularity &amp; relatedness values 
returned can be proven 
  * with validation requests.
  * </p>
  * <p>
- * (Refinement alone is not enough. Using the '*:*' query as the facet domain 
is neccessary to 
+ * (Refinement alone is not enough. Using the '*:*' query as the facet domain 
is necessary to
  * prevent situations where a single shardX may return candidate bucket with 
no child-buckets due to 
  * the normal facet intersections, but when refined on other shardY(s), can 
produce "high scoring" 
  * SKG child-buckets, which would then be missing the foreground/background 
"size" contributions from 
@@ -156,7 +155,7 @@ public class TestCloudJSONFacetSKG extends 
SolrCloudTestCase {
       SolrInputDocument doc = sdoc("id", ""+id);
       for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
         // NOTE: we ensure every doc has at least one value in each field
-        // that way, if a term is returned for a parent there there is 
garunteed to be at least one
+        // that way, if a term is returned for a parent there there is 
guaranteed to be at least one
         // one term in the child facet as well.
         //
         // otherwise, we'd face the risk of a single shardX returning 
parentTermX as a top term for
@@ -226,7 +225,7 @@ public class TestCloudJSONFacetSKG extends 
SolrCloudTestCase {
 
   /**
    * Given a (random) field number, returns a random (integer based) value for 
that field.
-   * NOTE: The number of unique values in each field is constant acording to 
{@link #UNIQUE_FIELD_VALS}
+   * NOTE: The number of unique values in each field is constant according to 
{@link #UNIQUE_FIELD_VALS}
    * but the precise <em>range</em> of values will vary for each unique field 
number, such that cross field joins 
    * will match fewer documents based on how far apart the field numbers are.
    *
@@ -279,7 +278,7 @@ public class TestCloudJSONFacetSKG extends 
SolrCloudTestCase {
       
       // NOTE that these two queries & facets *should* effectively identical 
given that the
       // very large limit value is big enough no shard will ever return that 
may terms,
-      // but the "limit=-1" case it actaully triggers slightly different code 
paths
+      // but the "limit=-1" case it actually triggers slightly different code 
paths
       // because it causes FacetField.returnsPartial() to be "true"
       for (int limit : new int[] { 999999999, -1 }) {
         Map<String,TermFacet> facets = new LinkedHashMap<>();
@@ -769,14 +768,8 @@ public class TestCloudJSONFacetSKG extends 
SolrCloudTestCase {
      *
      *
      * @return a Boolean, may be null
-     * @see <a 
href="https://issues.apache.org/jira/browse/SOLR-14514";>SOLR-14514: allBuckets 
ignored by method:stream</a>
      */
     public static Boolean randomAllBucketsParam(final Random r, final String 
sort) {
-
-      if ("index asc".equals(sort)) {
-        return null;
-      }
-      
       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
diff --git 
a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
 
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
index eb68662..8e09593 100644
--- 
a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
+++ 
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
@@ -20,8 +20,8 @@ import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.util.Arrays;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.LinkedHashMap;
@@ -47,22 +47,21 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
-import static org.apache.solr.search.facet.FacetField.FacetMethod;
-import static 
org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc.SWEEP_COLLECTION_DEBUG_KEY;
-
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
 import org.noggit.JSONWriter.Writable;
-  
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.search.facet.FacetField.FacetMethod;
+import static 
org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc.SWEEP_COLLECTION_DEBUG_KEY;
+
 /** 
  * <p>
  * A randomized test of nested facets using the <code>relatedness()</code> 
function, that asserts the 
- * results are consistent and equivilent regardless of what 
<code>method</code> (ie: FacetFieldProcessor) 
+ * results are consistent and equivalent regardless of what 
<code>method</code> (ie: FacetFieldProcessor)
  * and/or <code>{@value RelatednessAgg#SWEEP_COLLECTION}</code> option is 
requested.
  * </p>
  * <p>
@@ -71,7 +70,7 @@ import org.slf4j.LoggerFactory;
  * because this test does not attempt to prove the results with validation 
requests.
  * </p>
  * <p>
- * This test only concerns itself with the equivilency of results
+ * This test only concerns itself with the equivalency of results
  * </p>
  * 
  * @see TestCloudJSONFacetSKG
@@ -986,14 +985,8 @@ public class TestCloudJSONFacetSKGEquiv extends 
SolrCloudTestCase {
      * </p>
      *
      * @return a Boolean, may be null
-     * @see <a 
href="https://issues.apache.org/jira/browse/SOLR-14514";>SOLR-14514: allBuckets 
ignored by method:stream</a>
      */
     public static Boolean randomAllBucketsParam(final Random r, final String 
sort) {
-
-      if ("index asc".equals(sort)) {
-        return null;
-      }
-      
       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
diff --git 
a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java 
b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index c5a7a1d..166968b 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -488,21 +488,17 @@ public class TestJsonFacets extends SolrTestCaseHS {
     }
 
     // relatedness shouldn't be computed for allBuckets, but it also shouldn't 
cause any problems
-    //
-    // NOTE: we can't test this with 'index asc' because STREAM processor
-    // (which test may randomize as default) doesn't support allBuckets
-    // see: https://issues.apache.org/jira/browse/SOLR-14514
-    //
     for (String sort : Arrays.asList("sort:'y desc'",
                                      "sort:'z desc'",
                                      "sort:'skg desc'",
+                                     "sort:'index asc'",
                                      "prelim_sort:'count desc', sort:'skg 
desc'")) {
-      // the relatedness score of each of our cat_s values is (conviniently) 
also alphabetical order,
+      // the relatedness score of each of our cat_s values is (conveniently) 
also alphabetical order,
       // (and the same order as 'sum(num_i) desc' & 'min(num_i) desc')
       //
       // So all of these re/sort options should produce identical output 
(since the num buckets is < limit)
       // - Testing "index" sort allows the randomized use of "stream" 
processor as default to be tested.
-      // - Testing (re)sorts on other stats sanity checks code paths where 
relatedness() is a "defered" Agg
+      // - Testing (re)sorts on other stats sanity checks code paths where 
relatedness() is a "deferred" Agg
       for (String limit : Arrays.asList(", ", ", limit:5, ", ", limit:-1, ")) {
         // results shouldn't change regardless of our limit param"
         assertJQ(req("q", "cat_s:[* TO *]", "rows", "0",
@@ -524,7 +520,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
                  + "             background_popularity: 0.33333, },"
                  + "   }, "
                  + "   { val:'B', count:3, y:-3.0, z:-5, "
-                 + "     skg : { relatedness: 0.0, " // perfectly average and 
uncorrolated
+                 + "     skg : { relatedness: 0.0, " // perfectly average and 
uncorrelated
                  //+ "             foreground_count: 1, "
                  //+ "             foreground_size: 2, "
                  //+ "             background_count: 3, "
@@ -1003,11 +999,14 @@ public class TestJsonFacets extends SolrTestCaseHS {
     // test streaming
     assertJQ(req("q", "*:*", "rows", "0"
             , "json.facet", "{   cat:{terms:{field:'cat_s', method:stream }}" 
+ // won't stream; need sort:index asc
-                              ", cat2:{terms:{field:'cat_s', method:stream, 
sort:'index asc' }}" +
-                              ", cat3:{terms:{field:'cat_s', method:stream, 
sort:'index asc', mincount:3 }}" + // mincount
-                              ", cat4:{terms:{field:'cat_s', method:stream, 
sort:'index asc', prefix:B }}" + // prefix
-                              ", cat5:{terms:{field:'cat_s', method:stream, 
sort:'index asc', offset:1 }}" + // offset
-                " }"
+            ", cat2:{terms:{field:'cat_s', method:stream, sort:'index asc' }}" 
+
+            ", cat3:{terms:{field:'cat_s', method:stream, sort:'index asc', 
mincount:3 }}" + // mincount
+            ", cat4:{terms:{field:'cat_s', method:stream, sort:'index asc', 
prefix:B }}" + // prefix
+            ", cat5:{terms:{field:'cat_s', method:stream, sort:'index asc', 
offset:1 }}" + // offset
+            ", cat6:{terms:{field:'cat_s', method:stream, sort:'index asc', 
missing:true }}" + // missing
+            ", cat7:{terms:{field:'cat_s', method:stream, sort:'index asc', 
numBuckets:true }}" + // numBuckets
+            ", cat8:{terms:{field:'cat_s', method:stream, sort:'index asc', 
allBuckets:true }}" + // allBuckets
+            " }"
         )
         , "facets=={count:6 " +
             ", cat :{buckets:[{val:B, count:3},{val:A, count:2}]}" +
@@ -1015,6 +1014,9 @@ public class TestJsonFacets extends SolrTestCaseHS {
             ", cat3:{buckets:[{val:B, count:3}]}" +
             ", cat4:{buckets:[{val:B, count:3}]}" +
             ", cat5:{buckets:[{val:B, count:3}]}" +
+            ", cat6:{missing:{count:1}, buckets:[{val:A, count:2},{val:B, 
count:3}]}" +
+            ", cat7:{numBuckets:2, buckets:[{val:A, count:2},{val:B, 
count:3}]}" +
+            ", cat8:{allBuckets:{count:5}, buckets:[{val:A, count:2},{val:B, 
count:3}]}" +
             " }"
     );
 
diff --git a/solr/solr-ref-guide/src/json-facet-api.adoc 
b/solr/solr-ref-guide/src/json-facet-api.adoc
index bc336c3..5e83807 100644
--- a/solr/solr-ref-guide/src/json-facet-api.adoc
+++ b/solr/solr-ref-guide/src/json-facet-api.adoc
@@ -220,7 +220,7 @@ This parameter indicates the facet algorithm to use:
 * "uif" UnInvertedField, collect into ordinal array
 * "dvhash" DocValues, collect into hash - improves efficiency over high 
cardinality fields
 * "enum" TermsEnum then intersect DocSet (stream-able)
-* "stream" Presently equivalent to "enum"
+* "stream" Presently equivalent to "enum" - used for indexed, non-point fields 
with sort 'index asc' and allBuckets, numBuckets, missing disabled.
 * "smart" Pick the best method for the field type (this is the default)
 
 |prelim_sort |An optional parameter for specifying an approximation of the 
final `sort` to use during initial collection of top buckets when the 
<<json-facet-api.adoc#sorting-facets-by-nested-functions,`sort` parameter is 
very costly>>.

[solr] 20/49: SOLR-14514: add extra checks for picking 'stream' method in JSON facet

Reply via email to