kfaraz commented on a change in pull request #12046:
URL: https://github.com/apache/druid/pull/12046#discussion_r767594959



##########
File path: 
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -153,53 +153,65 @@ private boolean 
isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String val
       return false;
     }
     return rangeSet.asRanges().equals(
-        Collections.singleton(Range.closed(val, val))
+        Collections.singleton(Range.singleton(val))
     );
   }
 
+  /**
+   * Pruning scenarios at dimension i: (When the Effective domain is empty)
+   *  0) query domain left-aligns with start and right-aligns with end and 
then deviates outside to align with neither
+   *  1) The query domain left-aligns with start until dimension i and then 
deviates to be strictly less than start[i]
+   *  2) The query domain right-aligns with end until dimension i and then 
deviates to be strictly greater than end[i]
+   *
+   * Immediate acceptance criteria at dimension i:
+   *  Effective domain is non-empty and no longer aligns with any boundary
+   *
+   * @param domain The domain inferred from the query. Assumed to be non-emtpy
+   * @return boolean indicating if the segment needs to be considered or pruned
+   */
   @Override
   public boolean possibleInDomain(Map<String, RangeSet<String>> domain)
   {
-    // Trivial check for empty cartesian product
-    for (RangeSet<String> domainForDimension: domain.values()) {
-      if (domainForDimension.isEmpty()) {
-        return false;
-      }
-    }
-
     final StringTuple segmentStart = start == null ? new StringTuple(new 
String[dimensions.size()]) : start;
     final StringTuple segmentEnd = end == null ? new StringTuple(new 
String[dimensions.size()]) : end;
-    //Indicates if the start values of the previous dimensions marks the upper 
boundary of the query domain
-    boolean startIsGreatestInDomain = true;
-    //Indicates if the end values of the previous dimensions marks the lower 
boundary of the query domain
-    boolean endIsLeastInDomain = true;
+
+    // Indicates if the effective domain is equivalent to start till the 
previous dimension
+    boolean effectiveDomainIsStart = true;
+    // Indicates if the effective domain is equivalent to end till the 
previous dimension
+    boolean effectiveDomainIsEnd = true;
+
     for (int i = 0; i < dimensions.size(); i++) {
       String dimension = dimensions.get(i);
       RangeSet<String> queryDomainForDimension = domain.get(dimension);
       if (queryDomainForDimension == null) {
         queryDomainForDimension = TreeRangeSet.create();
         queryDomainForDimension.add(Range.all());
       }
-      // Range to compare the domain against
+
+      // Compute the segment's range based on its metadata and (outward) 
boundary alignment
       Range<String> rangeTillSegmentBoundary = Range.all();
-      if (startIsGreatestInDomain && segmentStart.get(i) != null) {
+      if (effectiveDomainIsStart && segmentStart.get(i) != null) {
         rangeTillSegmentBoundary = 
rangeTillSegmentBoundary.intersection(Range.atLeast(segmentStart.get(i)));
       }
-      if (endIsLeastInDomain && segmentEnd.get(i) != null) {
+      if (effectiveDomainIsEnd && segmentEnd.get(i) != null) {
         rangeTillSegmentBoundary = 
rangeTillSegmentBoundary.intersection(Range.atMost(segmentEnd.get(i)));
       }
+
+      // EffectiveDomain = QueryDomain INTERSECTION SegmentRange
       RangeSet<String> effectiveDomainForDimension = 
queryDomainForDimension.subRangeSet(rangeTillSegmentBoundary);
       // Prune segment because domain is out of range
       if (effectiveDomainForDimension.isEmpty()) {
         return false;
       }
-      // Update flags based on whether the effective domain lies on the 
boundary of this dimension
-      startIsGreatestInDomain = startIsGreatestInDomain
+
+      // Boundary aligment for current dimension
+      effectiveDomainIsStart = effectiveDomainIsStart
                                 && 
isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentStart.get(i));
-      endIsLeastInDomain = endIsLeastInDomain
+      effectiveDomainIsEnd = effectiveDomainIsEnd
                            && 
isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentEnd.get(i));
-      // If the query domain overflows either boundary of the segment, we 
cannot prune any further
-      if (!startIsGreatestInDomain && !endIsLeastInDomain) {
+
+      // Query domain segues into segment boundary

Review comment:
       "segue" could be ambiguous

##########
File path: 
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +301,98 @@ public void testIsInChunk_withMultiValues()
     ));
   }
 
+  @Test
+  public void testPossibleInDomain()
+  {
+    setDimensions("planet", "country", "city");
+
+    final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+    final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+    final RangeSet<String> emptySet = TreeRangeSet.create();
+    final RangeSet<String> universalSet = TreeRangeSet.create();
+    universalSet.add(Range.all());
+
+    ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, 
null);
+    Map<String, RangeSet<String>> domain = new HashMap<>();
+    RangeSet<String> planetSet;

Review comment:
       There don't seem to be any union cases, though.

##########
File path: 
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +300,96 @@ public void testIsInChunk_withMultiValues()
     ));
   }
 
+  @Test
+  public void testPossibleInDomain()
+  {
+    setDimensions("planet", "country", "city");
+
+    final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+    final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+    final RangeSet<String> universalSet = TreeRangeSet.create();
+    universalSet.add(Range.all());
+
+    ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, 
null);
+    Map<String, RangeSet<String>> domain = new HashMap<>();
+    RangeSet<String> planetSet;
+    RangeSet<String> countrySet;
+    RangeSet<String> citySet;
+
+    // null * null * null === (-INF, INF) * (-INF, INF) * (-INF, INF)
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, INF) * (-INF, INF)
+    populateDomain(domain, universalSet, universalSet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // {Earth} * (-INF, INF) * (-INF, INF)
+    planetSet = getUnion(Range.singleton("Earth"));
+    populateDomain(domain, planetSet, universalSet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, Earth) * (-INF, INF) * (-INF, INF)
+    planetSet = getUnion(Range.lessThan("Earth"));
+    populateDomain(domain, planetSet, universalSet, universalSet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France") * (-INF, INF)
+    countrySet = getUnion(Range.lessThan("France"));
+    populateDomain(domain, universalSet, countrySet, universalSet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France"] * Any non-empty set
+    countrySet = getUnion(Range.atMost("USA"));
+    populateDomain(domain, universalSet, countrySet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France] * (-INF, Paris)
+    countrySet = getUnion(Range.atMost("France"));
+    citySet = getUnion(Range.lessThan("Paris"));
+    populateDomain(domain, universalSet, countrySet, citySet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // {Earth} * {USA} * {New York}
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("USA"));
+    citySet = getUnion(Range.singleton("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // {Earth} * {USA} * (New York, INF)
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("USA"));
+    citySet = getUnion(Range.greaterThan("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // {Earth} * {India} * Any Non-empty set
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("India"));
+    citySet = getUnion(Range.greaterThan("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertTrue(shard.possibleInDomain(domain));
+  }
+
+  private RangeSet<String> getUnion(Range<String>... ranges)
+  {
+    RangeSet<String> unionSet = TreeRangeSet.create();
+    for (Range<String> range : ranges) {
+      unionSet.add(range);
+    }
+    return unionSet;
+  }
+
+  private void populateDomain(Map<String, RangeSet<String>> domain,

Review comment:
       I guess it would be better if you just create a new domain map here and 
return it, rather than re-using the same one repeatedly. It would make the test 
cases cleaner.

##########
File path: 
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -145,29 +141,81 @@ private static ShardSpecLookup createLookup(List<? 
extends ShardSpec> shardSpecs
     return Collections.unmodifiableList(dimensions);
   }
 
-  private Range<String> getFirstDimRange()
+  /**
+   * Check if a given domain of Strings is a singleton set containing the 
given value
+   * @param rangeSet Domain of Strings
+   * @param val Value of String
+   * @return rangeSet == {val}
+   */
+  private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String 
val)
   {
-    Range<String> range;
-    if (firstDimStart == null && firstDimEnd == null) {
-      range = Range.all();
-    } else if (firstDimStart == null) {
-      range = Range.atMost(firstDimEnd);
-    } else if (firstDimEnd == null) {
-      range = Range.atLeast(firstDimStart);
-    } else {
-      range = Range.closed(firstDimStart, firstDimEnd);
+    if (val == null) {
+      return false;
     }
-    return range;
+    return rangeSet.asRanges().equals(
+        Collections.singleton(Range.singleton(val))
+    );
   }
 
+  /**
+   * Pruning scenarios at dimension i: (When the Effective domain is empty)
+   *  0) query domain left-aligns with start and right-aligns with end and 
then deviates outside to align with neither
+   *  1) The query domain left-aligns with start until dimension i and then 
deviates to be strictly less than start[i]
+   *  2) The query domain right-aligns with end until dimension i and then 
deviates to be strictly greater than end[i]
+   *
+   * Immediate acceptance criteria at dimension i:
+   *  Effective domain is non-empty and no longer aligns with any boundary
+   *
+   * @param domain The domain inferred from the query. Assumed to be non-emtpy
+   * @return boolean indicating if the segment needs to be considered or pruned

Review comment:
       ```suggestion
      * @return true if this shard possibly has rows for the requested domain, 
false otherwise
   ```

##########
File path: 
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +300,96 @@ public void testIsInChunk_withMultiValues()
     ));
   }
 
+  @Test
+  public void testPossibleInDomain()
+  {
+    setDimensions("planet", "country", "city");
+
+    final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+    final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+    final RangeSet<String> universalSet = TreeRangeSet.create();
+    universalSet.add(Range.all());
+
+    ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, 
null);
+    Map<String, RangeSet<String>> domain = new HashMap<>();
+    RangeSet<String> planetSet;
+    RangeSet<String> countrySet;
+    RangeSet<String> citySet;
+
+    // null * null * null === (-INF, INF) * (-INF, INF) * (-INF, INF)
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, INF) * (-INF, INF)
+    populateDomain(domain, universalSet, universalSet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // {Earth} * (-INF, INF) * (-INF, INF)
+    planetSet = getUnion(Range.singleton("Earth"));
+    populateDomain(domain, planetSet, universalSet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, Earth) * (-INF, INF) * (-INF, INF)
+    planetSet = getUnion(Range.lessThan("Earth"));
+    populateDomain(domain, planetSet, universalSet, universalSet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France") * (-INF, INF)
+    countrySet = getUnion(Range.lessThan("France"));
+    populateDomain(domain, universalSet, countrySet, universalSet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France"] * Any non-empty set
+    countrySet = getUnion(Range.atMost("USA"));
+    populateDomain(domain, universalSet, countrySet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France] * (-INF, Paris)
+    countrySet = getUnion(Range.atMost("France"));
+    citySet = getUnion(Range.lessThan("Paris"));
+    populateDomain(domain, universalSet, countrySet, citySet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // {Earth} * {USA} * {New York}
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("USA"));
+    citySet = getUnion(Range.singleton("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // {Earth} * {USA} * (New York, INF)
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("USA"));
+    citySet = getUnion(Range.greaterThan("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // {Earth} * {India} * Any Non-empty set
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("India"));
+    citySet = getUnion(Range.greaterThan("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertTrue(shard.possibleInDomain(domain));
+  }
+
+  private RangeSet<String> getUnion(Range<String>... ranges)

Review comment:
       Please remove this method. I don't see this method ever being called 
with multiple ranges i.e. the union never seems to be happening.

##########
File path: 
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -145,29 +141,81 @@ private static ShardSpecLookup createLookup(List<? 
extends ShardSpec> shardSpecs
     return Collections.unmodifiableList(dimensions);
   }
 
-  private Range<String> getFirstDimRange()
+  /**
+   * Check if a given domain of Strings is a singleton set containing the 
given value
+   * @param rangeSet Domain of Strings
+   * @param val Value of String
+   * @return rangeSet == {val}
+   */
+  private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String 
val)
   {
-    Range<String> range;
-    if (firstDimStart == null && firstDimEnd == null) {
-      range = Range.all();
-    } else if (firstDimStart == null) {
-      range = Range.atMost(firstDimEnd);
-    } else if (firstDimEnd == null) {
-      range = Range.atLeast(firstDimStart);
-    } else {
-      range = Range.closed(firstDimStart, firstDimEnd);
+    if (val == null) {
+      return false;
     }
-    return range;
+    return rangeSet.asRanges().equals(
+        Collections.singleton(Range.singleton(val))
+    );
   }
 
+  /**

Review comment:
       I feel we could just skip this javadoc as it is an overridden method 
anyway.
   The class level javadoc would probably be a better place to talk about the 
pruning advantages and strategy.
   
   When describing the strategy, try to provide an overview of the idea first 
and then dig into the specific cases (only if necessary).

##########
File path: 
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -145,29 +141,81 @@ private static ShardSpecLookup createLookup(List<? 
extends ShardSpec> shardSpecs
     return Collections.unmodifiableList(dimensions);
   }
 
-  private Range<String> getFirstDimRange()
+  /**
+   * Check if a given domain of Strings is a singleton set containing the 
given value
+   * @param rangeSet Domain of Strings
+   * @param val Value of String
+   * @return rangeSet == {val}
+   */
+  private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String 
val)
   {
-    Range<String> range;
-    if (firstDimStart == null && firstDimEnd == null) {
-      range = Range.all();
-    } else if (firstDimStart == null) {
-      range = Range.atMost(firstDimEnd);
-    } else if (firstDimEnd == null) {
-      range = Range.atLeast(firstDimStart);
-    } else {
-      range = Range.closed(firstDimStart, firstDimEnd);
+    if (val == null) {
+      return false;
     }
-    return range;
+    return rangeSet.asRanges().equals(
+        Collections.singleton(Range.singleton(val))
+    );
   }
 
+  /**
+   * Pruning scenarios at dimension i: (When the Effective domain is empty)
+   *  0) query domain left-aligns with start and right-aligns with end and 
then deviates outside to align with neither
+   *  1) The query domain left-aligns with start until dimension i and then 
deviates to be strictly less than start[i]
+   *  2) The query domain right-aligns with end until dimension i and then 
deviates to be strictly greater than end[i]
+   *
+   * Immediate acceptance criteria at dimension i:
+   *  Effective domain is non-empty and no longer aligns with any boundary

Review comment:
       Maybe avoid using terms like `effective domain` in this javadoc as it is 
not a generally defined term and appears only in the code here. So, for someone 
only looking at the javadoc, it might not make a lot of sense.

##########
File path: 
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +300,96 @@ public void testIsInChunk_withMultiValues()
     ));
   }
 
+  @Test
+  public void testPossibleInDomain()
+  {
+    setDimensions("planet", "country", "city");
+
+    final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+    final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+    final RangeSet<String> universalSet = TreeRangeSet.create();
+    universalSet.add(Range.all());
+
+    ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, 
null);
+    Map<String, RangeSet<String>> domain = new HashMap<>();
+    RangeSet<String> planetSet;
+    RangeSet<String> countrySet;
+    RangeSet<String> citySet;
+
+    // null * null * null === (-INF, INF) * (-INF, INF) * (-INF, INF)
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, INF) * (-INF, INF)
+    populateDomain(domain, universalSet, universalSet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // {Earth} * (-INF, INF) * (-INF, INF)
+    planetSet = getUnion(Range.singleton("Earth"));
+    populateDomain(domain, planetSet, universalSet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, Earth) * (-INF, INF) * (-INF, INF)
+    planetSet = getUnion(Range.lessThan("Earth"));
+    populateDomain(domain, planetSet, universalSet, universalSet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France") * (-INF, INF)
+    countrySet = getUnion(Range.lessThan("France"));
+    populateDomain(domain, universalSet, countrySet, universalSet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France"] * Any non-empty set
+    countrySet = getUnion(Range.atMost("USA"));
+    populateDomain(domain, universalSet, countrySet, universalSet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // (-INF, INF) * (-INF, "France] * (-INF, Paris)
+    countrySet = getUnion(Range.atMost("France"));
+    citySet = getUnion(Range.lessThan("Paris"));
+    populateDomain(domain, universalSet, countrySet, citySet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // {Earth} * {USA} * {New York}
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("USA"));
+    citySet = getUnion(Range.singleton("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertTrue(shard.possibleInDomain(domain));
+
+    // {Earth} * {USA} * (New York, INF)
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("USA"));
+    citySet = getUnion(Range.greaterThan("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);
+    assertFalse(shard.possibleInDomain(domain));
+
+    // {Earth} * {India} * Any Non-empty set
+    planetSet = getUnion(Range.singleton("Earth"));
+    countrySet = getUnion(Range.singleton("India"));
+    citySet = getUnion(Range.greaterThan("New York"));
+    populateDomain(domain, planetSet, countrySet, citySet);

Review comment:
       This could be a little less verbose and perhaps more readable with the 
following:
   
   ```suggestion
       // {Earth} * {India} * Any Non-empty set
       populateDomain(
         domain,
         Range.singleton("Earth"),
         Range.singleton("India"),
         Range.singleton("New York")
       );
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to