kfaraz commented on a change in pull request #12046:
URL: https://github.com/apache/druid/pull/12046#discussion_r767594959
##########
File path:
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -153,53 +153,65 @@ private boolean
isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String val
return false;
}
return rangeSet.asRanges().equals(
- Collections.singleton(Range.closed(val, val))
+ Collections.singleton(Range.singleton(val))
);
}
+ /**
+ * Pruning scenarios at dimension i: (When the Effective domain is empty)
+ * 0) query domain left-aligns with start and right-aligns with end and
then deviates outside to align with neither
+ * 1) The query domain left-aligns with start until dimension i and then
deviates to be strictly less than start[i]
+ * 2) The query domain right-aligns with end until dimension i and then
deviates to be strictly greater than end[i]
+ *
+ * Immediate acceptance criteria at dimension i:
+ * Effective domain is non-empty and no longer aligns with any boundary
+ *
+ * @param domain The domain inferred from the query. Assumed to be non-emtpy
+ * @return boolean indicating if the segment needs to be considered or pruned
+ */
@Override
public boolean possibleInDomain(Map<String, RangeSet<String>> domain)
{
- // Trivial check for empty cartesian product
- for (RangeSet<String> domainForDimension: domain.values()) {
- if (domainForDimension.isEmpty()) {
- return false;
- }
- }
-
final StringTuple segmentStart = start == null ? new StringTuple(new
String[dimensions.size()]) : start;
final StringTuple segmentEnd = end == null ? new StringTuple(new
String[dimensions.size()]) : end;
- //Indicates if the start values of the previous dimensions marks the upper
boundary of the query domain
- boolean startIsGreatestInDomain = true;
- //Indicates if the end values of the previous dimensions marks the lower
boundary of the query domain
- boolean endIsLeastInDomain = true;
+
+ // Indicates if the effective domain is equivalent to start till the
previous dimension
+ boolean effectiveDomainIsStart = true;
+ // Indicates if the effective domain is equivalent to end till the
previous dimension
+ boolean effectiveDomainIsEnd = true;
+
for (int i = 0; i < dimensions.size(); i++) {
String dimension = dimensions.get(i);
RangeSet<String> queryDomainForDimension = domain.get(dimension);
if (queryDomainForDimension == null) {
queryDomainForDimension = TreeRangeSet.create();
queryDomainForDimension.add(Range.all());
}
- // Range to compare the domain against
+
+ // Compute the segment's range based on its metadata and (outward)
boundary alignment
Range<String> rangeTillSegmentBoundary = Range.all();
- if (startIsGreatestInDomain && segmentStart.get(i) != null) {
+ if (effectiveDomainIsStart && segmentStart.get(i) != null) {
rangeTillSegmentBoundary =
rangeTillSegmentBoundary.intersection(Range.atLeast(segmentStart.get(i)));
}
- if (endIsLeastInDomain && segmentEnd.get(i) != null) {
+ if (effectiveDomainIsEnd && segmentEnd.get(i) != null) {
rangeTillSegmentBoundary =
rangeTillSegmentBoundary.intersection(Range.atMost(segmentEnd.get(i)));
}
+
+ // EffectiveDomain = QueryDomain INTERSECTION SegmentRange
RangeSet<String> effectiveDomainForDimension =
queryDomainForDimension.subRangeSet(rangeTillSegmentBoundary);
// Prune segment because domain is out of range
if (effectiveDomainForDimension.isEmpty()) {
return false;
}
- // Update flags based on whether the effective domain lies on the
boundary of this dimension
- startIsGreatestInDomain = startIsGreatestInDomain
+
+ // Boundary aligment for current dimension
+ effectiveDomainIsStart = effectiveDomainIsStart
&&
isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentStart.get(i));
- endIsLeastInDomain = endIsLeastInDomain
+ effectiveDomainIsEnd = effectiveDomainIsEnd
&&
isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentEnd.get(i));
- // If the query domain overflows either boundary of the segment, we
cannot prune any further
- if (!startIsGreatestInDomain && !endIsLeastInDomain) {
+
+ // Query domain segues into segment boundary
Review comment:
"segue" could be ambiguous
##########
File path:
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +301,98 @@ public void testIsInChunk_withMultiValues()
));
}
+ @Test
+ public void testPossibleInDomain()
+ {
+ setDimensions("planet", "country", "city");
+
+ final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+ final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+ final RangeSet<String> emptySet = TreeRangeSet.create();
+ final RangeSet<String> universalSet = TreeRangeSet.create();
+ universalSet.add(Range.all());
+
+ ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0,
null);
+ Map<String, RangeSet<String>> domain = new HashMap<>();
+ RangeSet<String> planetSet;
Review comment:
There don't seem to be any union cases, though.
##########
File path:
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +300,96 @@ public void testIsInChunk_withMultiValues()
));
}
+ @Test
+ public void testPossibleInDomain()
+ {
+ setDimensions("planet", "country", "city");
+
+ final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+ final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+ final RangeSet<String> universalSet = TreeRangeSet.create();
+ universalSet.add(Range.all());
+
+ ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0,
null);
+ Map<String, RangeSet<String>> domain = new HashMap<>();
+ RangeSet<String> planetSet;
+ RangeSet<String> countrySet;
+ RangeSet<String> citySet;
+
+ // null * null * null === (-INF, INF) * (-INF, INF) * (-INF, INF)
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, INF) * (-INF, INF)
+ populateDomain(domain, universalSet, universalSet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // {Earth} * (-INF, INF) * (-INF, INF)
+ planetSet = getUnion(Range.singleton("Earth"));
+ populateDomain(domain, planetSet, universalSet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, Earth) * (-INF, INF) * (-INF, INF)
+ planetSet = getUnion(Range.lessThan("Earth"));
+ populateDomain(domain, planetSet, universalSet, universalSet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France") * (-INF, INF)
+ countrySet = getUnion(Range.lessThan("France"));
+ populateDomain(domain, universalSet, countrySet, universalSet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France"] * Any non-empty set
+ countrySet = getUnion(Range.atMost("USA"));
+ populateDomain(domain, universalSet, countrySet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France] * (-INF, Paris)
+ countrySet = getUnion(Range.atMost("France"));
+ citySet = getUnion(Range.lessThan("Paris"));
+ populateDomain(domain, universalSet, countrySet, citySet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // {Earth} * {USA} * {New York}
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("USA"));
+ citySet = getUnion(Range.singleton("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // {Earth} * {USA} * (New York, INF)
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("USA"));
+ citySet = getUnion(Range.greaterThan("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // {Earth} * {India} * Any Non-empty set
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("India"));
+ citySet = getUnion(Range.greaterThan("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertTrue(shard.possibleInDomain(domain));
+ }
+
+ private RangeSet<String> getUnion(Range<String>... ranges)
+ {
+ RangeSet<String> unionSet = TreeRangeSet.create();
+ for (Range<String> range : ranges) {
+ unionSet.add(range);
+ }
+ return unionSet;
+ }
+
+ private void populateDomain(Map<String, RangeSet<String>> domain,
Review comment:
I guess it would be better if you just create a new domain map here and
return it, rather than re-using the same one repeatedly. It would make the test
cases cleaner.
##########
File path:
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -145,29 +141,81 @@ private static ShardSpecLookup createLookup(List<?
extends ShardSpec> shardSpecs
return Collections.unmodifiableList(dimensions);
}
- private Range<String> getFirstDimRange()
+ /**
+ * Check if a given domain of Strings is a singleton set containing the
given value
+ * @param rangeSet Domain of Strings
+ * @param val Value of String
+ * @return rangeSet == {val}
+ */
+ private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String
val)
{
- Range<String> range;
- if (firstDimStart == null && firstDimEnd == null) {
- range = Range.all();
- } else if (firstDimStart == null) {
- range = Range.atMost(firstDimEnd);
- } else if (firstDimEnd == null) {
- range = Range.atLeast(firstDimStart);
- } else {
- range = Range.closed(firstDimStart, firstDimEnd);
+ if (val == null) {
+ return false;
}
- return range;
+ return rangeSet.asRanges().equals(
+ Collections.singleton(Range.singleton(val))
+ );
}
+ /**
+ * Pruning scenarios at dimension i: (When the Effective domain is empty)
+ * 0) query domain left-aligns with start and right-aligns with end and
then deviates outside to align with neither
+ * 1) The query domain left-aligns with start until dimension i and then
deviates to be strictly less than start[i]
+ * 2) The query domain right-aligns with end until dimension i and then
deviates to be strictly greater than end[i]
+ *
+ * Immediate acceptance criteria at dimension i:
+ * Effective domain is non-empty and no longer aligns with any boundary
+ *
+ * @param domain The domain inferred from the query. Assumed to be non-emtpy
+ * @return boolean indicating if the segment needs to be considered or pruned
Review comment:
```suggestion
* @return true if this shard possibly has rows for the requested domain,
false otherwise
```
##########
File path:
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +300,96 @@ public void testIsInChunk_withMultiValues()
));
}
+ @Test
+ public void testPossibleInDomain()
+ {
+ setDimensions("planet", "country", "city");
+
+ final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+ final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+ final RangeSet<String> universalSet = TreeRangeSet.create();
+ universalSet.add(Range.all());
+
+ ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0,
null);
+ Map<String, RangeSet<String>> domain = new HashMap<>();
+ RangeSet<String> planetSet;
+ RangeSet<String> countrySet;
+ RangeSet<String> citySet;
+
+ // null * null * null === (-INF, INF) * (-INF, INF) * (-INF, INF)
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, INF) * (-INF, INF)
+ populateDomain(domain, universalSet, universalSet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // {Earth} * (-INF, INF) * (-INF, INF)
+ planetSet = getUnion(Range.singleton("Earth"));
+ populateDomain(domain, planetSet, universalSet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, Earth) * (-INF, INF) * (-INF, INF)
+ planetSet = getUnion(Range.lessThan("Earth"));
+ populateDomain(domain, planetSet, universalSet, universalSet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France") * (-INF, INF)
+ countrySet = getUnion(Range.lessThan("France"));
+ populateDomain(domain, universalSet, countrySet, universalSet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France"] * Any non-empty set
+ countrySet = getUnion(Range.atMost("USA"));
+ populateDomain(domain, universalSet, countrySet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France] * (-INF, Paris)
+ countrySet = getUnion(Range.atMost("France"));
+ citySet = getUnion(Range.lessThan("Paris"));
+ populateDomain(domain, universalSet, countrySet, citySet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // {Earth} * {USA} * {New York}
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("USA"));
+ citySet = getUnion(Range.singleton("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // {Earth} * {USA} * (New York, INF)
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("USA"));
+ citySet = getUnion(Range.greaterThan("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // {Earth} * {India} * Any Non-empty set
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("India"));
+ citySet = getUnion(Range.greaterThan("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertTrue(shard.possibleInDomain(domain));
+ }
+
+ private RangeSet<String> getUnion(Range<String>... ranges)
Review comment:
Please remove this method. I don't see this method ever being called
with multiple ranges i.e. the union never seems to be happening.
##########
File path:
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -145,29 +141,81 @@ private static ShardSpecLookup createLookup(List<?
extends ShardSpec> shardSpecs
return Collections.unmodifiableList(dimensions);
}
- private Range<String> getFirstDimRange()
+ /**
+ * Check if a given domain of Strings is a singleton set containing the
given value
+ * @param rangeSet Domain of Strings
+ * @param val Value of String
+ * @return rangeSet == {val}
+ */
+ private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String
val)
{
- Range<String> range;
- if (firstDimStart == null && firstDimEnd == null) {
- range = Range.all();
- } else if (firstDimStart == null) {
- range = Range.atMost(firstDimEnd);
- } else if (firstDimEnd == null) {
- range = Range.atLeast(firstDimStart);
- } else {
- range = Range.closed(firstDimStart, firstDimEnd);
+ if (val == null) {
+ return false;
}
- return range;
+ return rangeSet.asRanges().equals(
+ Collections.singleton(Range.singleton(val))
+ );
}
+ /**
Review comment:
I feel we could just skip this javadoc as it is an overridden method
anyway.
The class level javadoc would probably be a better place to talk about the
pruning advantages and strategy.
When describing the strategy, try to provide an overview of the idea first
and then dig into the specific cases (only if necessary).
##########
File path:
core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
##########
@@ -145,29 +141,81 @@ private static ShardSpecLookup createLookup(List<?
extends ShardSpec> shardSpecs
return Collections.unmodifiableList(dimensions);
}
- private Range<String> getFirstDimRange()
+ /**
+ * Check if a given domain of Strings is a singleton set containing the
given value
+ * @param rangeSet Domain of Strings
+ * @param val Value of String
+ * @return rangeSet == {val}
+ */
+ private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String
val)
{
- Range<String> range;
- if (firstDimStart == null && firstDimEnd == null) {
- range = Range.all();
- } else if (firstDimStart == null) {
- range = Range.atMost(firstDimEnd);
- } else if (firstDimEnd == null) {
- range = Range.atLeast(firstDimStart);
- } else {
- range = Range.closed(firstDimStart, firstDimEnd);
+ if (val == null) {
+ return false;
}
- return range;
+ return rangeSet.asRanges().equals(
+ Collections.singleton(Range.singleton(val))
+ );
}
+ /**
+ * Pruning scenarios at dimension i: (When the Effective domain is empty)
+ * 0) query domain left-aligns with start and right-aligns with end and
then deviates outside to align with neither
+ * 1) The query domain left-aligns with start until dimension i and then
deviates to be strictly less than start[i]
+ * 2) The query domain right-aligns with end until dimension i and then
deviates to be strictly greater than end[i]
+ *
+ * Immediate acceptance criteria at dimension i:
+ * Effective domain is non-empty and no longer aligns with any boundary
Review comment:
Maybe avoid using terms like `effective domain` in this javadoc as it is
not a generally defined term and appears only in the code here. So, for someone
only looking at the javadoc, it might not make a lot of sense.
##########
File path:
core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
##########
@@ -297,6 +300,96 @@ public void testIsInChunk_withMultiValues()
));
}
+ @Test
+ public void testPossibleInDomain()
+ {
+ setDimensions("planet", "country", "city");
+
+ final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+ final StringTuple end = StringTuple.create("Earth", "USA", "New York");
+
+ final RangeSet<String> universalSet = TreeRangeSet.create();
+ universalSet.add(Range.all());
+
+ ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0,
null);
+ Map<String, RangeSet<String>> domain = new HashMap<>();
+ RangeSet<String> planetSet;
+ RangeSet<String> countrySet;
+ RangeSet<String> citySet;
+
+ // null * null * null === (-INF, INF) * (-INF, INF) * (-INF, INF)
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, INF) * (-INF, INF)
+ populateDomain(domain, universalSet, universalSet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // {Earth} * (-INF, INF) * (-INF, INF)
+ planetSet = getUnion(Range.singleton("Earth"));
+ populateDomain(domain, planetSet, universalSet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, Earth) * (-INF, INF) * (-INF, INF)
+ planetSet = getUnion(Range.lessThan("Earth"));
+ populateDomain(domain, planetSet, universalSet, universalSet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France") * (-INF, INF)
+ countrySet = getUnion(Range.lessThan("France"));
+ populateDomain(domain, universalSet, countrySet, universalSet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France"] * Any non-empty set
+ countrySet = getUnion(Range.atMost("USA"));
+ populateDomain(domain, universalSet, countrySet, universalSet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // (-INF, INF) * (-INF, "France] * (-INF, Paris)
+ countrySet = getUnion(Range.atMost("France"));
+ citySet = getUnion(Range.lessThan("Paris"));
+ populateDomain(domain, universalSet, countrySet, citySet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // {Earth} * {USA} * {New York}
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("USA"));
+ citySet = getUnion(Range.singleton("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertTrue(shard.possibleInDomain(domain));
+
+ // {Earth} * {USA} * (New York, INF)
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("USA"));
+ citySet = getUnion(Range.greaterThan("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
+ assertFalse(shard.possibleInDomain(domain));
+
+ // {Earth} * {India} * Any Non-empty set
+ planetSet = getUnion(Range.singleton("Earth"));
+ countrySet = getUnion(Range.singleton("India"));
+ citySet = getUnion(Range.greaterThan("New York"));
+ populateDomain(domain, planetSet, countrySet, citySet);
Review comment:
This could be a little less verbose and perhaps more readable with the
following:
```suggestion
// {Earth} * {India} * Any Non-empty set
populateDomain(
domain,
Range.singleton("Earth"),
Range.singleton("India"),
Range.singleton("New York")
);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]