frankgh commented on code in PR #34:
URL:
https://github.com/apache/cassandra-analytics/pull/34#discussion_r1467210673
##########
cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/utils/RangeUtils.java:
##########
@@ -31,109 +31,137 @@
import com.google.common.collect.Range;
import org.apache.cassandra.bridge.TokenRange;
-import org.apache.cassandra.spark.data.partitioner.CassandraInstance;
+import org.apache.cassandra.spark.data.model.TokenOwner;
import org.apache.cassandra.spark.data.partitioner.Partitioner;
import org.jetbrains.annotations.NotNull;
/**
* Common Cassandra range operations on Guava ranges. Assumes ranges are not
wrapped around.
- * It's the responsibility of caller to unwrap ranges. For example, [100, 1]
should become
- * [100, MAX] and [MIN, 1]. MIN and MAX depend on {@link Partitioner}.
+ * It's the responsibility of caller to unwrap ranges. For example, {@code
(100, 1]} should become
+ * {@code (100, MAX]} and {@code (MIN, 1]}. MIN and MAX values depend on
{@link Partitioner}.
*/
public final class RangeUtils
{
private RangeUtils()
{
- throw new IllegalStateException(getClass() + " is static utility class
and shall not be instantiated");
}
- @NotNull
- private static BigInteger sizeOf(@NotNull Range<BigInteger> range)
+ public static BigInteger sizeOf(Range<BigInteger> range)
{
Preconditions.checkArgument(range.lowerEndpoint().compareTo(range.upperEndpoint())
<= 0,
"RangeUtils assume ranges are not
wrap-around");
+ Preconditions.checkArgument(range.lowerBoundType() == BoundType.OPEN
+ && range.upperBoundType() ==
BoundType.CLOSED,
+ "Input must be an open-closed range");
if (range.isEmpty())
{
return BigInteger.ZERO;
}
- BigInteger size =
range.upperEndpoint().subtract(range.lowerEndpoint()).add(BigInteger.ONE);
-
- if (range.lowerBoundType() == BoundType.OPEN)
- {
- size = size.subtract(BigInteger.ONE);
- }
-
- if (range.upperBoundType() == BoundType.OPEN)
- {
- size = size.subtract(BigInteger.ONE);
- }
-
- return size;
+ return range.upperEndpoint().subtract(range.lowerEndpoint());
}
/**
- * Splits the given range into equal sized small ranges. Number of splits
can be controlled by
- * numberOfSplits. If numberOfSplits are smaller than size of the range,
split size would be set to 1.
+ * Splits the given range into equal-sized small ranges. Number of splits
can be controlled by
+ * nrSplits. If nrSplits are smaller than size of the range, split size
would be set to 1, which is
+ * the minimum allowed. For example, if the input range is {@code (0, 1]}
and nrSplits is 10, the split
+ * process yields a single range. Because {@code (0, 1]} cannot be split
further.
*
- * This is best effort scheme, numberOfSplits not necessarily as promised
and not all splits may not be
- * exact same size.
+ * This is a best-effort scheme. The nrSplits is not necessarily as
promised, and not all splits may be
+ * the exact same size.
*
- * @param range the range to split
- * @param numberOfSplits the desired number of splits
- * @return the list of split ranges
+ * @param range the range to split
+ * @param nrSplits the number of sub-ranges into which the range should be
divided
+ * @return a list of sub-ranges
*/
- @NotNull
- public static List<Range<BigInteger>> split(@NotNull Range<BigInteger>
range, int numberOfSplits)
+ public static List<Range<BigInteger>> split(Range<BigInteger> range, int
nrSplits)
{
Preconditions.checkArgument(range.lowerEndpoint().compareTo(range.upperEndpoint())
<= 0,
"RangeUtils assume ranges are not
wrap-around");
+ Preconditions.checkArgument(range.lowerBoundType() == BoundType.OPEN
+ && range.upperBoundType() ==
BoundType.CLOSED,
+ "Input must be an open-closed range");
if (range.isEmpty())
{
return Collections.emptyList();
}
- // Make sure split size is not 0
- BigInteger splitSize =
sizeOf(range).divide(BigInteger.valueOf(numberOfSplits)).max(BigInteger.ONE);
+ if (nrSplits == 1 || sizeOf(range).equals(BigInteger.ONE))
+ {
+ // no split required; exit early
+ return Collections.singletonList(range);
+ }
- // Start from range lower endpoint and spit ranges of size splitSize,
until we cross the range
- BigInteger nextLowerEndpoint = range.lowerBoundType() ==
BoundType.CLOSED ? range.lowerEndpoint() :
range.lowerEndpoint().add(BigInteger.ONE);
+ Preconditions.checkArgument(nrSplits >= 1, "nrSplits must be greater
than or equal to 1");
+
+ // The following algorithm tries to get a more evenly-split ranges.
+ // For example, if we split the range (0, 11] into 4 sub-ranges, the
naive split yields the following:
+ // (0, 2], (2, 4], (4, 6], (6, 11]
Review Comment:
nice! can we add unit tests that capture this behavior?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]