smiklosovic commented on code in PR #2836:
URL: https://github.com/apache/cassandra/pull/2836#discussion_r1381893798
##########
src/java/org/apache/cassandra/db/compaction/unified/Controller.java:
##########
@@ -200,38 +246,112 @@ public int getThreshold(int index) {
}
/**
- * Calculate the number of shards to split the local token space in for
the given sstable density.
- * This is calculated as a power-of-two multiple of baseShardCount, so
that the expected size of resulting sstables
- * is between sqrt(0.5) * targetSSTableSize and sqrt(2) *
targetSSTableSize, with a minimum of baseShardCount shards
- * for smaller sstables.
- *
- * Note that to get the sstables resulting from this splitting within the
bounds, the density argument must be
+ * Calculate the number of shards to split the local token space in for
the given SSTable density.
+ * This is calculated as a power-of-two multiple of baseShardCount, so
that the expected size of resulting SSTables
+ * is between sqrt(0.5) and sqrt(2) times the target size, which is
calculated from targetSSTableSize to grow
+ * at the given sstableGrowthModifier of the exponential growth of the
density.
+ * <p>
+ * Additionally, if a minimum SSTable size is set, we can go below the
baseShardCount when that would result in
+ * SSTables smaller than that minimum. Note that in the case of a
non-power-of-two base count, we will only
+ * split to divisors of baseShardCount.
+ * <p>
+ * Note that to get the SSTables resulting from this splitting within the
bounds, the density argument must be
* normalized to the span that is being split. In other words, if no disks
are defined, the density should be
* scaled by the token coverage of the locally-owned ranges. If multiple
data directories are defined, the density
- * should be scaled by the token coverage of the respective data
directory. That is localDensity = size / span,
+ * should be scaled by the token coverage of the respective data
directory. That is, localDensity = size / span,
* where the span is normalized so that span = 1 when the data covers the
range that is being split.
*/
public int getNumShards(double localDensity)
{
- // How many we would have to aim for the target size. Divided by the
base shard count, so that we can ensure
- // the result is a multiple of it by multiplying back below.
- double count = localDensity / (targetSSTableSize * INVERSE_SQRT_2 *
baseShardCount);
- if (count > MAX_SHARD_SPLIT)
- count = MAX_SHARD_SPLIT;
- assert !(count < 0); // Must be positive, 0 or NaN, which should
translate to baseShardCount
-
- // Make it a power of two multiple of the base count so that split
points for lower levels remain split points
- // for higher.
- // The conversion to int and highestOneBit round down, for which we
compensate by using the sqrt(0.5) multiplier
- // applied above.
- // Setting the bottom bit to 1 ensures the result is at least
baseShardCount.
- int shards = baseShardCount * Integer.highestOneBit((int) count | 1);
- logger.debug("Shard count {} for density {}, {} times target {}",
- shards,
- FBUtilities.prettyPrintBinary(localDensity, "B", " "),
- localDensity / targetSSTableSize,
- FBUtilities.prettyPrintBinary(targetSSTableSize, "B", "
"));
- return shards;
+ int shards;
+ // Check the minimum size first.
+ if (minSSTableSize > 0)
+ {
+ double count = localDensity / minSSTableSize;
+ // Minimum size only applies if it is smaller than the base count.
+ // Note: the minimum size cannot be larger than the target size's
minimum.
+ if (count < baseShardCount)
+ {
+ // Make it a power of two, rounding down so that sstables are
greater in size than the min.
+ // Setting the bottom bit to 1 ensures the result is at least
1.
+ // If baseShardCount is not a power of 2, split only to powers
of two that are divisors of baseShardCount so boundaries match higher levels
+ shards = Math.min(Integer.highestOneBit((int) count | 1),
baseShardCount & -baseShardCount);
+ if (logger.isDebugEnabled())
+ logger.debug("Shard count {} for density {}, {} times min
size {}",
+ shards,
+ FBUtilities.prettyPrintBinary(localDensity,
"B", " "),
+ localDensity / minSSTableSize,
+ FBUtilities.prettyPrintBinary(minSSTableSize,
"B", " "));
+
+ return shards;
+ }
+ }
+
+ if (sstableGrowthModifier == 1)
+ {
+ shards = baseShardCount;
+ logger.debug("Shard count {} for density {} in fixed shards mode",
+ shards,
+ FBUtilities.prettyPrintBinary(localDensity, "B", "
"));
+ return shards;
+ }
+ else if (sstableGrowthModifier == 0)
+ {
+ // How many we would have to aim for the target size. Divided by
the base shard count, so that we can ensure
+ // the result is a multiple of it by multiplying back below.
Adjusted by sqrt(0.5) to calculate the split
+ // points needed for the minimum size.
+ double count = localDensity / (targetSSTableSize * INVERSE_SQRT_2
* baseShardCount);
+ if (count > MAX_SHARD_SPLIT)
+ count = MAX_SHARD_SPLIT;
+ assert !(count < 0); // Must be positive, 0 or NaN, which
should translate to baseShardCount
Review Comment:
@ethan-brown2022 `count >= 0` is more natural to me
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]