switch to Map<Long, List> to avoid re-hashing the list with every addition patch by jbellis; reviewed by thobbs for CASSANDRA-4287
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/27176103 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/27176103 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/27176103 Branch: refs/heads/trunk Commit: 271761038ce1b77eea05b96f1226016b45f5f3d0 Parents: 6ab02c5 Author: Jonathan Ellis <jbel...@apache.org> Authored: Fri May 25 15:16:52 2012 -0500 Committer: Jonathan Ellis <jbel...@apache.org> Committed: Fri May 25 16:15:24 2012 -0500 ---------------------------------------------------------------------- CHANGES.txt | 2 + .../compaction/SizeTieredCompactionStrategy.java | 26 +++++++------- 2 files changed, 15 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/27176103/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 404a744..b566c6f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -3,6 +3,8 @@ * ensure unique streaming session id's (CASSANDRA-4223) * kick off background compaction when min/max thresholds change (CASSANDRA-4279) + * improve ability of STCS.getBuckets to deal with 100s of 1000s of + sstables, such as when convertinb back from LCS (CASSANDRA-4287) 1.0.10 http://git-wip-us.apache.org/repos/asf/cassandra/blob/27176103/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java b/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java index 380982f..747edcc 100644 --- a/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java +++ b/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java @@ -122,7 +122,7 @@ public class SizeTieredCompactionStrategy extends AbstractCompactionStrategy } }); - Map<List<T>, Long> buckets = new HashMap<List<T>, Long>(); + Map<Long, List<T>> buckets = new HashMap<Long, List<T>>(); for (Pair<T, Long> pair: sortedFiles) { @@ -132,19 +132,19 @@ public class SizeTieredCompactionStrategy extends AbstractCompactionStrategy // look for a bucket containing similar-sized files: // group in the same bucket if it's w/in 50% of the average for this bucket, // or this file and the bucket are all considered "small" (less than `minSSTableSize`) - for (Entry<List<T>, Long> entry : buckets.entrySet()) + for (Entry<Long, List<T>> entry : buckets.entrySet()) { - List<T> bucket = entry.getKey(); - long averageSize = entry.getValue(); - if ((size > (averageSize / 2) && size < (3 * averageSize) / 2) - || (size < minSSTableSize && averageSize < minSSTableSize)) + List<T> bucket = entry.getValue(); + long oldAverageSize = entry.getKey(); + if ((size > (oldAverageSize / 2) && size < (3 * oldAverageSize) / 2) + || (size < minSSTableSize && oldAverageSize < minSSTableSize)) { - // remove and re-add because adding changes the hash - buckets.remove(bucket); - long totalSize = bucket.size() * averageSize; - averageSize = (totalSize + size) / (bucket.size() + 1); + // remove and re-add under new new average size + buckets.remove(oldAverageSize); + long totalSize = bucket.size() * oldAverageSize; + long newAverageSize = (totalSize + size) / (bucket.size() + 1); bucket.add(pair.left); - buckets.put(bucket, averageSize); + buckets.put(newAverageSize, bucket); bFound = true; break; } @@ -154,11 +154,11 @@ public class SizeTieredCompactionStrategy extends AbstractCompactionStrategy { ArrayList<T> bucket = new ArrayList<T>(); bucket.add(pair.left); - buckets.put(bucket, size); + buckets.put(size, bucket); } } - return new ArrayList<List<T>>(buckets.keySet()); + return new ArrayList<List<T>>(buckets.values()); } private void updateEstimatedCompactionsByTasks(List<AbstractCompactionTask> tasks)