Fix overlapping sstables computation in leveled compaction patch by slebresne; reviewed by jbellis for CASSANDRA-4321
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/88b48df2 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/88b48df2 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/88b48df2 Branch: refs/heads/cassandra-1.1 Commit: 88b48df21d0bbfa7412a66f6ecf85ea0a0932368 Parents: f04e39d Author: Sylvain Lebresne <[email protected]> Authored: Fri Jun 29 10:24:34 2012 +0200 Committer: Sylvain Lebresne <[email protected]> Committed: Fri Jun 29 10:34:22 2012 +0200 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../cassandra/db/compaction/LeveledManifest.java | 79 +++++++++------ src/java/org/apache/cassandra/dht/Bounds.java | 6 + .../apache/cassandra/io/sstable/SSTableWriter.java | 4 +- 4 files changed, 56 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/88b48df2/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 3a1b4c2..e9f34ed 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -23,6 +23,7 @@ * (cql3) fix range queries containing unqueried results (CASSANDRA-4372) * (cql3) allow updating column_alias types (CASSANDRA-4041) * (cql3) Fix deletion bug (CASSANDRA-4193) + * Fix computation of overlapping sstable for leveled compaction (CASSANDRA-4321) Merged from 1.0: * Set gc_grace on index CF to 0 (CASSANDRA-4314) http://git-wip-us.apache.org/repos/asf/cassandra/blob/88b48df2/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java b/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java index dca8b7d..beb7d74 100644 --- a/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java +++ b/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java @@ -27,13 +27,14 @@ import java.io.IOException; import java.util.*; import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.RowPosition; -import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Bounds; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableReader; @@ -287,24 +288,6 @@ public class LeveledManifest return Collections.emptyList(); } - /** - * Go through candidates collection and check if any of the SSTables are marked as suspected. - * - * @param candidates The SSTable collection to examine. - * - * @return true if collection has at least one SSTable marked as suspected, false otherwise. - */ - private boolean hasSuspectSSTables(Collection<SSTableReader> candidates) - { - for (SSTableReader candidate : candidates) - { - if (candidate.isMarkedSuspect()) - return true; - } - - return false; - } - public int getLevelSize(int i) { return generations.length > i ? generations[i].size() : 0; @@ -350,16 +333,50 @@ public class LeveledManifest sstableGenerations.put(sstable, Integer.valueOf(level)); } - private static Set<SSTableReader> overlapping(SSTableReader sstable, Iterable<SSTableReader> candidates) + private static Set<SSTableReader> overlapping(Collection<SSTableReader> candidates, Iterable<SSTableReader> others) { - Set<SSTableReader> overlapped = new HashSet<SSTableReader>(); - overlapped.add(sstable); + assert !candidates.isEmpty(); + /* + * Picking each sstable from others that overlap one of the sstable of candidates is not enough + * because you could have the following situation: + * candidates = [ s1(a, c), s2(m, z) ] + * others = [ s3(e, g) ] + * In that case, s2 overlaps none of s1 or s2, but if we compact s1 with s2, the resulting sstable will + * overlap s3, so we must return s3. + * + * Thus, the correct approach is to pick sstables overlapping anything between the first key in all + * the candidate sstables, and the last. + */ + Iterator<SSTableReader> iter = candidates.iterator(); + SSTableReader sstable = iter.next(); + Token first = sstable.first.token; + Token last = sstable.last.token; + while (iter.hasNext()) + { + sstable = iter.next(); + first = first.compareTo(sstable.first.token) <= 0 ? first : sstable.first.token; + last = last.compareTo(sstable.last.token) >= 0 ? last : sstable.last.token; + } + return overlapping(first, last, others); + } + + private static Set<SSTableReader> overlapping(SSTableReader sstable, Iterable<SSTableReader> others) + { + return overlapping(sstable.first.token, sstable.last.token, others); + } - Range<Token> promotedRange = new Range<Token>(sstable.first.token, sstable.last.token); - for (SSTableReader candidate : candidates) + /** + * @return sstables from @param sstables that contain keys between @param start and @param end, inclusive. + */ + private static Set<SSTableReader> overlapping(Token start, Token end, Iterable<SSTableReader> sstables) + { + assert start.compareTo(end) <= 0; + Set<SSTableReader> overlapped = new HashSet<SSTableReader>(); + Bounds<Token> promotedBounds = new Bounds<Token>(start, end); + for (SSTableReader candidate : sstables) { - Range<Token> candidateRange = new Range<Token>(candidate.first.token, candidate.last.token); - if (candidateRange.intersects(promotedRange)) + Bounds<Token> candidateBounds = new Bounds<Token>(candidate.first.token, candidate.last.token); + if (candidateBounds.intersects(promotedBounds)) overlapped.add(candidate); } return overlapped; @@ -394,7 +411,7 @@ public class LeveledManifest if (candidates.contains(sstable)) continue; - for (SSTableReader newCandidate : overlapping(sstable, remaining)) + for (SSTableReader newCandidate : Sets.union(Collections.singleton(sstable), overlapping(sstable, remaining))) { if (!newCandidate.isMarkedSuspect()) { @@ -412,8 +429,7 @@ public class LeveledManifest if (SSTable.getTotalBytes(candidates) > maxSSTableSizeInBytes) { // add sstables from L1 that overlap candidates - for (SSTableReader candidate : new ArrayList<SSTableReader>(candidates)) - candidates.addAll(overlapping(candidate, generations[1])); + candidates.addAll(overlapping(candidates, generations[1])); } return candidates; } @@ -421,8 +437,7 @@ public class LeveledManifest if (SSTable.getTotalBytes(candidates) > maxSSTableSizeInBytes) { // add sstables from L1 that overlap candidates - for (SSTableReader candidate : new ArrayList<SSTableReader>(candidates)) - candidates.addAll(overlapping(candidate, generations[1])); + candidates.addAll(overlapping(candidates, generations[1])); break; } } @@ -450,7 +465,7 @@ public class LeveledManifest while (true) { SSTableReader sstable = generations[level].get(i); - Set<SSTableReader> candidates = overlapping(sstable, generations[(level + 1)]); + Set<SSTableReader> candidates = Sets.union(Collections.singleton(sstable), overlapping(sstable, generations[(level + 1)])); for (SSTableReader candidate : candidates) { if (candidate.isMarkedSuspect()) http://git-wip-us.apache.org/repos/asf/cassandra/blob/88b48df2/src/java/org/apache/cassandra/dht/Bounds.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/dht/Bounds.java b/src/java/org/apache/cassandra/dht/Bounds.java index 9ff830e..0e4dbdf 100644 --- a/src/java/org/apache/cassandra/dht/Bounds.java +++ b/src/java/org/apache/cassandra/dht/Bounds.java @@ -62,6 +62,12 @@ public class Bounds<T extends RingPosition> extends AbstractBounds<T> return new Pair<AbstractBounds<T>, AbstractBounds<T>>(lb, rb); } + public boolean intersects(Bounds<T> that) + { + // We either contains one of the that bounds, or we are fully contained into that. + return contains(that.left) || contains(that.right) || that.contains(left); + } + public List<? extends AbstractBounds<T>> unwrap() { // Bounds objects never wrap http://git-wip-us.apache.org/repos/asf/cassandra/blob/88b48df2/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java index 3e7e7a0..5619951 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java @@ -130,8 +130,8 @@ public class SSTableWriter extends SSTable private long beforeAppend(DecoratedKey<?> decoratedKey) throws IOException { assert decoratedKey != null : "Keys must not be null"; - assert lastWrittenKey == null || lastWrittenKey.compareTo(decoratedKey) < 0 - : "Last written key " + lastWrittenKey + " >= current key " + decoratedKey + " writing into " + getFilename(); + if (lastWrittenKey != null && lastWrittenKey.compareTo(decoratedKey) >= 0) + throw new RuntimeException("Last written key " + lastWrittenKey + " >= current key " + decoratedKey + " writing into " + getFilename()); return (lastWrittenKey == null) ? 0 : dataFile.getFilePointer(); }
