Repository: cassandra Updated Branches: refs/heads/cassandra-2.0 c5ccdb766 -> 2424dd133
Fix potentially returning deleted row with range tombstones patch by slebresne; reviewed by thobbs for CASSANDRA-8558 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2424dd13 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2424dd13 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2424dd13 Branch: refs/heads/cassandra-2.0 Commit: 2424dd133bd971de4b19e5030619d7be79e261e0 Parents: c5ccdb7 Author: Sylvain Lebresne <[email protected]> Authored: Wed Jan 14 11:07:22 2015 +0100 Committer: Sylvain Lebresne <[email protected]> Committed: Wed Jan 14 11:11:24 2015 +0100 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../db/columniterator/IndexedSliceReader.java | 36 ++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/2424dd13/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index a711790..92bf422 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 2.0.12: + * Fix potentially returning deleted rows with range tombstone (CASSANDRA-8558) * Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548) * Check for available disk space before starting a compaction (CASSANDRA-8562) * Fix DISTINCT queries with LIMITs or paging when some partitions http://git-wip-us.apache.org/repos/asf/cassandra/blob/2424dd13/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java index b6aa085..ac48fc0 100644 --- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java +++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java @@ -327,6 +327,10 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA // If we read blocks in reversed disk order, we may have columns from the previous block to handle. // Note that prefetched keeps columns in reversed disk order. + // Also note that Range Tombstone handling is a bit tricky, because we may run into range tombstones + // that cover a slice *after* we've move to the previous slice. To keep it simple, we simply include + // every RT in prefetched: it's only slightly inefficient to do so and there is only so much RT that + // can be mistakenly added this way. if (reversed && !prefetched.isEmpty()) { boolean gotSome = false; @@ -340,8 +344,22 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA if (isColumnBeforeSliceStart(prefetchedCol)) { inSlice = false; - if (!setNextSlice()) - return false; + + // As explained above, we add RT unconditionally + if (prefetchedCol instanceof RangeTombstone) + { + blockColumns.addLast(prefetched.poll()); + gotSome = true; + continue; + } + + // Otherwise, we either move to the next slice or, if we have none (which can happen + // because we unwind prefetched no matter what due to RT), we skip the cell + if (hasMoreSlice()) + setNextSlice(); + else + prefetched.poll(); + } // col is within slice, all columns // (we go in reverse, so as soon as we are in a slice, no need to check @@ -416,12 +434,19 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA // (If in slice, don't bother checking that until we change slice) if (!inSlice && isColumnBeforeSliceStart(column)) { - if (reversed) + // If it's a rangeTombstone, then we need to read it and include it unless it's end + // stops before our slice start. + if (column instanceof RangeTombstone && !isBeforeSliceStart(((RangeTombstone)column).max)) + { + addColumn(column); + } + else if (reversed) { // the next slice select columns that are before the current one, so it may // match this column, so keep it around. prefetched.addFirst(column); } + column = null; } // col is within slice @@ -492,6 +517,11 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA // (If in slice, don't bother checking that until we change slice) if (!inSlice && isColumnBeforeSliceStart(column)) { + // If it's a rangeTombstone, then we need to read it and include it unless it's end + // stops before our slice start. + if (column instanceof RangeTombstone && !isBeforeSliceStart(((RangeTombstone)column).max)) + addColumn(column); + column = null; continue; }
