RangeTombstoneList doesn't properly clean up mergeable or superseded rts in 
some cases

Patch by Blake Eggleston; Reviewed by Sam Tinnicliffe for CASSANDRA-14894


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f7630e4c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f7630e4c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f7630e4c

Branch: refs/heads/trunk
Commit: f7630e4c3af3bbcf933f0708afaac7e3e7ef6101
Parents: 0a7fbee
Author: Blake Eggleston <bdeggles...@gmail.com>
Authored: Mon Nov 26 13:29:03 2018 -0800
Committer: Blake Eggleston <bdeggles...@gmail.com>
Committed: Wed Nov 28 09:06:18 2018 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../db/rows/RowAndDeletionMergeIterator.java    | 40 +++++++++++++++-
 .../rows/RowAndDeletionMergeIteratorTest.java   | 49 ++++++++++++++++----
 3 files changed, 81 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/f7630e4c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 0d33e3c..6e18de1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.18
+ * RangeTombstoneList doesn't properly clean up mergeable or superseded rts in 
some cases (CASSANDRA-14894)
  * Fix handling of collection tombstones for dropped columns from legacy 
sstables (CASSANDRA-14912)
  * Fix missing rows when reading 2.1 SSTables with static columns in 3.0 
(CASSANDRA-14873)
  * Move TWCS message 'No compaction necessary for bucket size' to Trace level 
(CASSANDRA-14884)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f7630e4c/src/java/org/apache/cassandra/db/rows/RowAndDeletionMergeIterator.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/rows/RowAndDeletionMergeIterator.java 
b/src/java/org/apache/cassandra/db/rows/RowAndDeletionMergeIterator.java
index 389fe45..97b13e7 100644
--- a/src/java/org/apache/cassandra/db/rows/RowAndDeletionMergeIterator.java
+++ b/src/java/org/apache/cassandra/db/rows/RowAndDeletionMergeIterator.java
@@ -70,7 +70,7 @@ public class RowAndDeletionMergeIterator extends 
AbstractUnfilteredRowIterator
         this.ranges = ranges;
     }
 
-    protected Unfiltered computeNext()
+    private Unfiltered computeNextInternal()
     {
         while (true)
         {
@@ -112,6 +112,44 @@ public class RowAndDeletionMergeIterator extends 
AbstractUnfilteredRowIterator
         }
     }
 
+    /**
+     * RangeTombstoneList doesn't correctly merge multiple superseded rts, or 
overlapping rts with the
+     * same ts. This causes it to emit noop boundary markers which can cause 
unneeded read repairs and
+     * repair over streaming. This should technically be fixed in 
RangeTombstoneList. However, fixing
+     * it isn't trivial and that class is already so complicated that the fix 
would have a good chance
+     * of adding a worse bug. So we just swallow the noop boundary markers 
here. See CASSANDRA-14894
+     */
+    private static boolean shouldSkip(Unfiltered unfiltered)
+    {
+        if (unfiltered == null || !unfiltered.isRangeTombstoneMarker())
+            return false;
+
+        RangeTombstoneMarker marker = (RangeTombstoneMarker) unfiltered;
+
+        if (!marker.isBoundary())
+            return false;
+
+        DeletionTime open = marker.openDeletionTime(false);
+        DeletionTime close = marker.closeDeletionTime(false);
+
+        return open.equals(close);
+
+    }
+
+    @Override
+    protected Unfiltered computeNext()
+    {
+        while (true)
+        {
+            Unfiltered next = computeNextInternal();
+
+            if (shouldSkip(next))
+                continue;
+
+            return next;
+        }
+    }
+
     private void updateNextRow()
     {
         if (nextRow == null && rows.hasNext())

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f7630e4c/test/unit/org/apache/cassandra/db/rows/RowAndDeletionMergeIteratorTest.java
----------------------------------------------------------------------
diff --git 
a/test/unit/org/apache/cassandra/db/rows/RowAndDeletionMergeIteratorTest.java 
b/test/unit/org/apache/cassandra/db/rows/RowAndDeletionMergeIteratorTest.java
index 400d65a..dd88704 100644
--- 
a/test/unit/org/apache/cassandra/db/rows/RowAndDeletionMergeIteratorTest.java
+++ 
b/test/unit/org/apache/cassandra/db/rows/RowAndDeletionMergeIteratorTest.java
@@ -24,6 +24,7 @@ import java.nio.ByteBuffer;
 import java.util.Collections;
 import java.util.Iterator;
 
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -244,11 +245,13 @@ public class RowAndDeletionMergeIteratorTest
     {
         Iterator<Row> rowIterator = createRowIterator();
 
-        int delTime = nowInSeconds + 1;
-        long timestamp = toMillis(delTime);
+        int delTime1 = nowInSeconds + 1;
+        long timestamp1 = toMillis(delTime1);
+        int delTime2 = delTime1 + 1;
+        long timestamp2 = toMillis(delTime2);
 
-        Iterator<RangeTombstone> rangeTombstoneIterator = 
createRangeTombstoneIterator(atMost(2, timestamp, delTime),
-                                                                               
        greaterThan(2, timestamp, delTime));
+        Iterator<RangeTombstone> rangeTombstoneIterator = 
createRangeTombstoneIterator(atMost(2, timestamp1, delTime1),
+                                                                               
        greaterThan(2, timestamp2, delTime2));
 
         UnfilteredRowIterator iterator = createMergeIterator(rowIterator, 
rangeTombstoneIterator, false);
 
@@ -269,11 +272,13 @@ public class RowAndDeletionMergeIteratorTest
     {
         Iterator<Row> rowIterator = createRowIterator();
 
-        int delTime = nowInSeconds + 1;
-        long timestamp = toMillis(delTime);
+        int delTime1 = nowInSeconds + 1;
+        long timestamp1 = toMillis(delTime1);
+        int delTime2 = delTime1 + 1;
+        long timestamp2 = toMillis(delTime2);
 
-        Iterator<RangeTombstone> rangeTombstoneIterator = 
createRangeTombstoneIterator(lessThan(2, timestamp, delTime),
-                                                                               
        atLeast(2, timestamp, delTime));
+        Iterator<RangeTombstone> rangeTombstoneIterator = 
createRangeTombstoneIterator(lessThan(2, timestamp1, delTime1),
+                                                                               
        atLeast(2, timestamp2, delTime2));
 
         UnfilteredRowIterator iterator = createMergeIterator(rowIterator, 
rangeTombstoneIterator, false);
 
@@ -345,6 +350,29 @@ public class RowAndDeletionMergeIteratorTest
     }
 
 
+    /**
+     * RTL doesn't correctly merge range tombstones in some situations (see 
CASSANDRA-14894)
+     */
+    @Test
+    public void testWithNoopBoundaryMarkers()
+    {
+        PartitionUpdate update = new PartitionUpdate(cfm, dk, 
cfm.partitionColumns(), 1);
+        RangeTombstoneList rtl = new RangeTombstoneList(cfm.comparator, 10);
+        rtl.add(rt(1, 2, 5, 5));
+        rtl.add(rt(3, 4, 5, 5));
+        rtl.add(rt(5, 6, 5, 5));
+        rtl.add(rt(0, 8, 6, 6)); // <- supersedes all other tombstones
+
+        Assert.assertEquals(3, rtl.size());
+
+        try (UnfilteredRowIterator partition = 
createMergeIterator(update.iterator(), rtl.iterator(), false))
+        {
+            assertRtMarker(partition.next(), 
ClusteringPrefix.Kind.INCL_START_BOUND, 0);
+            assertRtMarker(partition.next(), 
ClusteringPrefix.Kind.INCL_END_BOUND, 8);
+            assertFalse(partition.hasNext());
+        }
+    }
+
     private void assertRtMarker(Unfiltered unfiltered, Bound bound)
     {
         assertEquals(Unfiltered.Kind.RANGE_TOMBSTONE_MARKER, 
unfiltered.kind());
@@ -436,6 +464,11 @@ public class RowAndDeletionMergeIteratorTest
         return new RangeTombstone(Slice.make(startBound, endBound), new 
DeletionTime(tstamp, delTime));
     }
 
+    private static RangeTombstone rt(int start, int end, long tstamp, int 
delTime)
+    {
+        return rt(start, true, end, true, tstamp, delTime);
+    }
+
     private static ByteBuffer bb(int i)
     {
         return ByteBufferUtil.bytes(i);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to