Possible AssertionError in UnfilteredRowIteratorWithLowerBound

patch by Sylvain Lebresne; reviewed by Stefania for CASSANDRA-13366


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f55cb88a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f55cb88a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f55cb88a

Branch: refs/heads/trunk
Commit: f55cb88ab595ccb941ebb4a088ab90f860f463d5
Parents: ec9ce3d
Author: Sylvain Lebresne <sylv...@datastax.com>
Authored: Wed Mar 22 15:41:49 2017 +0100
Committer: Sylvain Lebresne <sylv...@datastax.com>
Committed: Thu Mar 23 10:26:37 2017 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../db/SinglePartitionReadCommand.java          |  4 +--
 .../db/compaction/CompactionController.java     |  2 +-
 .../UnfilteredRowIteratorWithLowerBound.java    | 31 ++++++++++++++---
 .../io/sstable/format/SSTableReader.java        | 35 ++++++++------------
 5 files changed, 44 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8386c20..f4e48ff 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.11.0
+ * Possible AssertionError in UnfilteredRowIteratorWithLowerBound 
(CASSANDRA-13366)
  * Support unaligned memory access for AArch64 (CASSANDRA-13326)
  * Improve SASI range iterator efficiency on intersection with an empty range 
(CASSANDRA-12915).
  * Fix equality comparisons of columns using the duration type 
(CASSANDRA-13174)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java 
b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
index f6d10f5..724f59e 100644
--- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
+++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
@@ -584,7 +584,7 @@ public class SinglePartitionReadCommand extends ReadCommand
                 if (!shouldInclude(sstable))
                 {
                     nonIntersectingSSTables++;
-                    if (sstable.hasTombstones())
+                    if (sstable.mayHaveTombstones())
                     { // if sstable has tombstones we need to check after one 
pass if it can be safely skipped
                         if (skippedSSTablesWithTombstones == null)
                             skippedSSTablesWithTombstones = new ArrayList<>();
@@ -773,7 +773,7 @@ public class SinglePartitionReadCommand extends ReadCommand
                 // however: if it is set, it impacts everything and must be 
included. Getting that top-level partition deletion costs us
                 // some seek in general however (unless the partition is 
indexed and is in the key cache), so we first check if the sstable
                 // has any tombstone at all as a shortcut.
-                if (!sstable.hasTombstones())
+                if (!sstable.mayHaveTombstones())
                     continue; // no tombstone at all, we can skip that sstable
 
                 // We need to get the partition deletion and include it if 
it's live. In any case though, we're done with that sstable.

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/db/compaction/CompactionController.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/compaction/CompactionController.java 
b/src/java/org/apache/cassandra/db/compaction/CompactionController.java
index 64c35d9..bf3647a 100644
--- a/src/java/org/apache/cassandra/db/compaction/CompactionController.java
+++ b/src/java/org/apache/cassandra/db/compaction/CompactionController.java
@@ -297,7 +297,7 @@ public class CompactionController implements AutoCloseable
     {
         if (reader.isMarkedSuspect() ||
             reader.getMaxTimestamp() <= minTimestamp ||
-            tombstoneOnly && !reader.hasTombstones())
+            tombstoneOnly && !reader.mayHaveTombstones())
             return null;
         RowIndexEntry<?> position = reader.getPosition(key, 
SSTableReader.Operator.EQ);
         if (position == null)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
 
b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
index 14730ac..4536036 100644
--- 
a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
+++ 
b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
@@ -159,7 +159,7 @@ public class UnfilteredRowIteratorWithLowerBound extends 
LazilyInitializedUnfilt
     @Override
     public DeletionTime partitionLevelDeletion()
     {
-        if (!sstable.hasTombstones())
+        if (!sstable.mayHaveTombstones())
             return DeletionTime.LIVE;
 
         return super.partitionLevelDeletion();
@@ -210,13 +210,34 @@ public class UnfilteredRowIteratorWithLowerBound extends 
LazilyInitializedUnfilt
     }
 
     /**
-     * @return true if we can use the clustering values in the stats of the 
sstable:
-     * - we need the latest stats file format (or else the clustering values 
create clusterings with the wrong size)
-     * - we cannot create tombstone bounds from these values only and so we 
rule out sstables with tombstones
+     * Whether we can use the clustering values in the stats of the sstable to 
build the lower bound.
+     * <p>
+     * Currently, the clustering values of the stats file records for each 
clustering component the min and max
+     * value seen, null excluded. In other words, having a non-null value for 
a component in those min/max clustering
+     * values does _not_ guarantee that there isn't an unfiltered in the 
sstable whose clustering has either no value for
+     * that component (it's a prefix) or a null value.
+     * <p>
+     * This is problematic as this means we can't in general build a lower 
bound from those values since the "min"
+     * values doesn't actually guarantee minimality.
+     * <p>
+     * However, we can use those values if we can guarantee that no clustering 
in the sstable 1) is a true prefix and
+     * 2) uses null values. Nat having true prefixes means having no range 
tombstone markers since rows use
+     * {@link Clustering} which is always "full" (all components are always 
present). As for null values, we happen to
+     * only allow those in compact tables (for backward compatibility), so we 
can simply exclude those tables.
+     * <p>
+     * Note that the information we currently have at our disposal make this 
condition less precise that it could be.
+     * In particular, {@link SSTableReader#mayHaveTombstones} could return 
{@code true} (making us not use the stats)
+     * because of cell tombstone or even expiring cells even if the sstable 
has no range tombstone markers, even though
+     * it's really only markers we want to exclude here (more precisely, as 
said above, we want to exclude anything
+     * whose clustering is not "full", but that's only markers). It wouldn't 
be very hard to collect whether a sstable
+     * has any range tombstone marker however so it's a possible improvement.
      */
     private boolean canUseMetadataLowerBound()
     {
-        return !sstable.hasTombstones() && 
sstable.descriptor.version.hasNewStatsFile();
+        // Side-note: pre-2.1 sstable stat file had clustering value arrays 
whose size may not match the comparator size
+        // and that would break getMetadataLowerBound. We don't support 
upgrade from 2.0 to 3.0 directly however so it's
+        // not a true concern. Besides, !sstable.mayHaveTombstones already 
ensure this is a 3.0 sstable anyway.
+        return !sstable.mayHaveTombstones() && 
!sstable.metadata.isCompactTable();
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java 
b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
index 56609b3..321abc7 100644
--- a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
@@ -51,6 +51,7 @@ import org.apache.cassandra.config.Schema;
 import org.apache.cassandra.config.SchemaConstants;
 import org.apache.cassandra.db.*;
 import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.rows.Cell;
 import org.apache.cassandra.db.rows.EncodingStats;
 import org.apache.cassandra.db.rows.UnfilteredRowIterator;
 import org.apache.cassandra.dht.AbstractBounds;
@@ -1881,12 +1882,20 @@ public abstract class SSTableReader extends SSTable 
implements SelfRefCounted<SS
         return sstableMetadata.maxLocalDeletionTime;
     }
 
-    /** sstable contains no tombstones if minLocalDeletionTime == 
Integer.MAX_VALUE */
-    public boolean hasTombstones()
+    /**
+     * Whether the sstable may contain tombstones or if it is guaranteed to 
not contain any.
+     * <p>
+     * Note that having that method return {@code false} guarantees the 
sstable has no tombstones whatsoever (so no
+     * cell tombstone, no range tombstone maker and no expiring columns), but 
having it return {@code true} doesn't
+     * guarantee it contains any as 1) it may simply have non-expired cells 
and 2) old-format sstables didn't contain
+     * enough information to decide this and so always return {@code true}.
+     */
+    public boolean mayHaveTombstones()
     {
-        // sstable contains no tombstone if minLocalDeletionTime is still set 
to  the default value Integer.MAX_VALUE
-        // which is bigger than any valid deletion times
-        return getMinLocalDeletionTime() != Integer.MAX_VALUE;
+        // A sstable is guaranteed to have no tombstones if it properly 
tracked the minLocalDeletionTime (which we only
+        // do since 3.0 - see CASSANDRA-13366) and that value is still set to 
its default, Cell.NO_DELETION_TIME, which
+        // is bigger than any valid deletion times.
+        return !descriptor.version.storeRows() || getMinLocalDeletionTime() != 
Cell.NO_DELETION_TIME;
     }
 
     public int getMinTTL()
@@ -2008,22 +2017,6 @@ public abstract class SSTableReader extends SSTable 
implements SelfRefCounted<SS
             readMeter.mark();
     }
 
-    /**
-     * Checks if this sstable can overlap with another one based on the 
min/man clustering values.
-     * If this methods return false, we're guarantee that {@code this} and 
{@code other} have no overlapping
-     * data, i.e. no cells to reconcile.
-     */
-    public boolean mayOverlapsWith(SSTableReader other)
-    {
-        StatsMetadata m1 = getSSTableMetadata();
-        StatsMetadata m2 = other.getSSTableMetadata();
-
-        if (m1.minClusteringValues.isEmpty() || 
m1.maxClusteringValues.isEmpty() || m2.minClusteringValues.isEmpty() || 
m2.maxClusteringValues.isEmpty())
-            return true;
-
-        return !(compare(m1.maxClusteringValues, m2.minClusteringValues) < 0 
|| compare(m1.minClusteringValues, m2.maxClusteringValues) > 0);
-    }
-
     private int compare(List<ByteBuffer> values1, List<ByteBuffer> values2)
     {
         ClusteringComparator comparator = metadata.comparator;

Reply via email to