This is an automated email from the ASF dual-hosted git repository.

mck pushed a commit to branch cassandra-3.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-3.0 by this push:
     new f8500ee  Fix skipping on pre-3.0 created compact storage sstables due 
to missing primary key liveness
f8500ee is described below

commit f8500ee911343eb8826f9c44bb6db2ab780f6327
Author: Caleb Rackliffe <[email protected]>
AuthorDate: Fri Oct 23 15:38:57 2020 -0500

    Fix skipping on pre-3.0 created compact storage sstables due to missing 
primary key liveness
    
    Check primary key liveness information only if it exists, and fall back to 
checking cell contents, which makes skipping possible for COMPACT STORAGE 
tables after and upgrade to 3.0+
    
     patch by Caleb Rackliffe; reviewed by Alex Petrov, Mick Semb Wever for 
CASSANDRA-16226
---
 CHANGES.txt                                        |   1 +
 .../cassandra/db/SinglePartitionReadCommand.java   |  37 +++-
 .../upgrade/CompactStorage2to3UpgradeTest.java     |   6 +-
 .../miscellaneous/SSTablesIteratedTest.java        | 238 ++++++++++++++++++++-
 .../cql3/validation/operations/UpdateTest.java     |   1 -
 5 files changed, 266 insertions(+), 17 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 53883bc..54456c1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.24:
+ * Fix skipping on pre-3.0 created compact storage sstables due to missing 
primary key liveness (CASSANDRA-16226)
  * Fix DecimalDeserializer#toString OOM (CASSANDRA-14925)
  * Extend the exclusion of replica filtering protection to other indices 
instead of just SASI (CASSANDRA-16311)
  * Synchronize transaction logs for JBOD (CASSANDRA-16225)
diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java 
b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
index ca4e8e3..1e1953b 100644
--- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
+++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
@@ -27,8 +27,6 @@ import com.google.common.collect.Sets;
 import org.apache.cassandra.cache.IRowCacheEntry;
 import org.apache.cassandra.cache.RowCacheKey;
 import org.apache.cassandra.cache.RowCacheSentinel;
-import org.apache.cassandra.concurrent.Stage;
-import org.apache.cassandra.concurrent.StageManager;
 import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.config.ColumnDefinition;
 import org.apache.cassandra.config.DatabaseDescriptor;
@@ -1004,14 +1002,14 @@ public class SinglePartitionReadCommand extends 
ReadCommand
         if (!columns.statics.isEmpty())
         {
             Row staticRow = searchIter.next(Clustering.STATIC_CLUSTERING);
-            removeStatic = staticRow != null && canRemoveRow(staticRow, 
columns.statics, sstableTimestamp);
+            removeStatic = staticRow != null && isRowComplete(staticRow, 
columns.statics, sstableTimestamp);
         }
 
         NavigableSet<Clustering> toRemove = null;
         for (Clustering clustering : clusterings)
         {
             Row row = searchIter.next(clustering);
-            if (row == null || !canRemoveRow(row, columns.regulars, 
sstableTimestamp))
+            if (row == null || !isRowComplete(row, columns.regulars, 
sstableTimestamp))
                 continue;
 
             if (toRemove == null)
@@ -1037,21 +1035,40 @@ public class SinglePartitionReadCommand extends 
ReadCommand
         return new ClusteringIndexNamesFilter(clusterings, 
filter.isReversed());
     }
 
-    private boolean canRemoveRow(Row row, Columns requestedColumns, long 
sstableTimestamp)
+    /**
+     * We can stop reading row data from disk if what we've already read is 
more recent than the max timestamp
+     * of the next newest SSTable that might have data for the query. We care 
about 1.) the row timestamp (since
+     * every query cares if the row exists or not), 2.) the timestamps of the 
requested cells, and 3.) whether or
+     * not any of the cells we've read have actual data.
+     *
+     * @param row a potentially incomplete {@link Row}
+     * @param requestedColumns the columns requested by the query
+     * @param sstableTimestamp the max timestamp of the next newest SSTable to 
read
+     *
+     * @return true if the supplied {@link Row} is complete and its data more 
recent than the supplied timestamp
+     */
+    private boolean isRowComplete(Row row, Columns requestedColumns, long 
sstableTimestamp)
     {
-        // We can remove a row if it has data that is more recent that the 
next sstable to consider for the data that the query
-        // cares about. And the data we care about is 1) the row timestamp 
(since every query cares if the row exists or not)
-        // and 2) the requested columns.
-        if (row.primaryKeyLivenessInfo().isEmpty() || 
row.primaryKeyLivenessInfo().timestamp() <= sstableTimestamp)
+        // Note that compact tables will always have an empty primary key 
liveness info.
+        if (!row.primaryKeyLivenessInfo().isEmpty() && 
row.primaryKeyLivenessInfo().timestamp() <= sstableTimestamp)
             return false;
 
+        boolean hasLiveCell = false;
+
         for (ColumnDefinition column : requestedColumns)
         {
             Cell cell = row.getCell(column);
+
             if (cell == null || cell.timestamp() <= sstableTimestamp)
                 return false;
+
+            if (!cell.isTombstone())
+                hasLiveCell = true;
         }
-        return true;
+
+        // If we've gotten here w/ a compact table or at least one 
non-tombstone cell, the row is considered
+        // complete and we can avoid any further searching of older SSTables.
+        return hasLiveCell || !metadata().isCQLTable();
     }
 
     @Override
diff --git 
a/test/distributed/org/apache/cassandra/distributed/upgrade/CompactStorage2to3UpgradeTest.java
 
b/test/distributed/org/apache/cassandra/distributed/upgrade/CompactStorage2to3UpgradeTest.java
index c6235d3..4f5d1bc 100644
--- 
a/test/distributed/org/apache/cassandra/distributed/upgrade/CompactStorage2to3UpgradeTest.java
+++ 
b/test/distributed/org/apache/cassandra/distributed/upgrade/CompactStorage2to3UpgradeTest.java
@@ -29,7 +29,10 @@ import org.apache.cassandra.distributed.api.ICoordinator;
 import org.apache.cassandra.distributed.api.IMessageFilters;
 import org.apache.cassandra.distributed.api.NodeToolResult;
 import org.apache.cassandra.distributed.shared.Versions;
-import static org.apache.cassandra.distributed.shared.AssertUtils.*;
+
+import static org.apache.cassandra.distributed.shared.AssertUtils.assertRows;
+import static org.apache.cassandra.distributed.shared.AssertUtils.row;
+import static org.junit.Assert.assertEquals;
 
 public class CompactStorage2to3UpgradeTest extends UpgradeTestBase
 {
@@ -314,7 +317,6 @@ public class CompactStorage2to3UpgradeTest extends 
UpgradeTestBase
                 }).run();
     }
 
-
     private void runQueries(ICoordinator coordinator, ResultsRecorder helper, 
String[] queries)
     {
         for (String query : queries)
diff --git 
a/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java
 
b/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java
index 2cf518a..599711e 100644
--- 
a/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java
+++ 
b/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java
@@ -22,12 +22,13 @@ package org.apache.cassandra.cql3.validation.miscellaneous;
 
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
 import org.apache.cassandra.cql3.CQLTester;
 import org.apache.cassandra.cql3.UntypedResultSet;
 import org.apache.cassandra.db.ColumnFamilyStore;
 import org.apache.cassandra.metrics.ClearableHistogram;
 
+import static org.junit.Assert.assertEquals;
+
 /**
  * Tests for checking how many sstables we access during cql queries.
  */
@@ -108,10 +109,10 @@ public class SSTablesIteratedTest extends CQLTester
         executeAndCheck("SELECT * FROM %s WHERE pk = 2 AND c > 20 ORDER BY c 
DESC", 2,
                         row(2, 40, "42"));
 
-        // Test with only 2 of the 3 SSTables being merged and a Name filter
+        // Test with only 1 of the 3 SSTables being merged and a Name filter
         // This test checks the 
SinglePartitionReadCommand::queryMemtableAndSSTablesInTimestampOrder which is 
only
         // used for ClusteringIndexNamesFilter when there are no multi-cell 
columns
-        executeAndCheck("SELECT * FROM %s WHERE pk = 2 AND c = 10", 2,
+        executeAndCheck("SELECT * FROM %s WHERE pk = 2 AND c = 10", 1,
                         row(2, 10, "12"));
 
         // For partition range queries the metric must not be updated. The 
reason being that range queries simply
@@ -133,4 +134,233 @@ public class SSTablesIteratedTest extends CQLTester
         assertInvalidMessage("ORDER BY is only supported when the partition 
key is restricted by an EQ or an IN",
                              "SELECT * FROM %s WHERE token(pk) = token(1) 
ORDER BY C DESC");
     }
-}
\ No newline at end of file
+
+    @Test
+    public void testNonCompactTableRowDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, 
ck))");
+
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')");
+        flush();
+
+        execute("DELETE FROM %s WHERE pk = 1 AND ck = 1");
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2);
+    }
+
+    @Test
+    public void testNonCompactTableRangeDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY 
(a, b, c))");
+
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1);
+        flush();
+
+        execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1);
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 2);
+    }
+
+    @Test
+    public void testNonCompactTableCellsDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, v1 text, v2 text, 
PRIMARY KEY (pk, ck))");
+
+        execute("INSERT INTO %s (pk, ck, v1, v2) VALUES (1, 1, '1', '1')");
+        flush();
+
+        execute("DELETE v1 FROM %s WHERE pk = 1 AND ck = 1");
+        execute("DELETE v2 FROM %s WHERE pk = 1 AND ck = 1");
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2, row(1, 
1, null, null));
+    }
+
+    @Test
+    public void testCompactTableSkipping() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, 
ck)) WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1') USING TIMESTAMP 
1000000");
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 50, '2') USING 
TIMESTAMP 1000001");
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 100, '3') USING 
TIMESTAMP 1000002");
+        flush();
+
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 2, '4') USING TIMESTAMP 
2000000");
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 51, '5') USING 
TIMESTAMP 2000001");
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 101, '6') USING 
TIMESTAMP 2000002");
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 
51, "5"));
+
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 
51, "5"));
+    }
+
+    @Test
+    public void testCompactTableSkippingPkOnly() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck)) 
WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 1) USING TIMESTAMP 
1000000");
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 50) USING TIMESTAMP 
1000001");
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 100) USING TIMESTAMP 
1000002");
+        flush();
+
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 2) USING TIMESTAMP 
2000000");
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 51) USING TIMESTAMP 
2000001");
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 101) USING TIMESTAMP 
2000002");
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 
51));
+
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 
51));
+    }
+
+    @Test
+    public void testCompactTableCellDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, 
ck)) WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')");
+        flush();
+
+        execute("DELETE v FROM %s WHERE pk = 1 AND ck = 1");
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1);
+
+        // Dropping compact storage forces us to hit an extra SSTable, since 
we can't rely on the isDense flag
+        // to determine that a row with a complete set of column deletes is 
complete.
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2);
+    }
+
+    @Test
+    public void testCompactTableRowDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, 
ck)) WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')");
+        flush();
+
+        execute("DELETE FROM %s WHERE pk = 1 AND ck = 1");
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1);
+
+        // Dropping compact storage forces us to hit an extra SSTable, since 
we can't rely on the isDense flag
+        // to determine that a row with a complete set of column deletes is 
complete.
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2);
+    }
+
+    @Test
+    public void testCompactTableRangeDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY 
(a, b, c)) WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1);
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 2, 1);
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 2, 1, 1);
+        flush();
+
+        execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1);
+        flush();
+
+        // Even with a compact table, we can't short-circuit for a range 
deletion rather than a cell tombstone.
+        executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 2);
+
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 2);
+    }
+
+    @Test
+    public void testCompactTableRangeOverRowDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY 
(a, b, c)) WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1);
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 2, 1);
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 2, 1, 1);
+        flush();
+
+        execute("DELETE FROM %s WHERE a=? AND b=? AND c=?", 1, 1, 1);
+        flush();
+
+        execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1);
+        flush();
+
+        // The range delete will subsume the row delete, and the latter will 
not factor into skipping decisions.
+        executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 3);
+
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 3);
+    }
+
+    @Test
+    public void testCompactTableRowOverRangeDeletion() throws Throwable
+    {
+        createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY 
(a, b, c)) WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1);
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 2, 1);
+        execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 2, 1, 1);
+        flush();
+
+        execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1);
+        flush();
+
+        execute("DELETE FROM %s WHERE a=? AND b=? AND c=?", 1, 1, 1);
+        flush();
+
+        // The row delete provides a tombstone, which is enough information to 
short-circuit after the first SSTable.
+        executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 1);
+
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 3);
+    }
+
+    @Test
+    public void testCompactTableCellUpdate() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, 
ck)) WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')");
+        flush();
+
+        execute("UPDATE %s SET v = '2' WHERE pk = 1 AND ck = 1");
+        flush();
+
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1, row(1, 
1, "2"));
+
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1, row(1, 
1, "2"));
+    }
+
+    @Test
+    public void testCompactTableDeleteOverlappingSSTables() throws Throwable
+    {
+        createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck)) 
WITH COMPACT STORAGE");
+
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 51) USING TIMESTAMP 
1000002");
+        flush();
+        execute("DELETE FROM %s WHERE pk = 1 AND ck = 51");
+        flush();
+
+        execute("INSERT INTO %s (pk, ck) VALUES (1, 51) USING TIMESTAMP 
1000001");
+        execute("INSERT INTO %s (pk, ck) VALUES (2, 51)");
+        flush();
+
+        // If it weren't for the write to pk = 2, ck = 51, we could skip the 
third SSTable too and hit only one here.
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 2);
+
+        // Dropping compact storage forces us to hit an extra SSTable, since 
we can't rely on the isDense flag
+        // to determine that a row with a complete set of column deletes is 
complete.
+        execute("ALTER TABLE %s DROP COMPACT STORAGE");
+        executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 3);
+    }
+}
diff --git 
a/test/unit/org/apache/cassandra/cql3/validation/operations/UpdateTest.java 
b/test/unit/org/apache/cassandra/cql3/validation/operations/UpdateTest.java
index 8a9be19..42a8560 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/UpdateTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/UpdateTest.java
@@ -20,7 +20,6 @@ package org.apache.cassandra.cql3.validation.operations;
 
 import java.util.Arrays;
 
-import org.junit.Assert;
 import org.junit.Test;
 
 import static org.apache.commons.lang3.StringUtils.isEmpty;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to