[cassandra] branch trunk updated: failingReadRepairTest was not triggering a repair on the third node as expected, so fixed the test to make the expected semantics

ifesdjeen Fri, 17 Jan 2020 05:48:36 -0800

This is an automated email from the ASF dual-hosted git repository.

ifesdjeen pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git



The following commit(s) were added to refs/heads/trunk by this push:
     new 815c397  failingReadRepairTest was not triggering a repair on the 
third node as expected, so fixed the test to make the expected semantics
815c397 is described below

commit 815c397f4876aae9ed2ae5a9578c5ec7087643ab
Author: David Capwell <[email protected]>
AuthorDate: Wed Jan 15 15:03:40 2020 -0800

    failingReadRepairTest was not triggering a repair on the third node as 
expected, so fixed the test to make the expected semantics
    
    Patch by David Capwell, reviewed by Alex Petrov for CASSANDRA-15507.
---
 .../test/DistributedReadWritePathTest.java         | 49 ++++++++++++++++++----
 .../distributed/test/DistributedTestBase.java      |  2 +-
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git 
a/test/distributed/org/apache/cassandra/distributed/test/DistributedReadWritePathTest.java
 
b/test/distributed/org/apache/cassandra/distributed/test/DistributedReadWritePathTest.java
index 0870ab3..679a90f 100644
--- 
a/test/distributed/org/apache/cassandra/distributed/test/DistributedReadWritePathTest.java
+++ 
b/test/distributed/org/apache/cassandra/distributed/test/DistributedReadWritePathTest.java
@@ -29,6 +29,7 @@ import org.apache.cassandra.db.ConsistencyLevel;
 import org.apache.cassandra.db.Keyspace;
 import org.apache.cassandra.distributed.Cluster;
 import org.apache.cassandra.distributed.impl.IInvokableInstance;
+import org.apache.cassandra.metrics.ReadRepairMetrics;
 
 import static org.apache.cassandra.distributed.api.Feature.NETWORK;
 import static org.apache.cassandra.net.Verb.READ_REPAIR_RSP;
@@ -166,22 +167,52 @@ public class DistributedReadWritePathTest extends 
DistributedTestBase
     @Test
     public void failingReadRepairTest() throws Throwable
     {
+        // This test makes a explicit assumption about which nodes are read 
from; that 1 and 2 will be "contacts", and that 3 will be ignored.
+        // This is a implementation detail of 
org.apache.cassandra.locator.ReplicaPlans#contactForRead and
+        // 
org.apache.cassandra.locator.AbstractReplicationStrategy.getNaturalReplicasForToken
 that may change
+        // in a future release; when that happens this test could start to 
fail but should only fail with the explicit
+        // check that detects this behavior has changed.
+        // see CASSANDRA-15507
         try (Cluster cluster = init(Cluster.create(3)))
         {
             cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (pk int, 
ck int, v int, PRIMARY KEY (pk, ck)) WITH read_repair='blocking'");
 
-            cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + ".tbl 
(pk, ck, v) VALUES (1, 1, 1)");
-            cluster.get(2).executeInternal("INSERT INTO " + KEYSPACE + ".tbl 
(pk, ck, v) VALUES (1, 1, 1)");
+            // nodes 1 and 3 are identical
+            cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + ".tbl 
(pk, ck, v) VALUES (1, 1, 1) USING TIMESTAMP 43");
+            cluster.get(3).executeInternal("INSERT INTO " + KEYSPACE + ".tbl 
(pk, ck, v) VALUES (1, 1, 1) USING TIMESTAMP 43");
 
-            assertRows(cluster.get(3).executeInternal("SELECT * FROM " + 
KEYSPACE + ".tbl WHERE pk = 1"));
+            // node 2 is different because of the timestamp; a repair is needed
+            cluster.get(2).executeInternal("INSERT INTO " + KEYSPACE + ".tbl 
(pk, ck, v) VALUES (1, 1, 1) USING TIMESTAMP 42");
 
-            cluster.verbs(READ_REPAIR_REQ).to(3).drop();
-            assertRows(cluster.coordinator(1).execute("SELECT * FROM " + 
KEYSPACE + ".tbl WHERE pk = 1",
-                                                     ConsistencyLevel.QUORUM),
-                       row(1, 1, 1));
+            // 2 is out of date so needs to be repaired.  This will make sure 
that the repair does not happen on the node
+            // which will trigger the coordinator to write to node 3
+            cluster.verbs(READ_REPAIR_REQ).to(2).drop();
 
-            // Data was not repaired
-            assertRows(cluster.get(3).executeInternal("SELECT * FROM " + 
KEYSPACE + ".tbl WHERE pk = 1"));
+            // save the state of the counters so its known if the contacts 
list changed
+            long readRepairRequestsBefore = cluster.get(1).callOnInstance(() 
-> 
Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl").metric.readRepairRequests.getCount());
+            long speculatedWriteBefore = cluster.get(1).callOnInstance(() -> 
ReadRepairMetrics.speculatedWrite.getCount());
+
+            Object[][] rows = cluster.coordinator(1)
+                       .execute("SELECT pk, ck, v, WRITETIME(v) FROM " + 
KEYSPACE + ".tbl WHERE pk = 1", ConsistencyLevel.QUORUM);
+
+            // make sure to check counters first, so can detect if read repair 
executed as expected
+            long readRepairRequestsAfter = cluster.get(1).callOnInstance(() -> 
Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl").metric.readRepairRequests.getCount());
+            long speculatedWriteAfter = cluster.get(1).callOnInstance(() -> 
ReadRepairMetrics.speculatedWrite.getCount());
+
+            // defensive checks to make sure the nodes selected are the ones 
expected
+            Assert.assertEquals("number of read repairs after query does not 
match expected; its possible the nodes involved with the query did not match 
expected", readRepairRequestsBefore + 1, readRepairRequestsAfter);
+            Assert.assertEquals("number of read repairs speculated writes 
after query does not match expected; its possible the nodes involved with the 
query did not match expected", speculatedWriteBefore + 1, speculatedWriteAfter);
+
+            // 1 has newer data than 2 so its write timestamp will be used for 
the result
+            assertRows(rows, row(1, 1, 1, 43L));
+
+            // cheack each node's local state
+            // 1 and 3 should match quorum response
+            assertRows(cluster.get(1).executeInternal("SELECT pk, ck, v, 
WRITETIME(v) FROM " + KEYSPACE + ".tbl WHERE pk = 1"), row(1, 1, 1, 43L));
+            assertRows(cluster.get(3).executeInternal("SELECT pk, ck, v, 
WRITETIME(v) FROM " + KEYSPACE + ".tbl WHERE pk = 1"), row(1, 1, 1, 43L));
+
+            // 2 was not repaired (message was dropped), so still has old data
+            assertRows(cluster.get(2).executeInternal("SELECT pk, ck, v, 
WRITETIME(v) FROM " + KEYSPACE + ".tbl WHERE pk = 1"), row(1, 1, 1, 42L));
         }
     }
 
diff --git 
a/test/distributed/org/apache/cassandra/distributed/test/DistributedTestBase.java
 
b/test/distributed/org/apache/cassandra/distributed/test/DistributedTestBase.java
index d048079..ece369a 100644
--- 
a/test/distributed/org/apache/cassandra/distributed/test/DistributedTestBase.java
+++ 
b/test/distributed/org/apache/cassandra/distributed/test/DistributedTestBase.java
@@ -94,7 +94,7 @@ public class DistributedTestBase
         {
             Object[] expectedRow = expected[i];
             Object[] actualRow = actual[i];
-            Assert.assertTrue(rowsNotEqualErrorMessage(expected, actual),
+            Assert.assertTrue(rowsNotEqualErrorMessage(actual, expected),
                               Arrays.equals(expectedRow, actualRow));
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[cassandra] branch trunk updated: failingReadRepairTest was not triggering a repair on the third node as expected, so fixed the test to make the expected semantics

Reply via email to