Repository: cassandra
Updated Branches:
  refs/heads/trunk e645b9172 -> daa3619ae


Transient->Full movements mishandle consistency level upgrade

patch by Benedict; reviewed by Alex Petrov and Ariel Weisberg for 
CASSANDRA-14759


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/daa3619a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/daa3619a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/daa3619a

Branch: refs/heads/trunk
Commit: daa3619ae63bb8b06d532890e51d288c189c787c
Parents: e645b91
Author: Benedict Elliott Smith <[email protected]>
Authored: Sun Sep 9 23:53:07 2018 +0100
Committer: Benedict Elliott Smith <[email protected]>
Committed: Wed Oct 3 14:48:15 2018 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../org/apache/cassandra/locator/Endpoints.java |  6 --
 .../org/apache/cassandra/locator/Replica.java   |  2 +-
 .../apache/cassandra/locator/ReplicaLayout.java | 28 +++++++-
 .../cassandra/locator/ReplicaLayoutTest.java    | 73 ++++++++++++++++++++
 5 files changed, 100 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/daa3619a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 25c2728..e1fbb90 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 4.0
+ * Transient->Full range movements mishandle consistency level upgrade 
(CASSANDRA-14759)
  * ReplicaCollection follow-up (CASSANDRA-14726)
  * Transient node receives full data requests (CASSANDRA-14762)
  * Enable snapshot artifacts publish (CASSANDRA-12704)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/daa3619a/src/java/org/apache/cassandra/locator/Endpoints.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/locator/Endpoints.java 
b/src/java/org/apache/cassandra/locator/Endpoints.java
index ee42e36..a2bad6c 100644
--- a/src/java/org/apache/cassandra/locator/Endpoints.java
+++ b/src/java/org/apache/cassandra/locator/Endpoints.java
@@ -60,12 +60,6 @@ public abstract class Endpoints<E extends Endpoints<E>> 
extends AbstractReplicaC
         return map;
     }
 
-    public boolean contains(InetAddressAndPort endpoint, boolean isFull)
-    {
-        Replica replica = byEndpoint().get(endpoint);
-        return replica != null && replica.isFull() == isFull;
-    }
-
     @Override
     public boolean contains(Replica replica)
     {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/daa3619a/src/java/org/apache/cassandra/locator/Replica.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/locator/Replica.java 
b/src/java/org/apache/cassandra/locator/Replica.java
index c884f13..4c5f7c6 100644
--- a/src/java/org/apache/cassandra/locator/Replica.java
+++ b/src/java/org/apache/cassandra/locator/Replica.java
@@ -110,7 +110,7 @@ public final class Replica implements Comparable<Replica>
         return range;
     }
 
-    public boolean isFull()
+    public final boolean isFull()
     {
         return full;
     }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/daa3619a/src/java/org/apache/cassandra/locator/ReplicaLayout.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/locator/ReplicaLayout.java 
b/src/java/org/apache/cassandra/locator/ReplicaLayout.java
index cba4f68..54b82f9 100644
--- a/src/java/org/apache/cassandra/locator/ReplicaLayout.java
+++ b/src/java/org/apache/cassandra/locator/ReplicaLayout.java
@@ -18,6 +18,7 @@
 
 package org.apache.cassandra.locator;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.db.Keyspace;
 import org.apache.cassandra.db.PartitionPosition;
@@ -274,9 +275,29 @@ public abstract class ReplicaLayout<E extends Endpoints<E>>
      * See {@link ReplicaLayout#haveWriteConflicts}
      * @return a 'natural' replica collection, that has had its conflicts with 
pending repaired
      */
-    private static <E extends Endpoints<E>> E resolveWriteConflictsInNatural(E 
natural, E pending)
+    @VisibleForTesting
+    static EndpointsForToken resolveWriteConflictsInNatural(EndpointsForToken 
natural, EndpointsForToken pending)
     {
-        return natural.filter(r -> !r.isTransient() || 
!pending.contains(r.endpoint(), true));
+        EndpointsForToken.Mutable resolved = 
natural.newMutable(natural.size());
+        for (Replica replica : natural)
+        {
+            // always prefer the full natural replica, if there is a conflict
+            if (replica.isTransient())
+            {
+                Replica conflict = 
pending.byEndpoint().get(replica.endpoint());
+                if (conflict != null)
+                {
+                    // it should not be possible to have conflicts of the same 
replication type for the same range
+                    assert conflict.isFull();
+                    // If we have any pending transient->full movement, we 
need to move the full replica to our 'natural' bucket
+                    // to avoid corrupting our count
+                    resolved.add(conflict);
+                    continue;
+                }
+            }
+            resolved.add(replica);
+        }
+        return resolved.asSnapshot();
     }
 
     /**
@@ -284,7 +305,8 @@ public abstract class ReplicaLayout<E extends Endpoints<E>>
      * See {@link ReplicaLayout#haveWriteConflicts}
      * @return a 'pending' replica collection, that has had its conflicts with 
natural repaired
      */
-    private static <E extends Endpoints<E>> E resolveWriteConflictsInPending(E 
natural, E pending)
+    @VisibleForTesting
+    static EndpointsForToken resolveWriteConflictsInPending(EndpointsForToken 
natural, EndpointsForToken pending)
     {
         return pending.without(natural.endpoints());
     }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/daa3619a/test/unit/org/apache/cassandra/locator/ReplicaLayoutTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/locator/ReplicaLayoutTest.java 
b/test/unit/org/apache/cassandra/locator/ReplicaLayoutTest.java
new file mode 100644
index 0000000..9f2ac58
--- /dev/null
+++ b/test/unit/org/apache/cassandra/locator/ReplicaLayoutTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.locator;
+
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.dht.Token;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.cassandra.locator.ReplicaCollectionTest.*;
+
+public class ReplicaLayoutTest
+{
+    @Test
+    public void testConflictResolution()
+    {
+        final Token token = new Murmur3Partitioner.LongToken(1L);
+        final Replica f1 = Replica.fullReplica(EP1, R1);
+        final Replica f2 = Replica.fullReplica(EP2, R1);
+        final Replica t2 = Replica.transientReplica(EP2, R1);
+        final Replica f3 = Replica.fullReplica(EP3, R1);
+        final Replica t4 = Replica.transientReplica(EP4, R1);
+
+        {
+            // test no conflict
+            EndpointsForToken natural = EndpointsForToken.of(token, f1, f3);
+            EndpointsForToken pending = EndpointsForToken.of(token, t2, t4);
+            Assert.assertFalse(ReplicaLayout.haveWriteConflicts(natural, 
pending));
+        }
+        {
+            // test full in natural, transient in pending
+            EndpointsForToken natural = EndpointsForToken.of(token, f1, f2, 
f3);
+            EndpointsForToken pending = EndpointsForToken.of(token, t2, t4);
+            EndpointsForToken expectNatural = natural;
+            EndpointsForToken expectPending = EndpointsForToken.of(token, t4);
+            Assert.assertTrue(ReplicaLayout.haveWriteConflicts(natural, 
pending));
+            assertEquals(expectNatural, 
ReplicaLayout.resolveWriteConflictsInNatural(natural, pending));
+            assertEquals(expectPending, 
ReplicaLayout.resolveWriteConflictsInPending(natural, pending));
+        }
+        {
+            // test transient in natural, full in pending
+            EndpointsForToken natural = EndpointsForToken.of(token, f1, t2, 
f3);
+            EndpointsForToken pending = EndpointsForToken.of(token, f2, t4);
+            EndpointsForToken expectNatural = EndpointsForToken.of(token, f1, 
f2, f3);
+            EndpointsForToken expectPending = EndpointsForToken.of(token, t4);
+            Assert.assertTrue(ReplicaLayout.haveWriteConflicts(natural, 
pending));
+            assertEquals(expectNatural, 
ReplicaLayout.resolveWriteConflictsInNatural(natural, pending));
+            assertEquals(expectPending, 
ReplicaLayout.resolveWriteConflictsInPending(natural, pending));
+        }
+    }
+
+    private static void assertEquals(AbstractReplicaCollection<?> a, 
AbstractReplicaCollection<?> b)
+    {
+        Assert.assertEquals(a.list, b.list);
+    }
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to