This is an automated email from the ASF dual-hosted git repository.

dcapwell pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 740cec41d2 When a node is bootstrapping it gets the whole gossip state 
but applies in random order causing some cases where StorageService will fail 
causing an instance to not show up in TokenMetadata
740cec41d2 is described below

commit 740cec41d2d67783a463bd18f70221de331928df
Author: David Capwell <[email protected]>
AuthorDate: Wed Jun 1 08:49:44 2022 -0700

    When a node is bootstrapping it gets the whole gossip state but applies in 
random order causing some cases where StorageService will fail causing an 
instance to not show up in TokenMetadata
    
    patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-17676
---
 CHANGES.txt                                        |  1 +
 src/java/org/apache/cassandra/gms/Gossiper.java    | 46 +++++++++++++++++++++-
 .../org/apache/cassandra/gms/VersionedValue.java   |  3 ++
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index fb75bc510d..30dfdf763c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 4.2
+ * When a node is bootstrapping it gets the whole gossip state but applies in 
random order causing some cases where StorageService will fail causing an 
instance to not show up in TokenMetadata (CASSANDRA-17676)
  * Add CQLSH command SHOW REPLICAS (CASSANDRA-17577)
  * Add guardrail to allow disabling of SimpleStrategy (CASSANDRA-17647)
  * Change default directory permission to 750 in packaging (CASSANDRA-17470)
diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java 
b/src/java/org/apache/cassandra/gms/Gossiper.java
index 4c72166a9d..8041eed034 100644
--- a/src/java/org/apache/cassandra/gms/Gossiper.java
+++ b/src/java/org/apache/cassandra/gms/Gossiper.java
@@ -85,6 +85,7 @@ import static org.apache.cassandra.net.Verb.GOSSIP_DIGEST_SYN;
 import static 
org.apache.cassandra.utils.FBUtilities.getBroadcastAddressAndPort;
 import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis;
 import static org.apache.cassandra.utils.Clock.Global.nanoTime;
+import static org.apache.cassandra.gms.VersionedValue.BOOTSTRAPPING_STATUS;
 
 /**
  * This module is responsible for Gossiping information for the local 
endpoint. This abstraction
@@ -1500,11 +1501,54 @@ public class Gossiper implements 
IFailureDetectionEventListener, GossiperMBean
         return pieces[0];
     }
 
+    /**
+     * Gossip offers no happens-before relationship, but downstream 
subscribers assume a happens-before relationship
+     * before being notified!  To attempt to be nicer to subscribers, this 
{@link Comparator} attempts to order EndpointState
+     * within a map based off a few heuristics:
+     * <ol>
+     *     <li>STATUS - some STATUS depends on other instance STATUS, so make 
sure they are last; eg. BOOT, and BOOT_REPLACE</li>
+     *     <li>generation - normally defined as system clock millis, this can 
be skewed and is a best effort</li>
+     *     <li>address - tie breaker to make sure order is consistent</li>
+     * </ol>
+     * <p>
+     * Problems:
+     * Generation is normally defined as system clock millis, which can be 
skewed and in-consistent cross nodes
+     * (generations do not have a happens-before relationship, so ordering is 
sketchy at best).
+     * <p>
+     * Motivations:
+     * {@link Map#entrySet()} returns data in effectivlly random order, so can 
get into a situation such as the following example.
+     * {@code
+     * 3 node cluster: n1, n2, and n3
+     * n2 goes down and n4 does host replacement and fails before completion
+     * h5 tries to do a host replacement against n4 (ignore the fact this 
doesn't make sense)
+     * }
+     * In that case above, the {@link Map#entrySet()} ordering can be random, 
causing h4 to apply before h2, which will
+     * be rejected by subscripers (only after updating gossip causing zero 
retries).
+     */
+    private static Comparator<Entry<InetAddressAndPort, EndpointState>> 
STATE_MAP_ORDERING =
+    ((Comparator<Entry<InetAddressAndPort, EndpointState>>) (e1, e2) -> {
+        // check status first, make sure bootstrap status happens-after all 
others
+        if (BOOTSTRAPPING_STATUS.contains(getGossipStatus(e1.getValue())))
+            return 1;
+        if (BOOTSTRAPPING_STATUS.contains(getGossipStatus(e2.getValue())))
+            return -1;
+        return 0;
+    })
+    .thenComparingInt((Entry<InetAddressAndPort, EndpointState> e) -> 
e.getValue().getHeartBeatState().getGeneration())
+    .thenComparing(Entry::getKey);
+
+    private static Iterable<Entry<InetAddressAndPort, EndpointState>> 
order(Map<InetAddressAndPort, EndpointState> epStateMap)
+    {
+        List<Entry<InetAddressAndPort, EndpointState>> list = new 
ArrayList<>(epStateMap.entrySet());
+        Collections.sort(list, STATE_MAP_ORDERING);
+        return list;
+    }
+
     @VisibleForTesting
     public void applyStateLocally(Map<InetAddressAndPort, EndpointState> 
epStateMap)
     {
         checkProperThreadForStateMutation();
-        for (Entry<InetAddressAndPort, EndpointState> entry : 
epStateMap.entrySet())
+        for (Entry<InetAddressAndPort, EndpointState> entry : 
order(epStateMap))
         {
             InetAddressAndPort ep = entry.getKey();
             if (ep.equals(getBroadcastAddressAndPort()) && !isInShadowRound())
diff --git a/src/java/org/apache/cassandra/gms/VersionedValue.java 
b/src/java/org/apache/cassandra/gms/VersionedValue.java
index 26644e17cc..519fffa9b8 100644
--- a/src/java/org/apache/cassandra/gms/VersionedValue.java
+++ b/src/java/org/apache/cassandra/gms/VersionedValue.java
@@ -27,6 +27,7 @@ import java.util.stream.Collectors;
 import static java.nio.charset.StandardCharsets.ISO_8859_1;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 
 import org.apache.cassandra.db.TypeSizes;
@@ -84,6 +85,8 @@ public class VersionedValue implements 
Comparable<VersionedValue>
     // values for ApplicationState.REMOVAL_COORDINATOR
     public final static String REMOVAL_COORDINATOR = "REMOVER";
 
+    public static Set<String> BOOTSTRAPPING_STATUS = 
ImmutableSet.of(STATUS_BOOTSTRAPPING, STATUS_BOOTSTRAPPING_REPLACE);
+
     public final int version;
     public final String value;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to