belliottsmith commented on a change in pull request #1:
URL: https://github.com/apache/cassandra-accord/pull/1#discussion_r799263064



##########
File path: accord-core/src/main/java/accord/topology/TopologyManager.java
##########
@@ -0,0 +1,325 @@
+package accord.topology;
+
+import accord.api.ConfigurationService;
+import accord.coordinate.tracking.QuorumTracker;
+import accord.local.Node;
+import accord.messages.Request;
+import accord.messages.TxnRequest;
+import accord.messages.TxnRequestScope;
+import accord.txn.Keys;
+import accord.txn.Txn;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
+import java.util.*;
+import java.util.function.LongConsumer;
+
+/**
+ * Manages topology state changes and update bookkeeping
+ *
+ * Each time the topology changes we need to:
+ * * confirm previous owners of ranges we replicate are aware of the new config
+ * * learn of any outstanding operations for ranges we replicate
+ * * clean up obsolete data
+ *
+ * Assumes a topology service that won't report epoch n without having n-1 etc 
also available
+ */
+public class TopologyManager implements ConfigurationService.Listener
+{
+    class EpochState
+    {
+        private final Topology global;
+        private final Topology local;
+        private final QuorumTracker syncTracker;
+        private boolean syncComplete = false;
+        private boolean prevSynced;
+
+        private boolean updateState(boolean prevSynced)
+        {
+            this.prevSynced = prevSynced;
+            if (!syncComplete)
+            {
+                syncComplete = syncTracker.hasReachedQuorum();
+                return syncComplete;
+            }
+            return false;
+        }
+
+        EpochState(Topology global, boolean prevSynced)
+        {
+            Preconditions.checkArgument(!global.isSubset());
+            this.global = global;
+            this.local = global.forNode(node);
+            this.syncTracker = new QuorumTracker(new 
Topologies.Singleton(global, false));
+            this.prevSynced = prevSynced;
+            updateState(prevSynced);
+        }
+
+        public boolean recordSyncComplete(Node.Id node, boolean prevSynced)
+        {
+            syncTracker.recordSuccess(node);
+            return updateState(prevSynced);
+        }
+
+        long epoch()
+        {
+            return global.epoch;
+        }
+
+        boolean syncComplete()
+        {
+            return prevSynced && syncComplete;
+        }
+
+        /**
+         * determine if sync has completed for all shards intersecting with 
the given keys
+         */
+        boolean syncCompleteFor(Keys keys)
+        {
+            if (!prevSynced)
+                return false;
+            if (syncComplete)
+                return true;
+            Boolean result = global.accumulateForKeys(keys, (i, shard, acc) -> 
{
+                if (acc == Boolean.FALSE)
+                    return acc;
+                return 
Boolean.valueOf(syncTracker.unsafeGet(i).hasReachedQuorum());
+            }, Boolean.TRUE);
+            return result == Boolean.TRUE;
+        }
+
+        boolean shardIsUnsynced(int idx, Shard shard)
+        {
+            return !syncTracker.unsafeGet(idx).hasReachedQuorum();
+        }
+    }
+
+    private class Epochs
+    {
+        private final long maxEpoch;
+        private final long minEpoch;
+        private final EpochState[] epochs;
+        private final List<Set<Node.Id>> pendingSyncComplete;
+
+        private Epochs(EpochState[] epochs, List<Set<Node.Id>> 
pendingSyncComplete)
+        {
+            this.maxEpoch = epochs.length > 0 ? epochs[0].epoch() : 0;
+            this.pendingSyncComplete = pendingSyncComplete;
+            for (int i=1; i<epochs.length; i++)
+                Preconditions.checkArgument(epochs[i].epoch() == 
epochs[i-1].epoch() - 1);
+            this.minEpoch = epochs.length > 0 ? epochs[epochs.length - 
1].epoch() : 0;
+            this.epochs = epochs;
+        }
+
+        private Epochs(EpochState[] epochs)
+        {
+            this(epochs, new ArrayList<>());
+        }
+
+        public long nextEpoch()
+        {
+            return current().epoch + 1;
+        }
+
+        public Topology current()
+        {
+            return epochs.length > 0 ? epochs[0].global : Topology.EMPTY;
+        }
+
+        public Epochs add(Topology topology)
+        {
+            Preconditions.checkArgument(topology.epoch == nextEpoch());
+            EpochState[] nextEpochs = new EpochState[epochs.length + 1];
+            List<Set<Node.Id>> pendingSync = pendingSyncComplete;
+            if (!pendingSync.isEmpty())
+            {
+                boolean prevSynced = epochs.length <= 1 || 
epochs[1].syncComplete();
+                EpochState currentEpoch = epochs[0];
+                pendingSync.remove(0).forEach(id -> 
currentEpoch.recordSyncComplete(id, prevSynced));
+            }
+            System.arraycopy(epochs, 0, nextEpochs, 1, epochs.length);
+
+            boolean prevSynced = epochs.length == 0 || 
epochs[0].syncComplete();
+            EpochState nextEpochState = new EpochState(topology, prevSynced);
+            nextEpochs[0] = nextEpochState;
+            return new Epochs(nextEpochs, pendingSync);
+        }
+
+        public void syncComplete(Node.Id node, long epoch)
+        {
+            Preconditions.checkArgument(epoch > 0);
+            if (epoch > maxEpoch - 1)
+            {
+                int idx = (int) (epoch - maxEpoch);
+                for (int i=pendingSyncComplete.size(); i<=idx; i++)
+                    pendingSyncComplete.add(new HashSet<>());
+
+                pendingSyncComplete.get(idx).add(node);
+            }
+            else
+            {
+                boolean prevSynced = epoch == minEpoch || get(epoch - 
1).syncComplete();
+                boolean hasChanged = get(epoch).recordSyncComplete(node, 
prevSynced);
+                for (epoch++ ;hasChanged && epoch <= maxEpoch; epoch++)
+                {
+                    prevSynced = get(epoch - 1).syncComplete();
+                    hasChanged = get(epoch).updateState(prevSynced);
+                }
+            }
+        }
+
+        private EpochState get(long epoch)
+        {
+            if (epoch > maxEpoch || epoch < maxEpoch - epochs.length)
+                return null;
+
+            return epochs[(int) (maxEpoch - epoch)];
+        }
+
+        long canProcess(TxnRequestScope scope)
+        {
+            EpochState lastState = null;
+            for (int i=0, mi=scope.size(); i<mi; i++)
+            {
+                TxnRequestScope.EpochRanges requestRanges = scope.get(i);
+                EpochState epochState = get(requestRanges.epoch);
+
+                if (epochState != null)
+                {
+                    lastState = epochState;
+                }
+                else if (lastState != null && 
lastState.local.ranges().intersects(requestRanges.keys))
+                {
+                    // we don't have the most recent epoch, but still 
replicate the requested ranges
+                    continue;
+                }
+                else
+                {
+                    // we don't have the most recent epoch, and we don't 
replicate the requested ranges
+                    return scope.maxEpoch();
+                }
+
+                // validate requested ranges
+                KeyRanges localRanges = epochState.local.ranges();
+                if (!localRanges.intersects(requestRanges.keys))
+                    throw new RuntimeException("Received request for ranges 
not replicated by this node");
+            }
+            if (scope.maxEpoch() > 0)
+                missingEpochNotify.accept(scope.maxEpoch());

Review comment:
       Thanks. The confusion for me was that we also seemed to invoke this in 
`Node.receive` if we _didn't_ know an epoch. I can see now that this method is 
intended to be used differently by implementations, but it seems to also 
perform the same fetching by default. Are we relying on this behaviour? If not, 
might it be better (particularly from a test perspective) to have this no-op by 
default?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to