rpuch commented on code in PR #1653:
URL: https://github.com/apache/ignite-3/pull/1653#discussion_r1101089284
##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/ValidationManager.java:
##########
@@ -146,46 +120,33 @@ ValidationResult validateNode(
}
boolean isNodeValidated(ClusterNode node) {
- return storage.isNodeValidated(node.id()) ||
logicalTopology.isNodeInLogicalTopology(node);
- }
-
- /**
- * Checks and removes the node from the list of validated nodes thus
completing the validation procedure.
- *
- * @param node Node that wishes to join the logical topology.
- */
- void completeValidation(ClusterNode node) {
- Future<?> cleanupFuture = cleanupFutures.remove(node.id());
-
- if (cleanupFuture != null) {
- cleanupFuture.cancel(false);
- }
-
- storage.removeValidatedNode(node.id());
+ return storage.isNodeValidated(node) ||
logicalTopology.isNodeInLogicalTopology(node);
}
- private void putValidatedNode(ClusterNode node) {
- storage.putValidatedNode(node.id());
+ void putValidatedNode(ClusterNode node) {
+ storage.putValidatedNode(node);
- scheduleValidatedNodeRemoval(node.id());
+ logicalTopology.onNodeValidated(node);
}
- private void scheduleValidatedNodeRemoval(String nodeId) {
- Future<?> future = executor.schedule(() -> {
- LOG.info("Removing node from the list of validated nodes since no
JoinReady requests have been received [node={}]", nodeId);
-
- cleanupFutures.remove(nodeId);
+ void removeValidatedNodes(Collection<ClusterNode> nodes) {
+ Set<ClusterNode> validatedNodes = storage.getValidatedNodes();
- storage.removeValidatedNode(nodeId);
- }, configuration.incompleteJoinTimeout().value(),
TimeUnit.MILLISECONDS);
+ nodes.forEach(node -> {
+ if (validatedNodes.contains(node)) {
+ storage.removeValidatedNode(node);
- cleanupFutures.put(nodeId, future);
+ logicalTopology.onNodeInvalidated(node);
+ }
+ });
}
- @Override
- public void close() {
- IgniteUtils.shutdownAndAwaitTermination(executor, 10,
TimeUnit.SECONDS);
-
- cleanupFutures.clear();
+ /**
+ * Checks and removes the node from the list of validated nodes thus
completing the validation procedure.
Review Comment:
Does it still check anything?
##########
modules/runner/src/integrationTest/java/org/apache/ignite/internal/compute/ItLogicalTopologyTest.java:
##########
@@ -115,21 +131,38 @@ void receivesLogicalTopologyEventsCausedByNodeRestart()
throws Exception {
restartNode(1);
- waitForCondition(() -> events.size() >= 2, 10_000);
+ // There's a race between a new version of the node being validated,
while the old version has not yet been removed from the logical
+ // topology. So LEFT (for old node version) and VALIDATED (for new
node version) events can come in arbitrary order.
+ Event firstEvent = events.poll(10, TimeUnit.SECONDS);
+ Event secondEvent = events.poll(10, TimeUnit.SECONDS);
+
+ assertThat(firstEvent, is(notNullValue()));
+ assertThat(firstEvent.eventType, is(oneOf(EventType.LEFT,
EventType.VALIDATED)));
- assertThat(events, hasSize(2));
+ assertThat(secondEvent, is(notNullValue()));
+ assertThat(secondEvent.eventType, is(oneOf(EventType.LEFT,
EventType.VALIDATED)));
- Event leaveEvent = events.get(0);
+ Event leftEvent = firstEvent.eventType == EventType.LEFT ? firstEvent
: secondEvent;
+ Event validatedEvent = firstEvent.eventType == EventType.VALIDATED ?
firstEvent : secondEvent;
- assertFalse(leaveEvent.appeared);
- assertThat(leaveEvent.node.name(), is(secondIgnite.name()));
- assertThat(leaveEvent.topologyVersion, is(3L));
+ assertThat(leftEvent, is(notNullValue()));
+ assertThat(validatedEvent, is(notNullValue()));
Review Comment:
Non-nullability seems already have been asserted (for first and second event)
##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/topology/LogicalTopologyImpl.java:
##########
@@ -146,38 +157,32 @@ public boolean isNodeInLogicalTopology(ClusterNode
needle) {
}
private void fireAppeared(ClusterNode appearedNode,
LogicalTopologySnapshot snapshot) {
- for (LogicalTopologyEventListener listener : listeners) {
- try {
- listener.onAppeared(appearedNode, snapshot);
- } catch (Throwable e) {
- logAndRethrowIfError(e, "Failure while notifying onAppear()
listener {}", listener);
- }
- }
+ notifyListeners(listener -> listener.onNodeJoined(appearedNode,
snapshot), "onNodeJoined");
}
private void fireDisappeared(ClusterNode oldNode, LogicalTopologySnapshot
snapshot) {
Review Comment:
Let's rename it too
##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/ValidationManager.java:
##########
@@ -146,46 +120,33 @@ ValidationResult validateNode(
}
boolean isNodeValidated(ClusterNode node) {
- return storage.isNodeValidated(node.id()) ||
logicalTopology.isNodeInLogicalTopology(node);
- }
-
- /**
- * Checks and removes the node from the list of validated nodes thus
completing the validation procedure.
- *
- * @param node Node that wishes to join the logical topology.
- */
- void completeValidation(ClusterNode node) {
- Future<?> cleanupFuture = cleanupFutures.remove(node.id());
-
- if (cleanupFuture != null) {
- cleanupFuture.cancel(false);
- }
-
- storage.removeValidatedNode(node.id());
+ return storage.isNodeValidated(node) ||
logicalTopology.isNodeInLogicalTopology(node);
}
- private void putValidatedNode(ClusterNode node) {
- storage.putValidatedNode(node.id());
+ void putValidatedNode(ClusterNode node) {
+ storage.putValidatedNode(node);
- scheduleValidatedNodeRemoval(node.id());
+ logicalTopology.onNodeValidated(node);
}
- private void scheduleValidatedNodeRemoval(String nodeId) {
- Future<?> future = executor.schedule(() -> {
- LOG.info("Removing node from the list of validated nodes since no
JoinReady requests have been received [node={}]", nodeId);
-
- cleanupFutures.remove(nodeId);
+ void removeValidatedNodes(Collection<ClusterNode> nodes) {
+ Set<ClusterNode> validatedNodes = storage.getValidatedNodes();
- storage.removeValidatedNode(nodeId);
- }, configuration.incompleteJoinTimeout().value(),
TimeUnit.MILLISECONDS);
+ nodes.forEach(node -> {
+ if (validatedNodes.contains(node)) {
Review Comment:
`ClusterNode.equals()` uses `id` and address for comparison. Should we rely
on it? It seems safer to directly compare `id`s.
##########
modules/runner/src/integrationTest/java/org/apache/ignite/internal/compute/ItLogicalTopologyTest.java:
##########
@@ -115,21 +131,38 @@ void receivesLogicalTopologyEventsCausedByNodeRestart()
throws Exception {
restartNode(1);
- waitForCondition(() -> events.size() >= 2, 10_000);
+ // There's a race between a new version of the node being validated,
while the old version has not yet been removed from the logical
Review Comment:
Why is there such a race? Can't we always remove previous node version
before making its new version as validated?
##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/topology/LogicalTopologyImpl.java:
##########
@@ -146,38 +157,32 @@ public boolean isNodeInLogicalTopology(ClusterNode
needle) {
}
private void fireAppeared(ClusterNode appearedNode,
LogicalTopologySnapshot snapshot) {
Review Comment:
Let's rename it to `fireNodeJoined`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]