sashapolo commented on code in PR #784:
URL: https://github.com/apache/ignite-3/pull/784#discussion_r857365488


##########
modules/runner/src/main/java/org/apache/ignite/internal/app/IgniteImpl.java:
##########
@@ -396,90 +390,83 @@ public CompletableFuture<Ignite> start(@Language("HOCON") 
@Nullable String cfg)
                 nodeCfgMgr.configurationRegistry().initializeDefaults();
             }
 
-            waitForJoinPermission();
-
-            // Start the remaining components.
-            List<IgniteComponent> otherComponents = List.of(
+            // Start the components that are required to join the cluster.
+            lifecycleManager.startComponents(
                     nettyBootstrapFactory,
                     clusterSvc,
                     restComponent,
-                    computeComponent,
                     raftMgr,
-                    txManager,
-                    clusterCfgMgr,
-                    cmgMgr,
-                    metaStorageMgr,
-                    baselineMgr,
-                    dataStorageMgr,
-                    distributedTblMgr,
-                    qryEngine,
-                    clientHandlerModule
-            );
-
-            for (IgniteComponent component : otherComponents) {
-                doStartComponent(name, startedComponents, component);
-            }
-
-            CompletableFuture<Void> configurationCatchUpFuture = 
RecoveryCompletionFutureFactory.create(
-                    metaStorageMgr,
-                    clusterCfgMgr,
-                    fut -> new ConfigurationCatchUpListener(cfgStorage, fut, 
LOG)
+                    cmgMgr
             );
 
-            notifyConfigurationListeners();
-
-            // Deploy all registered watches because all components are ready 
and have registered their listeners.
-            metaStorageMgr.deployWatches();
-
-            if (!status.compareAndSet(Status.STARTING, Status.STARTED)) {
-                throw new NodeStoppingException();
-            }
-
-            return configurationCatchUpFuture.thenApply(v -> this);
+            return cmgMgr.joinFuture()
+                    .thenAccept(v -> {
+                        // Start all other components after the join request 
has completed and the node has been validated.
+                        try {
+                            lifecycleManager.startComponents(
+                                    metaStorageMgr,
+                                    clusterCfgMgr,
+                                    computeComponent,
+                                    txManager,
+                                    baselineMgr,
+                                    dataStorageMgr,
+                                    distributedTblMgr,
+                                    qryEngine,
+                                    clientHandlerModule
+                            );
+
+                            // Deploy all registered watches because all 
components are ready and have registered their listeners.
+                            metaStorageMgr.deployWatches();
+                        } catch (NodeStoppingException e) {
+                            throw new CompletionException(e);
+                        }
+                    })
+                    .thenCompose(v -> {
+                        // Recovery future must be created before 
configuration listeners are triggered.
+                        CompletableFuture<Void> recoveryFuture = 
RecoveryCompletionFutureFactory.create(
+                                clusterCfgMgr,
+                                fut -> new 
ConfigurationCatchUpListener(cfgStorage, fut, LOG)
+                        );
+
+                        return 
CompletableFuture.allOf(notifyConfigurationListeners(), recoveryFuture);
+                    })
+                    // Signal that local recovery is complete and the node is 
ready to join the cluster.
+                    .thenCompose(v -> cmgMgr.onJoinReady())
+                    .thenAccept(v -> {
+                        try {
+                            // Transfer the node to the STARTED state.
+                            lifecycleManager.onStartComplete();
+                        } catch (NodeStoppingException e) {
+                            throw new CompletionException(e);
+                        }
+                    })
+                    .handle((v, e) -> {
+                        if (e != null) {
+                            throw handleStartException(e);
+                        }
+
+                        return this;
+                    });
         } catch (Exception e) {
-            String errMsg = "Unable to start node=[" + name + "].";
+            throw handleStartException(e);
+        }
+    }
 
-            LOG.error(errMsg, e);
+    private RuntimeException handleStartException(Throwable e) {
+        String errMsg = "Unable to start node=[" + name + "].";
 
-            doStopNode(startedComponents);
+        LOG.error(errMsg, e);
 
-            throw new IgniteException(errMsg, e);
-        }
-    }
+        lifecycleManager.stopAllComponents();
 
-    /**
-     * Awaits for a permission to join the cluster, i.e. node join response 
from Cluster Management group.
-     * After the completion of this method, the node is considered as 
validated.
-     */
-    private void waitForJoinPermission() {
-        // TODO: implement, see 
https://issues.apache.org/jira/browse/IGNITE-16472
+        return new IgniteException(errMsg, e);
     }
 
     /**
      * Stops ignite node.
      */
     public void stop() {
-        if (status.getAndSet(Status.STOPPING) == Status.STARTED) {
-            doStopNode(List.of(
-                    longJvmPauseDetector,
-                    vaultMgr,
-                    nodeCfgMgr,
-                    clusterSvc,
-                    computeComponent,
-                    raftMgr,
-                    txManager,
-                    metaStorageMgr,
-                    clusterCfgMgr,
-                    cmgMgr,
-                    baselineMgr,
-                    dataStorageMgr,
-                    distributedTblMgr,
-                    qryEngine,
-                    restComponent,
-                    clientHandlerModule,
-                    nettyBootstrapFactory
-            ));
-        }
+        lifecycleManager.stopNode();

Review Comment:
   I've fixed this comment, but then I remembered that I actually did this on 
purpose: it is possible that a node may hang during join in which case it will 
never acquire the STARTED state. This will make it impossible to stop it in 
"afterEach" in tests, for example 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to