arjunmohnot commented on code in PR #7049:
URL: https://github.com/apache/hadoop/pull/7049#discussion_r1762881987


##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java:
##########
@@ -387,6 +388,115 @@ private void handleExcludeNodeList(boolean graceful, int 
timeout) {
     updateInactiveNodes();
   }
 
+  /**
+   * Marks the unregistered nodes as LOST
+   * if the feature is enabled via a configuration flag.
+   *
+   * This method finds nodes that are present in the include list but are not
+   * registered with the ResourceManager. Such nodes are then marked as LOST.
+   *
+   * The steps are as follows:
+   * 1. Retrieve all hostnames of registered nodes from RM.
+   * 2. Identify the nodes present in the include list but are not registered
+   * 3. Remove nodes from the exclude list
+   * 4. Dispatch LOST events for filtered nodes to mark them as LOST.
+   *
+   * @param yarnConf Configuration object that holds the YARN configurations.
+   */
+  private void markUnregisteredNodesAsLost(Configuration yarnConf) {
+    // Check if tracking unregistered nodes is enabled in the configuration
+    if 
(!yarnConf.getBoolean(YarnConfiguration.ENABLE_TRACKING_FOR_UNREGISTERED_NODES,
+        YarnConfiguration.DEFAULT_ENABLE_TRACKING_FOR_UNREGISTERED_NODES)) {
+      LOG.debug("Unregistered node tracking is disabled. " +
+          "Skipping marking unregistered nodes as LOST.");
+      return;
+    }
+
+    // Set to store all registered hostnames from both active and inactive 
lists
+    Set<String> registeredHostNames = gatherRegisteredHostNames();
+    // Event handler to dispatch LOST events
+    EventHandler eventHandler = 
this.rmContext.getDispatcher().getEventHandler();
+
+    // Identify nodes that are in the include list but are not registered
+    // and are not in the exclude list
+    List<String> nodesToMarkLost = new ArrayList<>();
+    HostDetails hostDetails = hostsReader.getHostDetails();
+    Set<String> includes = hostDetails.getIncludedHosts();
+    Set<String> excludes = hostDetails.getExcludedHosts();
+
+    for (String includedNode : includes) {
+      if (!registeredHostNames.contains(includedNode) && 
!excludes.contains(includedNode)) {
+        LOG.info("Lost node: " + includedNode);
+        nodesToMarkLost.add(includedNode);
+      }
+    }
+
+    // Dispatch LOST events for the identified lost nodes
+    for (String lostNode : nodesToMarkLost) {
+      dispatchLostEvent(eventHandler, lostNode);
+    }
+
+    // Log successful completion of marking unregistered nodes as LOST
+    LOG.info("Successfully marked unregistered nodes as LOST");
+  }
+
+  /**
+   * Gathers all registered hostnames from both active and inactive RMNodes.
+   *
+   * @return A set of registered hostnames.
+   */
+  private Set<String> gatherRegisteredHostNames() {
+    Set<String> registeredHostNames = new HashSet<>();
+    LOG.info("Getting all the registered hostnames");
+
+    // Gather all registered nodes (active) from RM into the set
+    for (RMNode node : this.rmContext.getRMNodes().values()) {
+      registeredHostNames.add(node.getHostName());
+    }
+
+    // Gather all inactive nodes from RM into the set
+    for (RMNode node : this.rmContext.getInactiveRMNodes().values()) {
+      registeredHostNames.add(node.getHostName());
+    }
+
+    return registeredHostNames;
+  }
+
+  /**
+   * Dispatches a LOST event for a specified lost node.
+   *
+   * @param eventHandler The EventHandler used to dispatch the LOST event.
+   * @param lostNode     The hostname of the lost node for which the event is
+   *                     being dispatched.
+   */
+  private void dispatchLostEvent(EventHandler eventHandler, String lostNode) {
+    // Generate a NodeId for the lost node with a special port -2
+    NodeId nodeId = createLostNodeId(lostNode);
+    RMNodeEvent lostEvent = new RMNodeEvent(nodeId, RMNodeEventType.EXPIRE);
+    RMNodeImpl rmNode = new RMNodeImpl(nodeId, this.rmContext, lostNode, -2, 
-2,
+        new UnknownNode(lostNode), Resource.newInstance(0, 0), "unknown");
+
+    try {
+      // Dispatch the LOST event to signal the node is no longer active
+      eventHandler.handle(lostEvent);
+
+      // After successful dispatch, update the node status in RMContext
+      // Set the node's timestamp for when it became untracked
+      rmNode.setUntrackedTimeStamp(Time.monotonicNow());
+
+      // Add the node to the active and inactive node maps in RMContext
+      this.rmContext.getRMNodes().put(nodeId, rmNode);
+      this.rmContext.getInactiveRMNodes().put(nodeId, rmNode);
+
+      LOG.info("Successfully dispatched LOST event and deactivated node: "

Review Comment:
   I've updated the logging structure in the recent commit. Thank you!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to