Author: degenaro Date: Thu Jan 3 13:51:01 2019 New Revision: 1850235 URL: http://svn.apache.org/viewvc?rev=1850235&view=rev Log: UIMA-5928 DUCC Agent quiesce should wait (forever) for non-fairshare displatchables to complete before shutting down
RM should honor NodeMetrics report of NodeStatus "UnAvailable" as indication of node "quiesced" WS should display Machines page Quiesce status: T/F Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/event/ResourceManagerEventListener.java uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerClassic.java uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerJsonFormat.java uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/INodeState.java uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/NodeState.java uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/system.machines.jsp Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java (original) +++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java Thu Jan 3 13:51:01 2019 @@ -259,6 +259,11 @@ public interface IRmPersistence public Type type() { return Type.Boolean; } public boolean isIndex() { return true; } }, + Quiesced{ + public String pname() { return "quiesced"; } + public Type type() { return Type.Boolean; } + public boolean isIndex() { return true; } + }, Reservable{ public String pname() { return "reservable"; } public Type type() { return Type.Boolean; } Modified: uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/event/ResourceManagerEventListener.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/event/ResourceManagerEventListener.java?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/event/ResourceManagerEventListener.java (original) +++ uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/event/ResourceManagerEventListener.java Thu Jan 3 13:51:01 2019 @@ -20,7 +20,10 @@ package org.apache.uima.ducc.rm.event; import org.apache.camel.Body; import org.apache.uima.ducc.common.ANodeStability; +import org.apache.uima.ducc.common.Node; import org.apache.uima.ducc.common.NodeIdentity; +import org.apache.uima.ducc.common.node.metrics.NodeMetrics; +import org.apache.uima.ducc.common.node.metrics.NodeMetrics.NodeStatus; import org.apache.uima.ducc.common.utils.DuccLogger; import org.apache.uima.ducc.common.utils.id.DuccId; import org.apache.uima.ducc.rm.ResourceManager; @@ -90,8 +93,24 @@ public class ResourceManagerEventListene */ public void onNodeMetricsEvent(@Body NodeMetricsUpdateDuccEvent duccEvent) throws Exception { + String location = "onNodeMetricsEvent"; + DuccId jobid = null; //rm.nodeArrives(duccEvent.getNode()); - nodeStability.nodeArrives(duccEvent.getNode()); + Node node = duccEvent.getNode(); + nodeStability.nodeArrives(node); + if(node != null) { + NodeMetrics nodeMetrics = node.getNodeMetrics(); + if(nodeMetrics != null) { + NodeStatus nodeStatus = nodeMetrics.getNodeStatus(); + String name = null; + NodeIdentity nodeIdentity = node.getNodeIdentity(); + if(nodeIdentity != null) { + name = nodeIdentity.getShortName(); + } + logger.debug(location, jobid, name, nodeStatus.name()); + } + + } } public void onNodeInventoryUpdateEvent(@Body NodeInventoryUpdateDuccEvent duccEvent) throws Exception { Modified: uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java (original) +++ uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java Thu Jan 3 13:51:01 2019 @@ -30,6 +30,8 @@ import java.util.Map; import org.apache.uima.ducc.common.Node; import org.apache.uima.ducc.common.NodeIdentity; +import org.apache.uima.ducc.common.node.metrics.NodeMetrics; +import org.apache.uima.ducc.common.node.metrics.NodeMetrics.NodeStatus; import org.apache.uima.ducc.common.persistence.rm.IRmPersistence.RmNodes; import org.apache.uima.ducc.common.utils.DuccLogger; import org.apache.uima.ducc.common.utils.SystemPropertyResolver; @@ -61,7 +63,8 @@ class NodePool HashMap<Integer, HashMap<Node, Machine>> machinesByOrder = new HashMap<Integer, HashMap<Node, Machine>>(); // All schedulable machines, not necessarily free HashMap<String, Machine> machinesByName = new HashMap<String, Machine>(); // by name, for nodepool support HashMap<String, Machine> deadByName = new HashMap<String, Machine>(); // anything we move to offline or unresponsive, - // but with the same name we used, because + + List<Node> quiesceMachines = new ArrayList<Node>(); // sometimes stupid domain gets in the way HashMap<String, Machine> machinesByIp = new HashMap<String, Machine>(); // by IP, for nodepool support @@ -1067,6 +1070,76 @@ class NodePool } } + // determine if Node has been quiesced + boolean isQuiesce(Node node) { + String methodName = "isQuiesce"; + boolean retVal = false; + NodeMetrics nm = node.getNodeMetrics(); + if(nm != null) { + NodeStatus ns = nm.getNodeStatus(); + if(ns != null) { + switch(ns) { + case UnAvailable: + logger.info(methodName, null, node.getNodeIdentity().getShortName(),"node status = "+ns.name()); + retVal = true; + break; + default: + logger.debug(methodName, null, node.getNodeIdentity().getShortName(),"node status = "+ns.name()); + break; + } + } + else { + logger.warn(methodName, null, node.getNodeIdentity().getShortName(),"node status missing"); + } + } + else { + logger.warn(methodName, null, node.getNodeIdentity().getShortName(),"node metrics missing"); + } + return retVal; + } + + // process Node that is quiesced + private void handle_quiesced(Node node) { + String methodName = "handle_quiesced"; + String name = node.getNodeIdentity().getCanonicalName(); + if(quiesceMachines.contains(node)) { + logger.trace(methodName, null, "Node ", name, " is already quiesced."); + } + else { + quiesceMachines.add(node); + logger.info(methodName, null, "Node ", name, " is quiesced."); + if(allMachines.containsKey(node)) { + Machine machine = allMachines.get(node); + signalDb(machine, RmNodes.Quiesced, true); + logger.info(methodName, null, "Node ", name, " db marked quiesced."); + } + else { + logger.info(methodName, null, "Node ", name, " is new."); + } + } + } + + // process Node that is not quiesced + private void handle_not_quiesced(Node node) { + String methodName = "handle_not_quiesced"; + String name = node.getNodeIdentity().getCanonicalName(); + if(!quiesceMachines.contains(node)) { + logger.trace(methodName, null, "Node ", name, " is already not quiesced."); + } + else { + quiesceMachines.remove(node); + logger.info(methodName, null, "Node ", name, " is not quiesced."); + if(allMachines.containsKey(node)) { + Machine machine = allMachines.get(node); + signalDb(machine, RmNodes.Quiesced, false); + logger.info(methodName, null, "Node ", name, " db marked not quiesced."); + } + else { + logger.info(methodName, null, "Node ", name, " is new."); + } + } + } + /** * Handle a new node update. */ @@ -1080,6 +1153,14 @@ class NodePool String n = node.getNodeIdentity().getCanonicalName(); + boolean node_quiesced = isQuiesce(node); + if(node_quiesced) { + handle_quiesced(node); + } + else { + handle_not_quiesced(node); + } + // if it's offline it can't be restored like this. if ( offlineMachines.containsKey(node) ) { Machine m = offlineMachines.get(node); @@ -1142,6 +1223,7 @@ class NodePool updated++; Map<RmNodes, Object> props = initDbProperties(allMachines.get(key)); + props.put(RmNodes.Quiesced, node_quiesced); props.put(RmNodes.Responsive, true); props.put(RmNodes.Online, true); try { @@ -1303,8 +1385,11 @@ class NodePool boolean isSchedulable(Machine m) { if ( m.isBlacklisted() ) return false; + if ( unresponsiveMachines.containsKey(m.key()) ) return false; if ( offlineMachines.containsKey(m.key()) ) return false; + + if ( quiesceMachines.contains(m.key()) ) return false; return true; } Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerClassic.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerClassic.java?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerClassic.java (original) +++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerClassic.java Thu Jan 3 13:51:01 2019 @@ -1818,6 +1818,10 @@ public class DuccHandlerClassic extends row.append("<td>"); row.append(nodeState.getOnline(machineInfo.getName(), "-")); row.append("</td>"); + // Quiesced + row.append("<td>"); + row.append(nodeState.getQuiesced(machineInfo.getName(), "-")); + row.append("</td>"); // IP row.append("<td>"); row.append(machineInfo.getIp()); @@ -2032,6 +2036,10 @@ public class DuccHandlerClassic extends row.append("<td>"); row.append(""); row.append("</td>"); + // Quiesced + row.append("<td>"); + row.append(""); + row.append("</td>"); // IP row.append("<td>"); row.append(""); Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerJsonFormat.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerJsonFormat.java?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerJsonFormat.java (original) +++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerJsonFormat.java Thu Jan 3 13:51:01 2019 @@ -1801,6 +1801,8 @@ public class DuccHandlerJsonFormat exten row.add(new JsonPrimitive(sb.toString())); // Online row.add(new JsonPrimitive(nodeState.getOnline(machineInfo.getName(), "-"))); + // Quiesced + row.add(new JsonPrimitive(nodeState.getQuiesced(machineInfo.getName(), "-"))); // IP row.add(new JsonPrimitive(machineInfo.getIp())); // Name @@ -2025,6 +2027,8 @@ public class DuccHandlerJsonFormat exten row.add(new JsonPrimitive("Total")); // Online row.add(new JsonPrimitive("")); + // Quiesced + row.add(new JsonPrimitive("")); // IP row.add(new JsonPrimitive("")); // Name Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/INodeState.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/INodeState.java?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/INodeState.java (original) +++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/INodeState.java Thu Jan 3 13:51:01 2019 @@ -20,6 +20,7 @@ package org.apache.uima.ducc.ws.state.mo public interface INodeState { public String getOnline(String node, String otherwise); + public String getQuiesced(String node, String otherwise); // admin public void start(); public void stop(); Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/NodeState.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/NodeState.java?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/NodeState.java (original) +++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/state/monitoring/NodeState.java Thu Jan 3 13:51:01 2019 @@ -50,6 +50,7 @@ public class NodeState implements INodeS private Monitor monitor = null; private String key_online = "online"; + private String key_quiesced = "quiesced"; private NodeState() { start(); @@ -101,6 +102,38 @@ public class NodeState implements INodeS logger.debug(location, jobid, node+"=="+retVal); break; } + else { + logger.debug(location, jobid, key+"!="+node); + } + } + } + else { + logger.warn(location, jobid, "size:"+0); + } + } + else { + logger.error(location, jobid, "node:"+node); + } + return retVal; + } + + // general function to get quiesced status for node + @Override + public String getQuiesced(String node, String otherwise) { + String location = "getQuiesced"; + String retVal = otherwise; + if(node != null) { + if(map.size() > 0) { + for(Entry<String, Map<String, Object>> entry : map.entrySet()) { + String key = entry.getKey(); + if(key.equals(node)) { + logger.debug(location, jobid, key+"=="+node); + Map<String, Object> value = entry.getValue(); + Boolean value_quiesced = (Boolean) value.get(key_quiesced); + retVal = ""+value_quiesced; + logger.debug(location, jobid, node+"=="+retVal); + break; + } else { logger.debug(location, jobid, key+"!="+node); } Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/system.machines.jsp URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/system.machines.jsp?rev=1850235&r1=1850234&r2=1850235&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/system.machines.jsp (original) +++ uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/system.machines.jsp Thu Jan 3 13:51:01 2019 @@ -39,7 +39,6 @@ under the License. "sAjaxSource": "ducc-servlet/json-format-aaData-machines", aaSorting: [], "fnRowCallback" : function(nRow,aData,iDisplayIndex) { - $('td:eq(5)' , nRow).css( "text-align", "right" ); $('td:eq(6)' , nRow).css( "text-align", "right" ); $('td:eq(7)' , nRow).css( "text-align", "right" ); $('td:eq(8)' , nRow).css( "text-align", "right" ); @@ -47,6 +46,7 @@ under the License. $('td:eq(10)' , nRow).css( "text-align", "right" ); $('td:eq(11)' , nRow).css( "text-align", "right" ); $('td:eq(12)' , nRow).css( "text-align", "right" ); + $('td:eq(13)' , nRow).css( "text-align", "right" ); return nRow; }, } ); @@ -108,6 +108,7 @@ if (table_style.equals("scroll")) { <tr class="ducc-header"> <th align="left" title="The heartbeat status, as reported to ducc-mon">Status</th> <th align="left" title="The online status, as determined by resource manager">Online</th> + <th align="left" title="The quiesce status, as determined by resource manager">Quiesce</th> <th align="left" title="The host IP">IP</th> <th align="left" title="The host name">Name</th> <th align="left" title="The host node pool">Nodepool</th> @@ -139,6 +140,7 @@ if (table_style.equals("classic")) { <tr class="ducc-head"> <th align="left" class="none" title="The heartbeat status, as reported to ducc-mon">Status</th> <th align="left" class="none" title="The online status, as determined by resource manager">Online</th> + <th align="left" title="The quiesce status, as determined by resource manager">Quiesce</th> <th align="left" class="none" title="The host IP">IP</th> <th align="left" class="none" title="The host name">Name</th> <th align="left" class="none" title="The host node pool">Nodepool</th>