Author: kihwal Date: Tue Aug 27 19:25:29 2013 New Revision: 1517942 URL: http://svn.apache.org/r1517942 Log: HDFS-3245. Add metrics and web UI for cluster version summary. Contributed by Ravi Prakash.
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Tue Aug 27 19:25:29 2013 @@ -107,6 +107,9 @@ Release 2.1.1-beta - UNRELEASED HDFS-5045. Add more unit tests for retry cache to cover all AtMostOnce methods. (jing9) + HDFS-3245. Add metrics and web UI for cluster version summary. (Ravi + Prakash via kihwal) + OPTIMIZATIONS BUG FIXES Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java Tue Aug 27 19:25:29 2013 @@ -47,6 +47,7 @@ public class DatanodeInfo extends Datano private long lastUpdate; private int xceiverCount; private String location = NetworkTopology.DEFAULT_RACK; + private String softwareVersion; // Datanode administrative states public enum AdminStates { @@ -381,4 +382,12 @@ public class DatanodeInfo extends Datano // by DatanodeID return (this == obj) || super.equals(obj); } + + public String getSoftwareVersion() { + return softwareVersion; + } + + public void setSoftwareVersion(String softwareVersion) { + this.softwareVersion = softwareVersion; + } } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java Tue Aug 27 19:25:29 2013 @@ -26,6 +26,7 @@ import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.NavigableMap; @@ -55,13 +56,6 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.Namesystem; import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand; -import org.apache.hadoop.hdfs.server.namenode.HostFileManager; -import org.apache.hadoop.hdfs.server.namenode.HostFileManager.Entry; -import org.apache.hadoop.hdfs.server.namenode.HostFileManager.EntrySet; -import org.apache.hadoop.hdfs.server.namenode.HostFileManager.MutableEntrySet; -import org.apache.hadoop.hdfs.server.namenode.NameNode; -import org.apache.hadoop.hdfs.server.namenode.Namesystem; -import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand; import org.apache.hadoop.hdfs.server.protocol.BlockCommand; import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand; import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock; @@ -71,7 +65,6 @@ import org.apache.hadoop.hdfs.server.pro import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException; import org.apache.hadoop.hdfs.server.protocol.RegisterCommand; import org.apache.hadoop.hdfs.util.CyclicIteration; -import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.net.CachedDNSToSwitchMapping; import org.apache.hadoop.net.DNSToSwitchMapping; @@ -172,6 +165,14 @@ public class DatanodeManager { * according to the NetworkTopology. */ private boolean hasClusterEverBeenMultiRack = false; + + /** + * The number of datanodes for each software version. This list should change + * during rolling upgrades. + * Software version -> Number of datanodes with this version + */ + private HashMap<String, Integer> datanodesSoftwareVersions = + new HashMap<String, Integer>(4, 0.75f); DatanodeManager(final BlockManager blockManager, final Namesystem namesystem, final Configuration conf) throws IOException { @@ -463,6 +464,7 @@ public class DatanodeManager { heartbeatManager.removeDatanode(nodeInfo); blockManager.removeBlocksAssociatedTo(nodeInfo); networktopology.remove(nodeInfo); + decrementVersionCount(nodeInfo.getSoftwareVersion()); if (LOG.isDebugEnabled()) { LOG.debug("remove datanode " + nodeInfo); @@ -545,6 +547,61 @@ public class DatanodeManager { } } + private void incrementVersionCount(String version) { + if (version == null) { + return; + } + synchronized(datanodeMap) { + Integer count = this.datanodesSoftwareVersions.get(version); + count = count == null ? 1 : count + 1; + this.datanodesSoftwareVersions.put(version, count); + } + } + + private void decrementVersionCount(String version) { + if (version == null) { + return; + } + synchronized(datanodeMap) { + Integer count = this.datanodesSoftwareVersions.get(version); + if(count != null) { + if(count > 1) { + this.datanodesSoftwareVersions.put(version, count-1); + } else { + this.datanodesSoftwareVersions.remove(version); + } + } + } + } + + private boolean shouldCountVersion(DatanodeDescriptor node) { + return node.getSoftwareVersion() != null && node.isAlive && + !isDatanodeDead(node); + } + + private void countSoftwareVersions() { + synchronized(datanodeMap) { + HashMap<String, Integer> versionCount = new HashMap<String, Integer>(); + for(DatanodeDescriptor dn: datanodeMap.values()) { + // Check isAlive too because right after removeDatanode(), + // isDatanodeDead() is still true + if(shouldCountVersion(dn)) + { + Integer num = versionCount.get(dn.getSoftwareVersion()); + num = num == null ? 1 : num+1; + versionCount.put(dn.getSoftwareVersion(), num); + } + } + this.datanodesSoftwareVersions = versionCount; + } + } + + public HashMap<String, Integer> getDatanodesSoftwareVersions() { + synchronized(datanodeMap) { + return new HashMap<String, Integer> (this.datanodesSoftwareVersions); + } + } + /* Resolve a node's network location */ private String resolveNetworkLocation (DatanodeID node) { List<String> names = new ArrayList<String>(1); @@ -761,21 +818,28 @@ public class DatanodeManager { try { // update cluster map getNetworkTopology().remove(nodeS); + if(shouldCountVersion(nodeS)) { + decrementVersionCount(nodeS.getSoftwareVersion()); + } nodeS.updateRegInfo(nodeReg); + + nodeS.setSoftwareVersion(nodeReg.getSoftwareVersion()); nodeS.setDisallowed(false); // Node is in the include list - + // resolve network location nodeS.setNetworkLocation(resolveNetworkLocation(nodeS)); getNetworkTopology().add(nodeS); // also treat the registration message as a heartbeat heartbeatManager.register(nodeS); + incrementVersionCount(nodeS.getSoftwareVersion()); checkDecommissioning(nodeS); success = true; } finally { if (!success) { removeDatanode(nodeS); wipeDatanode(nodeS); + countSoftwareVersions(); } } return; @@ -799,6 +863,7 @@ public class DatanodeManager { try { nodeDescr.setNetworkLocation(resolveNetworkLocation(nodeDescr)); networktopology.add(nodeDescr); + nodeDescr.setSoftwareVersion(nodeReg.getSoftwareVersion()); // register new datanode addDatanode(nodeDescr); @@ -809,10 +874,12 @@ public class DatanodeManager { // because its is done when the descriptor is created heartbeatManager.addDatanode(nodeDescr); success = true; + incrementVersionCount(nodeReg.getSoftwareVersion()); } finally { if (!success) { removeDatanode(nodeDescr); wipeDatanode(nodeDescr); + countSoftwareVersions(); } } } catch (InvalidTopologyException e) { @@ -834,6 +901,7 @@ public class DatanodeManager { namesystem.writeLock(); try { refreshDatanodes(); + countSoftwareVersions(); } finally { namesystem.writeUnlock(); } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java Tue Aug 27 19:25:29 2013 @@ -359,6 +359,7 @@ class ClusterJspHelper { nn.httpAddress = httpAddress; getLiveNodeCount(getProperty(props, "LiveNodes").getValueAsText(), nn); getDeadNodeCount(getProperty(props, "DeadNodes").getValueAsText(), nn); + nn.softwareVersion = getProperty(props, "SoftwareVersion").getTextValue(); return nn; } @@ -596,6 +597,7 @@ class ClusterJspHelper { toXmlItemBlockWithLink(doc, nn.deadDatanodeCount + " (" + nn.deadDecomCount + ")", nn.httpAddress+"/dfsnodelist.jsp?whatNodes=DEAD" , "Dead Datanode (Decommissioned)"); + toXmlItemBlock(doc, "Software Version", nn.softwareVersion); doc.endTag(); // node } doc.endTag(); // namenodes @@ -624,6 +626,7 @@ class ClusterJspHelper { int deadDatanodeCount = 0; int deadDecomCount = 0; String httpAddress = null; + String softwareVersion = ""; } /** Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Tue Aug 27 19:25:29 2013 @@ -6208,6 +6208,7 @@ public class FSNamesystem implements Nam innerinfo.put("nonDfsUsedSpace", node.getNonDfsUsed()); innerinfo.put("capacity", node.getCapacity()); innerinfo.put("numBlocks", node.numBlocks()); + innerinfo.put("version", node.getSoftwareVersion()); info.put(node.getHostName(), innerinfo); } return JSON.toString(info); @@ -6419,6 +6420,22 @@ public class FSNamesystem implements Nam return JSON.toString(list); } + @Override //NameNodeMXBean + public int getDistinctVersionCount() { + return blockManager.getDatanodeManager().getDatanodesSoftwareVersions() + .size(); + } + + @Override //NameNodeMXBean + public Map<String, Integer> getDistinctVersions() { + return blockManager.getDatanodeManager().getDatanodesSoftwareVersions(); + } + + @Override //NameNodeMXBean + public String getSoftwareVersion() { + return VersionInfo.getVersion(); + } + /** * Verifies that the given identifier and password are valid and match. * @param identifier Token identifier. Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java Tue Aug 27 19:25:29 2013 @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import java.util.Map; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -33,7 +35,13 @@ public interface NameNodeMXBean { * @return the version */ public String getVersion(); - + + /** + * Get the version of software running on the Namenode + * @return a string representing the version + */ + public String getSoftwareVersion(); + /** * Gets the used space by data nodes. * @@ -215,4 +223,19 @@ public interface NameNodeMXBean { * @return the list of corrupt files, as a JSON string. */ public String getCorruptFiles(); + + /** + * Get the number of distinct versions of live datanodes + * + * @return the number of distinct versions of live datanodes + */ + public int getDistinctVersionCount(); + + /** + * Get the number of live datanodes for each distinct versions + * + * @return the number of live datanodes for each distinct versions + */ + public Map<String, Integer> getDistinctVersions(); + } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java Tue Aug 27 19:25:29 2013 @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; +import java.util.Map; import javax.servlet.ServletContext; import javax.servlet.http.HttpServletRequest; @@ -99,6 +100,20 @@ class NamenodeJspHelper { } } + static String getRollingUpgradeText(FSNamesystem fsn) { + DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); + Map<String, Integer> list = dm.getDatanodesSoftwareVersions(); + if(list.size() > 1) { + StringBuffer status = new StringBuffer("Rolling upgrades in progress. " + + "There are " + list.size() + " versions of datanodes currently live: "); + for(Map.Entry<String, Integer> ver: list.entrySet()) { + status.append(ver.getKey() + "(" + ver.getValue() + "), "); + } + return status.substring(0, status.length()-2); + } + return ""; + } + static String getInodeLimitText(FSNamesystem fsn) { if (fsn == null) { return ""; @@ -802,7 +817,9 @@ class NamenodeJspHelper { + "<td align=\"right\" class=\"pcbpused\">" + percentBpUsed + "<td align=\"right\" class=\"volfails\">" - + d.getVolumeFailures() + "\n"); + + d.getVolumeFailures() + + "<td align=\"right\" class=\"version\">" + + d.getSoftwareVersion() + "\n"); } void generateNodesList(ServletContext context, JspWriter out, @@ -900,7 +917,9 @@ class NamenodeJspHelper { + nodeHeaderStr("pcbpused") + "> Block Pool<br>Used (%)" + " <th " + nodeHeaderStr("volfails") - +"> Failed Volumes\n"); + +"> Failed Volumes <th " + + nodeHeaderStr("versionString") + +"> Version\n"); JspHelper.sortNodeList(live, sorterField, sorterOrder); for (int i = 0; i < live.size(); i++) { Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp?rev=1517942&r1=1517941&r2=1517942&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp Tue Aug 27 19:25:29 2013 @@ -65,6 +65,7 @@ <h3>Cluster Summary</h3> <b> <%= NamenodeJspHelper.getSecurityModeText()%> </b> <b> <%= NamenodeJspHelper.getSafeModeText(fsn)%> </b> +<b> <%= NamenodeJspHelper.getRollingUpgradeText(fsn)%> </b> <b> <%= NamenodeJspHelper.getInodeLimitText(fsn)%> </b> <%= NamenodeJspHelper.getCorruptFilesWarning(fsn)%>