HDFS-9835. OIV: add ReverseXML processor which reconstructs an fsimage from an XML file (cmccabe)
(cherry picked from commit 700b0e4019cf483f7532609711812150b8c44742) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5998b20d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5998b20d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5998b20d Branch: refs/heads/branch-2 Commit: 5998b20d63fae09b08bd8bad1aca98551c0ac65a Parents: b97ac02 Author: Colin Patrick Mccabe <[email protected]> Authored: Thu Feb 25 16:43:54 2016 -0800 Committer: Colin Patrick Mccabe <[email protected]> Committed: Wed Mar 2 18:18:45 2016 -0800 ---------------------------------------------------------------------- .../apache/hadoop/test/GenericTestUtils.java | 56 + hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../server/namenode/FSImageFormatPBINode.java | 12 +- .../OfflineImageReconstructor.java | 1643 ++++++++++++++++++ .../OfflineImageViewerPB.java | 22 +- .../offlineImageViewer/PBImageXmlWriter.java | 247 ++- .../TestOfflineImageViewer.java | 56 +- 7 files changed, 1985 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/5998b20d/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 7e494a8..1907094 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -17,8 +17,12 @@ */ package org.apache.hadoop.test; +import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.StringWriter; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; @@ -32,6 +36,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; +import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.fs.FileUtil; @@ -481,4 +486,55 @@ public abstract class GenericTestUtils { Assume.assumeTrue( Boolean.parseBoolean(System.getProperty("runningWithNative", "false"))); } + + /** + * Get the diff between two files. + * + * @param a + * @param b + * @return The empty string if there is no diff; the diff, otherwise. + * + * @throws IOException If there is an error reading either file. + */ + public static String getFilesDiff(File a, File b) throws IOException { + StringBuilder bld = new StringBuilder(); + BufferedReader ra = null, rb = null; + try { + ra = new BufferedReader( + new InputStreamReader(new FileInputStream(a))); + rb = new BufferedReader( + new InputStreamReader(new FileInputStream(b))); + while (true) { + String la = ra.readLine(); + String lb = rb.readLine(); + if (la == null) { + if (lb != null) { + addPlusses(bld, ra); + } + break; + } else if (lb == null) { + if (la != null) { + addPlusses(bld, rb); + } + break; + } + if (!la.equals(lb)) { + bld.append(" - ").append(la).append("\n"); + bld.append(" + ").append(lb).append("\n"); + } + } + } finally { + IOUtils.closeQuietly(ra); + IOUtils.closeQuietly(rb); + } + return bld.toString(); + } + + private static void addPlusses(StringBuilder bld, BufferedReader r) + throws IOException { + String l; + while ((l = r.readLine()) != null) { + bld.append(" + ").append(l).append("\n"); + } + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5998b20d/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 23ae7b5..6898629 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -7,6 +7,8 @@ Release 2.9.0 - UNRELEASED HDFS-9047. Retire libwebhdfs. (wheat9) NEW FEATURES + HDFS-9835. OIV: add ReverseXML processor which reconstructs an fsimage from + an XML file (cmccabe) IMPROVEMENTS http://git-wip-us.apache.org/repos/asf/hadoop/blob/5998b20d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index e353c88..4e4a5e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -89,14 +89,14 @@ public final class FSImageFormatPBINode { private static final AclEntryType[] ACL_ENTRY_TYPE_VALUES = AclEntryType .values(); - private static final int XATTR_NAMESPACE_MASK = 3; - private static final int XATTR_NAMESPACE_OFFSET = 30; - private static final int XATTR_NAME_MASK = (1 << 24) - 1; - private static final int XATTR_NAME_OFFSET = 6; + public static final int XATTR_NAMESPACE_MASK = 3; + public static final int XATTR_NAMESPACE_OFFSET = 30; + public static final int XATTR_NAME_MASK = (1 << 24) - 1; + public static final int XATTR_NAME_OFFSET = 6; /* See the comments in fsimage.proto for an explanation of the following. */ - private static final int XATTR_NAMESPACE_EXT_OFFSET = 5; - private static final int XATTR_NAMESPACE_EXT_MASK = 1; + public static final int XATTR_NAMESPACE_EXT_OFFSET = 5; + public static final int XATTR_NAMESPACE_EXT_MASK = 1; private static final XAttr.NameSpace[] XATTR_NAMESPACE_VALUES = XAttr.NameSpace.values(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/5998b20d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java new file mode 100644 index 0000000..d2ebeb4 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java @@ -0,0 +1,1643 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools.offlineImageViewer; + +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_MASK; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAME_OFFSET; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_OFFSET; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_EXT_OFFSET; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_EXT_MASK; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Map; + +import com.google.common.io.CountingOutputStream; +import com.google.common.primitives.Ints; +import com.google.protobuf.ByteString; +import com.google.protobuf.TextFormat; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.XAttrProtos; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName; +import org.apache.hadoop.hdfs.server.namenode.FSImageUtil; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.DiffEntry; +import org.apache.hadoop.hdfs.util.MD5FileUtils; +import org.apache.hadoop.hdfs.util.XMLUtils; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.util.StringUtils; + +import javax.xml.bind.annotation.adapters.HexBinaryAdapter; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.events.XMLEvent; + [email protected] [email protected] +class OfflineImageReconstructor { + public static final Log LOG = + LogFactory.getLog(OfflineImageReconstructor.class); + + /** + * The output stream. + */ + private final CountingOutputStream out; + + /** + * A source of XML events based on the input file. + */ + private final XMLEventReader events; + + /** + * A map of section names to section handler objects. + */ + private final HashMap<String, SectionProcessor> sections; + + /** + * The offset of the start of the current section. + */ + private long sectionStartOffset; + + /** + * The FileSummary builder, where we gather information about each section + * we wrote. + */ + private final FileSummary.Builder fileSummaryBld = + FileSummary.newBuilder(); + + /** + * The string table. See registerStringId for details. + */ + private final HashMap<String, Integer> stringTable = new HashMap<>(); + + /** + * The date formatter to use with fsimage XML files. + */ + private final SimpleDateFormat isoDateFormat; + + /** + * The latest string ID. See registerStringId for details. + */ + private int latestStringId = 0; + + private OfflineImageReconstructor(CountingOutputStream out, + InputStreamReader reader) throws XMLStreamException { + this.out = out; + XMLInputFactory factory = XMLInputFactory.newInstance(); + this.events = factory.createXMLEventReader(reader); + this.sections = new HashMap<>(); + this.sections.put(NameSectionProcessor.NAME, new NameSectionProcessor()); + this.sections.put(INodeSectionProcessor.NAME, new INodeSectionProcessor()); + this.sections.put(SecretManagerSectionProcessor.NAME, + new SecretManagerSectionProcessor()); + this.sections.put(CacheManagerSectionProcessor.NAME, + new CacheManagerSectionProcessor()); + this.sections.put(SnapshotDiffSectionProcessor.NAME, + new SnapshotDiffSectionProcessor()); + this.sections.put(INodeReferenceSectionProcessor.NAME, + new INodeReferenceSectionProcessor()); + this.sections.put(INodeDirectorySectionProcessor.NAME, + new INodeDirectorySectionProcessor()); + this.sections.put(FilesUnderConstructionSectionProcessor.NAME, + new FilesUnderConstructionSectionProcessor()); + this.sections.put(SnapshotSectionProcessor.NAME, + new SnapshotSectionProcessor()); + this.isoDateFormat = PBImageXmlWriter.createSimpleDateFormat(); + } + + /** + * Read the next tag start or end event. + * + * @param expected The name of the next tag we expect. + * We will validate that the tag has this name, + * unless this string is enclosed in braces. + * @param allowEnd If true, we will also end events. + * If false, end events cause an exception. + * + * @return The next tag start or end event. + */ + private XMLEvent expectTag(String expected, boolean allowEnd) + throws IOException { + XMLEvent ev = null; + while (true) { + try { + ev = events.nextEvent(); + } catch (XMLStreamException e) { + throw new IOException("Expecting " + expected + + ", but got XMLStreamException", e); + } + switch (ev.getEventType()) { + case XMLEvent.ATTRIBUTE: + throw new IOException("Got unexpected attribute: " + ev); + case XMLEvent.CHARACTERS: + if (!ev.asCharacters().isWhiteSpace()) { + throw new IOException("Got unxpected characters while " + + "looking for " + expected + ": " + + ev.asCharacters().getData()); + } + break; + case XMLEvent.END_ELEMENT: + if (!allowEnd) { + throw new IOException("Got unexpected end event " + + "while looking for " + expected); + } + return ev; + case XMLEvent.START_ELEMENT: + if (!expected.startsWith("[")) { + if (!ev.asStartElement().getName().getLocalPart(). + equals(expected)) { + throw new IOException("Failed to find <" + expected + ">; " + + "got " + ev.asStartElement().getName().getLocalPart() + + " instead."); + } + } + return ev; + default: + // Ignore other event types like comment, etc. + if (LOG.isTraceEnabled()) { + LOG.trace("Skipping XMLEvent of type " + + ev.getEventType() + "(" + ev + ")"); + } + break; + } + } + } + + private void expectTagEnd(String expected) throws IOException { + XMLEvent ev = expectTag(expected, true); + if (ev.getEventType() != XMLStreamConstants.END_ELEMENT) { + throw new IOException("Expected tag end event for " + expected + + ", but got: " + ev); + } + if (!expected.startsWith("[")) { + String tag = ev.asEndElement().getName().getLocalPart(); + if (!tag.equals(expected)) { + throw new IOException("Expected tag end event for " + expected + + ", but got tag end event for " + tag); + } + } + } + + private static class Node { + private static final String EMPTY = ""; + HashMap<String, LinkedList<Node>> children; + String val = EMPTY; + + void addChild(String key, Node node) { + if (children == null) { + children = new HashMap<>(); + } + LinkedList<Node> cur = children.get(key); + if (cur == null) { + cur = new LinkedList<>(); + children.put(key, cur); + } + cur.add(node); + } + + Node removeChild(String key) { + if (children == null) { + return null; + } + LinkedList<Node> cur = children.get(key); + if (cur == null) { + return null; + } + Node node = cur.remove(); + if ((node == null) || cur.isEmpty()) { + children.remove(key); + } + return node; + } + + String removeChildStr(String key) { + Node child = removeChild(key); + if (child == null) { + return null; + } + if ((child.children != null) && (!child.children.isEmpty())) { + throw new RuntimeException("Node " + key + " contains children " + + "of its own."); + } + return child.getVal(); + } + + Integer removeChildInt(String key) throws IOException { + String str = removeChildStr(key); + if (str == null) { + return null; + } + return Integer.valueOf(str); + } + + Long removeChildLong(String key) throws IOException { + String str = removeChildStr(key); + if (str == null) { + return null; + } + return Long.valueOf(str); + } + + boolean removeChildBool(String key) throws IOException { + String str = removeChildStr(key); + if (str == null) { + return false; + } + return true; + } + + String getRemainingKeyNames() { + if (children == null) { + return ""; + } + return StringUtils.join(", ", children.keySet()); + } + + void verifyNoRemainingKeys(String sectionName) throws IOException { + String remainingKeyNames = getRemainingKeyNames(); + if (!remainingKeyNames.isEmpty()) { + throw new IOException("Found unknown XML keys in " + + sectionName + ": " + remainingKeyNames); + } + } + + void setVal(String val) { + this.val = val; + } + + String getVal() { + return val; + } + + String dump() { + StringBuilder bld = new StringBuilder(); + if ((children != null) && (!children.isEmpty())) { + bld.append("{"); + } + if (val != null) { + bld.append("[").append(val).append("]"); + } + if ((children != null) && (!children.isEmpty())) { + String prefix = ""; + for (Map.Entry<String, LinkedList<Node>> entry : children.entrySet()) { + for (Node n : entry.getValue()) { + bld.append(prefix); + bld.append(entry.getKey()).append(": "); + bld.append(n.dump()); + prefix = ", "; + } + } + bld.append("}"); + } + return bld.toString(); + } + } + + private void loadNodeChildrenHelper(Node parent, String expected, + String terminators[]) throws IOException { + XMLEvent ev = null; + while (true) { + try { + ev = events.peek(); + switch (ev.getEventType()) { + case XMLEvent.END_ELEMENT: + if (terminators.length != 0) { + return; + } + events.nextEvent(); + return; + case XMLEvent.START_ELEMENT: + String key = ev.asStartElement().getName().getLocalPart(); + for (String terminator : terminators) { + if (terminator.equals(key)) { + return; + } + } + events.nextEvent(); + Node node = new Node(); + parent.addChild(key, node); + loadNodeChildrenHelper(node, expected, new String[0]); + break; + case XMLEvent.CHARACTERS: + String val = XMLUtils. + unmangleXmlString(ev.asCharacters().getData(), true); + parent.setVal(val); + events.nextEvent(); + break; + case XMLEvent.ATTRIBUTE: + throw new IOException("Unexpected XML event " + ev); + default: + // Ignore other event types like comment, etc. + if (LOG.isTraceEnabled()) { + LOG.trace("Skipping XMLEvent " + ev); + } + events.nextEvent(); + break; + } + } catch (XMLStreamException e) { + throw new IOException("Expecting " + expected + + ", but got XMLStreamException", e); + } + } + } + + /** + * Load a subtree of the XML into a Node structure. + * We will keep consuming XML events until we exit the current subtree. + * If there are any terminators specified, we will always leave the + * terminating end tag event in the stream. + * + * @param parent The node to fill in. + * @param expected A string to display in exceptions. + * @param terminators Entering any one of these XML tags terminates our + * traversal. + * @throws IOException + */ + private void loadNodeChildren(Node parent, String expected, + String... terminators) throws IOException { + loadNodeChildrenHelper(parent, expected, terminators); + if (LOG.isTraceEnabled()) { + LOG.trace("loadNodeChildren(expected=" + expected + + ", terminators=[" + StringUtils.join(",", terminators) + + "]):" + parent.dump()); + } + } + + /** + * A processor for an FSImage XML section. + */ + private interface SectionProcessor { + /** + * Process this section. + */ + void process() throws IOException; + } + + /** + * Processes the NameSection containing last allocated block ID, etc. + */ + private class NameSectionProcessor implements SectionProcessor { + static final String NAME = "NameSection"; + + @Override + public void process() throws IOException { + Node node = new Node(); + loadNodeChildren(node, "NameSection fields"); + NameSystemSection.Builder b = NameSystemSection.newBuilder(); + Integer namespaceId = node.removeChildInt("namespaceId"); + if (namespaceId == null) { + throw new IOException("<NameSection> is missing <namespaceId>"); + } + b.setNamespaceId(namespaceId); + Long lval = node.removeChildLong("genstampV1"); + if (lval != null) { + b.setGenstampV1(lval); + } + lval = node.removeChildLong("genstampV2"); + if (lval != null) { + b.setGenstampV2(lval); + } + lval = node.removeChildLong("genstampV1Limit"); + if (lval != null) { + b.setGenstampV1Limit(lval); + } + lval = node.removeChildLong("lastAllocatedBlockId"); + if (lval != null) { + b.setLastAllocatedBlockId(lval); + } + lval = node.removeChildLong("txid"); + if (lval != null) { + b.setTransactionId(lval); + } + lval = node.removeChildLong("rollingUpgradeStartTime"); + if (lval != null) { + b.setRollingUpgradeStartTime(lval); + } + lval = node.removeChildLong("lastAllocatedStripedBlockId"); + if (lval != null) { + throw new IOException("can't handle lastAllocatedStripedBlockId " + + "in NameSection; XML file is too new.\n"); + } + node.verifyNoRemainingKeys("NameSection"); + NameSystemSection s = b.build(); + if (LOG.isDebugEnabled()) { + LOG.debug(SectionName.NS_INFO.name() + " writing header: {" + + TextFormat.printToString(s) + "}"); + } + s.writeDelimitedTo(out); + recordSectionLength(SectionName.NS_INFO.name()); + } + } + + private class INodeSectionProcessor implements SectionProcessor { + static final String NAME = "INodeSection"; + + @Override + public void process() throws IOException { + Node headerNode = new Node(); + loadNodeChildren(headerNode, "INodeSection fields", "inode"); + INodeSection.Builder b = INodeSection.newBuilder(); + Long lval = headerNode.removeChildLong("lastInodeId"); + if (lval != null) { + b.setLastInodeId(lval); + } + Integer expectedNumINodes = headerNode.removeChildInt("numInodes"); + if (expectedNumINodes == null) { + throw new IOException("Failed to find <numInodes> in INodeSection."); + } + b.setNumInodes(expectedNumINodes); + INodeSection s = b.build(); + s.writeDelimitedTo(out); + headerNode.verifyNoRemainingKeys("INodeSection"); + int actualNumINodes = 0; + while (actualNumINodes < expectedNumINodes) { + try { + expectTag("inode", false); + } catch (IOException e) { + throw new IOException("Only found " + actualNumINodes + + " <inode> entries out of " + expectedNumINodes, e); + } + actualNumINodes++; + Node inode = new Node(); + loadNodeChildren(inode, "INode fields"); + INodeSection.INode.Builder inodeBld = processINodeXml(inode); + inodeBld.build().writeDelimitedTo(out); + } + expectTagEnd("INodeSection"); + recordSectionLength(SectionName.INODE.name()); + } + } + + private INodeSection.INode.Builder processINodeXml(Node node) + throws IOException { + String type = node.removeChildStr("type"); + if (type == null) { + throw new IOException("INode XML found with no <type> tag."); + } + INodeSection.INode.Builder inodeBld = INodeSection.INode.newBuilder(); + Long id = node.removeChildLong("id"); + if (id == null) { + throw new IOException("<inode> found without <id>"); + } + inodeBld.setId(id); + String name = node.removeChildStr("name"); + if (name != null) { + inodeBld.setName(ByteString.copyFrom(name, "UTF8")); + } + switch (type) { + case "FILE": + processFileXml(node, inodeBld ); + break; + case "DIRECTORY": + processDirectoryXml(node, inodeBld); + break; + case "SYMLINK": + processSymlinkXml(node, inodeBld); + break; + default: + throw new IOException("INode XML found with unknown <type> " + + "tag " + type); + } + node.verifyNoRemainingKeys("inode"); + return inodeBld; + } + + private void processFileXml(Node node, INodeSection.INode.Builder inodeBld) + throws IOException { + inodeBld.setType(INodeSection.INode.Type.FILE); + INodeSection.INodeFile.Builder bld = INodeSection.INodeFile.newBuilder(); + Integer ival = node.removeChildInt("replication"); + if (ival != null) { + bld.setReplication(ival); + } + Long lval = node.removeChildLong("mtime"); + if (lval != null) { + bld.setModificationTime(lval); + } + lval = node.removeChildLong("atime"); + if (lval != null) { + bld.setAccessTime(lval); + } + lval = node.removeChildLong("preferredBlockSize"); + if (lval != null) { + bld.setPreferredBlockSize(lval); + } + String perm = node.removeChildStr("permission"); + if (perm != null) { + bld.setPermission(permissionXmlToU64(perm)); + } + Node blocks = node.removeChild("blocks"); + if (blocks != null) { + while (true) { + Node block = blocks.removeChild("block"); + if (block == null) { + break; + } + HdfsProtos.BlockProto.Builder blockBld = + HdfsProtos.BlockProto.newBuilder(); + Long id = block.removeChildLong("id"); + if (id == null) { + throw new IOException("<block> found without <id>"); + } + blockBld.setBlockId(id); + Long genstamp = block.removeChildLong("genstamp"); + if (genstamp == null) { + throw new IOException("<block> found without <genstamp>"); + } + blockBld.setGenStamp(genstamp); + Long numBytes = block.removeChildLong("numBytes"); + if (numBytes == null) { + throw new IOException("<block> found without <numBytes>"); + } + blockBld.setNumBytes(numBytes); + bld.addBlocks(blockBld); + } + } + Node fileUnderConstruction = node.removeChild("file-under-construction"); + if (fileUnderConstruction != null) { + INodeSection.FileUnderConstructionFeature.Builder fb = + INodeSection.FileUnderConstructionFeature.newBuilder(); + String clientName = + fileUnderConstruction.removeChildStr("clientName"); + if (clientName == null) { + throw new IOException("<file-under-construction> found without " + + "<clientName>"); + } + fb.setClientName(clientName); + String clientMachine = + fileUnderConstruction.removeChildStr("clientMachine"); + if (clientMachine == null) { + throw new IOException("<file-under-construction> found without " + + "<clientMachine>"); + } + fb.setClientMachine(clientMachine); + bld.setFileUC(fb); + } + Node acls = node.removeChild("acls"); + if (acls != null) { + bld.setAcl(aclXmlToProto(acls)); + } + Node xattrs = node.removeChild("xattrs"); + if (xattrs != null) { + bld.setXAttrs(xattrsXmlToProto(xattrs)); + } + ival = node.removeChildInt("storagePolicyId"); + if (ival != null) { + bld.setStoragePolicyID(ival); + } + Boolean bval = node.removeChildBool("isStriped"); + if (bval) { + throw new IOException("Can't handle <isStriped/> in file XML; " + + "XML file is too new."); + } + inodeBld.setFile(bld); + // Will check remaining keys and serialize in processINodeXml + } + + private void processDirectoryXml(Node node, + INodeSection.INode.Builder inodeBld) throws IOException { + inodeBld.setType(INodeSection.INode.Type.DIRECTORY); + INodeSection.INodeDirectory.Builder bld = + INodeSection.INodeDirectory.newBuilder(); + Long lval = node.removeChildLong("mtime"); + if (lval != null) { + bld.setModificationTime(lval); + } + lval = node.removeChildLong("nsquota"); + if (lval != null) { + bld.setNsQuota(lval); + } + lval = node.removeChildLong("dsquota"); + if (lval != null) { + bld.setDsQuota(lval); + } + String perm = node.removeChildStr("permission"); + if (perm != null) { + bld.setPermission(permissionXmlToU64(perm)); + } + Node acls = node.removeChild("acls"); + if (acls != null) { + bld.setAcl(aclXmlToProto(acls)); + } + Node xattrs = node.removeChild("xattrs"); + if (xattrs != null) { + bld.setXAttrs(xattrsXmlToProto(xattrs)); + } + INodeSection.QuotaByStorageTypeFeatureProto.Builder qf = + INodeSection.QuotaByStorageTypeFeatureProto.newBuilder(); + while (true) { + Node typeQuota = node.removeChild("typeQuota"); + if (typeQuota == null) { + break; + } + INodeSection.QuotaByStorageTypeEntryProto.Builder qbld = + INodeSection.QuotaByStorageTypeEntryProto.newBuilder(); + String type = typeQuota.removeChildStr("type"); + if (type == null) { + throw new IOException("<typeQuota> was missing <type>"); + } + HdfsProtos.StorageTypeProto storageType = + HdfsProtos.StorageTypeProto.valueOf(type); + if (storageType == null) { + throw new IOException("<typeQuota> had unknown <type> " + type); + } + qbld.setStorageType(storageType); + Long quota = typeQuota.removeChildLong("quota"); + if (quota == null) { + throw new IOException("<typeQuota> was missing <quota>"); + } + qbld.setQuota(quota); + qf.addQuotas(qbld); + } + bld.setTypeQuotas(qf); + inodeBld.setDirectory(bld); + // Will check remaining keys and serialize in processINodeXml + } + + private void processSymlinkXml(Node node, + INodeSection.INode.Builder inodeBld) throws IOException { + inodeBld.setType(INodeSection.INode.Type.SYMLINK); + INodeSection.INodeSymlink.Builder bld = + INodeSection.INodeSymlink.newBuilder(); + String perm = node.removeChildStr("permission"); + if (perm != null) { + bld.setPermission(permissionXmlToU64(perm)); + } + String target = node.removeChildStr("target"); + if (target != null) { + bld.setTarget(ByteString.copyFrom(target, "UTF8")); + } + Long lval = node.removeChildLong("mtime"); + if (lval != null) { + bld.setModificationTime(lval); + } + lval = node.removeChildLong("atime"); + if (lval != null) { + bld.setAccessTime(lval); + } + inodeBld.setSymlink(bld); + // Will check remaining keys and serialize in processINodeXml + } + + private INodeSection.AclFeatureProto.Builder aclXmlToProto(Node acl) + throws IOException { + // TODO: support ACLs + throw new IOException("ACLs are not supported yet."); + } + + private INodeSection.XAttrFeatureProto.Builder xattrsXmlToProto(Node xattrs) + throws IOException { + INodeSection.XAttrFeatureProto.Builder bld = + INodeSection.XAttrFeatureProto.newBuilder(); + while (true) { + Node xattr = xattrs.removeChild("xattr"); + if (xattr == null) { + break; + } + INodeSection.XAttrCompactProto.Builder b = + INodeSection.XAttrCompactProto.newBuilder(); + String ns = xattr.removeChildStr("ns"); + if (ns == null) { + throw new IOException("<xattr> had no <ns> entry."); + } + int nsIdx = XAttrProtos.XAttrProto. + XAttrNamespaceProto.valueOf(ns).ordinal(); + String name = xattr.removeChildStr("name"); + String valStr = xattr.removeChildStr("val"); + byte[] val = null; + if (valStr == null) { + String valHex = xattr.removeChildStr("valHex"); + if (valHex == null) { + throw new IOException("<xattr> had no <val> or <valHex> entry."); + } + val = new HexBinaryAdapter().unmarshal(valHex); + } else { + val = valStr.getBytes("UTF8"); + } + b.setValue(ByteString.copyFrom(val)); + + // The XAttrCompactProto name field uses a fairly complex format + // to encode both the string table ID of the xattr name and the + // namespace ID. See the protobuf file for details. + int nameId = registerStringId(name); + int encodedName = (nameId << XATTR_NAME_OFFSET) | + ((nsIdx & XATTR_NAMESPACE_MASK) << XATTR_NAMESPACE_OFFSET) | + (((nsIdx >> 2) & XATTR_NAMESPACE_EXT_MASK) + << XATTR_NAMESPACE_EXT_OFFSET); + b.setName(encodedName); + xattr.verifyNoRemainingKeys("xattr"); + bld.addXAttrs(b); + } + xattrs.verifyNoRemainingKeys("xattrs"); + return bld; + } + + private class SecretManagerSectionProcessor implements SectionProcessor { + static final String NAME = "SecretManagerSection"; + + @Override + public void process() throws IOException { + Node secretHeader = new Node(); + loadNodeChildren(secretHeader, "SecretManager fields", + "delegationKey", "token"); + SecretManagerSection.Builder b = SecretManagerSection.newBuilder(); + Integer currentId = secretHeader.removeChildInt("currentId"); + if (currentId == null) { + throw new IOException("SecretManager section had no <currentId>"); + } + b.setCurrentId(currentId); + Integer tokenSequenceNumber = secretHeader.removeChildInt("tokenSequenceNumber"); + if (tokenSequenceNumber == null) { + throw new IOException("SecretManager section had no " + + "<tokenSequenceNumber>"); + } + b.setTokenSequenceNumber(tokenSequenceNumber); + Integer expectedNumKeys = secretHeader.removeChildInt("numDelegationKeys"); + if (expectedNumKeys == null) { + throw new IOException("SecretManager section had no " + + "<numDelegationKeys>"); + } + b.setNumKeys(expectedNumKeys); + Integer expectedNumTokens = secretHeader.removeChildInt("numTokens"); + if (expectedNumTokens == null) { + throw new IOException("SecretManager section had no " + + "<numTokens>"); + } + b.setNumTokens(expectedNumTokens); + secretHeader.verifyNoRemainingKeys("SecretManager"); + b.build().writeDelimitedTo(out); + for (int actualNumKeys = 0; actualNumKeys < expectedNumKeys; + actualNumKeys++) { + try { + expectTag("delegationKey", false); + } catch (IOException e) { + throw new IOException("Only read " + actualNumKeys + + " delegation keys out of " + expectedNumKeys, e); + } + SecretManagerSection.DelegationKey.Builder dbld = + SecretManagerSection.DelegationKey.newBuilder(); + Node dkey = new Node(); + loadNodeChildren(dkey, "Delegation key fields"); + Integer id = dkey.removeChildInt("id"); + if (id == null) { + throw new IOException("Delegation key stanza <delegationKey> " + + "lacked an <id> field."); + } + dbld.setId(id); + String expiry = dkey.removeChildStr("expiry"); + if (expiry == null) { + throw new IOException("Delegation key stanza <delegationKey> " + + "lacked an <expiry> field."); + } + dbld.setExpiryDate(dateStrToLong(expiry)); + String keyHex = dkey.removeChildStr("key"); + if (keyHex == null) { + throw new IOException("Delegation key stanza <delegationKey> " + + "lacked a <key> field."); + } + byte[] key = new HexBinaryAdapter().unmarshal(keyHex); + dkey.verifyNoRemainingKeys("delegationKey"); + dbld.setKey(ByteString.copyFrom(key)); + dbld.build().writeDelimitedTo(out); + } + for (int actualNumTokens = 0; actualNumTokens < expectedNumTokens; + actualNumTokens++) { + try { + expectTag("token", false); + } catch (IOException e) { + throw new IOException("Only read " + actualNumTokens + + " tokens out of " + expectedNumTokens, e); + } + SecretManagerSection.PersistToken.Builder tbld = + SecretManagerSection.PersistToken.newBuilder(); + Node token = new Node(); + loadNodeChildren(token, "PersistToken key fields"); + Integer version = token.removeChildInt("version"); + if (version != null) { + tbld.setVersion(version); + } + String owner = token.removeChildStr("owner"); + if (owner != null) { + tbld.setOwner(owner); + } + String renewer = token.removeChildStr("renewer"); + if (renewer != null) { + tbld.setRenewer(renewer); + } + String realUser = token.removeChildStr("realUser"); + if (realUser != null) { + tbld.setRealUser(realUser); + } + String issueDateStr = token.removeChildStr("issueDate"); + if (issueDateStr != null) { + tbld.setIssueDate(dateStrToLong(issueDateStr)); + } + String maxDateStr = token.removeChildStr("maxDate"); + if (maxDateStr != null) { + tbld.setMaxDate(dateStrToLong(maxDateStr)); + } + Integer seqNo = token.removeChildInt("sequenceNumber"); + if (seqNo != null) { + tbld.setSequenceNumber(seqNo); + } + Integer masterKeyId = token.removeChildInt("masterKeyId"); + if (masterKeyId != null) { + tbld.setMasterKeyId(masterKeyId); + } + String expiryDateStr = token.removeChildStr("expiryDate"); + if (expiryDateStr != null) { + tbld.setExpiryDate(dateStrToLong(expiryDateStr)); + } + token.verifyNoRemainingKeys("token"); + tbld.build().writeDelimitedTo(out); + } + expectTagEnd("SecretManagerSection"); + recordSectionLength(SectionName.SECRET_MANAGER.name()); + } + + private long dateStrToLong(String dateStr) throws IOException { + try { + Date date = isoDateFormat.parse(dateStr); + return date.getTime(); + } catch (ParseException e) { + throw new IOException("Failed to parse ISO date string " + dateStr, e); + } + } + } + + private class CacheManagerSectionProcessor implements SectionProcessor { + static final String NAME = "CacheManagerSection"; + + @Override + public void process() throws IOException { + Node node = new Node(); + loadNodeChildren(node, "CacheManager fields", "pool", "directive"); + CacheManagerSection.Builder b = CacheManagerSection.newBuilder(); + Long nextDirectiveId = node.removeChildLong("nextDirectiveId"); + if (nextDirectiveId == null) { + throw new IOException("CacheManager section had no <nextDirectiveId>"); + } + b.setNextDirectiveId(nextDirectiveId); + Integer expectedNumPools = node.removeChildInt("numPools"); + if (expectedNumPools == null) { + throw new IOException("CacheManager section had no <numPools>"); + } + b.setNumPools(expectedNumPools); + Integer expectedNumDirectives = node.removeChildInt("numDirectives"); + if (expectedNumDirectives == null) { + throw new IOException("CacheManager section had no <numDirectives>"); + } + b.setNumDirectives(expectedNumDirectives); + b.build().writeDelimitedTo(out); + long actualNumPools = 0; + while (actualNumPools < expectedNumPools) { + try { + expectTag("pool", false); + } catch (IOException e) { + throw new IOException("Only read " + actualNumPools + + " cache pools out of " + expectedNumPools, e); + } + actualNumPools++; + Node pool = new Node(); + loadNodeChildren(pool, "pool fields", ""); + processPoolXml(node); + } + long actualNumDirectives = 0; + while (actualNumDirectives < expectedNumDirectives) { + try { + expectTag("directive", false); + } catch (IOException e) { + throw new IOException("Only read " + actualNumDirectives + + " cache pools out of " + expectedNumDirectives, e); + } + actualNumDirectives++; + Node pool = new Node(); + loadNodeChildren(pool, "directive fields", ""); + processDirectiveXml(node); + } + expectTagEnd("CacheManagerSection"); + recordSectionLength(SectionName.CACHE_MANAGER.name()); + } + + private void processPoolXml(Node pool) throws IOException { + CachePoolInfoProto.Builder bld = CachePoolInfoProto.newBuilder(); + String poolName = pool.removeChildStr("poolName"); + if (poolName == null) { + throw new IOException("<pool> found without <poolName>"); + } + bld.setPoolName(poolName); + String ownerName = pool.removeChildStr("ownerName"); + if (ownerName == null) { + throw new IOException("<pool> found without <ownerName>"); + } + bld.setOwnerName(ownerName); + String groupName = pool.removeChildStr("groupName"); + if (groupName == null) { + throw new IOException("<pool> found without <groupName>"); + } + bld.setGroupName(groupName); + Integer mode = pool.removeChildInt("mode"); + if (mode == null) { + throw new IOException("<pool> found without <mode>"); + } + bld.setMode(mode); + Long limit = pool.removeChildLong("limit"); + if (limit == null) { + throw new IOException("<pool> found without <limit>"); + } + bld.setLimit(limit); + Long maxRelativeExpiry = pool.removeChildLong("maxRelativeExpiry"); + if (maxRelativeExpiry == null) { + throw new IOException("<pool> found without <maxRelativeExpiry>"); + } + bld.setMaxRelativeExpiry(maxRelativeExpiry); + pool.verifyNoRemainingKeys("pool"); + bld.build().writeDelimitedTo(out); + } + + private void processDirectiveXml(Node directive) throws IOException { + CacheDirectiveInfoProto.Builder bld = + CacheDirectiveInfoProto.newBuilder(); + Long id = directive.removeChildLong("id"); + if (id == null) { + throw new IOException("<directive> found without <id>"); + } + bld.setId(id); + String path = directive.removeChildStr("path"); + if (path == null) { + throw new IOException("<directive> found without <path>"); + } + bld.setPath(path); + Integer replication = directive.removeChildInt("replication"); + if (replication == null) { + throw new IOException("<directive> found without <replication>"); + } + bld.setReplication(replication); + String pool = directive.removeChildStr("pool"); + if (path == null) { + throw new IOException("<directive> found without <pool>"); + } + bld.setPool(pool); + Node expiration = directive.removeChild("expiration"); + if (expiration != null) { + CacheDirectiveInfoExpirationProto.Builder ebld = + CacheDirectiveInfoExpirationProto.newBuilder(); + Long millis = expiration.removeChildLong("millis"); + if (millis == null) { + throw new IOException("cache directive <expiration> found " + + "without <millis>"); + } + ebld.setMillis(millis); + if (expiration.removeChildBool("relative")) { + ebld.setIsRelative(true); + } else { + ebld.setIsRelative(false); + } + bld.setExpiration(ebld); + } + directive.verifyNoRemainingKeys("directive"); + bld.build().writeDelimitedTo(out); + } + } + + private class INodeReferenceSectionProcessor implements SectionProcessor { + static final String NAME = "INodeReferenceSection"; + + @Override + public void process() throws IOException { + // There is no header for this section. + // We process the repeated <ref> elements. + while (true) { + XMLEvent ev = expectTag("ref", true); + if (ev.isEndElement()) { + break; + } + Node inodeRef = new Node(); + FsImageProto.INodeReferenceSection.INodeReference.Builder bld = + FsImageProto.INodeReferenceSection.INodeReference.newBuilder(); + loadNodeChildren(inodeRef, "INodeReference"); + Long referredId = inodeRef.removeChildLong("referredId"); + if (referredId != null) { + bld.setReferredId(referredId); + } + String name = inodeRef.removeChildStr("name"); + if (name != null) { + bld.setName(ByteString.copyFrom(name, "UTF8")); + } + Integer dstSnapshotId = inodeRef.removeChildInt("dstSnapshotId"); + if (dstSnapshotId != null) { + bld.setDstSnapshotId(dstSnapshotId); + } + Integer lastSnapshotId = inodeRef.removeChildInt("lastSnapshotId"); + if (lastSnapshotId != null) { + bld.setLastSnapshotId(lastSnapshotId); + } + inodeRef.verifyNoRemainingKeys("ref"); + bld.build().writeDelimitedTo(out); + } + recordSectionLength(SectionName.INODE_REFERENCE.name()); + } + } + + private class INodeDirectorySectionProcessor implements SectionProcessor { + static final String NAME = "INodeDirectorySection"; + + @Override + public void process() throws IOException { + // No header for this section + // Process the repeated <directory> elements. + while (true) { + XMLEvent ev = expectTag("directory", true); + if (ev.isEndElement()) { + break; + } + Node directory = new Node(); + FsImageProto.INodeDirectorySection.DirEntry.Builder bld = + FsImageProto.INodeDirectorySection.DirEntry.newBuilder(); + loadNodeChildren(directory, "directory"); + Long parent = directory.removeChildLong("parent"); + if (parent != null) { + bld.setParent(parent); + } + while (true) { + Node child = directory.removeChild("child"); + if (child == null) { + break; + } + bld.addChildren(Long.parseLong(child.getVal())); + } + while (true) { + Node refChild = directory.removeChild("refChild"); + if (refChild == null) { + break; + } + bld.addRefChildren(Integer.parseInt(refChild.getVal())); + } + directory.verifyNoRemainingKeys("directory"); + bld.build().writeDelimitedTo(out); + } + recordSectionLength(SectionName.INODE_DIR.name()); + } + } + + private class FilesUnderConstructionSectionProcessor + implements SectionProcessor { + static final String NAME = "FileUnderConstructionSection"; + + @Override + public void process() throws IOException { + // No header for this section type. + // Process the repeated files under construction elements. + while (true) { + XMLEvent ev = expectTag("inode", true); + if (ev.isEndElement()) { + break; + } + Node fileUnderConstruction = new Node(); + loadNodeChildren(fileUnderConstruction, "file under construction"); + FileUnderConstructionEntry.Builder bld = + FileUnderConstructionEntry.newBuilder(); + Long id = fileUnderConstruction.removeChildLong("id"); + if (id != null) { + bld.setInodeId(id); + } + String fullpath = fileUnderConstruction.removeChildStr("path"); + if (fullpath != null) { + bld.setFullPath(fullpath); + } + fileUnderConstruction.verifyNoRemainingKeys("inode"); + bld.build().writeDelimitedTo(out); + } + recordSectionLength(SectionName.FILES_UNDERCONSTRUCTION.name()); + } + } + + private class SnapshotSectionProcessor implements SectionProcessor { + static final String NAME = "SnapshotSection"; + + @Override + public void process() throws IOException { + FsImageProto.SnapshotSection.Builder bld = + FsImageProto.SnapshotSection.newBuilder(); + Node header = new Node(); + loadNodeChildren(header, "SnapshotSection fields", "snapshot"); + Integer snapshotCounter = header.removeChildInt("snapshotCounter"); + if (snapshotCounter == null) { + throw new IOException("No <snapshotCounter> entry found in " + + "SnapshotSection header"); + } + bld.setSnapshotCounter(snapshotCounter); + Integer expectedNumSnapshots = header.removeChildInt("numSnapshots"); + if (expectedNumSnapshots == null) { + throw new IOException("No <numSnapshots> entry found in " + + "SnapshotSection header"); + } + bld.setNumSnapshots(expectedNumSnapshots); + while (true) { + Node sd = header.removeChild("snapshottableDir"); + if (sd == null) { + break; + } + Long dir = sd.removeChildLong("dir"); + sd.verifyNoRemainingKeys("<dir>"); + bld.addSnapshottableDir(dir); + } + header.verifyNoRemainingKeys("SnapshotSection"); + bld.build().writeDelimitedTo(out); + int actualNumSnapshots = 0; + while (actualNumSnapshots < expectedNumSnapshots) { + try { + expectTag("snapshot", false); + } catch (IOException e) { + throw new IOException("Only read " + actualNumSnapshots + + " <snapshot> entries out of " + expectedNumSnapshots, e); + } + actualNumSnapshots++; + Node snapshot = new Node(); + loadNodeChildren(snapshot, "snapshot fields"); + FsImageProto.SnapshotSection.Snapshot.Builder s = + FsImageProto.SnapshotSection.Snapshot.newBuilder(); + Integer snapshotId = snapshot.removeChildInt("id"); + if (snapshotId == null) { + throw new IOException("<snapshot> section was missing <id>"); + } + s.setSnapshotId(snapshotId); + Node snapshotRoot = snapshot.removeChild("root"); + INodeSection.INode.Builder inodeBld = processINodeXml(snapshotRoot); + s.setRoot(inodeBld); + s.build().writeDelimitedTo(out); + } + expectTagEnd("SnapshotSection"); + recordSectionLength(SectionName.SNAPSHOT.name()); + } + } + + private class SnapshotDiffSectionProcessor implements SectionProcessor { + static final String NAME = "SnapshotDiffSection"; + + @Override + public void process() throws IOException { + // No header for this section type. + LOG.debug("Processing SnapshotDiffSection"); + while (true) { + XMLEvent ev = expectTag("[diff start tag]", true); + if (ev.isEndElement()) { + String name = ev.asEndElement().getName().getLocalPart(); + if (name.equals("SnapshotDiffSection")) { + break; + } + throw new IOException("Got unexpected end tag for " + name); + } + String tagName = ev.asStartElement().getName().getLocalPart(); + if (tagName.equals("dirDiffEntry")) { + processDirDiffEntry(); + } else if (tagName.equals("fileDiffEntry")) { + processFileDiffEntry(); + } else { + throw new IOException("SnapshotDiffSection contained unexpected " + + "tag " + tagName); + } + } + recordSectionLength(SectionName.SNAPSHOT_DIFF.name()); + } + + private void processDirDiffEntry() throws IOException { + LOG.debug("Processing dirDiffEntry"); + DiffEntry.Builder headerBld = DiffEntry.newBuilder(); + headerBld.setType(DiffEntry.Type.DIRECTORYDIFF); + Node dirDiffHeader = new Node(); + loadNodeChildren(dirDiffHeader, "dirDiffEntry fields", "dirDiff"); + Long inodeId = dirDiffHeader.removeChildLong("inodeId"); + if (inodeId == null) { + throw new IOException("<dirDiffEntry> contained no <inodeId> entry."); + } + headerBld.setInodeId(inodeId); + Integer expectedDiffs = dirDiffHeader.removeChildInt("count"); + if (expectedDiffs == null) { + throw new IOException("<dirDiffEntry> contained no <count> entry."); + } + headerBld.setNumOfDiff(expectedDiffs); + dirDiffHeader.verifyNoRemainingKeys("dirDiffEntry"); + headerBld.build().writeDelimitedTo(out); + for (int actualDiffs = 0; actualDiffs < expectedDiffs; actualDiffs++) { + try { + expectTag("dirDiff", false); + } catch (IOException e) { + throw new IOException("Only read " + (actualDiffs + 1) + + " diffs out of " + expectedDiffs, e); + } + Node dirDiff = new Node(); + loadNodeChildren(dirDiff, "dirDiff fields"); + FsImageProto.SnapshotDiffSection.DirectoryDiff.Builder bld = + FsImageProto.SnapshotDiffSection.DirectoryDiff.newBuilder(); + Integer snapshotId = dirDiff.removeChildInt("snapshotId"); + if (snapshotId != null) { + bld.setSnapshotId(snapshotId); + } + Integer childrenSize = dirDiff.removeChildInt("childrenSize"); + if (childrenSize == null) { + throw new IOException("Expected to find <childrenSize> in " + + "<dirDiff> section."); + } + bld.setIsSnapshotRoot(dirDiff.removeChildBool("isSnapshotRoot")); + bld.setChildrenSize(childrenSize); + String name = dirDiff.removeChildStr("name"); + if (name != null) { + bld.setName(ByteString.copyFrom(name, "UTF8")); + } + // TODO: add missing snapshotCopy field to XML + Integer expectedCreatedListSize = + dirDiff.removeChildInt("createdListSize"); + if (expectedCreatedListSize == null) { + throw new IOException("Expected to find <createdListSize> in " + + "<dirDiff> section."); + } + bld.setCreatedListSize(expectedCreatedListSize); + while (true) { + Node deleted = dirDiff.removeChild("deletedInode"); + if (deleted == null){ + break; + } + bld.addDeletedINode(Long.parseLong(deleted.getVal())); + } + while (true) { + Node deleted = dirDiff.removeChild("deletedInoderef"); + if (deleted == null){ + break; + } + bld.addDeletedINodeRef(Integer.parseInt(deleted.getVal())); + } + bld.build().writeDelimitedTo(out); + // After the DirectoryDiff header comes a list of CreatedListEntry PBs. + int actualCreatedListSize = 0; + while (true) { + Node created = dirDiff.removeChild("created"); + if (created == null){ + break; + } + String cleName = created.removeChildStr("name"); + if (cleName == null) { + throw new IOException("Expected <created> entry to have " + + "a <name> field"); + } + created.verifyNoRemainingKeys("created"); + FsImageProto.SnapshotDiffSection.CreatedListEntry.newBuilder(). + setName(ByteString.copyFrom(cleName, "UTF8")). + build().writeDelimitedTo(out); + actualCreatedListSize++; + } + if (actualCreatedListSize != expectedCreatedListSize) { + throw new IOException("<createdListSize> was " + + expectedCreatedListSize +", but there were " + + actualCreatedListSize + " <created> entries."); + } + dirDiff.verifyNoRemainingKeys("dirDiff"); + } + expectTagEnd("dirDiffEntry"); + } + + private void processFileDiffEntry() throws IOException { + LOG.debug("Processing fileDiffEntry"); + DiffEntry.Builder headerBld = DiffEntry.newBuilder(); + headerBld.setType(DiffEntry.Type.FILEDIFF); + Node fileDiffHeader = new Node(); + loadNodeChildren(fileDiffHeader, "fileDiffEntry fields", "fileDiff"); + Long inodeId = fileDiffHeader.removeChildLong("inodeid"); + if (inodeId == null) { + throw new IOException("<fileDiffEntry> contained no <inodeid> entry."); + } + headerBld.setInodeId(inodeId); + Integer expectedDiffs = fileDiffHeader.removeChildInt("count"); + if (expectedDiffs == null) { + throw new IOException("<fileDiffEntry> contained no <count> entry."); + } + headerBld.setNumOfDiff(expectedDiffs); + fileDiffHeader.verifyNoRemainingKeys("fileDiffEntry"); + headerBld.build().writeDelimitedTo(out); + for (int actualDiffs = 0; actualDiffs < expectedDiffs; actualDiffs++) { + try { + expectTag("fileDiff", false); + } catch (IOException e) { + throw new IOException("Only read " + (actualDiffs + 1) + + " diffs out of " + expectedDiffs, e); + } + Node fileDiff = new Node(); + loadNodeChildren(fileDiff, "fileDiff fields"); + FsImageProto.SnapshotDiffSection.FileDiff.Builder bld = + FsImageProto.SnapshotDiffSection.FileDiff.newBuilder(); + Integer snapshotId = fileDiff.removeChildInt("snapshotId"); + if (snapshotId != null) { + bld.setSnapshotId(snapshotId); + } + Long size = fileDiff.removeChildLong("size"); + if (size != null) { + bld.setFileSize(size); + } + String name = fileDiff.removeChildStr("name"); + if (name != null) { + bld.setName(ByteString.copyFrom(name, "UTF8")); + } + // TODO: missing snapshotCopy + // TODO: missing blocks + fileDiff.verifyNoRemainingKeys("fileDiff"); + bld.build().writeDelimitedTo(out); + } + expectTagEnd("fileDiffEntry"); + } + } + + /** + * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) + * (in Big Endian). The first and the second parts are the string ids + * of the user and group name, and the last 16 bits are the permission bits. + * + * @param perm The permission string from the XML. + * @return The 64-bit value to use in the fsimage for permission. + * @throws IOException If we run out of string IDs in the string table. + */ + private long permissionXmlToU64(String perm) throws IOException { + String components[] = perm.split(":"); + if (components.length != 3) { + throw new IOException("Unable to parse permission string " + perm + + ": expected 3 components, but only had " + components.length); + } + String userName = components[0]; + String groupName = components[1]; + String modeString = components[2]; + long userNameId = registerStringId(userName); + long groupNameId = registerStringId(groupName); + long mode = new FsPermission(modeString).toShort(); + return (userNameId << 40) | (groupNameId << 16) | mode; + } + + /** + * The FSImage contains a string table which maps strings to IDs. + * This is a simple form of compression which takes advantage of the fact + * that the same strings tend to occur over and over again. + * This function will return an ID which we can use to represent the given + * string. If the string already exists in the string table, we will use + * that ID; otherwise, we will allocate a new one. + * + * @param str The string. + * @return The ID in the string table. + * @throws IOException If we run out of bits in the string table. We only + * have 25 bits. + */ + int registerStringId(String str) throws IOException { + Integer id = stringTable.get(str); + if (id != null) { + return id; + } + int latestId = latestStringId; + if (latestId >= 0x1ffffff) { + throw new IOException("Cannot have more than 2**25 " + + "strings in the fsimage, because of the limitation on " + + "the size of string table IDs."); + } + stringTable.put(str, latestId); + latestStringId++; + return latestId; + } + + /** + * Record the length of a section of the FSImage in our FileSummary object. + * The FileSummary appears at the end of the FSImage and acts as a table of + * contents for the file. + * + * @param sectionNamePb The name of the section as it should appear in + * the fsimage. (This is different than the XML + * name.) + * @throws IOException + */ + void recordSectionLength(String sectionNamePb) throws IOException { + long curSectionStartOffset = sectionStartOffset; + long curPos = out.getCount(); + //if (sectionNamePb.equals(SectionName.STRING_TABLE.name())) { + fileSummaryBld.addSections(FileSummary.Section.newBuilder(). + setName(sectionNamePb). + setLength(curPos - curSectionStartOffset). + setOffset(curSectionStartOffset)); + //} + sectionStartOffset = curPos; + } + + /** + * Read the version tag which starts the XML file. + */ + private void readVersion() throws IOException { + try { + expectTag("version", false); + } catch (IOException e) { + // Handle the case where <version> does not exist. + // Note: fsimage XML files which are missing <version> are also missing + // many other fields that ovi needs to accurately reconstruct the + // fsimage. + throw new IOException("No <version> section found at the top of " + + "the fsimage XML. This XML file is too old to be processed " + + "by ovi.", e); + } + Node version = new Node(); + loadNodeChildren(version, "version fields"); + Integer onDiskVersion = version.removeChildInt("onDiskVersion"); + if (onDiskVersion == null) { + throw new IOException("The <version> section doesn't contain " + + "the onDiskVersion."); + } + Integer layoutVersion = version.removeChildInt("layoutVersion"); + if (layoutVersion == null) { + throw new IOException("The <version> section doesn't contain " + + "the layoutVersion."); + } + fileSummaryBld.setOndiskVersion(onDiskVersion); + fileSummaryBld.setLayoutVersion(layoutVersion); + if (LOG.isDebugEnabled()) { + LOG.debug("Loaded <version> with onDiskVersion=" + onDiskVersion + + ", layoutVersion=" + layoutVersion + "."); + } + } + + /** + * Write the string table to the fsimage. + * @throws IOException + */ + private void writeStringTableSection() throws IOException { + FsImageProto.StringTableSection sectionHeader = + FsImageProto.StringTableSection.newBuilder(). + setNumEntry(stringTable.size()).build(); + if (LOG.isDebugEnabled()) { + LOG.debug(SectionName.STRING_TABLE.name() + " writing header: {" + + TextFormat.printToString(sectionHeader) + "}"); + } + sectionHeader.writeDelimitedTo(out); + + // The entries don't have to be in any particular order, so iterating + // over the hash table is fine. + for (Map.Entry<String, Integer> entry : stringTable.entrySet()) { + FsImageProto.StringTableSection.Entry stEntry = + FsImageProto.StringTableSection.Entry.newBuilder(). + setStr(entry.getKey()). + setId(entry.getValue()). + build(); + if (LOG.isTraceEnabled()) { + LOG.trace("Writing string table entry: {" + + TextFormat.printToString(stEntry) + "}"); + } + stEntry.writeDelimitedTo(out); + } + recordSectionLength(SectionName.STRING_TABLE.name()); + } + + /** + * Processes the XML file back into an fsimage. + */ + private void processXml() throws Exception { + LOG.debug("Loading <fsimage>."); + expectTag("fsimage", false); + // Read the <version> tag. + readVersion(); + // Write the HDFSIMG1 magic number which begins the fsimage file. + out.write(FSImageUtil.MAGIC_HEADER); + // Write a series of fsimage sections. + sectionStartOffset = FSImageUtil.MAGIC_HEADER.length; + final HashSet<String> unprocessedSections = + new HashSet<>(sections.keySet()); + while (!unprocessedSections.isEmpty()) { + XMLEvent ev = expectTag("[section header]", true); + if (ev.getEventType() == XMLStreamConstants.END_ELEMENT) { + if (ev.asEndElement().getName().getLocalPart().equals("fsimage")) { + throw new IOException("FSImage XML ended prematurely, without " + + "including section(s) " + StringUtils.join(", ", + unprocessedSections)); + } + throw new IOException("Got unexpected tag end event for " + + ev.asEndElement().getName().getLocalPart() + " while looking " + + "for section header tag."); + } else if (ev.getEventType() != XMLStreamConstants.START_ELEMENT) { + throw new IOException("Expected section header START_ELEMENT; " + + "got event of type " + ev.getEventType()); + } + String sectionName = ev.asStartElement().getName().getLocalPart(); + if (!unprocessedSections.contains(sectionName)) { + throw new IOException("Unknown or duplicate section found for " + + sectionName); + } + SectionProcessor sectionProcessor = sections.get(sectionName); + if (sectionProcessor == null) { + throw new IOException("Unknown FSImage section " + sectionName + + ". Valid section names are [" + + StringUtils.join(", ", sections.keySet()) + "]"); + } + unprocessedSections.remove(sectionName); + sectionProcessor.process(); + } + + // Write the StringTable section to disk. + // This has to be done after the other sections, since some of them + // add entries to the string table. + writeStringTableSection(); + + // Write the FileSummary section to disk. + // This section is always last. + long prevOffset = out.getCount(); + FileSummary fileSummary = fileSummaryBld.build(); + if (LOG.isDebugEnabled()) { + LOG.debug("Writing FileSummary: {" + + TextFormat.printToString(fileSummary) + "}"); + } + // Even though the last 4 bytes of the file gives the FileSummary length, + // we still write a varint first that also contains the length. + fileSummary.writeDelimitedTo(out); + + // Write the length of the FileSummary section as a fixed-size big + // endian 4-byte quantity. + int summaryLen = Ints.checkedCast(out.getCount() - prevOffset); + byte[] summaryLenBytes = new byte[4]; + ByteBuffer.wrap(summaryLenBytes).asIntBuffer().put(summaryLen); + out.write(summaryLenBytes); + } + + /** + * Run the OfflineImageReconstructor. + * + * @param inputPath The input path to use. + * @param outputPath The output path to use. + * + * @throws Exception On error. + */ + public static void run(String inputPath, String outputPath) + throws Exception { + MessageDigest digester = MD5Hash.getDigester(); + FileOutputStream fout = null; + File foutHash = new File(outputPath + ".md5"); + Files.deleteIfExists(foutHash.toPath()); // delete any .md5 file that exists + CountingOutputStream out = null; + FileInputStream fis = null; + InputStreamReader reader = null; + try { + Files.deleteIfExists(Paths.get(outputPath)); + fout = new FileOutputStream(outputPath); + fis = new FileInputStream(inputPath); + reader = new InputStreamReader(fis, Charset.forName("UTF-8")); + out = new CountingOutputStream( + new DigestOutputStream( + new BufferedOutputStream(fout), digester)); + OfflineImageReconstructor oir = + new OfflineImageReconstructor(out, reader); + oir.processXml(); + } finally { + IOUtils.cleanup(LOG, reader, fis, out, fout); + } + // Write the md5 file + MD5FileUtils.saveMD5File(new File(outputPath), + new MD5Hash(digester.digest())); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/5998b20d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java index f27abb7..f2b308f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java @@ -60,6 +60,8 @@ public class OfflineImageViewerPB { + " * XML: This processor creates an XML document with all elements of\n" + " the fsimage enumerated, suitable for further analysis by XML\n" + " tools.\n" + + " * reverseXML: This processor takes an XML file and creates a\n" + + " binary fsimage containing the same elements.\n" + " * FileDistribution: This processor analyzes the file size\n" + " distribution in the image.\n" + " -maxSize specifies the range [0, maxSize] of file sizes to be\n" @@ -73,15 +75,18 @@ public class OfflineImageViewerPB { + " changed via the -delimiter argument.\n" + "\n" + "Required command line arguments:\n" - + "-i,--inputFile <arg> FSImage file to process.\n" + + "-i,--inputFile <arg> FSImage or XML file to process.\n" + "\n" + "Optional command line arguments:\n" + "-o,--outputFile <arg> Name of output file. If the specified\n" + " file exists, it will be overwritten.\n" + " (output to stdout by default)\n" + + " If the input file was an XML file, we\n" + + " will also create an <outputFile>.md5 file.\n" + "-p,--processor <arg> Select which type of processor to apply\n" - + " against image file. (XML|FileDistribution|Web|Delimited)\n" - + " (Web by default)\n" + + " against image file. (XML|FileDistribution|\n" + + " ReverseXML|Web|Delimited)\n" + + " The default is Web.\n" + "-delimiter <arg> Delimiting string to use with Delimited processor. \n" + "-t,--temp <arg> Use temporary dir to cache intermediate result to generate\n" + " Delimited outputs. If not set, Delimited processor constructs\n" @@ -177,6 +182,16 @@ public class OfflineImageViewerPB { new PBImageXmlWriter(conf, out).visit( new RandomAccessFile(inputFile, "r")); break; + case "ReverseXML": + try { + OfflineImageReconstructor.run(inputFile, outputFile); + } catch (Exception e) { + System.err.println("OfflineImageReconstructor failed: " + + e.getMessage()); + e.printStackTrace(System.err); + System.exit(1); + } + break; case "Web": String addr = cmd.getOptionValue("addr", "localhost:5978"); try (WebImageViewer viewer = new WebImageViewer( @@ -200,6 +215,7 @@ public class OfflineImageViewerPB { System.err.println("Input file ended unexpectedly. Exiting"); } catch (IOException e) { System.err.println("Encountered exception. Exiting: " + e.getMessage()); + e.printStackTrace(System.err); } return -1; }
