HDFS-10797. Disk usage summary of snapshots causes renamed blocks to get 
counted twice. Contributed by Sean Mackrory.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6a38d118
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6a38d118
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6a38d118

Branch: refs/heads/HDFS-7240
Commit: 6a38d118d86b7907009bcec34f1b788d076f1d1c
Parents: e57fa81
Author: Xiao Chen <x...@apache.org>
Authored: Fri Oct 7 17:30:30 2016 -0700
Committer: Xiao Chen <x...@apache.org>
Committed: Fri Oct 7 17:37:15 2016 -0700

----------------------------------------------------------------------
 .../ContentSummaryComputationContext.java       |  94 ++++++++-
 .../hadoop/hdfs/server/namenode/INode.java      |   1 +
 .../hdfs/server/namenode/INodeDirectory.java    |  11 +-
 .../hadoop/hdfs/server/namenode/INodeFile.java  |   1 +
 .../hdfs/server/namenode/INodeReference.java    |   2 +
 .../hdfs/server/namenode/INodeSymlink.java      |   1 +
 .../snapshot/DirectorySnapshottableFeature.java |   9 +-
 .../snapshot/DirectoryWithSnapshotFeature.java  |  14 +-
 .../hdfs/server/namenode/snapshot/Snapshot.java |   1 +
 .../snapshot/TestRenameWithSnapshots.java       | 199 +++++++++++++++++++
 10 files changed, 307 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java
index 6df9e75..4208b53 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java
@@ -21,6 +21,10 @@ import com.google.common.base.Preconditions;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+
+import java.util.HashSet;
+import java.util.Set;
 
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
@@ -35,6 +39,8 @@ public class ContentSummaryComputationContext {
   private long yieldCount = 0;
   private long sleepMilliSec = 0;
   private int sleepNanoSec = 0;
+  private Set<INode> includedNodes = new HashSet<>();
+  private Set<INode> deletedSnapshottedNodes = new HashSet<>();
 
   /**
    * Constructor
@@ -51,8 +57,8 @@ public class ContentSummaryComputationContext {
     this.fsn = fsn;
     this.limitPerRun = limitPerRun;
     this.nextCountLimit = limitPerRun;
-    this.counts = new ContentCounts.Builder().build();
-    this.snapshotCounts = new ContentCounts.Builder().build();
+    setCounts(new ContentCounts.Builder().build());
+    setSnapshotCounts(new ContentCounts.Builder().build());
     this.sleepMilliSec = sleepMicroSec/1000;
     this.sleepNanoSec = (int)((sleepMicroSec%1000)*1000);
   }
@@ -82,6 +88,7 @@ public class ContentSummaryComputationContext {
     }
 
     // Have we reached the limit?
+    ContentCounts counts = getCounts();
     long currentCount = counts.getFileCount() +
         counts.getSymlinkCount() +
         counts.getDirectoryCount() +
@@ -123,14 +130,22 @@ public class ContentSummaryComputationContext {
   }
 
   /** Get the content counts */
-  public ContentCounts getCounts() {
+  public synchronized ContentCounts getCounts() {
     return counts;
   }
 
+  private synchronized void setCounts(ContentCounts counts) {
+    this.counts = counts;
+  }
+
   public ContentCounts getSnapshotCounts() {
     return snapshotCounts;
   }
 
+  private void setSnapshotCounts(ContentCounts snapshotCounts) {
+    this.snapshotCounts = snapshotCounts;
+  }
+
   public BlockStoragePolicySuite getBlockStoragePolicySuite() {
     Preconditions.checkState((bsps != null || fsn != null),
         "BlockStoragePolicySuite must be either initialized or available via" +
@@ -138,4 +153,77 @@ public class ContentSummaryComputationContext {
     return (bsps != null) ? bsps:
         fsn.getBlockManager().getStoragePolicySuite();
   }
+
+  /**
+   * If the node is an INodeReference, resolves it to the actual inode.
+   * Snapshot diffs represent renamed / moved files as different
+   * INodeReferences, but the underlying INode it refers to is consistent.
+   *
+   * @param node
+   * @return The referred INode if there is one, else returns the input
+   * unmodified.
+   */
+  private INode resolveINodeReference(INode node) {
+    if (node.isReference() && node instanceof INodeReference) {
+      return ((INodeReference)node).getReferredINode();
+    }
+    return node;
+  }
+
+  /**
+   * Reports that a node is about to be included in this summary. Can be used
+   * either to simply report that a node has been including, or check whether
+   * a node has already been included.
+   *
+   * @param node
+   * @return true if node has already been included
+   */
+  public boolean nodeIncluded(INode node) {
+    INode resolvedNode = resolveINodeReference(node);
+    synchronized (includedNodes) {
+      if (!includedNodes.contains(resolvedNode)) {
+        includedNodes.add(resolvedNode);
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Schedules a node that is listed as DELETED in a snapshot's diff to be
+   * included in the summary at the end of computation. See
+   * {@link #tallyDeletedSnapshottedINodes()} for more context.
+   *
+   * @param node
+   */
+  public void reportDeletedSnapshottedNode(INode node) {
+    deletedSnapshottedNodes.add(node);
+  }
+
+  /**
+   * Finalizes the computation by including all nodes that were reported as
+   * deleted by a snapshot but have not been already including due to other
+   * references.
+   * <p>
+   * Nodes that get renamed are listed in the snapshot's diff as both DELETED
+   * under the old name and CREATED under the new name. The computation
+   * relies on nodes to report themselves as being included (via
+   * {@link #nodeIncluded(INode)} as the only reliable way to determine which
+   * nodes were renamed within the tree being summarized and which were
+   * removed (either by deletion or being renamed outside of the tree).
+   */
+  public synchronized void tallyDeletedSnapshottedINodes() {
+    /* Temporarily create a new counts object so these results can then be
+    added to both counts and snapshotCounts */
+    ContentCounts originalCounts = getCounts();
+    setCounts(new ContentCounts.Builder().build());
+    for (INode node : deletedSnapshottedNodes) {
+      if (!nodeIncluded(node)) {
+        node.computeContentSummary(Snapshot.CURRENT_STATE_ID, this);
+      }
+    }
+    originalCounts.addContents(getCounts());
+    snapshotCounts.addContents(getCounts());
+    setCounts(originalCounts);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
index c6258a1..e1db990 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
@@ -429,6 +429,7 @@ public abstract class INode implements INodeAttributes, 
Diff.Element<byte[]> {
   public final ContentSummary computeAndConvertContentSummary(int snapshotId,
       ContentSummaryComputationContext summary) {
     computeContentSummary(snapshotId, summary);
+    summary.tallyDeletedSnapshottedINodes();
     final ContentCounts counts = summary.getCounts();
     final ContentCounts snapshotCounts = summary.getSnapshotCounts();
     final QuotaCounts q = getQuotaCounts();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
index 24c8815..b6e2713 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
@@ -628,17 +628,10 @@ public class INodeDirectory extends 
INodeWithAdditionalFields
   @Override
   public ContentSummaryComputationContext computeContentSummary(int snapshotId,
       ContentSummaryComputationContext summary) {
+    summary.nodeIncluded(this);
     final DirectoryWithSnapshotFeature sf = getDirectoryWithSnapshotFeature();
     if (sf != null && snapshotId == Snapshot.CURRENT_STATE_ID) {
-      final ContentCounts counts = new ContentCounts.Builder().build();
-      // if the getContentSummary call is against a non-snapshot path, the
-      // computation should include all the deleted files/directories
-      sf.computeContentSummary4Snapshot(summary.getBlockStoragePolicySuite(),
-          counts);
-      summary.getCounts().addContents(counts);
-      // Also add ContentSummary to snapshotCounts (So we can extract it
-      // later from the ContentSummary of all).
-      summary.getSnapshotCounts().addContents(counts);
+      sf.computeContentSummary4Snapshot(summary);
     }
     final DirectoryWithQuotaFeature q = getDirectoryWithQuotaFeature();
     if (q != null && snapshotId == Snapshot.CURRENT_STATE_ID) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
index 12ead7f..37f97db 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
@@ -770,6 +770,7 @@ public class INodeFile extends INodeWithAdditionalFields
   @Override
   public final ContentSummaryComputationContext computeContentSummary(
       int snapshotId, final ContentSummaryComputationContext summary) {
+    summary.nodeIncluded(this);
     final ContentCounts counts = summary.getCounts();
     counts.addContent(Content.FILE, 1);
     final long fileLen = computeFileSize(snapshotId);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
index 1b85237..56aaf8c 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
@@ -315,6 +315,7 @@ public abstract class INodeReference extends INode {
   @Override
   public ContentSummaryComputationContext computeContentSummary(int snapshotId,
       ContentSummaryComputationContext summary) {
+    summary.nodeIncluded(this);
     return referred.computeContentSummary(snapshotId, summary);
   }
 
@@ -504,6 +505,7 @@ public abstract class INodeReference extends INode {
     @Override
     public final ContentSummaryComputationContext computeContentSummary(
         int snapshotId, ContentSummaryComputationContext summary) {
+      summary.nodeIncluded(this);
       final int s = snapshotId < lastSnapshotId ? snapshotId : lastSnapshotId;
       // only count storagespace for WithName
       final QuotaCounts q = computeQuotaUsage(

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
index c76bea0..1223f4e 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
@@ -96,6 +96,7 @@ public class INodeSymlink extends INodeWithAdditionalFields {
   @Override
   public ContentSummaryComputationContext computeContentSummary(int snapshotId,
       final ContentSummaryComputationContext summary) {
+    summary.nodeIncluded(this);
     summary.getCounts().addContent(Content.SYMLINK, 1);
     return summary;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
index 39db979..fa7bace 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
@@ -29,9 +29,9 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.SnapshotException;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
 import org.apache.hadoop.hdfs.server.namenode.Content;
 import org.apache.hadoop.hdfs.server.namenode.ContentCounts;
+import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.SnapshotAndINode;
@@ -220,11 +220,12 @@ public class DirectorySnapshottableFeature extends 
DirectoryWithSnapshotFeature
   }
 
   @Override
-  public void computeContentSummary4Snapshot(final BlockStoragePolicySuite 
bsps,
-      final ContentCounts counts) {
+  public void computeContentSummary4Snapshot(ContentSummaryComputationContext
+                                                   context) {
+    ContentCounts counts = context.getCounts();
     counts.addContent(Content.SNAPSHOT, snapshotsByNames.size());
     counts.addContent(Content.SNAPSHOTTABLE_DIRECTORY, 1);
-    super.computeContentSummary4Snapshot(bsps, counts);
+    super.computeContentSummary4Snapshot(context);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
index 0111b3b..9addbfa 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
 import org.apache.hadoop.hdfs.server.namenode.AclStorage;
-import org.apache.hadoop.hdfs.server.namenode.ContentCounts;
 import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
 import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
@@ -629,18 +628,13 @@ public class DirectoryWithSnapshotFeature implements 
INode.Feature {
     return counts;
   }
 
-  public void computeContentSummary4Snapshot(final BlockStoragePolicySuite 
bsps,
-      final ContentCounts counts) {
-    // Create a new blank summary context for blocking processing of subtree.
-    ContentSummaryComputationContext summary = 
-        new ContentSummaryComputationContext(bsps);
+  public void computeContentSummary4Snapshot(
+      ContentSummaryComputationContext context) {
     for(DirectoryDiff d : diffs) {
-      for(INode deleted : d.getChildrenDiff().getList(ListType.DELETED)) {
-        deleted.computeContentSummary(Snapshot.CURRENT_STATE_ID, summary);
+      for(INode deletedNode : d.getChildrenDiff().getList(ListType.DELETED)) {
+        context.reportDeletedSnapshottedNode(deletedNode);
       }
     }
-    // Add the counts from deleted trees.
-    counts.addContents(summary.getCounts());
   }
   
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
index e98e766..832a339 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
@@ -177,6 +177,7 @@ public class Snapshot implements Comparable<byte[]> {
     @Override
     public ContentSummaryComputationContext computeContentSummary(
         int snapshotId, ContentSummaryComputationContext summary) {
+      summary.nodeIncluded(this);
       return computeDirectoryContentSummary(summary, snapshotId);
     }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6a38d118/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
index 91eec78..d1b3aa6 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
@@ -36,8 +36,10 @@ import java.util.Random;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
@@ -2411,4 +2413,201 @@ public class TestRenameWithSnapshots {
     assertTrue(existsInDiffReport(entries, DiffType.RENAME, "foo/file2", 
"newDir/file2"));
     assertTrue(existsInDiffReport(entries, DiffType.RENAME, "foo/file3", 
"newDir/file1"));
   }
+
+  private void checkSpaceConsumed(String message, Path directory,
+                                  long expectedSpace) throws Exception {
+    ContentSummary summary = hdfs.getContentSummary(directory);
+    assertEquals(message, expectedSpace, summary.getSpaceConsumed());
+  }
+
+  /**
+   * Runs through various combinations of renames, deletes, appends and other
+   * operations in a snapshotted directory and ensures disk usage summaries
+   * (e.g. du -s) are computed correctly.
+   *
+   * @throws Exception
+   */
+  @Test (timeout=300000)
+  public void testDu() throws Exception {
+    File tempFile = File.createTempFile("testDu-", ".tmp");
+    tempFile.deleteOnExit();
+
+    final FileSystem localfs = FileSystem.getLocal(conf);
+    final Path localOriginal = new Path(tempFile.getPath());
+    final Path dfsRoot = new Path("/testDu");
+    final Path dfsOriginal = new Path(dfsRoot, "original");
+    final Path dfsRenamed1 = new Path(dfsRoot, "renamed1");
+    final Path dfsRenamed2 = new Path(dfsRoot, "renamed2");
+    final Path dfsAppended = new Path(dfsRoot, "appended");
+
+    /* We will test with a single block worth of data. If we don't at least use
+    a multiple of BLOCKSIZE, append operations will modify snapshotted blocks
+    and other factors will come into play here that we'll have to account for 
*/
+    final long spaceIncrement = BLOCKSIZE * REPL;
+    final byte[] appendData = new byte[(int) BLOCKSIZE];
+    DFSTestUtil.createFile(localfs, localOriginal, BLOCKSIZE, REPL, SEED);
+
+    FSDataOutputStream out = null;
+    long expectedSpace = 0;
+
+    hdfs.mkdirs(dfsRoot);
+    checkSpaceConsumed("Du is wrong immediately",
+        dfsRoot, 0L);
+
+    hdfs.copyFromLocalFile(localOriginal, dfsOriginal);
+    expectedSpace += spaceIncrement;
+    checkSpaceConsumed("Du is wrong after creating / copying file",
+        dfsRoot, expectedSpace);
+
+    SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s0");
+    checkSpaceConsumed("Du is wrong after snapshotting",
+        dfsRoot, expectedSpace);
+
+    hdfs.rename(dfsOriginal, dfsRenamed1);
+    checkSpaceConsumed("Du is wrong after 1 rename",
+        dfsRoot, expectedSpace);
+
+    hdfs.rename(dfsRenamed1, dfsRenamed2);
+    checkSpaceConsumed("Du is wrong after 2 renames",
+        dfsRoot, expectedSpace);
+
+    hdfs.delete(dfsRenamed2, false);
+    checkSpaceConsumed("Du is wrong after deletion",
+        dfsRoot, expectedSpace);
+
+    hdfs.copyFromLocalFile(localOriginal, dfsOriginal);
+    expectedSpace += spaceIncrement;
+    checkSpaceConsumed("Du is wrong after replacing a renamed file",
+        dfsRoot, expectedSpace);
+
+    hdfs.copyFromLocalFile(localOriginal, dfsAppended);
+    expectedSpace += spaceIncrement;
+    SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s1");
+
+    out = hdfs.append(dfsAppended);
+    out.write(appendData);
+    out.close();
+    expectedSpace += spaceIncrement;
+    checkSpaceConsumed("Du is wrong after 1 snapshot + append",
+        dfsRoot, expectedSpace);
+
+    SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s2");
+    out = hdfs.append(dfsAppended);
+    out.write(appendData);
+    out.close();
+    expectedSpace += spaceIncrement;
+    checkSpaceConsumed("Du is wrong after 2 snapshot + appends",
+        dfsRoot, expectedSpace);
+
+    SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s3");
+    out = hdfs.append(dfsAppended);
+    out.write(appendData);
+    out.close();
+    expectedSpace += spaceIncrement;
+    hdfs.rename(dfsAppended, dfsRenamed1);
+    checkSpaceConsumed("Du is wrong after snapshot, append, & rename",
+        dfsRoot, expectedSpace);
+    hdfs.delete(dfsRenamed1, false);
+    // everything but the last append is snapshotted
+    expectedSpace -= spaceIncrement;
+    checkSpaceConsumed("Du is wrong after snapshot, append, delete & rename",
+        dfsRoot, expectedSpace);
+
+    hdfs.delete(dfsOriginal, false);
+    hdfs.deleteSnapshot(dfsRoot, "s0");
+    hdfs.deleteSnapshot(dfsRoot, "s1");
+    hdfs.deleteSnapshot(dfsRoot, "s2");
+    hdfs.deleteSnapshot(dfsRoot, "s3");
+    expectedSpace = 0;
+    checkSpaceConsumed("Du is wrong after deleting all files and snapshots",
+        dfsRoot, expectedSpace);
+  }
+
+  /**
+   * Runs through various combinations of renames, deletes, appends and other
+   * operations between two snapshotted directories and ensures disk usage
+   * summaries (e.g. du -s) are computed correctly.
+   *
+   * This test currently assumes some incorrect behavior when files have been
+   * moved between subdirectories of the one being queried. In the cases
+   * below, only 1 block worth of data should ever actually be used. However
+   * if there are 2 - 3 subdirectories that do contained or have contained
+   * when snapshotted the same file, that file will be counted 2-3 times,
+   * respectively, since each directory is computed independently recursively.
+   *
+   * @throws Exception
+   */
+  @Test (timeout=300000)
+  public void testDuMultipleDirs() throws Exception {
+    File tempFile = File.createTempFile("testDuMultipleDirs-", "" + ".tmp");
+    tempFile.deleteOnExit();
+
+    final FileSystem localfs = FileSystem.getLocal(conf);
+    final Path localOriginal = new Path(tempFile.getPath());
+    final Path dfsRoot = new Path("/testDuMultipleDirs");
+    final Path snapshottable1 = new Path(dfsRoot, "snapshottable1");
+    final Path snapshottable2 = new Path(dfsRoot, "snapshottable2");
+    final Path nonsnapshottable = new Path(dfsRoot, "nonsnapshottable");
+    final Path subdirectory = new Path(snapshottable1, "subdirectory");
+    final Path dfsOriginal = new Path(snapshottable1, "file");
+    final Path renamedNonsnapshottable = new Path(nonsnapshottable, "file");
+    final Path renamedSnapshottable = new Path(snapshottable2, "file");
+    final Path renamedSubdirectory = new Path(subdirectory, "file");
+
+    /* We will test with a single block worth of data. If we don't at least use
+    a multiple of BLOCKSIZE, append operations will modify snapshotted blocks
+    and other factors will come into play here that we'll have to account for 
*/
+    final long spaceConsumed = BLOCKSIZE * REPL;
+    DFSTestUtil.createFile(localfs, localOriginal, BLOCKSIZE, REPL, SEED);
+
+    hdfs.mkdirs(snapshottable1);
+    hdfs.mkdirs(snapshottable2);
+    hdfs.mkdirs(nonsnapshottable);
+    hdfs.mkdirs(subdirectory);
+    checkSpaceConsumed("Du is wrong immediately",
+        dfsRoot, 0L);
+
+    hdfs.copyFromLocalFile(localOriginal, dfsOriginal);
+    checkSpaceConsumed("Du is wrong after creating / copying file",
+        snapshottable1, spaceConsumed);
+
+    SnapshotTestHelper.createSnapshot(hdfs, snapshottable1, "s1");
+    checkSpaceConsumed("Du is wrong in original dir after 1st snapshot",
+        snapshottable1, spaceConsumed);
+
+    hdfs.rename(dfsOriginal, renamedNonsnapshottable);
+    checkSpaceConsumed("Du is wrong in original dir after 1st rename",
+        snapshottable1, spaceConsumed);
+    checkSpaceConsumed("Du is wrong in non-snapshottable dir after 1st rename",
+        nonsnapshottable, spaceConsumed);
+    checkSpaceConsumed("Du is wrong in root dir after 1st rename",
+        dfsRoot, spaceConsumed);
+
+    hdfs.rename(renamedNonsnapshottable, renamedSnapshottable);
+    checkSpaceConsumed("Du is wrong in original dir after 2nd rename",
+        snapshottable1, spaceConsumed);
+    checkSpaceConsumed("Du is wrong in non-snapshottable dir after 2nd rename",
+        nonsnapshottable, 0);
+    checkSpaceConsumed("Du is wrong in snapshottable dir after 2nd rename",
+        snapshottable2, spaceConsumed);
+    checkSpaceConsumed("Du is wrong in root dir after 2nd rename",
+        dfsRoot, spaceConsumed);
+
+    SnapshotTestHelper.createSnapshot(hdfs, snapshottable2, "s2");
+    hdfs.rename(renamedSnapshottable, renamedSubdirectory);
+    checkSpaceConsumed("Du is wrong in original dir after 3rd rename",
+        snapshottable1, spaceConsumed);
+    checkSpaceConsumed("Du is wrong in snapshottable dir after 3rd rename",
+        snapshottable2, spaceConsumed);
+    checkSpaceConsumed("Du is wrong in original subdirectory after 3rd rename",
+        subdirectory, spaceConsumed);
+    checkSpaceConsumed("Du is wrong in root dir after 3rd rename",
+        dfsRoot, spaceConsumed);
+
+    hdfs.delete(renamedSubdirectory, false);
+    hdfs.deleteSnapshot(snapshottable1, "s1");
+    hdfs.deleteSnapshot(snapshottable2, "s2");
+    checkSpaceConsumed("Du is wrong after deleting all files and snapshots",
+        dfsRoot, 0);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to