Repository: hadoop
Updated Branches:
  refs/heads/branch-3.1 164ad48fa -> 3141e17d8


HDFS-12998. SnapshotDiff - Provide an iterator-based listing API for 
calculating snapshotDiff. Contributed by Shashikant Banerjee


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7abcea03
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7abcea03
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7abcea03

Branch: refs/heads/branch-3.1
Commit: 7abcea037810f9e771e4e9050a22a759e00aab9b
Parents: 164ad48
Author: Tsz-Wo Nicholas Sze <szets...@hortonworks.com>
Authored: Mon Feb 19 11:42:10 2018 +0800
Committer: Tsz-Wo Nicholas Sze <szets...@hortonworks.com>
Committed: Tue Feb 27 15:37:48 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hdfs/DistributedFileSystem.java      |  87 +++++++++++++
 .../snapshot/TestSnapshotDiffReport.java        | 130 +++++++++++++++++++
 2 files changed, 217 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/7abcea03/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 3883f2f..35b6417 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -1994,6 +1994,93 @@ public class DistributedFileSystem extends FileSystem
     }.resolve(this, absF);
   }
 
+  /**
+   * Returns a remote iterator so that followup calls are made on demand
+   * while consuming the SnapshotDiffReportListing entries.
+   * This reduces memory consumption overhead in case the snapshotDiffReport
+   * is huge.
+   *
+   * @param snapshotDir
+   *          full path of the directory where snapshots are taken
+   * @param fromSnapshot
+   *          snapshot name of the from point. Null indicates the current
+   *          tree
+   * @param toSnapshot
+   *          snapshot name of the to point. Null indicates the current
+   *          tree.
+   * @return Remote iterator
+   */
+  public RemoteIterator
+      <SnapshotDiffReportListing> snapshotDiffReportListingRemoteIterator(
+      final Path snapshotDir, final String fromSnapshot,
+      final String toSnapshot) throws IOException {
+    Path absF = fixRelativePart(snapshotDir);
+    return new FileSystemLinkResolver
+        <RemoteIterator<SnapshotDiffReportListing>>() {
+      @Override
+      public RemoteIterator<SnapshotDiffReportListing> doCall(final Path p)
+          throws IOException {
+        return new SnapshotDiffReportListingIterator(
+            getPathName(p), fromSnapshot, toSnapshot);
+      }
+
+      @Override
+      public RemoteIterator<SnapshotDiffReportListing> next(final FileSystem 
fs,
+          final Path p) throws IOException {
+        return ((DistributedFileSystem) fs)
+            .snapshotDiffReportListingRemoteIterator(p, fromSnapshot,
+                toSnapshot);
+      }
+    }.resolve(this, absF);
+
+  }
+
+  /**
+   * This class defines an iterator that returns
+   * the SnapshotDiffReportListing for a snapshottable directory
+   * between two given snapshots.
+   */
+  private final class SnapshotDiffReportListingIterator implements
+      RemoteIterator<SnapshotDiffReportListing> {
+    private final String snapshotDir;
+    private final String fromSnapshot;
+    private final String toSnapshot;
+
+    private byte[] startPath;
+    private int index;
+    private boolean hasNext = true;
+
+    private SnapshotDiffReportListingIterator(String snapshotDir,
+        String fromSnapshot, String toSnapshot) {
+      this.snapshotDir = snapshotDir;
+      this.fromSnapshot = fromSnapshot;
+      this.toSnapshot = toSnapshot;
+      this.startPath = DFSUtilClient.EMPTY_BYTES;
+      this.index = -1;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return hasNext;
+    }
+
+    @Override
+    public SnapshotDiffReportListing next() throws IOException {
+      if (!hasNext) {
+        throw new java.util.NoSuchElementException(
+            "No more entry in SnapshotDiffReport for " + snapshotDir);
+      }
+      final SnapshotDiffReportListing part =
+          dfs.getSnapshotDiffReportListing(snapshotDir, fromSnapshot,
+              toSnapshot, startPath, index);
+      startPath = part.getLastPath();
+      index = part.getLastIndex();
+      hasNext =
+          !(Arrays.equals(startPath, DFSUtilClient.EMPTY_BYTES) && index == 
-1);
+      return part;
+    }
+  }
+
   private SnapshotDiffReport getSnapshotDiffReportInternal(
       final String snapshotDir, final String fromSnapshot,
       final String toSnapshot) throws IOException {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7abcea03/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
index a4fb8ab..3bfcfbf 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
@@ -28,11 +28,15 @@ import java.util.Date;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.Random;
+import java.util.List;
+import java.util.ArrayList;
 
+import org.apache.commons.collections.list.TreeList;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
@@ -40,14 +44,17 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
+import org.apache.hadoop.hdfs.client.impl.SnapshotDiffReportGenerator;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
 import org.apache.hadoop.hdfs.protocol.SnapshotException;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.ChunkedArrayList;
 import org.apache.hadoop.util.Time;
 import org.junit.After;
 import org.junit.Assert;
@@ -1409,4 +1416,127 @@ public class TestSnapshotDiffReport {
         new DiffReportEntry(DiffType.DELETE,
             DFSUtil.string2Bytes("dir3/file3")));
   }
+
+  private void verifyDiffReportForGivenReport(Path dirPath, String from,
+      String to, SnapshotDiffReport report, DiffReportEntry... entries)
+      throws IOException {
+    // reverse the order of from and to
+    SnapshotDiffReport inverseReport =
+        hdfs.getSnapshotDiffReport(dirPath, to, from);
+    LOG.info(report.toString());
+    LOG.info(inverseReport.toString() + "\n");
+
+    assertEquals(entries.length, report.getDiffList().size());
+    assertEquals(entries.length, inverseReport.getDiffList().size());
+
+    for (DiffReportEntry entry : entries) {
+      if (entry.getType() == DiffType.MODIFY) {
+        assertTrue(report.getDiffList().contains(entry));
+        assertTrue(inverseReport.getDiffList().contains(entry));
+      } else if (entry.getType() == DiffType.DELETE) {
+        assertTrue(report.getDiffList().contains(entry));
+        assertTrue(inverseReport.getDiffList().contains(
+            new DiffReportEntry(DiffType.CREATE, entry.getSourcePath())));
+      } else if (entry.getType() == DiffType.CREATE) {
+        assertTrue(report.getDiffList().contains(entry));
+        assertTrue(inverseReport.getDiffList().contains(
+            new DiffReportEntry(DiffType.DELETE, entry.getSourcePath())));
+      }
+    }
+  }
+
+  @Test
+  public void testSnapshotDiffReportRemoteIterator() throws Exception {
+    final Path root = new Path("/");
+    hdfs.mkdirs(root);
+    for (int i = 1; i <= 3; i++) {
+      final Path path = new Path(root, "dir" + i);
+      hdfs.mkdirs(path);
+    }
+    for (int i = 1; i <= 3; i++) {
+      final Path path = new Path(root, "dir" + i);
+      for (int j = 1; j < 4; j++) {
+        final Path file = new Path(path, "file" + j);
+        DFSTestUtil.createFile(hdfs, file, BLOCKSIZE, REPLICATION, SEED);
+      }
+    }
+    SnapshotTestHelper.createSnapshot(hdfs, root, "s0");
+    Path targetDir = new Path(root, "dir4");
+    //create directory dir4
+    hdfs.mkdirs(targetDir);
+    //moves files from dir1 to dir4
+    Path path = new Path(root, "dir1");
+    for (int j = 1; j < 4; j++) {
+      final Path srcPath = new Path(path, "file" + j);
+      final Path targetPath = new Path(targetDir, "file" + j);
+      hdfs.rename(srcPath, targetPath);
+    }
+    targetDir = new Path(root, "dir3");
+    //overwrite existing files in dir3 from files in dir1
+    path = new Path(root, "dir2");
+    for (int j = 1; j < 4; j++) {
+      final Path srcPath = new Path(path, "file" + j);
+      final Path targetPath = new Path(targetDir, "file" + j);
+      hdfs.rename(srcPath, targetPath, Rename.OVERWRITE);
+    }
+    final Path pathToRename = new Path(root, "dir2");
+    //move dir2 inside dir3
+    hdfs.rename(pathToRename, targetDir);
+    SnapshotTestHelper.createSnapshot(hdfs, root, "s1");
+    RemoteIterator<SnapshotDiffReportListing> iterator =
+        hdfs.snapshotDiffReportListingRemoteIterator(root, "s0", "s1");
+    SnapshotDiffReportGenerator snapshotDiffReport;
+    List<SnapshotDiffReportListing.DiffReportListingEntry> modifiedList =
+        new TreeList();
+    List<SnapshotDiffReportListing.DiffReportListingEntry> createdList =
+        new ChunkedArrayList<>();
+    List<SnapshotDiffReportListing.DiffReportListingEntry> deletedList =
+        new ChunkedArrayList<>();
+    SnapshotDiffReportListing report = null;
+    List<SnapshotDiffReportListing> reportList = new ArrayList<>();
+    while (iterator.hasNext()) {
+      report = iterator.next();
+      reportList.add(report);
+      modifiedList.addAll(report.getModifyList());
+      createdList.addAll(report.getCreateList());
+      deletedList.addAll(report.getDeleteList());
+    }
+    try {
+      iterator.next();
+    } catch (Exception e) {
+      Assert.assertTrue(
+          e.getMessage().contains("No more entry in SnapshotDiffReport for 
/"));
+    }
+    Assert.assertNotEquals(0, reportList.size());
+    // generate the snapshotDiffReport and Verify
+    snapshotDiffReport = new SnapshotDiffReportGenerator("/", "s0", "s1",
+        report.getIsFromEarlier(), modifiedList, createdList, deletedList);
+    verifyDiffReportForGivenReport(root, "s0", "s1",
+        snapshotDiffReport.generateReport(),
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("dir4")),
+        new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2"),
+            DFSUtil.string2Bytes("dir3/dir2")),
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir1")),
+        new DiffReportEntry(DiffType.RENAME, 
DFSUtil.string2Bytes("dir1/file1"),
+            DFSUtil.string2Bytes("dir4/file1")),
+        new DiffReportEntry(DiffType.RENAME, 
DFSUtil.string2Bytes("dir1/file2"),
+            DFSUtil.string2Bytes("dir4/file2")),
+        new DiffReportEntry(DiffType.RENAME, 
DFSUtil.string2Bytes("dir1/file3"),
+            DFSUtil.string2Bytes("dir4/file3")),
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir2")),
+        new DiffReportEntry(DiffType.RENAME, 
DFSUtil.string2Bytes("dir2/file1"),
+            DFSUtil.string2Bytes("dir3/file1")),
+        new DiffReportEntry(DiffType.RENAME, 
DFSUtil.string2Bytes("dir2/file2"),
+            DFSUtil.string2Bytes("dir3/file2")),
+        new DiffReportEntry(DiffType.RENAME, 
DFSUtil.string2Bytes("dir2/file3"),
+            DFSUtil.string2Bytes("dir3/file3")),
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir3")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("dir3/file1")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("dir3/file1")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("dir3/file3")));
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to