Repository: incubator-gobblin
Updated Branches:
  refs/heads/master 383568685 -> 8949aa301


[GOBBLIN-509] Ensure that tar data writer untars within output directory

Closes #2379 from abti/master


Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/8949aa30
Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/8949aa30
Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/8949aa30

Branch: refs/heads/master
Commit: 8949aa30117050158154fba5a6cf2530ac4f60ef
Parents: 3835686
Author: Abhishek Tiwari <[email protected]>
Authored: Mon Jun 4 19:37:14 2018 -0700
Committer: Abhishek Tiwari <[email protected]>
Committed: Mon Jun 4 19:37:48 2018 -0700

----------------------------------------------------------------------
 .../writer/TarArchiveInputStreamDataWriter.java     |  5 +++++
 .../java/org/apache/gobblin/util/FileUtils.java     | 14 ++++++++++++++
 .../java/org/apache/gobblin/util/FileUtilsTest.java | 16 +++++++++++++++-
 3 files changed, 34 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/8949aa30/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java
----------------------------------------------------------------------
diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java
index 93a2278..5e1164d 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java
@@ -20,6 +20,7 @@ package org.apache.gobblin.data.management.copy.writer;
 import org.apache.gobblin.configuration.State;
 import org.apache.gobblin.data.management.copy.CopyableFile;
 import org.apache.gobblin.data.management.copy.FileAwareInputStream;
+import org.apache.gobblin.util.FileUtils;
 import org.apache.gobblin.util.io.StreamCopier;
 import org.apache.gobblin.util.io.StreamUtils;
 
@@ -80,6 +81,10 @@ public class TarArchiveInputStreamDataWriter extends 
FileAwareInputStreamDataWri
         // the API tarEntry.getName() is misleading, it is actually the path 
of the tarEntry in the tar file
         String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, 
writeAt.getName());
         Path tarEntryStagingPath = new Path(writeAt.getParent(), 
newTarEntryPath);
+        if (!FileUtils.isSubPath(writeAt.getParent(), tarEntryStagingPath)) {
+          throw new IOException(String.format("Extracted file: %s is trying to 
write outside of output directory: %s",
+              tarEntryStagingPath, writeAt.getParent()));
+        }
 
         if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) {
           this.fs.mkdirs(tarEntryStagingPath);

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/8949aa30/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java
----------------------------------------------------------------------
diff --git 
a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java 
b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java
index 6f314d2..3461045 100644
--- a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java
+++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java
@@ -48,4 +48,18 @@ public class FileUtils {
 
     return childStr.startsWith(parentStr);
   }
+
+  /***
+   * Check if child path is child of parent path.
+   * @param parent Expected parent path.
+   * @param child Expected child path.
+   * @return If child path is child of parent path.
+   * @throws IOException
+   */
+  public static boolean isSubPath(org.apache.hadoop.fs.Path parent, 
org.apache.hadoop.fs.Path child) throws IOException {
+    String childStr = child.toString();
+    String parentStr = parent.toString();
+
+    return childStr.startsWith(parentStr);
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/8949aa30/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java
----------------------------------------------------------------------
diff --git 
a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java 
b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java
index a1f5bc0..048a648 100644
--- a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java
+++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java
@@ -43,7 +43,7 @@ public class FileUtilsTest {
   }
 
   @Test
-  public void testIsSubPath() throws IOException {
+  public void testIsSubFile() throws IOException {
     File parentPath = new File("/tmp/foo/bar");
 
     File childPath = new File("/tmp/foo/../tar/file.txt");
@@ -55,4 +55,18 @@ public class FileUtilsTest {
     childPath = new File("/tmp/foo/bar/car/file.txt");
     assertThat(true).isEqualTo(FileUtils.isSubPath(parentPath, childPath));
   }
+
+  @Test
+  public void testIsSubPath() throws IOException {
+    org.apache.hadoop.fs.Path parentPath = new 
org.apache.hadoop.fs.Path("/tmp/foo/bar");
+
+    org.apache.hadoop.fs.Path childPath = new 
org.apache.hadoop.fs.Path("/tmp/foo/../tar/file.txt");
+    assertThat(false).isEqualTo(FileUtils.isSubPath(parentPath, childPath));
+
+    childPath = new org.apache.hadoop.fs.Path("/tmp/foo/tar/../bar/file.txt");
+    assertThat(true).isEqualTo(FileUtils.isSubPath(parentPath, childPath));
+
+    childPath = new org.apache.hadoop.fs.Path("/tmp/foo/bar/car/file.txt");
+    assertThat(true).isEqualTo(FileUtils.isSubPath(parentPath, childPath));
+  }
 }

Reply via email to