Repository: incubator-gobblin Updated Branches: refs/heads/master 383568685 -> 8949aa301
[GOBBLIN-509] Ensure that tar data writer untars within output directory Closes #2379 from abti/master Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/8949aa30 Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/8949aa30 Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/8949aa30 Branch: refs/heads/master Commit: 8949aa30117050158154fba5a6cf2530ac4f60ef Parents: 3835686 Author: Abhishek Tiwari <[email protected]> Authored: Mon Jun 4 19:37:14 2018 -0700 Committer: Abhishek Tiwari <[email protected]> Committed: Mon Jun 4 19:37:48 2018 -0700 ---------------------------------------------------------------------- .../writer/TarArchiveInputStreamDataWriter.java | 5 +++++ .../java/org/apache/gobblin/util/FileUtils.java | 14 ++++++++++++++ .../java/org/apache/gobblin/util/FileUtilsTest.java | 16 +++++++++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/8949aa30/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java index 93a2278..5e1164d 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/writer/TarArchiveInputStreamDataWriter.java @@ -20,6 +20,7 @@ package org.apache.gobblin.data.management.copy.writer; import org.apache.gobblin.configuration.State; import org.apache.gobblin.data.management.copy.CopyableFile; import org.apache.gobblin.data.management.copy.FileAwareInputStream; +import org.apache.gobblin.util.FileUtils; import org.apache.gobblin.util.io.StreamCopier; import org.apache.gobblin.util.io.StreamUtils; @@ -80,6 +81,10 @@ public class TarArchiveInputStreamDataWriter extends FileAwareInputStreamDataWri // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName()); Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath); + if (!FileUtils.isSubPath(writeAt.getParent(), tarEntryStagingPath)) { + throw new IOException(String.format("Extracted file: %s is trying to write outside of output directory: %s", + tarEntryStagingPath, writeAt.getParent())); + } if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) { this.fs.mkdirs(tarEntryStagingPath); http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/8949aa30/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java ---------------------------------------------------------------------- diff --git a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java index 6f314d2..3461045 100644 --- a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java +++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileUtils.java @@ -48,4 +48,18 @@ public class FileUtils { return childStr.startsWith(parentStr); } + + /*** + * Check if child path is child of parent path. + * @param parent Expected parent path. + * @param child Expected child path. + * @return If child path is child of parent path. + * @throws IOException + */ + public static boolean isSubPath(org.apache.hadoop.fs.Path parent, org.apache.hadoop.fs.Path child) throws IOException { + String childStr = child.toString(); + String parentStr = parent.toString(); + + return childStr.startsWith(parentStr); + } } http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/8949aa30/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java ---------------------------------------------------------------------- diff --git a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java index a1f5bc0..048a648 100644 --- a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java +++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileUtilsTest.java @@ -43,7 +43,7 @@ public class FileUtilsTest { } @Test - public void testIsSubPath() throws IOException { + public void testIsSubFile() throws IOException { File parentPath = new File("/tmp/foo/bar"); File childPath = new File("/tmp/foo/../tar/file.txt"); @@ -55,4 +55,18 @@ public class FileUtilsTest { childPath = new File("/tmp/foo/bar/car/file.txt"); assertThat(true).isEqualTo(FileUtils.isSubPath(parentPath, childPath)); } + + @Test + public void testIsSubPath() throws IOException { + org.apache.hadoop.fs.Path parentPath = new org.apache.hadoop.fs.Path("/tmp/foo/bar"); + + org.apache.hadoop.fs.Path childPath = new org.apache.hadoop.fs.Path("/tmp/foo/../tar/file.txt"); + assertThat(false).isEqualTo(FileUtils.isSubPath(parentPath, childPath)); + + childPath = new org.apache.hadoop.fs.Path("/tmp/foo/tar/../bar/file.txt"); + assertThat(true).isEqualTo(FileUtils.isSubPath(parentPath, childPath)); + + childPath = new org.apache.hadoop.fs.Path("/tmp/foo/bar/car/file.txt"); + assertThat(true).isEqualTo(FileUtils.isSubPath(parentPath, childPath)); + } }
