Repository: incubator-gobblin Updated Branches: refs/heads/master dddc0b3ea -> eae5e6d26
[GOBBLIN-214] Fix the filtering issue in listFilesRecursively Closes #2067 from yukuai518/pathfilter Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/eae5e6d2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/eae5e6d2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/eae5e6d2 Branch: refs/heads/master Commit: eae5e6d268964a6f3dfa0ae98c3639333f3b0854 Parents: dddc0b3 Author: Kuai Yu <[email protected]> Authored: Fri Aug 18 16:42:34 2017 -0700 Committer: Hung Tran <[email protected]> Committed: Fri Aug 18 16:42:34 2017 -0700 ---------------------------------------------------------------------- .../org/apache/gobblin/util/FileListUtils.java | 2 +- .../apache/gobblin/util/FileListUtilsTest.java | 57 ++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/eae5e6d2/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java ---------------------------------------------------------------------- diff --git a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java index 02920c2..51bf66d 100644 --- a/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java +++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/FileListUtils.java @@ -132,7 +132,7 @@ public class FileListUtils { files.add(status); } } else { - files.add(status); + listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories, includeEmptyDirectories); } } } else if (fileFilter.accept(fileStatus.getPath())) { http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/eae5e6d2/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java ---------------------------------------------------------------------- diff --git a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java index 388e311..e739e00 100644 --- a/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java +++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/FileListUtilsTest.java @@ -146,6 +146,63 @@ public class FileListUtilsTest { } } + public void testListAllFiles () throws IOException { + FileSystem localFs = FileSystem.getLocal(new Configuration()); + Path baseDir = new Path(FILE_UTILS_TEST_DIR, "listAllFiles"); + System.out.println (baseDir); + try { + if (localFs.exists(baseDir)) { + localFs.delete(baseDir, true); + } + localFs.mkdirs(baseDir); + + // Empty root directory + List<FileStatus> testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, FileListUtils.NO_OP_PATH_FILTER); + Assert.assertTrue(testFiles.size() == 0); + + // With two avro files (1.avro, 2.avro) + Path file1 = new Path(baseDir, "1.avro"); + localFs.create(file1); + Path file2 = new Path(baseDir, "2.avro"); + localFs.create(file2); + testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, FileListUtils.NO_OP_PATH_FILTER); + Assert.assertTrue(testFiles.size() == 2); + + // With an avro schema file (part.avsc) + Path avsc = new Path(baseDir, "part.avsc"); + localFs.create(avsc); + testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, FileListUtils.NO_OP_PATH_FILTER); + Assert.assertTrue(testFiles.size() == 3); + testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, (path)->path.getName().endsWith(".avro")); + Assert.assertTrue(testFiles.size() == 2); + + // A complicated hierarchy + // baseDir ____ 1.avro + // |____ 2.avro + // |____ part.avsc + // |____ subDir ____ 3.avro + // |____ subDir2 ____ 4.avro + // |____ part2.avsc + Path subDir = new Path(baseDir, "subDir"); + localFs.mkdirs(subDir); + Path file3 = new Path(subDir, "3.avro"); + localFs.create(file3); + Path subDir2 = new Path(subDir, "subDir2"); + localFs.mkdirs(subDir2); + Path file4 = new Path(subDir2, "4.avro"); + localFs.create(file4); + Path avsc2 = new Path(subDir2, "part2.avsc"); + localFs.create(avsc2); + + testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, (path)->path.getName().endsWith(".avro")); + Assert.assertTrue(testFiles.size() == 4); + testFiles = FileListUtils.listFilesRecursively(localFs, baseDir, FileListUtils.NO_OP_PATH_FILTER); + Assert.assertTrue(testFiles.size() == 6); + } finally { + localFs.delete(baseDir, true); + } + } + public void testListFilesToCopyAtPath() throws IOException { FileSystem localFs = FileSystem.getLocal(new Configuration()); Path baseDir = new Path(FILE_UTILS_TEST_DIR, "fileListTestDir4");
