This is an automated email from the ASF dual-hosted git repository.
mkwhit pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/crunch.git
The following commit(s) were added to refs/heads/master by this push:
new 8711b2f CRUNCH-683 avoid unnecessary listStatus() calls from
getPathSize() (#26)
8711b2f is described below
commit 8711b2fec4bb3a2b56e39ebaccc316dfa0a0d4eb
Author: Ben Roling <[email protected]>
AuthorDate: Fri Jul 12 16:30:24 2019 -0500
CRUNCH-683 avoid unnecessary listStatus() calls from getPathSize() (#26)
---
.../org/apache/crunch/io/SourceTargetHelper.java | 6 ++-
.../apache/crunch/io/SourceTargetHelperTest.java | 50 +++++++++++++++++++---
2 files changed, 49 insertions(+), 7 deletions(-)
diff --git
a/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
index 8fb7065..94b6b87 100644
--- a/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
+++ b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
@@ -22,7 +22,9 @@ import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
/**
* Functions for configuring the inputs/outputs of MapReduce jobs.
@@ -42,8 +44,8 @@ public class SourceTargetHelper {
long size = 0;
for (FileStatus status : stati) {
if (status.isDir()) {
- for (FileStatus st : fs.listStatus(status.getPath())) {
- size += getPathSize(fs, st.getPath());
+ for (RemoteIterator<LocatedFileStatus> iterator =
fs.listFiles(status.getPath(), true); iterator.hasNext();) {
+ size += iterator.next().getLen();
}
} else {
size += status.getLen();
diff --git
a/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
b/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
index 434fd10..8c48af6 100644
--- a/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
+++ b/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
@@ -19,25 +19,30 @@ package org.apache.crunch.io;
import static org.junit.Assert.assertEquals;
-import java.io.File;
import java.io.IOException;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
+
+import org.junit.Rule;
import org.junit.Test;
public class SourceTargetHelperTest {
+ @Rule
+ public TemporaryPath tmpDir = TemporaryPaths.create();
@Test
public void testGetNonexistentPathSize() throws Exception {
- File tmp = File.createTempFile("pathsize", "");
- Path tmpPath = new Path(tmp.getAbsolutePath());
- tmp.delete();
- FileSystem fs = FileSystem.getLocal(new Configuration(false));
+ Path tmpPath = tmpDir.getRootPath();
+ tmpDir.delete();
+ FileSystem fs = FileSystem.getLocal(tmpDir.getDefaultConfiguration());
assertEquals(-1L, SourceTargetHelper.getPathSize(fs, tmpPath));
}
@@ -48,6 +53,41 @@ public class SourceTargetHelperTest {
}
/**
+ * Tests for proper recursive size calculation on a path containing a glob
pattern.
+ */
+ @Test
+ public void testGetPathSizeGlobPathRecursive() throws Exception {
+ FileSystem fs = FileSystem.getLocal(tmpDir.getDefaultConfiguration());
+
+ // Create a directory structure with 3 files spread across 2 top-level
directories and one subdirectory:
+ // foo1/file1
+ // foo1/subdir/file2
+ // foo2/file3
+ Path foo1 = tmpDir.getPath("foo1");
+ fs.mkdirs(foo1);
+ createFile(fs, new Path(foo1, "file1"), 3);
+
+ Path subDir = tmpDir.getPath("foo1/subdir");
+ fs.mkdirs(subDir);
+ createFile(fs, new Path(subDir, "file2"), 5);
+
+ Path foo2 = tmpDir.getPath("foo2");
+ fs.mkdirs(foo2);
+ createFile(fs, new Path(foo2, "file3"), 11);
+
+ // assert total size with glob pattern (3 + 5 + 11 = 19)
+ assertEquals(19, SourceTargetHelper.getPathSize(fs,
tmpDir.getPath("foo*")));
+ }
+
+ private static void createFile(FileSystem fs, Path path, int size) throws
IOException {
+ FSDataOutputStream outputStream = fs.create(path);
+ for (int i = 0; i < size; i++) {
+ outputStream.write(0);
+ }
+ outputStream.close();
+ }
+
+ /**
* Mock FileSystem that returns null for {@link FileSystem#listStatus(Path)}.
*/
private static class MockFileSystem extends LocalFileSystem {