Repository: crunch Updated Branches: refs/heads/master 87318ca7e -> 504a6194e
CRUNCH-513 recursively browsing children directories to determine the path size Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/504a6194 Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/504a6194 Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/504a6194 Branch: refs/heads/master Commit: 504a6194edb5e066e248e78f83e4519bb6e82c58 Parents: 87318ca Author: Andy Nelson <[email protected]> Authored: Tue Apr 28 15:49:32 2015 -0500 Committer: Micah Whitacre <[email protected]> Committed: Wed May 6 20:58:04 2015 -0500 ---------------------------------------------------------------------- .../apache/crunch/io/SourceTargetHelper.java | 2 +- .../apache/crunch/io/hbase/HFileSourceIT.java | 22 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/504a6194/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java index 7b9bea0..8fb7065 100644 --- a/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java +++ b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java @@ -43,7 +43,7 @@ public class SourceTargetHelper { for (FileStatus status : stati) { if (status.isDir()) { for (FileStatus st : fs.listStatus(status.getPath())) { - size += st.getLen(); + size += getPathSize(fs, st.getPath()); } } else { size += status.getLen(); http://git-wip-us.apache.org/repos/asf/crunch/blob/504a6194/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java ---------------------------------------------------------------------- diff --git a/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java b/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java index e82102b..6f418a5 100644 --- a/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java +++ b/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java @@ -26,11 +26,13 @@ import org.apache.crunch.MapFn; import org.apache.crunch.PCollection; import org.apache.crunch.Pipeline; import org.apache.crunch.PipelineResult; +import org.apache.crunch.Source; import org.apache.crunch.impl.mr.MRPipeline; import org.apache.crunch.io.To; import org.apache.crunch.test.TemporaryPath; import org.apache.crunch.test.TemporaryPaths; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; @@ -266,6 +268,26 @@ public class HFileSourceIT implements Serializable { assertArrayEquals(VALUE3, results.get(0).getValue(FAMILY1, QUALIFIER3)); } + @Test + public void testHFileSize() throws IOException { + Path inputPath = tmpDir.getPath("in"); + List<KeyValue> kvs = ImmutableList.of( + new KeyValue(ROW1, FAMILY1, QUALIFIER1, 1, VALUE1), + new KeyValue(ROW1, FAMILY1, QUALIFIER2, 2, VALUE2), + new KeyValue(ROW1, FAMILY1, QUALIFIER2, 3, VALUE3)); + writeKeyValuesToHFile(inputPath, kvs); + + FileSystem fs = FileSystem.get(conf); + FileStatus[] fileStatuses = fs.listStatus(inputPath.getParent()); + long size = 0; + for(FileStatus s: fileStatuses){ + size += s.getLen(); + } + + Source<KeyValue> hfile = FromHBase.hfile(inputPath); + assertTrue(hfile.getSize(conf) >= size); + } + private List<Result> doTestScanHFiles(List<KeyValue> kvs, Scan scan) throws IOException { Path inputPath = tmpDir.getPath("in"); writeKeyValuesToHFile(inputPath, kvs);
