Repository: crunch Updated Branches: refs/heads/apache-crunch-0.10 [created] 4c364d0df
CRUNCH-408: Fix the HFileSource globStatus check to work on Hadoop2 Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/2e284058 Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/2e284058 Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/2e284058 Branch: refs/heads/apache-crunch-0.10 Commit: 2e284058a915482c922e6e4328f08674ac4724f1 Parents: 35d404c Author: Josh Wills <[email protected]> Authored: Sun Jun 1 20:41:01 2014 -0700 Committer: Josh Wills <[email protected]> Committed: Sun Jun 1 20:42:32 2014 -0700 ---------------------------------------------------------------------- .../org/apache/crunch/io/hbase/HFileSource.java | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/2e284058/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java ---------------------------------------------------------------------- diff --git a/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java b/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java index c21cc47..47abe9a 100644 --- a/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java +++ b/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java @@ -120,10 +120,28 @@ public class HFileSource extends FileSourceImpl<KeyValue> implements ReadableSou long sum = 0; for (Path path : getPaths()) { try { - sum += SourceTargetHelper.getPathSize(conf, new Path(path, "*")); + sum += getSizeInternal(conf, path); } catch (IOException e) { LOG.warn("Failed to estimate size of " + path); } + System.out.println("Size after read of path = " + path.toString() + " = " + sum); + } + return sum; + } + + private long getSizeInternal(Configuration conf, Path path) throws IOException { + FileSystem fs = path.getFileSystem(conf); + FileStatus[] statuses = fs.globStatus(path, HFileInputFormat.HIDDEN_FILE_FILTER); + if (statuses == null) { + return 0; + } + long sum = 0; + for (FileStatus status : statuses) { + if (status.isDir()) { + sum += SourceTargetHelper.getPathSize(fs, status.getPath()); + } else { + sum += status.getLen(); + } } return sum; }
