Repository: crunch Updated Branches: refs/heads/apache-crunch-0.8 26c068e14 -> 3536bba9f
CRUNCH-408: Make HFileSource correctly estimate file sizes when there are wildcards in the path. Contributed by Chao Shi. Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/3536bba9 Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/3536bba9 Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/3536bba9 Branch: refs/heads/apache-crunch-0.8 Commit: 3536bba9f1afc8748576d7f109bc5561b24c60b8 Parents: 26c068e Author: Josh Wills <[email protected]> Authored: Sun Jun 1 13:29:46 2014 -0700 Committer: Josh Wills <[email protected]> Committed: Sun Jun 1 18:49:59 2014 -0700 ---------------------------------------------------------------------- .../org/apache/crunch/io/hbase/HFileSource.java | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/3536bba9/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java ---------------------------------------------------------------------- diff --git a/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java b/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java index fff2525..8b5dab5 100644 --- a/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java +++ b/crunch-hbase/src/main/java/org/apache/crunch/io/hbase/HFileSource.java @@ -102,28 +102,11 @@ public class HFileSource extends FileSourceImpl<KeyValue> implements ReadableSou long sum = 0; for (Path path : getPaths()) { try { - sum += getSizeInternal(conf, path); + sum += SourceTargetHelper.getPathSize(conf, new Path(path, "*")); } catch (IOException e) { LOG.warn("Failed to estimate size of " + path); } } return sum; } - - private long getSizeInternal(Configuration conf, Path path) throws IOException { - FileSystem fs = path.getFileSystem(conf); - FileStatus[] statuses = fs.listStatus(path, HFileInputFormat.HIDDEN_FILE_FILTER); - if (statuses == null) { - return 0; - } - long sum = 0; - for (FileStatus status : statuses) { - if (status.isDir()) { - sum += SourceTargetHelper.getPathSize(fs, status.getPath()); - } else { - sum += status.getLen(); - } - } - return sum; - } }
