Repository: hive Updated Branches: refs/heads/master 6a01be889 -> 411c356bd
HIVE-13839 : Refactor : remove SHIMS.getListLocatedStatus (Ashutosh Chauhan via Sergey Shelukhin) Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/411c356b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/411c356b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/411c356b Branch: refs/heads/master Commit: 411c356bd975231e4acf97d4ac312e4899098e30 Parents: 6a01be8 Author: Ashutosh Chauhan <[email protected]> Authored: Tue May 24 16:33:34 2016 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Tue May 31 14:09:18 2016 -0700 ---------------------------------------------------------------------- .../hcatalog/templeton/tool/TempletonUtils.java | 17 ++++++++------- .../hive/ql/hooks/PostExecOrcFileDump.java | 7 +++--- .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 ++-- .../org/apache/hadoop/hive/ql/io/HdfsUtils.java | 23 +++++++++++++++++--- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 ++-- .../apache/hadoop/hive/shims/Hadoop23Shims.java | 17 --------------- .../apache/hadoop/hive/shims/HadoopShims.java | 12 ---------- 7 files changed, 37 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java ---------------------------------------------------------------------- diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java index 83584d3..201e647 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java @@ -104,14 +104,14 @@ public class TempletonUtils { public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = (\\d+%),\\s+reduce = (\\d+%).*$"); /** * Hive on Tez produces progress report that looks like this - * Map 1: -/- Reducer 2: 0/1 - * Map 1: -/- Reducer 2: 0(+1)/1 + * Map 1: -/- Reducer 2: 0/1 + * Map 1: -/- Reducer 2: 0(+1)/1 * Map 1: -/- Reducer 2: 1/1 - * + * * -/- means there are no tasks (yet) * 0/1 means 1 total tasks, 0 completed * 1(+2)/3 means 3 total, 1 completed and 2 running - * + * * HIVE-8495, in particular https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png * has more examples. * To report progress, we'll assume all tasks are equal size and compute "completed" as percent of "total" @@ -132,7 +132,7 @@ public class TempletonUtils { Matcher pig = PIG_COMPLETE.matcher(line); if (pig.find()) return pig.group().trim(); - + Matcher hive = HIVE_COMPLETE.matcher(line); if(hive.find()) { return "map " + hive.group(1) + " reduce " + hive.group(2); @@ -274,7 +274,7 @@ public class TempletonUtils { if(!fs.exists(p)) { return Collections.emptyList(); } - List<FileStatus> children = ShimLoader.getHadoopShims().listLocatedStatus(fs, p, null); + FileStatus[] children = fs.listStatus(p); if(!isset(children)) { return Collections.emptyList(); } @@ -327,9 +327,10 @@ public class TempletonUtils { } final String finalFName = new String(fname); - final FileSystem defaultFs = + final FileSystem defaultFs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { - public FileSystem run() + @Override + public FileSystem run() throws URISyntaxException, IOException, InterruptedException { return FileSystem.get(new URI(finalFName), conf); } http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java index b1595ce..f1eb5cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java @@ -30,12 +30,12 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.orc.FileFormatException; +import org.apache.hadoop.hive.ql.io.HdfsUtils; import org.apache.orc.tools.FileDump; +import org.apache.orc.FileFormatException; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.shims.ShimLoader; import com.google.common.collect.Lists; @@ -48,6 +48,7 @@ public class PostExecOrcFileDump implements ExecuteWithHookContext { private static final Logger LOG = LoggerFactory.getLogger(PostExecOrcFileDump.class.getName()); private static final PathFilter hiddenFileFilter = new PathFilter() { + @Override public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); @@ -88,7 +89,7 @@ public class PostExecOrcFileDump implements ExecuteWithHookContext { for (Path dir : directories) { FileSystem fs = dir.getFileSystem(conf); - List<FileStatus> fileList = ShimLoader.getHadoopShims().listLocatedStatus(fs, dir, + List<FileStatus> fileList = HdfsUtils.listLocatedStatus(fs, dir, hiddenFileFilter); for (FileStatus fileStatus : fileList) { http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index bac38ce..496bd0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -507,7 +507,7 @@ public class AcidUtils { originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles); } } else { - List<FileStatus> children = SHIMS.listLocatedStatus(fs, directory, hiddenFileFilter); + List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, directory, hiddenFileFilter); for (FileStatus child : children) { getChildState( child, null, txnList, working, originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles); @@ -675,7 +675,7 @@ public class AcidUtils { } } } else { - List<FileStatus> children = SHIMS.listLocatedStatus(fs, stat.getPath(), hiddenFileFilter); + List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, stat.getPath(), hiddenFileFilter); for (FileStatus child : children) { if (child.isDir()) { findOriginals(fs, child, original, useFileIds); http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java index b71ca09..9b8b761 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java @@ -19,12 +19,17 @@ package org.apache.hadoop.hive.ql.io; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; @@ -53,7 +58,7 @@ public class HdfsUtils { int fileSizeHash = (int)(fileSize ^ (fileSize >>> 32)), modTimeHash = (int)(modTime ^ (modTime >>> 32)), combinedHash = modTimeHash ^ fileSizeHash; - long id = (((long)nameHash & 0xffffffffL) << 32) | ((long)combinedHash & 0xffffffffL); + long id = ((nameHash & 0xffffffffL) << 32) | (combinedHash & 0xffffffffL); if (doLog) { LOG.warn("Cannot get unique file ID from " + fsName + "; using " + id + " (" + pathStr + "," + nameHash + "," + fileSize + ")"); @@ -61,8 +66,20 @@ public class HdfsUtils { return id; } - - + public static List<FileStatus> listLocatedStatus(final FileSystem fs, + final Path path, + final PathFilter filter + ) throws IOException { + RemoteIterator<LocatedFileStatus> itr = fs.listLocatedStatus(path); + List<FileStatus> result = new ArrayList<FileStatus>(); + while(itr.hasNext()) { + FileStatus stat = itr.next(); + if (filter == null || filter.accept(stat.getPath())) { + result.add(stat); + } + } + return result; + } // TODO: this relies on HDFS not changing the format; we assume if we could get inode ID, this // is still going to work. Otherwise, file IDs can be turned off. Later, we should use http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 185852c..d7a8c2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.io.orc; import org.apache.orc.impl.InStream; - import java.io.IOException; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; @@ -80,6 +79,7 @@ import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.AcidUtils.Directory; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HdfsUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.LlapWrappableInputFormatInterface; @@ -1034,7 +1034,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, } // Fall back to regular API and create states without ID. - List<FileStatus> children = SHIMS.listLocatedStatus(fs, base, AcidUtils.hiddenFileFilter); + List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, base, AcidUtils.hiddenFileFilter); List<HdfsFileStatusWithId> result = new ArrayList<>(children.size()); for (FileStatus child : children) { result.add(AcidUtils.createOriginalObj(null, child)); http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java ---------------------------------------------------------------------- diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index ef2b7f7..273099e 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -52,7 +52,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.ProxyFileSystem; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.Trash; import org.apache.hadoop.fs.TrashPolicy; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; @@ -664,22 +663,6 @@ public class Hadoop23Shims extends HadoopShimsSecure { return new WebHCatJTShim23(conf, ugi);//this has state, so can't be cached } - @Override - public List<FileStatus> listLocatedStatus(final FileSystem fs, - final Path path, - final PathFilter filter - ) throws IOException { - RemoteIterator<LocatedFileStatus> itr = fs.listLocatedStatus(path); - List<FileStatus> result = new ArrayList<FileStatus>(); - while(itr.hasNext()) { - FileStatus stat = itr.next(); - if (filter == null || filter.accept(stat.getPath())) { - result.add(stat); - } - } - return result; - } - private static final class HdfsFileStatusWithIdImpl implements HdfsFileStatusWithId { private final LocatedFileStatus lfs; private final long fileId; http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java ---------------------------------------------------------------------- diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 4a96355..3e30758 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -234,18 +234,6 @@ public interface HadoopShims { Class<RecordReader<K, V>> rrClass) throws IOException; } - /** - * Get the block locations for the given directory. - * @param fs the file system - * @param path the directory name to get the status and block locations - * @param filter a filter that needs to accept the file (or null) - * @return an list for the located file status objects - * @throws IOException - */ - List<FileStatus> listLocatedStatus(FileSystem fs, Path path, - PathFilter filter) throws IOException; - - List<HdfsFileStatusWithId> listLocatedHdfsStatus( FileSystem fs, Path path, PathFilter filter) throws IOException;
