Repository: hive
Updated Branches:
  refs/heads/master 6a01be889 -> 411c356bd


HIVE-13839 : Refactor : remove SHIMS.getListLocatedStatus (Ashutosh Chauhan via 
Sergey Shelukhin)

Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/411c356b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/411c356b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/411c356b

Branch: refs/heads/master
Commit: 411c356bd975231e4acf97d4ac312e4899098e30
Parents: 6a01be8
Author: Ashutosh Chauhan <[email protected]>
Authored: Tue May 24 16:33:34 2016 -0700
Committer: Ashutosh Chauhan <[email protected]>
Committed: Tue May 31 14:09:18 2016 -0700

----------------------------------------------------------------------
 .../hcatalog/templeton/tool/TempletonUtils.java | 17 ++++++++-------
 .../hive/ql/hooks/PostExecOrcFileDump.java      |  7 +++---
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |  4 ++--
 .../org/apache/hadoop/hive/ql/io/HdfsUtils.java | 23 +++++++++++++++++---
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  4 ++--
 .../apache/hadoop/hive/shims/Hadoop23Shims.java | 17 ---------------
 .../apache/hadoop/hive/shims/HadoopShims.java   | 12 ----------
 7 files changed, 37 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
----------------------------------------------------------------------
diff --git 
a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
 
b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
index 83584d3..201e647 100644
--- 
a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
+++ 
b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
@@ -104,14 +104,14 @@ public class TempletonUtils {
   public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = 
(\\d+%),\\s+reduce = (\\d+%).*$");
   /**
    * Hive on Tez produces progress report that looks like this
-   * Map 1: -/-        Reducer 2: 0/1  
-   * Map 1: -/-        Reducer 2: 0(+1)/1      
+   * Map 1: -/-        Reducer 2: 0/1
+   * Map 1: -/-        Reducer 2: 0(+1)/1
    * Map 1: -/-        Reducer 2: 1/1
-   * 
+   *
    * -/- means there are no tasks (yet)
    * 0/1 means 1 total tasks, 0 completed
    * 1(+2)/3 means 3 total, 1 completed and 2 running
-   * 
+   *
    * HIVE-8495, in particular 
https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png
    * has more examples.
    * To report progress, we'll assume all tasks are equal size and compute 
"completed" as percent of "total"
@@ -132,7 +132,7 @@ public class TempletonUtils {
     Matcher pig = PIG_COMPLETE.matcher(line);
     if (pig.find())
       return pig.group().trim();
-    
+
     Matcher hive = HIVE_COMPLETE.matcher(line);
     if(hive.find()) {
       return "map " + hive.group(1) + " reduce " + hive.group(2);
@@ -274,7 +274,7 @@ public class TempletonUtils {
     if(!fs.exists(p)) {
       return Collections.emptyList();
     }
-    List<FileStatus> children = 
ShimLoader.getHadoopShims().listLocatedStatus(fs, p, null);
+    FileStatus[] children = fs.listStatus(p);
     if(!isset(children)) {
       return Collections.emptyList();
     }
@@ -327,9 +327,10 @@ public class TempletonUtils {
     }
     final String finalFName = new String(fname);
 
-    final FileSystem defaultFs = 
+    final FileSystem defaultFs =
         ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
-          public FileSystem run() 
+          @Override
+          public FileSystem run()
             throws URISyntaxException, IOException, InterruptedException {
             return FileSystem.get(new URI(finalFName), conf);
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java 
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java
index b1595ce..f1eb5cd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java
@@ -30,12 +30,12 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.QueryPlan;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
-import org.apache.orc.FileFormatException;
+import org.apache.hadoop.hive.ql.io.HdfsUtils;
 import org.apache.orc.tools.FileDump;
+import org.apache.orc.FileFormatException;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
 import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.shims.ShimLoader;
 
 import com.google.common.collect.Lists;
 
@@ -48,6 +48,7 @@ public class PostExecOrcFileDump implements 
ExecuteWithHookContext {
   private static final Logger LOG = 
LoggerFactory.getLogger(PostExecOrcFileDump.class.getName());
 
   private static final PathFilter hiddenFileFilter = new PathFilter() {
+    @Override
     public boolean accept(Path p) {
       String name = p.getName();
       return !name.startsWith("_") && !name.startsWith(".");
@@ -88,7 +89,7 @@ public class PostExecOrcFileDump implements 
ExecuteWithHookContext {
 
       for (Path dir : directories) {
         FileSystem fs = dir.getFileSystem(conf);
-        List<FileStatus> fileList = 
ShimLoader.getHadoopShims().listLocatedStatus(fs, dir,
+        List<FileStatus> fileList = HdfsUtils.listLocatedStatus(fs, dir,
             hiddenFileFilter);
 
         for (FileStatus fileStatus : fileList) {

http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index bac38ce..496bd0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -507,7 +507,7 @@ public class AcidUtils {
             originalDirectories, original, obsolete, bestBase, 
ignoreEmptyFiles);
       }
     } else {
-      List<FileStatus> children = SHIMS.listLocatedStatus(fs, directory, 
hiddenFileFilter);
+      List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, directory, 
hiddenFileFilter);
       for (FileStatus child : children) {
         getChildState(
             child, null, txnList, working, originalDirectories, original, 
obsolete, bestBase, ignoreEmptyFiles);
@@ -675,7 +675,7 @@ public class AcidUtils {
         }
       }
     } else {
-      List<FileStatus> children = SHIMS.listLocatedStatus(fs, stat.getPath(), 
hiddenFileFilter);
+      List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, 
stat.getPath(), hiddenFileFilter);
       for (FileStatus child : children) {
         if (child.isDir()) {
           findOriginals(fs, child, original, useFileIds);

http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
index b71ca09..9b8b761 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
@@ -19,12 +19,17 @@
 package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -53,7 +58,7 @@ public class HdfsUtils {
     int fileSizeHash = (int)(fileSize ^ (fileSize >>> 32)),
         modTimeHash = (int)(modTime ^ (modTime >>> 32)),
         combinedHash = modTimeHash ^ fileSizeHash;
-    long id = (((long)nameHash & 0xffffffffL) << 32) | ((long)combinedHash & 
0xffffffffL);
+    long id = ((nameHash & 0xffffffffL) << 32) | (combinedHash & 0xffffffffL);
     if (doLog) {
       LOG.warn("Cannot get unique file ID from " + fsName + "; using " + id
           + " (" + pathStr + "," + nameHash + "," + fileSize + ")");
@@ -61,8 +66,20 @@ public class HdfsUtils {
     return id;
   }
 
-
-
+  public static List<FileStatus> listLocatedStatus(final FileSystem fs,
+      final Path path,
+      final PathFilter filter
+      ) throws IOException {
+    RemoteIterator<LocatedFileStatus> itr = fs.listLocatedStatus(path);
+    List<FileStatus> result = new ArrayList<FileStatus>();
+    while(itr.hasNext()) {
+      FileStatus stat = itr.next();
+      if (filter == null || filter.accept(stat.getPath())) {
+        result.add(stat);
+      }
+    }
+    return result;
+  }
 
   // TODO: this relies on HDFS not changing the format; we assume if we could 
get inode ID, this
   //       is still going to work. Otherwise, file IDs can be turned off. 
Later, we should use

http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 185852c..d7a8c2f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.io.orc;
 
 import org.apache.orc.impl.InStream;
 
-  
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.security.PrivilegedExceptionAction;
@@ -80,6 +79,7 @@ import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
 import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+import org.apache.hadoop.hive.ql.io.HdfsUtils;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
 import org.apache.hadoop.hive.ql.io.LlapWrappableInputFormatInterface;
@@ -1034,7 +1034,7 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
       }
 
       // Fall back to regular API and create states without ID.
-      List<FileStatus> children = SHIMS.listLocatedStatus(fs, base, 
AcidUtils.hiddenFileFilter);
+      List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, base, 
AcidUtils.hiddenFileFilter);
       List<HdfsFileStatusWithId> result = new ArrayList<>(children.size());
       for (FileStatus child : children) {
         result.add(AcidUtils.createOriginalObj(null, child));

http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git 
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java 
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index ef2b7f7..273099e 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -52,7 +52,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.ProxyFileSystem;
 import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.fs.Trash;
 import org.apache.hadoop.fs.TrashPolicy;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
@@ -664,22 +663,6 @@ public class Hadoop23Shims extends HadoopShimsSecure {
     return new WebHCatJTShim23(conf, ugi);//this has state, so can't be cached
   }
 
-  @Override
-  public List<FileStatus> listLocatedStatus(final FileSystem fs,
-                                            final Path path,
-                                            final PathFilter filter
-                                           ) throws IOException {
-    RemoteIterator<LocatedFileStatus> itr = fs.listLocatedStatus(path);
-    List<FileStatus> result = new ArrayList<FileStatus>();
-    while(itr.hasNext()) {
-      FileStatus stat = itr.next();
-      if (filter == null || filter.accept(stat.getPath())) {
-        result.add(stat);
-      }
-    }
-    return result;
-  }
-
   private static final class HdfsFileStatusWithIdImpl implements 
HdfsFileStatusWithId {
     private final LocatedFileStatus lfs;
     private final long fileId;

http://git-wip-us.apache.org/repos/asf/hive/blob/411c356b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
----------------------------------------------------------------------
diff --git 
a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java 
b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
index 4a96355..3e30758 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
@@ -234,18 +234,6 @@ public interface HadoopShims {
         Class<RecordReader<K, V>> rrClass) throws IOException;
   }
 
-  /**
-   * Get the block locations for the given directory.
-   * @param fs the file system
-   * @param path the directory name to get the status and block locations
-   * @param filter a filter that needs to accept the file (or null)
-   * @return an list for the located file status objects
-   * @throws IOException
-   */
-  List<FileStatus> listLocatedStatus(FileSystem fs, Path path,
-                                     PathFilter filter) throws IOException;
-
-
   List<HdfsFileStatusWithId> listLocatedHdfsStatus(
       FileSystem fs, Path path, PathFilter filter) throws IOException;
 

Reply via email to