Author: hashutosh
Date: Wed Jul 30 00:09:46 2014
New Revision: 1614526

URL: http://svn.apache.org/r1614526
Log:
HIVE-7529 : load data query fails on hdfs federation + viewfs (Navis via 
Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java Wed Jul 30 
00:09:46 2014
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.lockmgr
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 
 import java.io.DataInput;
@@ -54,8 +53,6 @@ import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.ConcurrentHashMap;
 
-import javax.security.auth.login.LoginException;
-
 /**
  * Context for Semantic Analyzers. Usage: not reusable - construct a new one 
for
  * each query should call clear() at end of use to remove temporary folders
@@ -337,7 +334,14 @@ public class Context {
    *          external URI to which the tmp data has to be eventually moved
    * @return next available tmp path on the file system corresponding extURI
    */
-  public Path getExternalTmpPath(URI extURI) {
+  public Path getExternalTmpPath(Path path) {
+    URI extURI = path.toUri();
+    if (extURI.getScheme().equals("viewfs")) {
+      // if we are on viewfs we don't want to use /tmp as tmp dir since rename 
from /tmp/..
+      // to final /user/hive/warehouse/ will fail later, so instead pick tmp 
dir
+      // on same namespace as tbl dir.
+      return getExtTmpPathRelTo(path.getParent());
+    }
     return new Path(getExternalScratchDir(extURI), EXT_PREFIX +
       nextPathId());
   }
@@ -347,7 +351,8 @@ public class Context {
    * within passed in uri, whereas getExternalTmpPath() ignores passed in path 
and returns temp
    * path within /tmp
    */
-  public Path getExtTmpPathRelTo(URI uri) {
+  public Path getExtTmpPathRelTo(Path path) {
+    URI uri = path.toUri();
     return new Path (getScratchDir(uri.getScheme(), uri.getAuthority(), 
!explain, 
     uri.getPath() + Path.SEPARATOR + "_" + this.executionId), EXT_PREFIX + 
nextPathId());
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Wed Jul 
30 00:09:46 2014
@@ -1283,7 +1283,7 @@ public class DDLTask extends Task<DDLWor
       // First create the archive in a tmp dir so that if the job fails, the
       // bad files don't pollute the filesystem
       Path tmpPath = new Path(driverContext.getCtx()
-          .getExternalTmpPath(originalDir.toUri()), "partlevel");
+          .getExternalTmpPath(originalDir), "partlevel");
 
       console.printInfo("Creating " + archiveName +
           " for " + originalDir.toString());
@@ -1478,7 +1478,7 @@ public class DDLTask extends Task<DDLWor
       throw new HiveException("Haven't found any archive where it should be");
     }
 
-    Path tmpPath = 
driverContext.getCtx().getExternalTmpPath(originalDir.toUri());
+    Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
 
     try {
       fs = tmpPath.getFileSystem(conf);

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java 
Wed Jul 30 00:09:46 2014
@@ -499,7 +499,7 @@ public class ExecDriver extends Task<Map
       inputPaths.add(new Path(path));
     }
 
-    Path tmpPath = 
context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri());
+    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0));
     Path partitionFile = new Path(tmpPath, ".partitions");
     ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
     PartitionKeySampler sampler = new PartitionKeySampler();

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
Wed Jul 30 00:09:46 2014
@@ -1697,12 +1697,8 @@ public final class GenMapRedUtils {
       // generate the temporary file
       // it must be on the same file system as the current destination
       Context baseCtx = parseCtx.getContext();
-         // if we are on viewfs we don't want to use /tmp as tmp dir since 
rename from /tmp/..
-      // to final location /user/hive/warehouse/ will fail later, so instead 
pick tmp dir
-      // on same namespace as tbl dir.
-      Path tmpDir = dest.toUri().getScheme().equals("viewfs") ?
-        baseCtx.getExtTmpPathRelTo(dest.toUri()) :
-        baseCtx.getExternalTmpPath(dest.toUri());
+
+      Path tmpDir = baseCtx.getExternalTmpPath(dest);
 
       FileSinkDesc fileSinkDesc = fsOp.getConf();
       // Change all the linked file sink descriptors

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 
Wed Jul 30 00:09:46 2014
@@ -969,7 +969,7 @@ public class DDLSemanticAnalyzer extends
         TableDesc tblDesc = Utilities.getTableDesc(table);
         // Write the output to temporary directory and move it to the final 
location at the end
         // so the operation is atomic.
-        Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc.toUri());
+        Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
         truncateTblDesc.setOutputDir(queryTmpdir);
         LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc,
             partSpec == null ? new HashMap<String, String>() : partSpec);
@@ -1549,7 +1549,7 @@ public class DDLSemanticAnalyzer extends
       ddlWork.setNeedLock(true);
       Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork, conf);
       TableDesc tblDesc = Utilities.getTableDesc(tblObj);
-      Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc.toUri());
+      Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
       mergeDesc.setOutputDir(queryTmpdir);
       LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc,
           partSpec == null ? new HashMap<String, String>() : partSpec);

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
 Wed Jul 30 00:09:46 2014
@@ -276,7 +276,7 @@ public class ImportSemanticAnalyzer exte
 
   private Task<?> loadTable(URI fromURI, Table table) {
     Path dataPath = new Path(fromURI.toString(), "data");
-    Path tmpPath = ctx.getExternalTmpPath(fromURI);
+    Path tmpPath = ctx.getExternalTmpPath(new Path(fromURI));
     Task<?> copyTask = TaskFactory.get(new CopyWork(dataPath,
        tmpPath, false), conf);
     LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath,
@@ -321,7 +321,7 @@ public class ImportSemanticAnalyzer exte
       LOG.debug("adding dependent CopyWork/AddPart/MoveWork for partition "
           + partSpecToString(partSpec.getPartSpec())
           + " with source location: " + srcLocation);
-      Path tmpPath = ctx.getExternalTmpPath(fromURI);
+      Path tmpPath = ctx.getExternalTmpPath(new Path(fromURI));
       Task<?> copyTask = TaskFactory.get(new CopyWork(new Path(srcLocation),
           tmpPath, false), conf);
       Task<?> addPartTask = TaskFactory.get(new DDLWork(getInputs(),

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
Wed Jul 30 00:09:46 2014
@@ -1426,7 +1426,7 @@ public class SemanticAnalyzer extends Ba
               }
               try {
                 fname = ctx.getExternalTmpPath(
-                    FileUtils.makeQualified(location, 
conf).toUri()).toString();
+                    FileUtils.makeQualified(location, conf)).toString();
               } catch (Exception e) {
                 throw new SemanticException(generateErrorMessage(ast,
                     "Error creating temporary folder on: " + 
location.toString()), e);
@@ -5662,12 +5662,7 @@ public class SemanticAnalyzer extends Ba
       if (isNonNativeTable) {
         queryTmpdir = dest_path;
       } else {
-       // if we are on viewfs we don't want to use /tmp as tmp dir since 
rename from /tmp/..
-        // to final /user/hive/warehouse/ will fail later, so instead pick tmp 
dir
-        // on same namespace as tbl dir.
-        queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ?
-          ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) :
-          ctx.getExternalTmpPath(dest_path.toUri());
+        queryTmpdir = ctx.getExternalTmpPath(dest_path);
       }
       if (dpCtx != null) {
         // set the root of the temporary path where dynamic partition columns 
will populate
@@ -5780,12 +5775,7 @@ public class SemanticAnalyzer extends Ba
       dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri()
           .getAuthority(), partPath.toUri().getPath());
 
-      // if we are on viewfs we don't want to use /tmp as tmp dir since rename 
from /tmp/..
-      // to final /user/hive/warehouse/ will fail later, so instead pick tmp 
dir
-      // on same namespace as tbl dir.
-      queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ?
-        ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) :
-        ctx.getExternalTmpPath(dest_path.toUri());
+      queryTmpdir = ctx.getExternalTmpPath(dest_path);
       table_desc = Utilities.getTableDesc(dest_tab);
 
       // Add sorting/bucketing if needed
@@ -5842,7 +5832,7 @@ public class SemanticAnalyzer extends Ba
 
         try {
           Path qPath = FileUtils.makeQualified(dest_path, conf);
-          queryTmpdir = ctx.getExternalTmpPath(qPath.toUri());
+          queryTmpdir = ctx.getExternalTmpPath(qPath);
         } catch (Exception e) {
           throw new SemanticException("Error creating temporary folder on: "
               + dest_path, e);
@@ -6003,7 +5993,7 @@ public class SemanticAnalyzer extends Ba
     // it should be the same as the MoveWork's sourceDir.
     fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
     if (HiveConf.getVar(conf, 
HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
-      String statsTmpLoc = 
ctx.getExternalTmpPath(queryTmpdir.toUri()).toString();
+      String statsTmpLoc = ctx.getExternalTmpPath(queryTmpdir).toString();
       LOG.info("Set stats collection dir : " + statsTmpLoc);
       conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc);
     }
@@ -9014,7 +9004,7 @@ public class SemanticAnalyzer extends Ba
       tsDesc.setGatherStats(false);
     } else {
       if (HiveConf.getVar(conf, 
HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
-        String statsTmpLoc = 
ctx.getExternalTmpPath(tab.getPath().toUri()).toString();
+        String statsTmpLoc = ctx.getExternalTmpPath(tab.getPath()).toString();
         LOG.info("Set stats collection dir : " + statsTmpLoc);
         conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc);
       }


Reply via email to