Author: hashutosh
Date: Wed Jul 30 00:09:46 2014
New Revision: 1614526
URL: http://svn.apache.org/r1614526
Log:
HIVE-7529 : load data query fails on hdfs federation + viewfs (Navis via
Ashutosh Chauhan)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Context.java Wed Jul 30
00:09:46 2014
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.lockmgr
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import java.io.DataInput;
@@ -54,8 +53,6 @@ import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
-import javax.security.auth.login.LoginException;
-
/**
* Context for Semantic Analyzers. Usage: not reusable - construct a new one
for
* each query should call clear() at end of use to remove temporary folders
@@ -337,7 +334,14 @@ public class Context {
* external URI to which the tmp data has to be eventually moved
* @return next available tmp path on the file system corresponding extURI
*/
- public Path getExternalTmpPath(URI extURI) {
+ public Path getExternalTmpPath(Path path) {
+ URI extURI = path.toUri();
+ if (extURI.getScheme().equals("viewfs")) {
+ // if we are on viewfs we don't want to use /tmp as tmp dir since rename
from /tmp/..
+ // to final /user/hive/warehouse/ will fail later, so instead pick tmp
dir
+ // on same namespace as tbl dir.
+ return getExtTmpPathRelTo(path.getParent());
+ }
return new Path(getExternalScratchDir(extURI), EXT_PREFIX +
nextPathId());
}
@@ -347,7 +351,8 @@ public class Context {
* within passed in uri, whereas getExternalTmpPath() ignores passed in path
and returns temp
* path within /tmp
*/
- public Path getExtTmpPathRelTo(URI uri) {
+ public Path getExtTmpPathRelTo(Path path) {
+ URI uri = path.toUri();
return new Path (getScratchDir(uri.getScheme(), uri.getAuthority(),
!explain,
uri.getPath() + Path.SEPARATOR + "_" + this.executionId), EXT_PREFIX +
nextPathId());
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Wed Jul
30 00:09:46 2014
@@ -1283,7 +1283,7 @@ public class DDLTask extends Task<DDLWor
// First create the archive in a tmp dir so that if the job fails, the
// bad files don't pollute the filesystem
Path tmpPath = new Path(driverContext.getCtx()
- .getExternalTmpPath(originalDir.toUri()), "partlevel");
+ .getExternalTmpPath(originalDir), "partlevel");
console.printInfo("Creating " + archiveName +
" for " + originalDir.toString());
@@ -1478,7 +1478,7 @@ public class DDLTask extends Task<DDLWor
throw new HiveException("Haven't found any archive where it should be");
}
- Path tmpPath =
driverContext.getCtx().getExternalTmpPath(originalDir.toUri());
+ Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
try {
fs = tmpPath.getFileSystem(conf);
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
Wed Jul 30 00:09:46 2014
@@ -499,7 +499,7 @@ public class ExecDriver extends Task<Map
inputPaths.add(new Path(path));
}
- Path tmpPath =
context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri());
+ Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0));
Path partitionFile = new Path(tmpPath, ".partitions");
ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
PartitionKeySampler sampler = new PartitionKeySampler();
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
Wed Jul 30 00:09:46 2014
@@ -1697,12 +1697,8 @@ public final class GenMapRedUtils {
// generate the temporary file
// it must be on the same file system as the current destination
Context baseCtx = parseCtx.getContext();
- // if we are on viewfs we don't want to use /tmp as tmp dir since
rename from /tmp/..
- // to final location /user/hive/warehouse/ will fail later, so instead
pick tmp dir
- // on same namespace as tbl dir.
- Path tmpDir = dest.toUri().getScheme().equals("viewfs") ?
- baseCtx.getExtTmpPathRelTo(dest.toUri()) :
- baseCtx.getExternalTmpPath(dest.toUri());
+
+ Path tmpDir = baseCtx.getExternalTmpPath(dest);
FileSinkDesc fileSinkDesc = fsOp.getConf();
// Change all the linked file sink descriptors
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
Wed Jul 30 00:09:46 2014
@@ -969,7 +969,7 @@ public class DDLSemanticAnalyzer extends
TableDesc tblDesc = Utilities.getTableDesc(table);
// Write the output to temporary directory and move it to the final
location at the end
// so the operation is atomic.
- Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc.toUri());
+ Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
truncateTblDesc.setOutputDir(queryTmpdir);
LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc,
partSpec == null ? new HashMap<String, String>() : partSpec);
@@ -1549,7 +1549,7 @@ public class DDLSemanticAnalyzer extends
ddlWork.setNeedLock(true);
Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork, conf);
TableDesc tblDesc = Utilities.getTableDesc(tblObj);
- Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc.toUri());
+ Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
mergeDesc.setOutputDir(queryTmpdir);
LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc,
partSpec == null ? new HashMap<String, String>() : partSpec);
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
Wed Jul 30 00:09:46 2014
@@ -276,7 +276,7 @@ public class ImportSemanticAnalyzer exte
private Task<?> loadTable(URI fromURI, Table table) {
Path dataPath = new Path(fromURI.toString(), "data");
- Path tmpPath = ctx.getExternalTmpPath(fromURI);
+ Path tmpPath = ctx.getExternalTmpPath(new Path(fromURI));
Task<?> copyTask = TaskFactory.get(new CopyWork(dataPath,
tmpPath, false), conf);
LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath,
@@ -321,7 +321,7 @@ public class ImportSemanticAnalyzer exte
LOG.debug("adding dependent CopyWork/AddPart/MoveWork for partition "
+ partSpecToString(partSpec.getPartSpec())
+ " with source location: " + srcLocation);
- Path tmpPath = ctx.getExternalTmpPath(fromURI);
+ Path tmpPath = ctx.getExternalTmpPath(new Path(fromURI));
Task<?> copyTask = TaskFactory.get(new CopyWork(new Path(srcLocation),
tmpPath, false), conf);
Task<?> addPartTask = TaskFactory.get(new DDLWork(getInputs(),
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1614526&r1=1614525&r2=1614526&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Wed Jul 30 00:09:46 2014
@@ -1426,7 +1426,7 @@ public class SemanticAnalyzer extends Ba
}
try {
fname = ctx.getExternalTmpPath(
- FileUtils.makeQualified(location,
conf).toUri()).toString();
+ FileUtils.makeQualified(location, conf)).toString();
} catch (Exception e) {
throw new SemanticException(generateErrorMessage(ast,
"Error creating temporary folder on: " +
location.toString()), e);
@@ -5662,12 +5662,7 @@ public class SemanticAnalyzer extends Ba
if (isNonNativeTable) {
queryTmpdir = dest_path;
} else {
- // if we are on viewfs we don't want to use /tmp as tmp dir since
rename from /tmp/..
- // to final /user/hive/warehouse/ will fail later, so instead pick tmp
dir
- // on same namespace as tbl dir.
- queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ?
- ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) :
- ctx.getExternalTmpPath(dest_path.toUri());
+ queryTmpdir = ctx.getExternalTmpPath(dest_path);
}
if (dpCtx != null) {
// set the root of the temporary path where dynamic partition columns
will populate
@@ -5780,12 +5775,7 @@ public class SemanticAnalyzer extends Ba
dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri()
.getAuthority(), partPath.toUri().getPath());
- // if we are on viewfs we don't want to use /tmp as tmp dir since rename
from /tmp/..
- // to final /user/hive/warehouse/ will fail later, so instead pick tmp
dir
- // on same namespace as tbl dir.
- queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ?
- ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) :
- ctx.getExternalTmpPath(dest_path.toUri());
+ queryTmpdir = ctx.getExternalTmpPath(dest_path);
table_desc = Utilities.getTableDesc(dest_tab);
// Add sorting/bucketing if needed
@@ -5842,7 +5832,7 @@ public class SemanticAnalyzer extends Ba
try {
Path qPath = FileUtils.makeQualified(dest_path, conf);
- queryTmpdir = ctx.getExternalTmpPath(qPath.toUri());
+ queryTmpdir = ctx.getExternalTmpPath(qPath);
} catch (Exception e) {
throw new SemanticException("Error creating temporary folder on: "
+ dest_path, e);
@@ -6003,7 +5993,7 @@ public class SemanticAnalyzer extends Ba
// it should be the same as the MoveWork's sourceDir.
fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
if (HiveConf.getVar(conf,
HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
- String statsTmpLoc =
ctx.getExternalTmpPath(queryTmpdir.toUri()).toString();
+ String statsTmpLoc = ctx.getExternalTmpPath(queryTmpdir).toString();
LOG.info("Set stats collection dir : " + statsTmpLoc);
conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc);
}
@@ -9014,7 +9004,7 @@ public class SemanticAnalyzer extends Ba
tsDesc.setGatherStats(false);
} else {
if (HiveConf.getVar(conf,
HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
- String statsTmpLoc =
ctx.getExternalTmpPath(tab.getPath().toUri()).toString();
+ String statsTmpLoc = ctx.getExternalTmpPath(tab.getPath()).toString();
LOG.info("Set stats collection dir : " + statsTmpLoc);
conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc);
}