Author: xuefu
Date: Thu Dec 18 18:54:04 2014
New Revision: 1646510
URL: http://svn.apache.org/r1646510
Log:
HIVE-9127: Improve CombineHiveInputFormat.getSplit performance (Brock via Xuefu)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1646510&r1=1646509&r2=1646510&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Thu
Dec 18 18:54:04 2014
@@ -419,8 +419,9 @@ public final class Utilities {
LOG.info("No plan file found: "+path);
return null;
} catch (Exception e) {
- LOG.error("Failed to load plan: "+path, e);
- throw new RuntimeException(e);
+ String msg = "Failed to load plan: " + path + ": " + e;
+ LOG.error(msg, e);
+ throw new RuntimeException(msg, e);
} finally {
if (in != null) {
try {
@@ -702,11 +703,11 @@ public final class Utilities {
// Cache the plan in this process
gWorkMap.put(planPath, w);
-
return planPath;
} catch (Exception e) {
- e.printStackTrace();
- throw new RuntimeException(e);
+ String msg = "Error caching " + name + ": " + e;
+ LOG.error(msg, e);
+ throw new RuntimeException(msg, e);
}
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1646510&r1=1646509&r2=1646510&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Thu Dec 18 18:54:04 2014
@@ -82,8 +82,9 @@ public class CombineHiveInputFormat<K ex
*/
public static class CombineHiveInputSplit extends InputSplitShim {
- String inputFormatClassName;
- CombineFileSplit inputSplitShim;
+ private String inputFormatClassName;
+ private CombineFileSplit inputSplitShim;
+ private Map<String, PartitionDesc> pathToPartitionInfo;
public CombineHiveInputSplit() throws IOException {
this(ShimLoader.getHadoopShims().getCombineFileInputFormat()
@@ -93,20 +94,25 @@ public class CombineHiveInputFormat<K ex
public CombineHiveInputSplit(CombineFileSplit inputSplitShim) throws
IOException {
this(inputSplitShim.getJob(), inputSplitShim);
}
-
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim)
throws IOException {
+ this(job, inputSplitShim, null);
+ }
+ public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
+ Map<String, PartitionDesc> pathToPartitionInfo) throws IOException {
this.inputSplitShim = inputSplitShim;
+ this.pathToPartitionInfo = pathToPartitionInfo;
if (job != null) {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
- .getMapWork(job).getPathToPartitionInfo();
+ if (this.pathToPartitionInfo == null) {
+ this.pathToPartitionInfo =
Utilities.getMapWork(job).getPathToPartitionInfo();
+ }
// extract all the inputFormatClass names for each chunk in the
// CombinedSplit.
Path[] ipaths = inputSplitShim.getPaths();
if (ipaths.length > 0) {
PartitionDesc part = HiveFileFormatUtils
- .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+ .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
ipaths[0], IOPrepareCache.get().getPartitionDescMap());
inputFormatClassName = part.getInputFileFormatClass().getName();
}
@@ -215,8 +221,9 @@ public class CombineHiveInputFormat<K ex
inputSplitShim.write(out);
if (inputFormatClassName == null) {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
- .getMapWork(getJob()).getPathToPartitionInfo();
+ if (pathToPartitionInfo == null) {
+ pathToPartitionInfo =
Utilities.getMapWork(getJob()).getPathToPartitionInfo();
+ }
// extract all the inputFormatClass names for each chunk in the
// CombinedSplit.
@@ -268,8 +275,8 @@ public class CombineHiveInputFormat<K ex
/**
* Create Hive splits based on CombineFileSplit.
*/
- private InputSplit[] getCombineSplits(JobConf job,
- int numSplits) throws IOException {
+ private InputSplit[] getCombineSplits(JobConf job, int numSplits,
Map<String, PartitionDesc> pathToPartitionInfo)
+ throws IOException {
PerfLogger perfLogger = PerfLogger.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
@@ -438,7 +445,7 @@ public class CombineHiveInputFormat<K ex
}
for (CombineFileSplit is : iss) {
- CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
+ CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is,
pathToPartitionInfo);
result.add(csplit);
}
@@ -505,7 +512,8 @@ public class CombineHiveInputFormat<K ex
if (combinablePaths.size() > 0) {
FileInputFormat.setInputPaths(job, combinablePaths.toArray
(new Path[combinablePaths.size()]));
- InputSplit[] splits = getCombineSplits(job, numSplits);
+ Map<String, PartitionDesc> pathToPartitionInfo =
Utilities.getMapWork(job).getPathToPartitionInfo();
+ InputSplit[] splits = getCombineSplits(job, numSplits,
pathToPartitionInfo);
for (InputSplit split : splits) {
result.add(split);
}