Author: hashutosh
Date: Wed Mar 12 22:54:18 2014
New Revision: 1576978
URL: http://svn.apache.org/r1576978
Log:
HIVE-6572 : Use shimmed version of hadoop conf names for
mapred.{min,max}.split.size{.*} (Sushanth Sowmyan via Ashutosh Chauhan)
Modified:
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java
hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
Modified:
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
(original)
+++
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
Wed Mar 12 22:54:18 2014
@@ -88,7 +88,9 @@ public final class HCatConstants {
* The desired number of input splits produced for each partition. When the
* input files are large and few, we want to split them into many splits,
* so as to increase the parallelizm of loading the splits. Try also two
- * other parameters, mapred.min.split.size and mapred.max.split.size, to
+ * other parameters, mapred.min.split.size and mapred.max.split.size for
+ * hadoop 1.x, or mapreduce.input.fileinputformat.split.minsize and
+ * mapreduce.input.fileinputformat.split.maxsize in hadoop 2.x to
* control the number of input splits.
*/
public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS =
Modified:
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java
(original)
+++
hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java
Wed Mar 12 22:54:18 2014
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
+import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.InputSplit;
@@ -44,7 +45,9 @@ public class RCFileMapReduceInputFormat<
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
- job.getConfiguration().setLong("mapred.min.split.size",
SequenceFile.SYNC_INTERVAL);
+ job.getConfiguration().setLong(
+
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ SequenceFile.SYNC_INTERVAL);
return super.getSplits(job);
}
}
Modified:
hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java
(original)
+++
hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java
Wed Mar 12 22:54:18 2014
@@ -228,7 +228,8 @@ public class TestRCFileMapReduceInputFor
Configuration jonconf = new Configuration(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
JobContext context = new Job(jonconf);
- context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize);
+ context.getConfiguration().setLong(
+
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"),
maxSplitSize);
List<InputSplit> splits = inputFormat.getSplits(context);
assertEquals("splits length should be " + splitNumber, splits.size(),
splitNumber);
int readCount = 0;
Modified:
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
(original)
+++
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
Wed Mar 12 22:54:18 2014
@@ -284,7 +284,9 @@ public class HiveInputFormat<K extends W
if (headerCount != 0 || footerCount != 0) {
// Input file has header or footer, cannot be splitted.
- conf.setLong("mapred.min.split.size", Long.MAX_VALUE);
+ conf.setLong(
+
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ Long.MAX_VALUE);
}
}
Modified:
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
(original)
+++
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
Wed Mar 12 22:54:18 2014
@@ -77,8 +77,10 @@ public class OrcInputFormat implements
VectorizedOrcInputFormat voif = new VectorizedOrcInputFormat();
private static final Log LOG = LogFactory.getLog(OrcInputFormat.class);
- static final String MIN_SPLIT_SIZE = "mapred.min.split.size";
- static final String MAX_SPLIT_SIZE = "mapred.max.split.size";
+ static final String MIN_SPLIT_SIZE =
+
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE");
+ static final String MAX_SPLIT_SIZE =
+
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE");
private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
Modified:
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
(original)
+++
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
Wed Mar 12 22:54:18 2014
@@ -58,6 +58,7 @@ import org.apache.hadoop.hive.serde2.obj
import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -640,7 +641,9 @@ public class TestRCFile {
RCFileInputFormat inputFormat = new RCFileInputFormat();
JobConf jobconf = new JobConf(cloneConf);
jobconf.set("mapred.input.dir", testDir.toString());
- jobconf.setLong("mapred.min.split.size", fileLen);
+ jobconf.setLong(
+
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ fileLen);
InputSplit[] splits = inputFormat.getSplits(jobconf, 1);
RCFileRecordReader rr = new RCFileRecordReader(jobconf,
(FileSplit)splits[0]);
long lastSync = 0;
@@ -707,7 +710,9 @@ public class TestRCFile {
RCFileInputFormat inputFormat = new RCFileInputFormat();
JobConf jonconf = new JobConf(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
- jonconf.setLong("mapred.min.split.size", minSplitSize);
+ jonconf.setLong(
+
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ minSplitSize);
InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
assertEquals("splits length should be " + splitNumber, splits.length,
splitNumber);
int readCount = 0;
Modified:
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
(original)
+++
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
Wed Mar 12 22:54:18 2014
@@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
@@ -106,8 +107,8 @@ public class TestOrcSplitElimination {
100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
- conf.set("mapred.min.split.size", "1000");
- conf.set("mapred.max.split.size", "5000");
+
conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
"1000");
+
conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"),
"5000");
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
@@ -184,8 +185,8 @@ public class TestOrcSplitElimination {
100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
- conf.set("mapred.min.split.size", "1000");
- conf.set("mapred.max.split.size", "150000");
+
conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
"1000");
+
conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"),
"150000");
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
@@ -273,8 +274,8 @@ public class TestOrcSplitElimination {
100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
- conf.set("mapred.min.split.size", "1000");
- conf.set("mapred.max.split.size", "150000");
+
conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
"1000");
+
conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"),
"150000");
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
Modified:
hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
(original)
+++
hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
Wed Mar 12 22:54:18 2014
@@ -778,8 +778,8 @@ public class Hadoop20Shims implements Ha
ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapred.input.dir.recursive");
ret.put("MAPREDMAXSPLITSIZE", "mapred.max.split.size");
ret.put("MAPREDMINSPLITSIZE", "mapred.min.split.size");
- ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.rack");
- ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.rack");
+ ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.node");
ret.put("HADOOPNUMREDUCERS", "mapred.reduce.tasks");
ret.put("HADOOPJOBNAME", "mapred.job.name");
ret.put("HADOOPSPECULATIVEEXECREDUCERS",
"mapred.reduce.tasks.speculative.execution");
Modified:
hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
(original)
+++
hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
Wed Mar 12 22:54:18 2014
@@ -404,8 +404,8 @@ public class Hadoop20SShims extends Hado
ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapred.input.dir.recursive");
ret.put("MAPREDMAXSPLITSIZE", "mapred.max.split.size");
ret.put("MAPREDMINSPLITSIZE", "mapred.min.split.size");
- ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.rack");
- ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.rack");
ret.put("HADOOPNUMREDUCERS", "mapred.reduce.tasks");
ret.put("HADOOPJOBNAME", "mapred.job.name");
ret.put("HADOOPSPECULATIVEEXECREDUCERS",
"mapred.reduce.tasks.speculative.execution");
Modified:
hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
---
hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
(original)
+++
hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
Wed Mar 12 22:54:18 2014
@@ -568,8 +568,8 @@ public class Hadoop23Shims extends Hadoo
ret.put("HADOOPMAPREDINPUTDIRRECURSIVE",
"mapreduce.input.fileinputformat.input.dir.recursive");
ret.put("MAPREDMAXSPLITSIZE",
"mapreduce.input.fileinputformat.split.maxsize");
ret.put("MAPREDMINSPLITSIZE",
"mapreduce.input.fileinputformat.split.minsize");
- ret.put("MAPREDMINSPLITSIZEPERNODE",
"mapreduce.input.fileinputformat.split.minsize.per.rack");
- ret.put("MAPREDMINSPLITSIZEPERRACK",
"mapreduce.input.fileinputformat.split.minsize.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERNODE",
"mapreduce.input.fileinputformat.split.minsize.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERRACK",
"mapreduce.input.fileinputformat.split.minsize.per.rack");
ret.put("HADOOPNUMREDUCERS", "mapreduce.job.reduces");
ret.put("HADOOPJOBNAME", "mapreduce.job.name");
ret.put("HADOOPSPECULATIVEEXECREDUCERS", "mapreduce.reduce.speculative");