Author: heyongqiang
Date: Tue Nov 15 20:12:54 2011
New Revision: 1202405
URL: http://svn.apache.org/viewvc?rev=1202405&view=rev
Log:
HIVE-2566: reduce the number map-reduce jobs for union all (namit via He
Yongqiang)
Added:
hive/trunk/ql/src/test/queries/clientpositive/union24.q
hive/trunk/ql/src/test/results/clientpositive/union24.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out
hive/trunk/ql/src/test/results/clientpositive/create_view.q.out
hive/trunk/ql/src/test/results/clientpositive/input25.q.out
hive/trunk/ql/src/test/results/clientpositive/input26.q.out
hive/trunk/ql/src/test/results/clientpositive/join35.q.out
hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part14.q.out
hive/trunk/ql/src/test/results/clientpositive/merge4.q.out
hive/trunk/ql/src/test/results/clientpositive/ppd_union_view.q.out
hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out
hive/trunk/ql/src/test/results/clientpositive/stats1.q.out
hive/trunk/ql/src/test/results/clientpositive/union10.q.out
hive/trunk/ql/src/test/results/clientpositive/union11.q.out
hive/trunk/ql/src/test/results/clientpositive/union12.q.out
hive/trunk/ql/src/test/results/clientpositive/union14.q.out
hive/trunk/ql/src/test/results/clientpositive/union15.q.out
hive/trunk/ql/src/test/results/clientpositive/union17.q.out
hive/trunk/ql/src/test/results/clientpositive/union18.q.out
hive/trunk/ql/src/test/results/clientpositive/union19.q.out
hive/trunk/ql/src/test/results/clientpositive/union20.q.out
hive/trunk/ql/src/test/results/clientpositive/union3.q.out
hive/trunk/ql/src/test/results/clientpositive/union4.q.out
hive/trunk/ql/src/test/results/clientpositive/union5.q.out
hive/trunk/ql/src/test/results/clientpositive/union6.q.out
hive/trunk/ql/src/test/results/clientpositive/union7.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Tue Nov
15 20:12:54 2011
@@ -680,16 +680,19 @@ public abstract class Operator<T extends
assert parentIndex != -1;
if (parentOperators.size() == 1) {
parentOperators = null;
- } else {
+ }
+ else {
parentOperators.remove(parentIndex);
}
int childIndex = parent.getChildOperators().indexOf(this);
- assert childIndex != -1;
- if (parent.getChildOperators().size() == 1) {
- parent.setChildOperators(null);
- } else {
- parent.getChildOperators().remove(childIndex);
+ if (childIndex >= 0) {
+ if (parent.getChildOperators().size() == 1) {
+ parent.setChildOperators(null);
+ }
+ else {
+ parent.getChildOperators().remove(childIndex);
+ }
}
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
Tue Nov 15 20:12:54 2011
@@ -102,11 +102,13 @@ public class GenMRProcContext implements
Task<? extends Serializable> uTask;
List<String> taskTmpDir;
List<TableDesc> tt_desc;
+ List<Operator<? extends Serializable>> listTopOperators;
public GenMRUnionCtx() {
uTask = null;
taskTmpDir = new ArrayList<String>();
tt_desc = new ArrayList<TableDesc>();
+ listTopOperators = new ArrayList<Operator<? extends Serializable>>();
}
public Task<? extends Serializable> getUTask() {
@@ -132,6 +134,19 @@ public class GenMRProcContext implements
public List<TableDesc> getTTDesc() {
return tt_desc;
}
+
+ public List<Operator<? extends Serializable>> getListTopOperators() {
+ return listTopOperators;
+ }
+
+ public void setListTopOperators(
+ List<Operator<? extends Serializable>> listTopOperators) {
+ this.listTopOperators = listTopOperators;
+ }
+
+ public void addListTopOperators(Operator<? extends Serializable>
topOperator) {
+ listTopOperators.add(topOperator);
+ }
}
/**
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
Tue Nov 15 20:12:54 2011
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.plan.Ma
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
/**
* Processor for the rule - TableScan followed by Union.
@@ -60,13 +61,145 @@ public class GenMRUnion1 implements Node
}
/**
+ * Process the union if all sub-queries are map-only
+ *
+ * @return
+ * @throws SemanticException
+ */
+ private Object processMapOnlyUnion(UnionOperator union, Stack<Node> stack,
+ GenMRProcContext ctx, UnionProcContext uCtx) throws SemanticException {
+ // merge currTask from multiple topOps
+ HashMap<Operator<? extends Serializable>, Task<? extends Serializable>>
opTaskMap = ctx.getOpTaskMap();
+ if (opTaskMap != null && opTaskMap.size() > 0) {
+ Task<? extends Serializable> tsk = opTaskMap.get(null);
+ if (tsk != null) {
+ ctx.setCurrTask(tsk);
+ }
+ }
+
+ UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
+ if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
+ GenMapRedUtils.mergeMapJoinUnion(union, ctx,
+ UnionProcFactory.getPositionParent(union, stack));
+ }
+ else {
+ ctx.getMapCurrCtx().put(
+ (Operator<? extends Serializable>) union,
+ new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(),
+ ctx.getCurrAliasId()));
+ }
+ return null;
+ }
+
+ /**
+ * Process the union when the parent is a map-reduce job. Create a temporary
+ * output, and let the union task read from the temporary output.
+ *
+ * The files created for all the inputs are in the union context and later
+ * used to initialize the union plan
+ *
+ * @param parent
+ * @param child
+ * @param uTask
+ * @param ctx
+ * @param uCtxTask
+ */
+ private void processSubQueryUnionCreateIntermediate(
+ Operator<? extends Serializable> parent,
+ Operator<? extends Serializable> child,
+ Task<? extends Serializable> uTask, GenMRProcContext ctx,
+ GenMRUnionCtx uCtxTask) {
+ ParseContext parseCtx = ctx.getParseCtx();
+
+ TableDesc tt_desc =
PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
+ parent.getSchema(), "temporarycol"));
+
+ // generate the temporary file
+ Context baseCtx = parseCtx.getContext();
+ String taskTmpDir = baseCtx.getMRTmpFileURI();
+
+ // Create a file sink operator for this file name
+ Operator<? extends Serializable> fs_op = OperatorFactory.get(
+ new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
+ HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());
+
+ assert parent.getChildOperators().size() == 1;
+ parent.getChildOperators().set(0, fs_op);
+
+ List<Operator<? extends Serializable>> parentOpList = new
ArrayList<Operator<? extends Serializable>>();
+ parentOpList.add(parent);
+ fs_op.setParentOperators(parentOpList);
+
+ // Create a dummy table scan operator
+ Operator<? extends Serializable> ts_op = OperatorFactory.get(
+ new TableScanDesc(), parent.getSchema());
+ List<Operator<? extends Serializable>> childOpList = new
ArrayList<Operator<? extends Serializable>>();
+ childOpList.add(child);
+ ts_op.setChildOperators(childOpList);
+ child.replaceParent(parent, ts_op);
+
+ // Add the path to alias mapping
+
+ uCtxTask.addTaskTmpDir(taskTmpDir);
+ uCtxTask.addTTDesc(tt_desc);
+ uCtxTask.addListTopOperators(ts_op);
+
+ // The union task is empty. The files created for all the inputs are
+ // assembled in the union context and later used to initialize the union
+ // plan
+
+ ctx.getCurrTask().addDependentTask(uTask);
+ }
+
+ /**
+ * Union Operator encountered. A map-only query is encountered at the given
+ * position. However, atleast of the sub-qeuries is a map-reduce job. Copy
the
+ * information from the current top operator to the union context.
+ *
+ * @param ctx
+ * @param uCtxTask
+ * @param union
+ * @param stack
+ * @throws SemanticException
+ */
+ private void processSubQueryUnionMerge(GenMRProcContext ctx,
+ GenMRUnionCtx uCtxTask, UnionOperator union, Stack<Node> stack)
+ throws SemanticException {
+ // The current plan can be thrown away after being merged with the union
+ // plan
+ Task<? extends Serializable> uTask = uCtxTask.getUTask();
+ MapredWork plan = (MapredWork) uTask.getWork();
+ Operator<? extends Serializable> currTopOp = ctx.getCurrTopOp();
+ String currAliasId = ctx.getCurrAliasId();
+ GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), plan,
+ false, ctx);
+ }
+
+ private void processSubQueryUnionMapJoin(GenMRProcContext ctx) {
+ AbstractMapJoinOperator<? extends MapJoinDesc> mjOp =
ctx.getCurrMapJoinOp();
+ assert mjOp != null;
+ GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp);
+ assert mjCtx != null;
+ MapredWork plan = (MapredWork) ctx.getCurrTask().getWork();
+
+ String taskTmpDir = mjCtx.getTaskTmpDir();
+ TableDesc tt_desc = mjCtx.getTTDesc();
+ assert plan.getPathToAliases().get(taskTmpDir) == null;
+ plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
+ plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
+ plan.getPathToPartitionInfo().put(taskTmpDir,
+ new PartitionDesc(tt_desc, null));
+ plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
+ }
+
+ /**
* Union Operator encountered . Currently, the algorithm is pretty simple: If
- * all the sub-queries are map-only, dont do anything. However, if there is a
+ * all the sub-queries are map-only, don't do anything. However, if there is
a
* mapjoin followed by the union, merge at the union Otherwise, insert a
* FileSink on top of all the sub-queries.
- *
+ *
* This can be optimized later on.
- *
+ *
* @param nd
* the file sink operator encountered
* @param opProcCtx
@@ -81,31 +214,12 @@ public class GenMRUnion1 implements Node
// Map-only subqueries can be optimized in future to not write to a file in
// future
- Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
- .getMapCurrCtx();
+ Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx =
ctx.getMapCurrCtx();
// The plan needs to be broken only if one of the sub-queries involve a
// map-reduce job
if (uCtx.isMapOnlySubq()) {
- // merge currTask from multiple topOps
- HashMap<Operator<? extends Serializable>, Task<? extends Serializable>>
opTaskMap = ctx
- .getOpTaskMap();
- if (opTaskMap != null && opTaskMap.size() > 0) {
- Task<? extends Serializable> tsk = opTaskMap.get(null);
- if (tsk != null) {
- ctx.setCurrTask(tsk);
- }
- }
-
- UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
- if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
- GenMapRedUtils.mergeMapJoinUnion(union, ctx, UnionProcFactory
- .getPositionParent(union, stack));
- } else {
- mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(
- ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
- }
- return null;
+ return processMapOnlyUnion(union, stack, ctx, uCtx);
}
ctx.setCurrUnionOp(union);
@@ -116,16 +230,8 @@ public class GenMRUnion1 implements Node
Task<? extends Serializable> currTask = ctx.getCurrTask();
int pos = UnionProcFactory.getPositionParent(union, stack);
- // is the current task a root task
- if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask))) {
- ctx.getRootTasks().add(currTask);
- }
-
GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
Task<? extends Serializable> uTask = null;
-
- Operator<? extends Serializable> parent = union.getParentOperators().get(
- pos);
MapredWork uPlan = null;
// union is encountered for the first time
@@ -135,70 +241,36 @@ public class GenMRUnion1 implements Node
uTask = TaskFactory.get(uPlan, parseCtx.getConf());
uCtxTask.setUTask(uTask);
ctx.setUnionTask(union, uCtxTask);
- } else {
+ }
+ else {
uTask = uCtxTask.getUTask();
}
- // If there is a mapjoin at position 'pos'
- if (uPrsCtx.getMapJoinSubq(pos)) {
- AbstractMapJoinOperator<? extends MapJoinDesc> mjOp =
ctx.getCurrMapJoinOp();
- assert mjOp != null;
- GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp);
- assert mjCtx != null;
- MapredWork plan = (MapredWork) currTask.getWork();
-
- String taskTmpDir = mjCtx.getTaskTmpDir();
- TableDesc tt_desc = mjCtx.getTTDesc();
- assert plan.getPathToAliases().get(taskTmpDir) == null;
- plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
- plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
- plan.getPathToPartitionInfo().put(taskTmpDir,
- new PartitionDesc(tt_desc, null));
- plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
+ // Copy into the current union task plan if
+ if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)
+ && !uPrsCtx.getMapJoinSubq(pos)) {
+ processSubQueryUnionMerge(ctx, uCtxTask, union, stack);
}
+ // If it a map-reduce job, create a temporary file
+ else {
+ // is the current task a root task
+ if (uPrsCtx.getRootTask(pos) &&
(!ctx.getRootTasks().contains(currTask))) {
+ ctx.getRootTasks().add(currTask);
+ }
+ // If there is a mapjoin at position 'pos'
+ if (uPrsCtx.getMapJoinSubq(pos)) {
+ processSubQueryUnionMapJoin(ctx);
+ }
- TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
- .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
-
- // generate the temporary file
- Context baseCtx = parseCtx.getContext();
- String taskTmpDir = baseCtx.getMRTmpFileURI();
-
- // Add the path to alias mapping
- uCtxTask.addTaskTmpDir(taskTmpDir);
- uCtxTask.addTTDesc(tt_desc);
-
- // The union task is empty. The files created for all the inputs are
- // assembled in the
- // union context and later used to initialize the union plan
-
- // Create a file sink operator for this file name
- Operator<? extends Serializable> fs_op = OperatorFactory.get(
- new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
- HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());
-
- assert parent.getChildOperators().size() == 1;
- parent.getChildOperators().set(0, fs_op);
-
- List<Operator<? extends Serializable>> parentOpList =
- new ArrayList<Operator<? extends Serializable>>();
- parentOpList.add(parent);
- fs_op.setParentOperators(parentOpList);
-
- currTask.addDependentTask(uTask);
-
- // If it is map-only task, add the files to be processed
- if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) {
- GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(),
- (MapredWork) currTask.getWork(), false, ctx);
+
processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos),
union, uTask, ctx, uCtxTask);
}
ctx.setCurrTask(uTask);
ctx.setCurrAliasId(null);
ctx.setCurrTopOp(null);
- mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(ctx
- .getCurrTask(), null, null));
+ mapCurrCtx.put((Operator<? extends Serializable>) nd,
+ new GenMapRedCtx(ctx.getCurrTask(), null, null));
return null;
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
Tue Nov 15 20:12:54 2011
@@ -326,12 +326,18 @@ public final class GenMapRedUtils {
assert uCtx != null;
List<String> taskTmpDirLst = uCtx.getTaskTmpDir();
+ if ((taskTmpDirLst == null) || (taskTmpDirLst.isEmpty())) {
+ return;
+ }
+
List<TableDesc> tt_descLst = uCtx.getTTDesc();
assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
assert taskTmpDirLst.size() == tt_descLst.size();
int size = taskTmpDirLst.size();
assert local == false;
+ List<Operator<? extends Serializable>> topOperators =
uCtx.getListTopOperators();
+
for (int pos = 0; pos < size; pos++) {
String taskTmpDir = taskTmpDirLst.get(pos);
TableDesc tt_desc = tt_descLst.get(pos);
@@ -340,7 +346,7 @@ public final class GenMapRedUtils {
plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
plan.getPathToPartitionInfo().put(taskTmpDir,
new PartitionDesc(tt_desc, null));
- plan.getAliasToWork().put(taskTmpDir, currUnionOp);
+ plan.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
}
}
}
Added: hive/trunk/ql/src/test/queries/clientpositive/union24.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union24.q?rev=1202405&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union24.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union24.q Tue Nov 15 20:12:54
2011
@@ -0,0 +1,70 @@
+create table src2 as select key, count(1) as count from src group by key;
+create table src3 as select * from src2;
+create table src4 as select * from src2;
+create table src5 as select * from src2;
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+
+explain extended
+select s.key, s.count from (
+ select key, count from src2 where key < 10
+ union all
+ select key, count from src3 where key < 10
+ union all
+ select key, count from src4 where key < 10
+ union all
+ select key, count(1) as count from src5 where key < 10 group by key
+)s
+order by s.key;
+
+select s.key, s.count from (
+ select key, count from src2 where key < 10
+ union all
+ select key, count from src3 where key < 10
+ union all
+ select key, count from src4 where key < 10
+ union all
+ select key, count(1) as count from src5 where key < 10 group by key
+)s
+order by s.key;
+
+explain extended
+select s.key, s.count from (
+ select key, count from src2 where key < 10
+ union all
+ select key, count from src3 where key < 10
+ union all
+ select a.key as key, b.count as count from src4 a join src5 b on a.key=b.key
where a.key < 10
+)s
+order by s.key;
+
+select s.key, s.count from (
+ select key, count from src2 where key < 10
+ union all
+ select key, count from src3 where key < 10
+ union all
+ select a.key as key, b.count as count from src4 a join src5 b on a.key=b.key
where a.key < 10
+)s
+order by s.key;
+
+explain extended
+select s.key, s.count from (
+ select key, count from src2 where key < 10
+ union all
+ select key, count from src3 where key < 10
+ union all
+ select a.key as key, count(1) as count from src4 a join src5 b on
a.key=b.key where a.key < 10 group by a.key
+)s
+order by s.key;
+
+select s.key, s.count from (
+ select key, count from src2 where key < 10
+ union all
+ select key, count from src3 where key < 10
+ union all
+ select a.key as key, count(1) as count from src4 a join src5 b on
a.key=b.key where a.key < 10 group by a.key
+)s
+order by s.key;
Modified: hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out Tue Nov 15
20:12:54 2011
@@ -27,14 +27,13 @@ ABSTRACT SYNTAX TREE:
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10,
Stage-2
+ Stage-8 depends on stages: Stage-1 , consists of Stage-9, Stage-10, Stage-2
Stage-9 has a backup stage: Stage-2
Stage-6 depends on stages: Stage-9
Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
Stage-10 has a backup stage: Stage-2
Stage-7 depends on stages: Stage-10
Stage-2
- Stage-4 is a root stage
Stage-0 is a root stage
STAGE PLANS:
@@ -132,65 +131,75 @@ STAGE PLANS:
Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002
- Union
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[_col0]]
- 1 [Column[_col0]]
- Position of Big Table: 0
- Select Operator
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004
- Union
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[_col0]]
- 1 [Column[_col0]]
- Position of Big Table: 0
- Select Operator
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
+ TableScan
+ Union
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ null-subquery1:src_12-subquery1:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Local Work:
Map Reduce Local Work
Stage: Stage-3
Map Reduce
Alias -> Map Operator Tree:
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10003
+
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10003
Reduce Output Operator
sort order:
tag: -1
@@ -219,35 +228,45 @@ STAGE PLANS:
Stage: Stage-10
Map Reduce Local Work
Alias -> Map Local Tables:
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002
+
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
Fetch Operator
limit: -1
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004
+ null-subquery1:src_12-subquery1:src
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002
- Union
- HashTable Sink Operator
- condition expressions:
- 0
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[_col0]]
- 1 [Column[_col0]]
- Position of Big Table: 1
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004
- Union
- HashTable Sink Operator
- condition expressions:
- 0
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[_col0]]
- 1 [Column[_col0]]
- Position of Big Table: 1
+
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
+ TableScan
+ Union
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+ null-subquery1:src_12-subquery1:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
Stage: Stage-7
Map Reduce
@@ -294,28 +313,38 @@ STAGE PLANS:
Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002
- Union
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: 0
-
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004
- Union
- Reduce Output Operator
- key expressions:
- expr: _col0
+
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
+ TableScan
+ Union
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ null-subquery1:src_12-subquery1:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
+ expr: value
type: string
- tag: 0
+ outputColumnNames: _col0, _col1
+ Union
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
src3:src
TableScan
alias: src
@@ -359,26 +388,6 @@ STAGE PLANS:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-4
- Map Reduce
- Alias -> Map Operator Tree:
- null-subquery1:src_12-subquery1:src
- TableScan
- alias: src
- Select Operator
- expressions:
- expr: key
- type: string
- expr: value
- type: string
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
Stage: Stage-0
Fetch Operator
limit: -1
@@ -396,7 +405,7 @@ JOIN
ON src_12.key = src3.k AND src3.k < 200
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output:
file:/tmp/njain/hive_2011-03-23_22-02-44_469_912794075622273367/-mr-10000
+PREHOOK: Output:
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-44_708_3827922999145042039/-mr-10000
POSTHOOK: query: SELECT count(1)
FROM
(
@@ -409,5 +418,5 @@ JOIN
ON src_12.key = src3.k AND src3.k < 200
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output:
file:/tmp/njain/hive_2011-03-23_22-02-44_469_912794075622273367/-mr-10000
+POSTHOOK: Output:
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-44_708_3827922999145042039/-mr-10000
548