Author: heyongqiang
Date: Tue Nov 15 20:12:54 2011
New Revision: 1202405

URL: http://svn.apache.org/viewvc?rev=1202405&view=rev
Log:
HIVE-2566: reduce the number map-reduce jobs for union all (namit via He 
Yongqiang)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/union24.q
    hive/trunk/ql/src/test/results/clientpositive/union24.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out
    hive/trunk/ql/src/test/results/clientpositive/create_view.q.out
    hive/trunk/ql/src/test/results/clientpositive/input25.q.out
    hive/trunk/ql/src/test/results/clientpositive/input26.q.out
    hive/trunk/ql/src/test/results/clientpositive/join35.q.out
    hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part14.q.out
    hive/trunk/ql/src/test/results/clientpositive/merge4.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_union_view.q.out
    hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out
    hive/trunk/ql/src/test/results/clientpositive/stats1.q.out
    hive/trunk/ql/src/test/results/clientpositive/union10.q.out
    hive/trunk/ql/src/test/results/clientpositive/union11.q.out
    hive/trunk/ql/src/test/results/clientpositive/union12.q.out
    hive/trunk/ql/src/test/results/clientpositive/union14.q.out
    hive/trunk/ql/src/test/results/clientpositive/union15.q.out
    hive/trunk/ql/src/test/results/clientpositive/union17.q.out
    hive/trunk/ql/src/test/results/clientpositive/union18.q.out
    hive/trunk/ql/src/test/results/clientpositive/union19.q.out
    hive/trunk/ql/src/test/results/clientpositive/union20.q.out
    hive/trunk/ql/src/test/results/clientpositive/union3.q.out
    hive/trunk/ql/src/test/results/clientpositive/union4.q.out
    hive/trunk/ql/src/test/results/clientpositive/union5.q.out
    hive/trunk/ql/src/test/results/clientpositive/union6.q.out
    hive/trunk/ql/src/test/results/clientpositive/union7.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Tue Nov 
15 20:12:54 2011
@@ -680,16 +680,19 @@ public abstract class Operator<T extends
     assert parentIndex != -1;
     if (parentOperators.size() == 1) {
       parentOperators = null;
-    } else {
+    }
+    else {
       parentOperators.remove(parentIndex);
     }
 
     int childIndex = parent.getChildOperators().indexOf(this);
-    assert childIndex != -1;
-    if (parent.getChildOperators().size() == 1) {
-      parent.setChildOperators(null);
-    } else {
-      parent.getChildOperators().remove(childIndex);
+    if (childIndex >= 0) {
+      if (parent.getChildOperators().size() == 1) {
+        parent.setChildOperators(null);
+      }
+      else {
+        parent.getChildOperators().remove(childIndex);
+      }
     }
   }
 

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
 Tue Nov 15 20:12:54 2011
@@ -102,11 +102,13 @@ public class GenMRProcContext implements
     Task<? extends Serializable> uTask;
     List<String> taskTmpDir;
     List<TableDesc> tt_desc;
+    List<Operator<? extends Serializable>> listTopOperators;
 
     public GenMRUnionCtx() {
       uTask = null;
       taskTmpDir = new ArrayList<String>();
       tt_desc = new ArrayList<TableDesc>();
+      listTopOperators = new ArrayList<Operator<? extends Serializable>>();
     }
 
     public Task<? extends Serializable> getUTask() {
@@ -132,6 +134,19 @@ public class GenMRProcContext implements
     public List<TableDesc> getTTDesc() {
       return tt_desc;
     }
+
+    public List<Operator<? extends Serializable>> getListTopOperators() {
+      return listTopOperators;
+    }
+
+    public void setListTopOperators(
+        List<Operator<? extends Serializable>> listTopOperators) {
+      this.listTopOperators = listTopOperators;
+    }
+    
+    public void addListTopOperators(Operator<? extends Serializable> 
topOperator) {
+      listTopOperators.add(topOperator);
+    }
   }
 
   /**

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java 
Tue Nov 15 20:12:54 2011
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 
 /**
  * Processor for the rule - TableScan followed by Union.
@@ -60,13 +61,145 @@ public class GenMRUnion1 implements Node
   }
 
   /**
+   * Process the union if all sub-queries are map-only
+   * 
+   * @return
+   * @throws SemanticException
+   */
+  private Object processMapOnlyUnion(UnionOperator union, Stack<Node> stack,
+      GenMRProcContext ctx, UnionProcContext uCtx) throws SemanticException {
+    // merge currTask from multiple topOps
+    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> 
opTaskMap = ctx.getOpTaskMap();
+    if (opTaskMap != null && opTaskMap.size() > 0) {
+      Task<? extends Serializable> tsk = opTaskMap.get(null);
+      if (tsk != null) {
+        ctx.setCurrTask(tsk);
+      }
+    }
+
+    UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
+    if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
+      GenMapRedUtils.mergeMapJoinUnion(union, ctx,
+          UnionProcFactory.getPositionParent(union, stack));
+    }
+    else {
+      ctx.getMapCurrCtx().put(
+          (Operator<? extends Serializable>) union,
+          new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(),
+              ctx.getCurrAliasId()));
+    }
+    return null;
+  }
+
+  /**
+   * Process the union when the parent is a map-reduce job. Create a temporary
+   * output, and let the union task read from the temporary output.
+   * 
+   * The files created for all the inputs are in the union context and later
+   * used to initialize the union plan
+   * 
+   * @param parent
+   * @param child
+   * @param uTask
+   * @param ctx
+   * @param uCtxTask
+   */
+  private void processSubQueryUnionCreateIntermediate(
+      Operator<? extends Serializable> parent,
+      Operator<? extends Serializable> child,
+      Task<? extends Serializable> uTask, GenMRProcContext ctx,
+      GenMRUnionCtx uCtxTask) {
+    ParseContext parseCtx = ctx.getParseCtx();
+
+    TableDesc tt_desc = 
PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
+        parent.getSchema(), "temporarycol"));
+
+    // generate the temporary file
+    Context baseCtx = parseCtx.getContext();
+    String taskTmpDir = baseCtx.getMRTmpFileURI();
+
+    // Create a file sink operator for this file name
+    Operator<? extends Serializable> fs_op = OperatorFactory.get(
+        new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
+            HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());
+
+    assert parent.getChildOperators().size() == 1;
+    parent.getChildOperators().set(0, fs_op);
+
+    List<Operator<? extends Serializable>> parentOpList = new 
ArrayList<Operator<? extends Serializable>>();
+    parentOpList.add(parent);
+    fs_op.setParentOperators(parentOpList);
+
+    // Create a dummy table scan operator
+    Operator<? extends Serializable> ts_op = OperatorFactory.get(
+        new TableScanDesc(), parent.getSchema());
+    List<Operator<? extends Serializable>> childOpList = new 
ArrayList<Operator<? extends Serializable>>();
+    childOpList.add(child);
+    ts_op.setChildOperators(childOpList);
+    child.replaceParent(parent, ts_op);
+
+    // Add the path to alias mapping
+
+    uCtxTask.addTaskTmpDir(taskTmpDir);
+    uCtxTask.addTTDesc(tt_desc);
+    uCtxTask.addListTopOperators(ts_op);
+
+    // The union task is empty. The files created for all the inputs are
+    // assembled in the union context and later used to initialize the union
+    // plan
+
+    ctx.getCurrTask().addDependentTask(uTask);
+  }
+
+  /**
+   * Union Operator encountered. A map-only query is encountered at the given
+   * position. However, atleast of the sub-qeuries is a map-reduce job. Copy 
the
+   * information from the current top operator to the union context.
+   * 
+   * @param ctx
+   * @param uCtxTask
+   * @param union
+   * @param stack
+   * @throws SemanticException
+   */
+  private void processSubQueryUnionMerge(GenMRProcContext ctx,
+      GenMRUnionCtx uCtxTask, UnionOperator union, Stack<Node> stack)
+      throws SemanticException {
+    // The current plan can be thrown away after being merged with the union
+    // plan
+    Task<? extends Serializable> uTask = uCtxTask.getUTask();
+    MapredWork plan = (MapredWork) uTask.getWork();
+    Operator<? extends Serializable> currTopOp = ctx.getCurrTopOp();
+    String currAliasId = ctx.getCurrAliasId();
+    GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), plan,
+        false, ctx);
+  }
+
+  private void processSubQueryUnionMapJoin(GenMRProcContext ctx) {
+    AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = 
ctx.getCurrMapJoinOp();
+    assert mjOp != null;
+    GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp);
+    assert mjCtx != null;
+    MapredWork plan = (MapredWork) ctx.getCurrTask().getWork();
+
+    String taskTmpDir = mjCtx.getTaskTmpDir();
+    TableDesc tt_desc = mjCtx.getTTDesc();
+    assert plan.getPathToAliases().get(taskTmpDir) == null;
+    plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
+    plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
+    plan.getPathToPartitionInfo().put(taskTmpDir,
+        new PartitionDesc(tt_desc, null));
+    plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
+  }
+
+  /**
    * Union Operator encountered . Currently, the algorithm is pretty simple: If
-   * all the sub-queries are map-only, dont do anything. However, if there is a
+   * all the sub-queries are map-only, don't do anything. However, if there is 
a
    * mapjoin followed by the union, merge at the union Otherwise, insert a
    * FileSink on top of all the sub-queries.
-   *
+   * 
    * This can be optimized later on.
-   *
+   * 
    * @param nd
    *          the file sink operator encountered
    * @param opProcCtx
@@ -81,31 +214,12 @@ public class GenMRUnion1 implements Node
 
     // Map-only subqueries can be optimized in future to not write to a file in
     // future
-    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
-        .getMapCurrCtx();
+    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = 
ctx.getMapCurrCtx();
 
     // The plan needs to be broken only if one of the sub-queries involve a
     // map-reduce job
     if (uCtx.isMapOnlySubq()) {
-      // merge currTask from multiple topOps
-      HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> 
opTaskMap = ctx
-          .getOpTaskMap();
-      if (opTaskMap != null && opTaskMap.size() > 0) {
-        Task<? extends Serializable> tsk = opTaskMap.get(null);
-        if (tsk != null) {
-          ctx.setCurrTask(tsk);
-        }
-      }
-
-      UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
-      if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
-        GenMapRedUtils.mergeMapJoinUnion(union, ctx, UnionProcFactory
-            .getPositionParent(union, stack));
-      } else {
-        mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(
-            ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
-      }
-      return null;
+      return processMapOnlyUnion(union, stack, ctx, uCtx);
     }
 
     ctx.setCurrUnionOp(union);
@@ -116,16 +230,8 @@ public class GenMRUnion1 implements Node
     Task<? extends Serializable> currTask = ctx.getCurrTask();
     int pos = UnionProcFactory.getPositionParent(union, stack);
 
-    // is the current task a root task
-    if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask))) {
-      ctx.getRootTasks().add(currTask);
-    }
-
     GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
     Task<? extends Serializable> uTask = null;
-
-    Operator<? extends Serializable> parent = union.getParentOperators().get(
-        pos);
     MapredWork uPlan = null;
 
     // union is encountered for the first time
@@ -135,70 +241,36 @@ public class GenMRUnion1 implements Node
       uTask = TaskFactory.get(uPlan, parseCtx.getConf());
       uCtxTask.setUTask(uTask);
       ctx.setUnionTask(union, uCtxTask);
-    } else {
+    }
+    else {
       uTask = uCtxTask.getUTask();
     }
 
-    // If there is a mapjoin at position 'pos'
-    if (uPrsCtx.getMapJoinSubq(pos)) {
-      AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = 
ctx.getCurrMapJoinOp();
-      assert mjOp != null;
-      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp);
-      assert mjCtx != null;
-      MapredWork plan = (MapredWork) currTask.getWork();
-
-      String taskTmpDir = mjCtx.getTaskTmpDir();
-      TableDesc tt_desc = mjCtx.getTTDesc();
-      assert plan.getPathToAliases().get(taskTmpDir) == null;
-      plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
-      plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
-      plan.getPathToPartitionInfo().put(taskTmpDir,
-          new PartitionDesc(tt_desc, null));
-      plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
+    // Copy into the current union task plan if  
+    if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)
+        && !uPrsCtx.getMapJoinSubq(pos)) {
+      processSubQueryUnionMerge(ctx, uCtxTask, union, stack);
     }
+    // If it a map-reduce job, create a temporary file
+    else {
+      // is the current task a root task
+      if (uPrsCtx.getRootTask(pos) && 
(!ctx.getRootTasks().contains(currTask))) {
+        ctx.getRootTasks().add(currTask);
+      }
+      // If there is a mapjoin at position 'pos'
+      if (uPrsCtx.getMapJoinSubq(pos)) {
+        processSubQueryUnionMapJoin(ctx);
+      }
 
-    TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
-        .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
-
-    // generate the temporary file
-    Context baseCtx = parseCtx.getContext();
-    String taskTmpDir = baseCtx.getMRTmpFileURI();
-
-    // Add the path to alias mapping
-    uCtxTask.addTaskTmpDir(taskTmpDir);
-    uCtxTask.addTTDesc(tt_desc);
-
-    // The union task is empty. The files created for all the inputs are
-    // assembled in the
-    // union context and later used to initialize the union plan
-
-    // Create a file sink operator for this file name
-    Operator<? extends Serializable> fs_op = OperatorFactory.get(
-        new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
-        HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());
-
-    assert parent.getChildOperators().size() == 1;
-    parent.getChildOperators().set(0, fs_op);
-
-    List<Operator<? extends Serializable>> parentOpList =
-      new ArrayList<Operator<? extends Serializable>>();
-    parentOpList.add(parent);
-    fs_op.setParentOperators(parentOpList);
-
-    currTask.addDependentTask(uTask);
-
-    // If it is map-only task, add the files to be processed
-    if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) {
-      GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(),
-          (MapredWork) currTask.getWork(), false, ctx);
+      
processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), 
union, uTask, ctx, uCtxTask);
     }
 
     ctx.setCurrTask(uTask);
     ctx.setCurrAliasId(null);
     ctx.setCurrTopOp(null);
 
-    mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(ctx
-        .getCurrTask(), null, null));
+    mapCurrCtx.put((Operator<? extends Serializable>) nd,
+        new GenMapRedCtx(ctx.getCurrTask(), null, null));
 
     return null;
   }

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
Tue Nov 15 20:12:54 2011
@@ -326,12 +326,18 @@ public final class GenMapRedUtils {
     assert uCtx != null;
 
     List<String> taskTmpDirLst = uCtx.getTaskTmpDir();
+    if ((taskTmpDirLst == null) || (taskTmpDirLst.isEmpty())) {
+      return;
+    }
+    
     List<TableDesc> tt_descLst = uCtx.getTTDesc();
     assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
     assert taskTmpDirLst.size() == tt_descLst.size();
     int size = taskTmpDirLst.size();
     assert local == false;
 
+    List<Operator<? extends Serializable>> topOperators = 
uCtx.getListTopOperators();
+    
     for (int pos = 0; pos < size; pos++) {
       String taskTmpDir = taskTmpDirLst.get(pos);
       TableDesc tt_desc = tt_descLst.get(pos);
@@ -340,7 +346,7 @@ public final class GenMapRedUtils {
         plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
         plan.getPathToPartitionInfo().put(taskTmpDir,
             new PartitionDesc(tt_desc, null));
-        plan.getAliasToWork().put(taskTmpDir, currUnionOp);
+        plan.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
       }
     }
   }

Added: hive/trunk/ql/src/test/queries/clientpositive/union24.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union24.q?rev=1202405&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union24.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union24.q Tue Nov 15 20:12:54 
2011
@@ -0,0 +1,70 @@
+create table src2 as select key, count(1) as count from src group by key;
+create table src3 as select * from src2;
+create table src4 as select * from src2;
+create table src5 as select * from src2;
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+
+explain extended
+select s.key, s.count from (
+  select key, count from src2  where key < 10
+  union all
+  select key, count from src3  where key < 10
+  union all
+  select key, count from src4  where key < 10
+  union all
+  select key, count(1) as count from src5 where key < 10 group by key
+)s
+order by s.key;
+
+select s.key, s.count from (
+  select key, count from src2  where key < 10
+  union all
+  select key, count from src3  where key < 10
+  union all
+  select key, count from src4  where key < 10
+  union all
+  select key, count(1) as count from src5 where key < 10 group by key
+)s
+order by s.key;
+
+explain extended
+select s.key, s.count from (
+  select key, count from src2  where key < 10
+  union all
+  select key, count from src3  where key < 10
+  union all
+  select a.key as key, b.count as count from src4 a join src5 b on a.key=b.key 
where a.key < 10
+)s
+order by s.key;
+
+select s.key, s.count from (
+  select key, count from src2  where key < 10
+  union all
+  select key, count from src3  where key < 10
+  union all
+  select a.key as key, b.count as count from src4 a join src5 b on a.key=b.key 
where a.key < 10
+)s
+order by s.key;
+
+explain extended
+select s.key, s.count from (
+  select key, count from src2  where key < 10
+  union all
+  select key, count from src3  where key < 10
+  union all
+  select a.key as key, count(1) as count from src4 a join src5 b on 
a.key=b.key where a.key < 10 group by a.key
+)s
+order by s.key;
+
+select s.key, s.count from (
+  select key, count from src2  where key < 10
+  union all
+  select key, count from src3  where key < 10
+  union all
+  select a.key as key, count(1) as count from src4 a join src5 b on 
a.key=b.key where a.key < 10 group by a.key
+)s
+order by s.key;

Modified: hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out?rev=1202405&r1=1202404&r2=1202405&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out Tue Nov 15 
20:12:54 2011
@@ -27,14 +27,13 @@ ABSTRACT SYNTAX TREE:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, 
Stage-2
+  Stage-8 depends on stages: Stage-1 , consists of Stage-9, Stage-10, Stage-2
   Stage-9 has a backup stage: Stage-2
   Stage-6 depends on stages: Stage-9
   Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
   Stage-10 has a backup stage: Stage-2
   Stage-7 depends on stages: Stage-10
   Stage-2
-  Stage-4 is a root stage
   Stage-0 is a root stage
 
 STAGE PLANS:
@@ -132,65 +131,75 @@ STAGE PLANS:
   Stage: Stage-6
     Map Reduce
       Alias -> Map Operator Tree:
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
-          Union
-            Map Join Operator
-              condition map:
-                   Inner Join 0 to 1
-              condition expressions:
-                0 
-                1 
-              handleSkewJoin: false
-              keys:
-                0 [Column[_col0]]
-                1 [Column[_col0]]
-              Position of Big Table: 0
-              Select Operator
-                Group By Operator
-                  aggregations:
-                        expr: count(1)
-                  bucketGroup: false
-                  mode: hash
-                  outputColumnNames: _col0
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
-          Union
-            Map Join Operator
-              condition map:
-                   Inner Join 0 to 1
-              condition expressions:
-                0 
-                1 
-              handleSkewJoin: false
-              keys:
-                0 [Column[_col0]]
-                1 [Column[_col0]]
-              Position of Big Table: 0
-              Select Operator
-                Group By Operator
-                  aggregations:
-                        expr: count(1)
-                  bucketGroup: false
-                  mode: hash
-                  outputColumnNames: _col0
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+        
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
 
+          TableScan
+            Union
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 
+                  1 
+                handleSkewJoin: false
+                keys:
+                  0 [Column[_col0]]
+                  1 [Column[_col0]]
+                Position of Big Table: 0
+                Select Operator
+                  Group By Operator
+                    aggregations:
+                          expr: count(1)
+                    bucketGroup: false
+                    mode: hash
+                    outputColumnNames: _col0
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+        null-subquery1:src_12-subquery1:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 
+                    1 
+                  handleSkewJoin: false
+                  keys:
+                    0 [Column[_col0]]
+                    1 [Column[_col0]]
+                  Position of Big Table: 0
+                  Select Operator
+                    Group By Operator
+                      aggregations:
+                            expr: count(1)
+                      bucketGroup: false
+                      mode: hash
+                      outputColumnNames: _col0
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
       Local Work:
         Map Reduce Local Work
 
   Stage: Stage-3
     Map Reduce
       Alias -> Map Operator Tree:
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10003 
+        
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10003
 
             Reduce Output Operator
               sort order: 
               tag: -1
@@ -219,35 +228,45 @@ STAGE PLANS:
   Stage: Stage-10
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
+        
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
 
           Fetch Operator
             limit: -1
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
+        null-subquery1:src_12-subquery1:src 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
-          Union
-            HashTable Sink Operator
-              condition expressions:
-                0 
-                1 
-              handleSkewJoin: false
-              keys:
-                0 [Column[_col0]]
-                1 [Column[_col0]]
-              Position of Big Table: 1
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
-          Union
-            HashTable Sink Operator
-              condition expressions:
-                0 
-                1 
-              handleSkewJoin: false
-              keys:
-                0 [Column[_col0]]
-                1 [Column[_col0]]
-              Position of Big Table: 1
+        
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
 
+          TableScan
+            Union
+              HashTable Sink Operator
+                condition expressions:
+                  0 
+                  1 
+                handleSkewJoin: false
+                keys:
+                  0 [Column[_col0]]
+                  1 [Column[_col0]]
+                Position of Big Table: 1
+        null-subquery1:src_12-subquery1:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                HashTable Sink Operator
+                  condition expressions:
+                    0 
+                    1 
+                  handleSkewJoin: false
+                  keys:
+                    0 [Column[_col0]]
+                    1 [Column[_col0]]
+                  Position of Big Table: 1
 
   Stage: Stage-7
     Map Reduce
@@ -294,28 +313,38 @@ STAGE PLANS:
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
-          Union
-            Reduce Output Operator
-              key expressions:
-                    expr: _col0
-                    type: string
-              sort order: +
-              Map-reduce partition columns:
-                    expr: _col0
-                    type: string
-              tag: 0
-        
file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
-          Union
-            Reduce Output Operator
-              key expressions:
-                    expr: _col0
+        
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-43_167_4198125662621366018/-mr-10002
 
+          TableScan
+            Union
+              Reduce Output Operator
+                key expressions:
+                      expr: _col0
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: _col0
+                      type: string
+                tag: 0
+        null-subquery1:src_12-subquery1:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
                     type: string
-              sort order: +
-              Map-reduce partition columns:
-                    expr: _col0
+                    expr: value
                     type: string
-              tag: 0
+              outputColumnNames: _col0, _col1
+              Union
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: 0
         src3:src 
           TableScan
             alias: src
@@ -359,26 +388,6 @@ STAGE PLANS:
                     input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
 
-  Stage: Stage-4
-    Map Reduce
-      Alias -> Map Operator Tree:
-        null-subquery1:src_12-subquery1:src 
-          TableScan
-            alias: src
-            Select Operator
-              expressions:
-                    expr: key
-                    type: string
-                    expr: value
-                    type: string
-              outputColumnNames: _col0, _col1
-              File Output Operator
-                compressed: false
-                GlobalTableId: 0
-                table:
-                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
   Stage: Stage-0
     Fetch Operator
       limit: -1
@@ -396,7 +405,7 @@ JOIN
 ON src_12.key = src3.k AND src3.k < 200
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: 
file:/tmp/njain/hive_2011-03-23_22-02-44_469_912794075622273367/-mr-10000
+PREHOOK: Output: 
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-44_708_3827922999145042039/-mr-10000
 POSTHOOK: query: SELECT count(1)
 FROM
 (
@@ -409,5 +418,5 @@ JOIN
 ON src_12.key = src3.k AND src3.k < 200
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: 
file:/tmp/njain/hive_2011-03-23_22-02-44_469_912794075622273367/-mr-10000
+POSTHOOK: Output: 
file:/var/folders/bZ/bZe+iKfoFTuPoShRd6dy6-tOU9Y/-Tmp-/njain/hive_2011-11-11_17-36-44_708_3827922999145042039/-mr-10000
 548


Reply via email to