Author: hashutosh
Date: Wed Nov 27 16:49:32 2013
New Revision: 1546104

URL: http://svn.apache.org/r1546104
Log:
HIVE-5817 : column name to index mapping in VectorizationContext is broken 
(Remus Rusanu, Sergey Shelukhin via Ashutosh Chauhan)

Added:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q
    hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out
Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
 Wed Nov 27 16:49:32 2013
@@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
@@ -39,11 +40,12 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
 /**
  * The vectorized version of the MapJoinOperator.
  */
-public class VectorMapJoinOperator extends MapJoinOperator {
+public class VectorMapJoinOperator extends MapJoinOperator implements 
VectorizationContextRegion {
 
   private static final Log LOG = LogFactory.getLog(
       VectorMapJoinOperator.class.getName());
@@ -77,7 +79,10 @@ public class VectorMapJoinOperator exten
   //
   private transient int batchIndex;
   private transient VectorHashKeyWrapper[] keyValues;
-
+  
+  private transient VectorizationContext vOutContext = null;
+  private transient VectorizedRowBatchCtx vrbCtx = null;
+  
   public VectorMapJoinOperator() {
     super();
   }
@@ -113,36 +118,28 @@ public class VectorMapJoinOperator exten
     bigTableValueExpressions = 
vContext.getVectorExpressions(exprs.get(posBigTable));
 
     List<String> outColNames = desc.getOutputColumnNames();
-    int outputColumnIndex = 0;
-
-    Map<String, Integer> cMap = vContext.getColumnMap();
-    for(byte alias:order) {
-      for(ExprNodeDesc expr: exprs.get(alias)) {
-        String columnName = outColNames.get(outputColumnIndex);
-        if (!cMap.containsKey(columnName)) {
-          vContext.addOutputColumn(columnName, expr.getTypeString());
-        }
-        ++outputColumnIndex;
-      }
+    
+    Map<String, Integer> mapOutCols = new HashMap<String, 
Integer>(outColNames.size());
+    
+    int outColIndex = 0;
+    for(String outCol: outColNames) {
+      mapOutCols.put(outCol,  outColIndex++);
     }
-
-    this.fileKey = vContext.getFileKey();
+    
+    vOutContext = new VectorizationContext(mapOutCols, outColIndex);
+    vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + 
desc.getBigTableAlias());
+    this.fileKey = vOutContext.getFileKey();
   }
 
   @Override
   public void initializeOp(Configuration hconf) throws HiveException {
     super.initializeOp(hconf);
+    
 
-    Map<String, Map<Integer, String>> allTypeMaps = Utilities.
-        getMapRedWork(hconf).getMapWork().getScratchColumnVectorTypes();
-    Map<Integer, String> typeMap = allTypeMaps.get(fileKey);
-
-    Map<String, Map<String, Integer>> allColumnMaps = Utilities.
-        getMapRedWork(hconf).getMapWork().getScratchColumnMap();
-
-    Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
-
-    outputBatch = VectorizedRowBatch.buildBatch(typeMap, columnMap);
+    vrbCtx = new VectorizedRowBatchCtx();
+    vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) 
this.outputObjInspector);
+    
+    outputBatch = vrbCtx.createVectorizedRowBatch();
 
     keyWrapperBatch 
=VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
 
@@ -298,4 +295,9 @@ public class VectorMapJoinOperator exten
     batchIndex = -1;
     keyValues = null;
   }
+
+  @Override
+  public VectorizationContext getOuputVectorizationContext() {
+    return vOutContext;
+  }
 }

Added: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java?rev=1546104&view=auto
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
 (added)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
 Wed Nov 27 16:49:32 2013
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * VectorizationContextRegion optional interface implemented by vectorized 
operators 
+ * that are changing the vectorizaiton context (region boundary operators)
+ */
+public interface VectorizationContextRegion {
+
+  VectorizationContext getOuputVectorizationContext();
+
+}

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
 Wed Nov 27 16:49:32 2013
@@ -177,51 +177,6 @@ public class VectorizedRowBatch implemen
     this.valueWriters = valueWriters;
   }
 
-  public static VectorizedRowBatch buildBatch(Map<Integer, String> typeMap,
-      Map<String, Integer> columnMap) throws HiveException {
-
-    Map<Integer, ColumnVector> mapVectorColumn = new HashMap<Integer, 
ColumnVector>(typeMap.size());
-    int maxIndex = 0;
-
-    Iterator<Entry<Integer, String>> typeMapIt = typeMap.entrySet().iterator();
-    while(typeMapIt.hasNext()) {
-      Entry<Integer, String> type = typeMapIt.next();
-      ColumnVector cv = 
VectorizationContext.allocateColumnVector(type.getValue(),
-          VectorizedRowBatch.DEFAULT_SIZE);
-      mapVectorColumn.put(type.getKey(), cv);
-      if (maxIndex < type.getKey()) {
-        maxIndex = type.getKey();
-      }
-    }
-
-    VectorizedRowBatch batch = new VectorizedRowBatch(maxIndex+1);
-    for(int i=0; i <= maxIndex; ++i) {
-      ColumnVector cv = mapVectorColumn.get(i);
-      if (cv == null) {
-        // allocate a default type for the unused column.
-        // there are APIs that expect all cols[i] to be non NULL
-        cv = VectorizationContext.allocateColumnVector("long",
-            VectorizedRowBatch.DEFAULT_SIZE);
-      }
-      batch.cols[i] = cv;
-    }
-
-    // Validate that every column in the column map exists
-    Iterator<Entry<String, Integer>> columnMapIt = 
columnMap.entrySet().iterator();
-    while(columnMapIt.hasNext()) {
-      Entry<String, Integer> cm = columnMapIt.next();
-      if (batch.cols.length <= cm.getValue() || batch.cols[cm.getValue()] == 
null) {
-        throw new HiveException(String.format(
-            "Internal error: The type map has no entry for column %d %s",
-            cm.getValue(), cm.getKey()));
-      }
-    }
-
-    batch.reset();
-
-    return batch;
-  }
-
   /**
    * Resets the row batch to default state
    *  - sets selectedInUse to false

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
 Wed Nov 27 16:49:32 2013
@@ -96,6 +96,25 @@ public class VectorizedRowBatchCtx {
   public VectorizedRowBatchCtx() {
 
   }
+  
+  /**
+   * Initializes the VectorizedRowBatch context based on an arbitrary object 
inspector
+   * Used by non-tablescan operators when they change the vectorization 
context 
+   * @param hiveConf
+   * @param fileKey 
+   *          The key on which to retrieve the extra column mapping from the 
map scratch
+   * @param rowOI
+   *          Object inspector that shapes the column types
+   */
+  public void init(Configuration hiveConf, String fileKey,
+      StructObjectInspector rowOI) {
+    columnTypeMap = Utilities
+        .getMapRedWork(hiveConf).getMapWork().getScratchColumnVectorTypes()
+        .get(fileKey);
+    this.rowOI= rowOI;
+    this.rawRowOI = rowOI;
+  }
+  
 
   /**
    * Initializes VectorizedRowBatch context based on the
@@ -251,6 +270,7 @@ public class VectorizedRowBatchCtx {
     }
     result.numCols = fieldRefs.size();
     this.addScratchColumnsToBatch(result);
+    result.reset();
     return result;
   }
 
@@ -351,4 +371,5 @@ public class VectorizedRowBatchCtx {
       return new LongColumnVector(defaultSize);
     }
   }
+
 }

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
 Wed Nov 27 16:49:32 2013
@@ -23,9 +23,11 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.Stack;
 
@@ -48,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.UD
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -342,8 +345,17 @@ public class Vectorizer implements Physi
       topNodes.addAll(mapWork.getAliasToWork().values());
       HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
       ogw.startWalking(topNodes, nodeOutput);
-      mapWork.setScratchColumnVectorTypes(vnp.getScratchColumnVectorTypes());
-      mapWork.setScratchColumnMap(vnp.getScratchColumnMap());
+      
+      Map<String, Map<Integer, String>> columnVectorTypes = 
vnp.getScratchColumnVectorTypes();
+      mapWork.setScratchColumnVectorTypes(columnVectorTypes);
+      Map<String, Map<String, Integer>> columnMap = vnp.getScratchColumnMap();
+      mapWork.setScratchColumnMap(columnMap);
+      
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("vectorTypes: %s", 
columnVectorTypes.toString()));
+        LOG.debug(String.format("columnMap: %s", columnMap.toString()));
+      }
+      
       return;
     }
   }
@@ -411,33 +423,42 @@ public class Vectorizer implements Physi
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
-      Node firstOp = stack.firstElement();
-      TableScanOperator tsOp = null;
-
-      tsOp = (TableScanOperator) firstOp;
-
-      VectorizationContext vContext = vContextsByTSOp.get(tsOp);
-      if (vContext == null) {
-        String fileKey = "";
+      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) 
nd;
+      
+      VectorizationContext vContext = null;      
+      
+      if (op instanceof TableScanOperator) {
+        vContext = getVectorizationContext(op, physicalContext);
         for (String onefile : mWork.getPathToAliases().keySet()) {
           List<String> aliases = mWork.getPathToAliases().get(onefile);
           for (String alias : aliases) {
-            Operator<? extends OperatorDesc> op = 
mWork.getAliasToWork().get(alias);
-            if (op == tsOp) {
-              fileKey = onefile;
-              if (vContext == null) {
-                vContext = getVectorizationContext(tsOp, physicalContext);
-              }
-              vContext.setFileKey(fileKey);
-              vectorizationContexts.put(fileKey, vContext);
+            Operator<? extends OperatorDesc> opRoot = 
mWork.getAliasToWork().get(alias);
+            if (op == opRoot) {
+              // The same vectorization context is copied multiple times into
+              // the MapWork scratch columnMap
+              // Each partition gets a copy
+              //
+              vContext.setFileKey(onefile);
+              vectorizationContexts.put(onefile, vContext);
               break;
             }
           }
         }
-        vContextsByTSOp.put(tsOp, vContext);
+        vContextsByTSOp.put(op, vContext);
+      } else {
+        assert stack.size() > 1;
+        // Walk down the stack of operators until we found one willing to give 
us a context.
+        // At the bottom will be the TS operator, guaranteed to have a context
+        int i= stack.size()-2;
+        while (vContext == null) {
+          Operator<? extends OperatorDesc> opParent = (Operator<? extends 
OperatorDesc>) stack.get(i);
+          vContext = vContextsByTSOp.get(opParent);
+          --i;
+        }
       }
-
-      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) 
nd;
+      
+      assert vContext != null;
+      
       if (op.getType().equals(OperatorType.REDUCESINK) &&
           
op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
         // No need to vectorize
@@ -453,6 +474,12 @@ public class Vectorizer implements Physi
             if (vectorOp != op) {
               opsDone.add(vectorOp);
             }
+            if (vectorOp instanceof VectorizationContextRegion) {
+              VectorizationContextRegion vcRegion = 
(VectorizationContextRegion) vectorOp;
+              VectorizationContext vOutContext = 
vcRegion.getOuputVectorizationContext();
+              vContextsByTSOp.put(op, vOutContext);
+              vectorizationContexts.put(vOutContext.getFileKey(), vOutContext);
+            }
           }
         } catch (HiveException e) {
           throw new SemanticException(e);
@@ -678,7 +705,7 @@ public class Vectorizer implements Physi
     return supportedDataTypes.contains(type.toLowerCase());
   }
 
-  private VectorizationContext getVectorizationContext(TableScanOperator op,
+  private VectorizationContext getVectorizationContext(Operator<? extends 
OperatorDesc> op,
       PhysicalContext pctx) {
     RowResolver rr = 
pctx.getParseContext().getOpParseCtx().get(op).getRowResolver();
 

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q?rev=1546104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q Wed Nov 
27 16:49:32 2013
@@ -0,0 +1,47 @@
+create table store(s_store_sk int, s_city string)
+stored as orc;
+insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table store_sales(ss_store_sk int, ss_hdemo_sk int, ss_net_profit 
double)
+stored as orc;
+insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table household_demographics(hd_demo_sk int)
+stored as orc;
+insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+
+explain 
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
+limit 100
+;
+
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
+limit 100
+;
+
+set hive.auto.convert.join=false;
+set hive.vectorized.execution.enabled=false;
+
+drop table store;
+drop table store_sales;
+drop table household_demographics;
+

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out?rev=1546104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out Wed 
Nov 27 16:49:32 2013
@@ -0,0 +1,379 @@
+PREHOOK: query: create table store(s_store_sk int, s_city string)
+stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table store(s_store_sk int, s_city string)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@store
+PREHOOK: query: insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@store
+POSTHOOK: query: insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@store
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+PREHOOK: query: create table store_sales(ss_store_sk int, ss_hdemo_sk int, 
ss_net_profit double)
+stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table store_sales(ss_store_sk int, ss_hdemo_sk int, 
ss_net_profit double)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+PREHOOK: query: insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+PREHOOK: query: create table household_demographics(hd_demo_sk int)
+stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table household_demographics(hd_demo_sk int)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@household_demographics
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+PREHOOK: query: insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@household_demographics
+POSTHOOK: query: insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@household_demographics
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+PREHOOK: query: explain 
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME 
store_sales)) (TOK_TABREF (TOK_TABNAME store)) (= (. (TOK_TABLE_OR_COL 
store_sales) ss_store_sk) (. (TOK_TABLE_OR_COL store) s_store_sk))) (TOK_TABREF 
(TOK_TABNAME household_demographics)) (= (. (TOK_TABLE_OR_COL store_sales) 
ss_hdemo_sk) (. (TOK_TABLE_OR_COL household_demographics) hd_demo_sk)))) 
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 
(. (TOK_TABLE_OR_COL store) s_city)) (TOK_SELEXPR (TOK_TABLE_OR_COL 
ss_net_profit))) (TOK_LIMIT 100)))
+
+STAGE DEPENDENCIES:
+  Stage-6 is a root stage
+  Stage-4 depends on stages: Stage-6
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-6
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        household_demographics 
+          Fetch Operator
+            limit: -1
+        store_sales 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        household_demographics 
+          TableScan
+            alias: household_demographics
+            HashTable Sink Operator
+              condition expressions:
+                0 {_col6} {_col2}
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col1]]
+                1 [Column[hd_demo_sk]]
+              Position of Big Table: 0
+        store_sales 
+          TableScan
+            alias: store_sales
+            HashTable Sink Operator
+              condition expressions:
+                0 {ss_hdemo_sk} {ss_net_profit}
+                1 {s_city}
+              handleSkewJoin: false
+              keys:
+                0 [Column[ss_store_sk]]
+                1 [Column[s_store_sk]]
+              Position of Big Table: 1
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        store 
+          TableScan
+            alias: store
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {ss_hdemo_sk} {ss_net_profit}
+                1 {s_city}
+              handleSkewJoin: false
+              keys:
+                0 [Column[ss_store_sk]]
+                1 [Column[s_store_sk]]
+              outputColumnNames: _col1, _col2, _col6
+              Position of Big Table: 1
+              Vectorized execution: true
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {_col6} {_col2}
+                  1 
+                handleSkewJoin: false
+                keys:
+                  0 [Column[_col1]]
+                  1 [Column[hd_demo_sk]]
+                outputColumnNames: _col1, _col6
+                Position of Big Table: 0
+                Vectorized execution: true
+                Select Operator
+                  expressions:
+                        expr: _col1
+                        type: string
+                        expr: _col6
+                        type: double
+                  outputColumnNames: _col0, _col1
+                  Vectorized execution: true
+                  Limit
+                    Vectorized execution: true
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      Vectorized execution: true
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 100
+
+
+PREHOOK: query: select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@household_demographics
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+LFgU5WT87C2yJ4W4YU0r8Pp        NULL
+v3p153e2bSkGS70v04G    NULL
+0pOH7A4O8aQ37NuBqn     NULL
+8ShAFcD734S8Q26WjMwpq0Q        NULL
+nOF31ehjY7ULCHMf       NULL
+t32s57Cjt4a250qQgVNAB5T        NULL
+nvO822k30OaH37Il       NULL
+M152O  NULL
+FgJ7Hft6845s1766oyt82q NULL
+0ovL2T NULL
+3e27C1jTdTQPdvCWi4if   NULL
+XWIExC7NI3bqu6VhR14g2  NULL
+6g482F6IEbD2mKeLE153e0w        NULL
+2diFRgr78diK6rSl0J     NULL
+21UE6fJyy      NULL
+H3bTj310QaL012cPe      NULL
+7342q5oFQL8QIl7cO      NULL
+VkXY4IOSO      NULL
+4K1nnlkt7786Sq8x0ARXtr NULL
+m4eSLx4qihVg1e32       NULL
+OSBq0b NULL
+aKbAu2WJV8HWHU6K1Ukq   NULL
+LcfhOxSVg68ACRvw1xC7LU NULL
+AwVW3sV2gsM    NULL
+Tqar00A        NULL
+mC4mr  NULL
+YHVB0  NULL
+2vtmB0qNlHlGV15P1p     NULL
+2wbgE0Yo1RX82H2sp4f1l5 NULL
+BSmA3fAai62QpNjmL66y8d NULL
+314nQ6nVj      NULL
+H8mh48T7       NULL
+U616In80F54RI  NULL
+BuSLb058f2     NULL
+OSc0r  NULL
+75KN62a2iAf0j5Jol77wH7 NULL
+66Mx4v NULL
+7SchQY2j74BW7dQNy5G5   NULL
+FEefA  NULL
+P2DNeo00PA7DJF0        NULL
+SMXqH  NULL
+6fB40r75kxeX3k10       NULL
+AmYxfSOBdJv8B48l0VAeeI NULL
+S87OO  NULL
+0EIL81O        NULL
+dG8B5PQ3b85U362G6huu   NULL
+XOypj8 NULL
+61eT82N24      NULL
+lVfv3fD1jn532h3K67H    NULL
+J1an665U       NULL
+Y6P8Ji868U7u8W3X2GHNiOLh       NULL
+wXbLC0LS2bFf12f1ljC    NULL
+j0L50J2e82     NULL
+8EPG0Xi307qd   NULL
+04Y1mA17       NULL
+lTLWdPg0yM0IgY76s70    NULL
+KDr0tMRnCJJIBA84       NULL
+71KN0p4NhE4xm4ixm      NULL
+u6HT8fTw6IgPf2 NULL
+7WYO11kWn6fT2pOlh5sTDIwG       NULL
+Yc6gaH2OFF7cymt8q23Fr  NULL
+RQbQ5  NULL
+75Y6J  NULL
+eUx01FREb2LD4kle4dpS   NULL
+T0Y8Vi41EYW4CpQ6Hg1Xg30w       NULL
+Egf7KV7TeT     NULL
+LIJuG07tfqoLu8K        NULL
+uUTO41xk6VyqYPh        NULL
+aEvOE7hUNO0d67AM3V7BwUCK       NULL
+8AqHq  NULL
+gl03UrAU4bWrOvqwwf     NULL
+NULL   NULL
+LX6QHG6sEmBAIbA6e6Am24 NULL
+i330V4Y0Lm4ajyKqM1X2Y  NULL
+64K51WMTs      NULL
+iW12567av      NULL
+v3U315C36UQ4oEW        NULL
+niiH6MSNaSk4fRRb74o1y28c       NULL
+p4WmTkrM       NULL
+L1Q62u2        NULL
+hnrm68NiEQCL4  NULL
+fju0XS06MyUS7Nqk8P8    NULL
+0VWukLt        NULL
+642LsMiNArr0ufitL3l7RCU7       NULL
+DWNvg304j4KTMEs2174Cy1 NULL
+DU1m68i1Q7W3   NULL
+44vcS2S5wu684R05fq01fu NULL
+eu3X5Qfp4sHv5H NULL
+QbdFB1d7vfaM7  NULL
+s43i4lU        NULL
+0pOTqi3O44rEnGQ        NULL
+32cB3f NULL
+c300w5 NULL
+w66f63n        NULL
+iR76SEs2C4V    NULL
+ss2PoJAipj6B1tn75O     NULL
+n3ner11ab4     NULL
+r17jGvc7gR     NULL
+5G1Xp277YJRklEO5kHx    NULL
+B78T0SnxlCe5AQ522GBUf6c6       NULL
+PREHOOK: query: drop table store
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@store
+PREHOOK: Output: default@store
+POSTHOOK: query: drop table store
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@store
+POSTHOOK: Output: default@store
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+PREHOOK: query: drop table store_sales
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: drop table store_sales
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+PREHOOK: query: drop table household_demographics
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@household_demographics
+PREHOOK: Output: default@household_demographics
+POSTHOOK: query: drop table household_demographics
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Output: default@household_demographics
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from 
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from 
deserializer), ]


Reply via email to