Author: hashutosh
Date: Wed Nov 27 16:49:32 2013
New Revision: 1546104
URL: http://svn.apache.org/r1546104
Log:
HIVE-5817 : column name to index mapping in VectorizationContext is broken
(Remus Rusanu, Sergey Shelukhin via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q
hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
Wed Nov 27 16:49:32 2013
@@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
@@ -39,11 +40,12 @@ import org.apache.hadoop.hive.ql.plan.Ma
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
/**
* The vectorized version of the MapJoinOperator.
*/
-public class VectorMapJoinOperator extends MapJoinOperator {
+public class VectorMapJoinOperator extends MapJoinOperator implements
VectorizationContextRegion {
private static final Log LOG = LogFactory.getLog(
VectorMapJoinOperator.class.getName());
@@ -77,7 +79,10 @@ public class VectorMapJoinOperator exten
//
private transient int batchIndex;
private transient VectorHashKeyWrapper[] keyValues;
-
+
+ private transient VectorizationContext vOutContext = null;
+ private transient VectorizedRowBatchCtx vrbCtx = null;
+
public VectorMapJoinOperator() {
super();
}
@@ -113,36 +118,28 @@ public class VectorMapJoinOperator exten
bigTableValueExpressions =
vContext.getVectorExpressions(exprs.get(posBigTable));
List<String> outColNames = desc.getOutputColumnNames();
- int outputColumnIndex = 0;
-
- Map<String, Integer> cMap = vContext.getColumnMap();
- for(byte alias:order) {
- for(ExprNodeDesc expr: exprs.get(alias)) {
- String columnName = outColNames.get(outputColumnIndex);
- if (!cMap.containsKey(columnName)) {
- vContext.addOutputColumn(columnName, expr.getTypeString());
- }
- ++outputColumnIndex;
- }
+
+ Map<String, Integer> mapOutCols = new HashMap<String,
Integer>(outColNames.size());
+
+ int outColIndex = 0;
+ for(String outCol: outColNames) {
+ mapOutCols.put(outCol, outColIndex++);
}
-
- this.fileKey = vContext.getFileKey();
+
+ vOutContext = new VectorizationContext(mapOutCols, outColIndex);
+ vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" +
desc.getBigTableAlias());
+ this.fileKey = vOutContext.getFileKey();
}
@Override
public void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
+
- Map<String, Map<Integer, String>> allTypeMaps = Utilities.
- getMapRedWork(hconf).getMapWork().getScratchColumnVectorTypes();
- Map<Integer, String> typeMap = allTypeMaps.get(fileKey);
-
- Map<String, Map<String, Integer>> allColumnMaps = Utilities.
- getMapRedWork(hconf).getMapWork().getScratchColumnMap();
-
- Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
-
- outputBatch = VectorizedRowBatch.buildBatch(typeMap, columnMap);
+ vrbCtx = new VectorizedRowBatchCtx();
+ vrbCtx.init(hconf, this.fileKey, (StructObjectInspector)
this.outputObjInspector);
+
+ outputBatch = vrbCtx.createVectorizedRowBatch();
keyWrapperBatch
=VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
@@ -298,4 +295,9 @@ public class VectorMapJoinOperator exten
batchIndex = -1;
keyValues = null;
}
+
+ @Override
+ public VectorizationContext getOuputVectorizationContext() {
+ return vOutContext;
+ }
}
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java?rev=1546104&view=auto
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
(added)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java
Wed Nov 27 16:49:32 2013
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * VectorizationContextRegion optional interface implemented by vectorized
operators
+ * that are changing the vectorizaiton context (region boundary operators)
+ */
+public interface VectorizationContextRegion {
+
+ VectorizationContext getOuputVectorizationContext();
+
+}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
Wed Nov 27 16:49:32 2013
@@ -177,51 +177,6 @@ public class VectorizedRowBatch implemen
this.valueWriters = valueWriters;
}
- public static VectorizedRowBatch buildBatch(Map<Integer, String> typeMap,
- Map<String, Integer> columnMap) throws HiveException {
-
- Map<Integer, ColumnVector> mapVectorColumn = new HashMap<Integer,
ColumnVector>(typeMap.size());
- int maxIndex = 0;
-
- Iterator<Entry<Integer, String>> typeMapIt = typeMap.entrySet().iterator();
- while(typeMapIt.hasNext()) {
- Entry<Integer, String> type = typeMapIt.next();
- ColumnVector cv =
VectorizationContext.allocateColumnVector(type.getValue(),
- VectorizedRowBatch.DEFAULT_SIZE);
- mapVectorColumn.put(type.getKey(), cv);
- if (maxIndex < type.getKey()) {
- maxIndex = type.getKey();
- }
- }
-
- VectorizedRowBatch batch = new VectorizedRowBatch(maxIndex+1);
- for(int i=0; i <= maxIndex; ++i) {
- ColumnVector cv = mapVectorColumn.get(i);
- if (cv == null) {
- // allocate a default type for the unused column.
- // there are APIs that expect all cols[i] to be non NULL
- cv = VectorizationContext.allocateColumnVector("long",
- VectorizedRowBatch.DEFAULT_SIZE);
- }
- batch.cols[i] = cv;
- }
-
- // Validate that every column in the column map exists
- Iterator<Entry<String, Integer>> columnMapIt =
columnMap.entrySet().iterator();
- while(columnMapIt.hasNext()) {
- Entry<String, Integer> cm = columnMapIt.next();
- if (batch.cols.length <= cm.getValue() || batch.cols[cm.getValue()] ==
null) {
- throw new HiveException(String.format(
- "Internal error: The type map has no entry for column %d %s",
- cm.getValue(), cm.getKey()));
- }
- }
-
- batch.reset();
-
- return batch;
- }
-
/**
* Resets the row batch to default state
* - sets selectedInUse to false
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
Wed Nov 27 16:49:32 2013
@@ -96,6 +96,25 @@ public class VectorizedRowBatchCtx {
public VectorizedRowBatchCtx() {
}
+
+ /**
+ * Initializes the VectorizedRowBatch context based on an arbitrary object
inspector
+ * Used by non-tablescan operators when they change the vectorization
context
+ * @param hiveConf
+ * @param fileKey
+ * The key on which to retrieve the extra column mapping from the
map scratch
+ * @param rowOI
+ * Object inspector that shapes the column types
+ */
+ public void init(Configuration hiveConf, String fileKey,
+ StructObjectInspector rowOI) {
+ columnTypeMap = Utilities
+ .getMapRedWork(hiveConf).getMapWork().getScratchColumnVectorTypes()
+ .get(fileKey);
+ this.rowOI= rowOI;
+ this.rawRowOI = rowOI;
+ }
+
/**
* Initializes VectorizedRowBatch context based on the
@@ -251,6 +270,7 @@ public class VectorizedRowBatchCtx {
}
result.numCols = fieldRefs.size();
this.addScratchColumnsToBatch(result);
+ result.reset();
return result;
}
@@ -351,4 +371,5 @@ public class VectorizedRowBatchCtx {
return new LongColumnVector(defaultSize);
}
}
+
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1546104&r1=1546103&r2=1546104&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Wed Nov 27 16:49:32 2013
@@ -23,9 +23,11 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
@@ -48,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.UD
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -342,8 +345,17 @@ public class Vectorizer implements Physi
topNodes.addAll(mapWork.getAliasToWork().values());
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
ogw.startWalking(topNodes, nodeOutput);
- mapWork.setScratchColumnVectorTypes(vnp.getScratchColumnVectorTypes());
- mapWork.setScratchColumnMap(vnp.getScratchColumnMap());
+
+ Map<String, Map<Integer, String>> columnVectorTypes =
vnp.getScratchColumnVectorTypes();
+ mapWork.setScratchColumnVectorTypes(columnVectorTypes);
+ Map<String, Map<String, Integer>> columnMap = vnp.getScratchColumnMap();
+ mapWork.setScratchColumnMap(columnMap);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format("vectorTypes: %s",
columnVectorTypes.toString()));
+ LOG.debug(String.format("columnMap: %s", columnMap.toString()));
+ }
+
return;
}
}
@@ -411,33 +423,42 @@ public class Vectorizer implements Physi
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
- Node firstOp = stack.firstElement();
- TableScanOperator tsOp = null;
-
- tsOp = (TableScanOperator) firstOp;
-
- VectorizationContext vContext = vContextsByTSOp.get(tsOp);
- if (vContext == null) {
- String fileKey = "";
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>)
nd;
+
+ VectorizationContext vContext = null;
+
+ if (op instanceof TableScanOperator) {
+ vContext = getVectorizationContext(op, physicalContext);
for (String onefile : mWork.getPathToAliases().keySet()) {
List<String> aliases = mWork.getPathToAliases().get(onefile);
for (String alias : aliases) {
- Operator<? extends OperatorDesc> op =
mWork.getAliasToWork().get(alias);
- if (op == tsOp) {
- fileKey = onefile;
- if (vContext == null) {
- vContext = getVectorizationContext(tsOp, physicalContext);
- }
- vContext.setFileKey(fileKey);
- vectorizationContexts.put(fileKey, vContext);
+ Operator<? extends OperatorDesc> opRoot =
mWork.getAliasToWork().get(alias);
+ if (op == opRoot) {
+ // The same vectorization context is copied multiple times into
+ // the MapWork scratch columnMap
+ // Each partition gets a copy
+ //
+ vContext.setFileKey(onefile);
+ vectorizationContexts.put(onefile, vContext);
break;
}
}
}
- vContextsByTSOp.put(tsOp, vContext);
+ vContextsByTSOp.put(op, vContext);
+ } else {
+ assert stack.size() > 1;
+ // Walk down the stack of operators until we found one willing to give
us a context.
+ // At the bottom will be the TS operator, guaranteed to have a context
+ int i= stack.size()-2;
+ while (vContext == null) {
+ Operator<? extends OperatorDesc> opParent = (Operator<? extends
OperatorDesc>) stack.get(i);
+ vContext = vContextsByTSOp.get(opParent);
+ --i;
+ }
}
-
- Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>)
nd;
+
+ assert vContext != null;
+
if (op.getType().equals(OperatorType.REDUCESINK) &&
op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
// No need to vectorize
@@ -453,6 +474,12 @@ public class Vectorizer implements Physi
if (vectorOp != op) {
opsDone.add(vectorOp);
}
+ if (vectorOp instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion =
(VectorizationContextRegion) vectorOp;
+ VectorizationContext vOutContext =
vcRegion.getOuputVectorizationContext();
+ vContextsByTSOp.put(op, vOutContext);
+ vectorizationContexts.put(vOutContext.getFileKey(), vOutContext);
+ }
}
} catch (HiveException e) {
throw new SemanticException(e);
@@ -678,7 +705,7 @@ public class Vectorizer implements Physi
return supportedDataTypes.contains(type.toLowerCase());
}
- private VectorizationContext getVectorizationContext(TableScanOperator op,
+ private VectorizationContext getVectorizationContext(Operator<? extends
OperatorDesc> op,
PhysicalContext pctx) {
RowResolver rr =
pctx.getParseContext().getOpParseCtx().get(op).getRowResolver();
Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q?rev=1546104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_context.q Wed Nov
27 16:49:32 2013
@@ -0,0 +1,47 @@
+create table store(s_store_sk int, s_city string)
+stored as orc;
+insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table store_sales(ss_store_sk int, ss_hdemo_sk int, ss_net_profit
double)
+stored as orc;
+insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table household_demographics(hd_demo_sk int)
+stored as orc;
+insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+
+explain
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk =
household_demographics.hd_demo_sk
+limit 100
+;
+
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk =
household_demographics.hd_demo_sk
+limit 100
+;
+
+set hive.auto.convert.join=false;
+set hive.vectorized.execution.enabled=false;
+
+drop table store;
+drop table store_sales;
+drop table household_demographics;
+
Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out?rev=1546104&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_context.q.out Wed
Nov 27 16:49:32 2013
@@ -0,0 +1,379 @@
+PREHOOK: query: create table store(s_store_sk int, s_city string)
+stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table store(s_store_sk int, s_city string)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@store
+PREHOOK: query: insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@store
+POSTHOOK: query: insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@store
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+PREHOOK: query: create table store_sales(ss_store_sk int, ss_hdemo_sk int,
ss_net_profit double)
+stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table store_sales(ss_store_sk int, ss_hdemo_sk int,
ss_net_profit double)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+PREHOOK: query: insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+PREHOOK: query: create table household_demographics(hd_demo_sk int)
+stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table household_demographics(hd_demo_sk int)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@household_demographics
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+PREHOOK: query: insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@household_demographics
+POSTHOOK: query: insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@household_demographics
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+PREHOOK: query: explain
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk =
household_demographics.hd_demo_sk
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk =
household_demographics.hd_demo_sk
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME
store_sales)) (TOK_TABREF (TOK_TABNAME store)) (= (. (TOK_TABLE_OR_COL
store_sales) ss_store_sk) (. (TOK_TABLE_OR_COL store) s_store_sk))) (TOK_TABREF
(TOK_TABNAME household_demographics)) (= (. (TOK_TABLE_OR_COL store_sales)
ss_hdemo_sk) (. (TOK_TABLE_OR_COL household_demographics) hd_demo_sk))))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR
(. (TOK_TABLE_OR_COL store) s_city)) (TOK_SELEXPR (TOK_TABLE_OR_COL
ss_net_profit))) (TOK_LIMIT 100)))
+
+STAGE DEPENDENCIES:
+ Stage-6 is a root stage
+ Stage-4 depends on stages: Stage-6
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-6
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ household_demographics
+ Fetch Operator
+ limit: -1
+ store_sales
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ household_demographics
+ TableScan
+ alias: household_demographics
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col6} {_col2}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[hd_demo_sk]]
+ Position of Big Table: 0
+ store_sales
+ TableScan
+ alias: store_sales
+ HashTable Sink Operator
+ condition expressions:
+ 0 {ss_hdemo_sk} {ss_net_profit}
+ 1 {s_city}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[ss_store_sk]]
+ 1 [Column[s_store_sk]]
+ Position of Big Table: 1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ store
+ TableScan
+ alias: store
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {ss_hdemo_sk} {ss_net_profit}
+ 1 {s_city}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[ss_store_sk]]
+ 1 [Column[s_store_sk]]
+ outputColumnNames: _col1, _col2, _col6
+ Position of Big Table: 1
+ Vectorized execution: true
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col6} {_col2}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[hd_demo_sk]]
+ outputColumnNames: _col1, _col6
+ Position of Big Table: 0
+ Vectorized execution: true
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col6
+ type: double
+ outputColumnNames: _col0, _col1
+ Vectorized execution: true
+ Limit
+ Vectorized execution: true
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format:
org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Vectorized execution: true
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+
+
+PREHOOK: query: select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk =
household_demographics.hd_demo_sk
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@household_demographics
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk =
household_demographics.hd_demo_sk
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+LFgU5WT87C2yJ4W4YU0r8Pp NULL
+v3p153e2bSkGS70v04G NULL
+0pOH7A4O8aQ37NuBqn NULL
+8ShAFcD734S8Q26WjMwpq0Q NULL
+nOF31ehjY7ULCHMf NULL
+t32s57Cjt4a250qQgVNAB5T NULL
+nvO822k30OaH37Il NULL
+M152O NULL
+FgJ7Hft6845s1766oyt82q NULL
+0ovL2T NULL
+3e27C1jTdTQPdvCWi4if NULL
+XWIExC7NI3bqu6VhR14g2 NULL
+6g482F6IEbD2mKeLE153e0w NULL
+2diFRgr78diK6rSl0J NULL
+21UE6fJyy NULL
+H3bTj310QaL012cPe NULL
+7342q5oFQL8QIl7cO NULL
+VkXY4IOSO NULL
+4K1nnlkt7786Sq8x0ARXtr NULL
+m4eSLx4qihVg1e32 NULL
+OSBq0b NULL
+aKbAu2WJV8HWHU6K1Ukq NULL
+LcfhOxSVg68ACRvw1xC7LU NULL
+AwVW3sV2gsM NULL
+Tqar00A NULL
+mC4mr NULL
+YHVB0 NULL
+2vtmB0qNlHlGV15P1p NULL
+2wbgE0Yo1RX82H2sp4f1l5 NULL
+BSmA3fAai62QpNjmL66y8d NULL
+314nQ6nVj NULL
+H8mh48T7 NULL
+U616In80F54RI NULL
+BuSLb058f2 NULL
+OSc0r NULL
+75KN62a2iAf0j5Jol77wH7 NULL
+66Mx4v NULL
+7SchQY2j74BW7dQNy5G5 NULL
+FEefA NULL
+P2DNeo00PA7DJF0 NULL
+SMXqH NULL
+6fB40r75kxeX3k10 NULL
+AmYxfSOBdJv8B48l0VAeeI NULL
+S87OO NULL
+0EIL81O NULL
+dG8B5PQ3b85U362G6huu NULL
+XOypj8 NULL
+61eT82N24 NULL
+lVfv3fD1jn532h3K67H NULL
+J1an665U NULL
+Y6P8Ji868U7u8W3X2GHNiOLh NULL
+wXbLC0LS2bFf12f1ljC NULL
+j0L50J2e82 NULL
+8EPG0Xi307qd NULL
+04Y1mA17 NULL
+lTLWdPg0yM0IgY76s70 NULL
+KDr0tMRnCJJIBA84 NULL
+71KN0p4NhE4xm4ixm NULL
+u6HT8fTw6IgPf2 NULL
+7WYO11kWn6fT2pOlh5sTDIwG NULL
+Yc6gaH2OFF7cymt8q23Fr NULL
+RQbQ5 NULL
+75Y6J NULL
+eUx01FREb2LD4kle4dpS NULL
+T0Y8Vi41EYW4CpQ6Hg1Xg30w NULL
+Egf7KV7TeT NULL
+LIJuG07tfqoLu8K NULL
+uUTO41xk6VyqYPh NULL
+aEvOE7hUNO0d67AM3V7BwUCK NULL
+8AqHq NULL
+gl03UrAU4bWrOvqwwf NULL
+NULL NULL
+LX6QHG6sEmBAIbA6e6Am24 NULL
+i330V4Y0Lm4ajyKqM1X2Y NULL
+64K51WMTs NULL
+iW12567av NULL
+v3U315C36UQ4oEW NULL
+niiH6MSNaSk4fRRb74o1y28c NULL
+p4WmTkrM NULL
+L1Q62u2 NULL
+hnrm68NiEQCL4 NULL
+fju0XS06MyUS7Nqk8P8 NULL
+0VWukLt NULL
+642LsMiNArr0ufitL3l7RCU7 NULL
+DWNvg304j4KTMEs2174Cy1 NULL
+DU1m68i1Q7W3 NULL
+44vcS2S5wu684R05fq01fu NULL
+eu3X5Qfp4sHv5H NULL
+QbdFB1d7vfaM7 NULL
+s43i4lU NULL
+0pOTqi3O44rEnGQ NULL
+32cB3f NULL
+c300w5 NULL
+w66f63n NULL
+iR76SEs2C4V NULL
+ss2PoJAipj6B1tn75O NULL
+n3ner11ab4 NULL
+r17jGvc7gR NULL
+5G1Xp277YJRklEO5kHx NULL
+B78T0SnxlCe5AQ522GBUf6c6 NULL
+PREHOOK: query: drop table store
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@store
+PREHOOK: Output: default@store
+POSTHOOK: query: drop table store
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@store
+POSTHOOK: Output: default@store
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+PREHOOK: query: drop table store_sales
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: drop table store_sales
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+PREHOOK: query: drop table household_demographics
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@household_demographics
+PREHOOK: Output: default@household_demographics
+POSTHOOK: query: drop table household_demographics
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Output: default@household_demographics
+POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_city SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: store.s_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]