This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 60b656d HIVE-19647: use bitvectors in IN operators (#2598)
(Soumyakanti Das reviewed by Zoltan Haindrich)
60b656d is described below
commit 60b656da37c04ad6db7a12324e0b9ee079a80f84
Author: Soumyakanti Das <[email protected]>
AuthorDate: Tue Sep 21 03:23:14 2021 -0700
HIVE-19647: use bitvectors in IN operators (#2598) (Soumyakanti Das
reviewed by Zoltan Haindrich)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +
.../stats/annotation/HiveMurmur3Adapter.java | 85 +++++++
.../stats/annotation/StatsRulesProcFactory.java | 50 +++-
.../apache/hadoop/hive/ql/plan/ColStatistics.java | 9 +
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 7 +
.../hive/ql/plan/mapping/TestStatEstimations.java | 39 ++-
.../queries/clientpositive/in_bitvector_filter.q | 22 ++
.../clientpositive/llap/in_bitvector_filter.q.out | 274 +++++++++++++++++++++
8 files changed, 485 insertions(+), 3 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f61b903..903a803 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2914,6 +2914,8 @@ public class HiveConf extends Configuration {
"UDTFs change the number of rows of the output. A common UDTF is the
explode() method that creates\n" +
"multiple rows for each element in the input array. This factor is
applied to the number of\n" +
"output rows and output size."),
+ HIVE_STATS_USE_BITVECTORS("hive.stats.use.bitvectors", false,
+ "Enables to use bitvectors for estimating selectivity."),
HIVE_STATS_MAX_NUM_STATS("hive.stats.max.num.stats", (long) 10000,
"When the number of stats to be updated is huge, this value is used to
control the number of \n" +
" stats to be sent to HMS for update."),
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java
new file mode 100644
index 0000000..0baaa62
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.stats.annotation;
+
+import java.nio.ByteBuffer;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hive.common.util.Murmur3;
+
+/**
+* This class could be used to map Hive values type to Murmur3 hash values.
+*/
+public class HiveMurmur3Adapter {
+
+ private PrimitiveCategory type;
+ private PrimitiveObjectInspector inputOI;
+
+ public HiveMurmur3Adapter(PrimitiveObjectInspector oi) throws HiveException {
+ this.inputOI = oi;
+ type = oi.getTypeInfo().getPrimitiveCategory();
+ }
+
+ private final ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
+
+ public long murmur3(Object objVal) throws HiveException {
+ Object p = objVal;
+ switch (type) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case TIMESTAMP: {
+ long val = PrimitiveObjectInspectorUtils.getLong(objVal, inputOI);
+ LONG_BUFFER.putLong(0, val);
+ return Murmur3.hash64(LONG_BUFFER.array());
+ }
+ case FLOAT:
+ case DOUBLE: {
+ double val = PrimitiveObjectInspectorUtils.getDouble(objVal, inputOI);
+ LONG_BUFFER.putDouble(0, val);
+ return Murmur3.hash64(LONG_BUFFER.array());
+ }
+ case STRING:
+ case CHAR:
+ case VARCHAR: {
+ String val = PrimitiveObjectInspectorUtils.getString(objVal, inputOI);
+ return Murmur3.hash64(val.getBytes());
+ }
+ case DECIMAL: {
+ HiveDecimal decimal =
PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI);
+ LONG_BUFFER.putDouble(0, decimal.doubleValue());
+ return Murmur3.hash64(LONG_BUFFER.array());
+ }
+ case DATE:
+ DateWritable v = new DateWritable((DateWritable)
inputOI.getPrimitiveWritableObject(p));
+ int days = v.getDays();
+ LONG_BUFFER.putLong(0, days);
+ return Murmur3.hash64(LONG_BUFFER.array());
+ case BOOLEAN:
+ case BINARY:
+ default:
+ throw new HiveException("type: " + type + " is not supported");
+ }
+ }
+}
\ No newline at end of file
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 0a7c5e2..dc83827 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -36,6 +36,9 @@ import java.util.Stack;
import com.google.common.base.Preconditions;
import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -118,7 +121,9 @@ import
org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -537,10 +542,11 @@ public class StatsRulesProcFactory {
}
for (int i = 0; i < columnStats.size(); i++) {
long dvs = columnStats.get(i) == null ? 0 :
columnStats.get(i).getCountDistint();
+ long intersectionSize = estimateIntersectionSize(aspCtx.getConf(),
columnStats.get(i), values.get(i));
// (num of distinct vals for col in IN clause / num of distinct vals
for col )
double columnFactor = dvs == 0 ? 0.5d : (1.0d / dvs);
if (!multiColumn) {
- columnFactor *=values.get(0).size();
+ columnFactor *= intersectionSize;
}
// max can be 1, even when ndv is larger in IN clause than in column
stats
factor *= columnFactor > 1d ? 1d : columnFactor;
@@ -555,6 +561,48 @@ public class StatsRulesProcFactory {
return Math.round(numRows * factor * inFactor);
}
+ private long estimateIntersectionSize(HiveConf conf, ColStatistics
colStatistics, Set<ExprNodeDescEqualityWrapper> values) {
+ try {
+ boolean useBitVectors = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_STATS_USE_BITVECTORS);
+ if (!useBitVectors){
+ return values.size();
+ }
+ if (colStatistics == null) {
+ return values.size();
+ }
+ byte[] bitVector = colStatistics.getBitVectors();
+ if (bitVector == null) {
+ return values.size();
+ }
+ NumDistinctValueEstimator sketch =
NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(bitVector);
+ if (!(sketch instanceof HyperLogLog)) {
+ return values.size();
+ }
+ HyperLogLog hllCol = (HyperLogLog) sketch;
+ HyperLogLog hllVals = new HyperLogLog.HyperLogLogBuilder().build();
+
+ for (ExprNodeDescEqualityWrapper b : values) {
+ ObjectInspector oi =
b.getExprNodeDesc().getWritableObjectInspector();
+ HiveMurmur3Adapter hma = new
HiveMurmur3Adapter((PrimitiveObjectInspector) oi);
+ ExprNodeConstantDesc c = (ExprNodeConstantDesc) b.getExprNodeDesc();
+
hllVals.add(hma.murmur3(c.getWritableObjectInspector().getWritableConstantValue()));
+ }
+
+ long cntA = hllCol.count();
+ long cntB = hllVals.count();
+ hllCol.merge(hllVals);
+ long cntU = hllCol.count();
+
+ long cntI = cntA + cntB - cntU;
+ if (cntI < 0) {
+ return 0;
+ }
+ return cntI;
+ } catch (HiveException e) {
+ throw new RuntimeException("checking!", e);
+ }
+ }
+
static class RangeOps {
private String colType;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
index 7e7b9a4..7142a05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
@@ -31,6 +31,7 @@ public class ColStatistics {
private boolean isPrimaryKey;
private boolean isEstimated;
private boolean isFilteredColumn;
+ private byte[] bitVectors;
public ColStatistics(String colName, String colType) {
this.setColumnName(colName);
@@ -111,6 +112,14 @@ public class ColStatistics {
this.range = r;
}
+ public byte[] getBitVectors() {
+ return bitVectors;
+ }
+
+ public void setBitVectors(byte[] bitVectors) {
+ this.bitVectors = bitVectors;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index b2c6fc2..56b3843 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -845,27 +845,32 @@ public class StatsUtils {
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getLongStats().getLowValue(),
csd.getLongStats().getHighValue());
+ cs.setBitVectors(csd.getLongStats().getBitVectors());
} else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getLongStats().getLowValue(),
csd.getLongStats().getHighValue());
+ cs.setBitVectors(csd.getLongStats().getBitVectors());
} else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getDoubleStats().getLowValue(),
csd.getDoubleStats().getHighValue());
+ cs.setBitVectors(csd.getDoubleStats().getBitVectors());
} else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getDoubleStats().getLowValue(),
csd.getDoubleStats().getHighValue());
+ cs.setBitVectors(csd.getDoubleStats().getBitVectors());
} else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
|| colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
|| colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
cs.setCountDistint(csd.getStringStats().getNumDVs());
cs.setNumNulls(csd.getStringStats().getNumNulls());
cs.setAvgColLen(csd.getStringStats().getAvgColLen());
+ cs.setBitVectors(csd.getStringStats().getBitVectors());
} else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
if (csd.getBooleanStats().getNumFalses() > 0 &&
csd.getBooleanStats().getNumTrues() > 0) {
cs.setCountDistint(2);
@@ -906,6 +911,7 @@ public class StatsUtils {
cs.setRange(minVal, maxVal);
}
}
+ cs.setBitVectors(csd.getDecimalStats().getBitVectors());
} else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
cs.setNumNulls(csd.getDateStats().getNumNulls());
@@ -914,6 +920,7 @@ public class StatsUtils {
Long highVal = (csd.getDateStats().getHighValue() != null) ?
csd.getDateStats().getHighValue()
.getDaysSinceEpoch() : null;
cs.setRange(lowVal, highVal);
+ cs.setBitVectors(csd.getDateStats().getBitVectors());
} else {
// Columns statistics for complex datatypes are not supported yet
return null;
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java
b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java
index 4a2d568..c9cfb2d 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java
@@ -35,6 +35,7 @@ import org.hamcrest.Matchers;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
+import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestRule;
@@ -54,8 +55,8 @@ public class TestStatEstimations {
String cmds[] = {
// @formatter:off
"create table t2(a integer, b string) STORED AS ORC",
- "insert into t2 values(1, 'AAA'),(2, 'AAA'),(3, 'AAA'),(4, 'AAA'),(5,
'AAA')," +
- "(6, 'BBB'),(7, 'BBB'),(8, 'BBB'),(9,
'BBB'),(10, 'BBB')",
+ "insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5,
'A5')," +
+ "(6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(10,
'B5')",
"analyze table t2 compute statistics for columns"
// @formatter:on
};
@@ -84,6 +85,39 @@ public class TestStatEstimations {
}
@Test
+ public void testFilterStringIn() throws ParseException,
CommandProcessorException {
+ IDriver driver = createDriver();
+ String query = "explain select a from t2 where b IN ('A3', 'ABC', 'AXZ')
order by a";
+
+ PlanMapper pm = getMapperForQuery(driver, query);
+ List<FilterOperator> fos = pm.getAll(FilterOperator.class);
+ // the same operator is present 2 times
+ fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed());
+ FilterOperator fop = fos.get(0);
+
+ // any estimation near 1 is ok...currently 1
+ assertEquals(1, fop.getStatistics().getNumRows());
+ }
+
+ // FIXME: right now not in is transformed into AND( NE(...) , NE(...) )
+ @Ignore
+ @Test
+ public void testFilterStringNotIn() throws CommandProcessorException {
+ IDriver driver = createDriver();
+ String query = "explain select a from t2 where b NOT IN ('XXX', 'UUU')
order by a";
+
+ PlanMapper pm = getMapperForQuery(driver, query);
+ List<FilterOperator> fos = pm.getAll(FilterOperator.class);
+ // the same operator is present 2 times
+ fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed());
+ assertEquals(1, fos.size());
+ FilterOperator fop = fos.get(0);
+
+ // any estimation near 10 is ok...currently 10
+ assertEquals(10, fop.getStatistics().getNumRows());
+ }
+
+ @Test
public void testFilterIntIn() throws ParseException,
CommandProcessorException {
IDriver driver = createDriver();
String query = "explain select a from t2 where a IN (-1,0,1,2,10,20,30,40)
order by a";
@@ -103,6 +137,7 @@ public class TestStatEstimations {
private static IDriver createDriver() {
HiveConf conf = env_setup.getTestCtx().hiveConf;
+ conf.setBoolVar(ConfVars.HIVE_STATS_USE_BITVECTORS, true);
conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false);
conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
diff --git a/ql/src/test/queries/clientpositive/in_bitvector_filter.q
b/ql/src/test/queries/clientpositive/in_bitvector_filter.q
new file mode 100644
index 0000000..177d36f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/in_bitvector_filter.q
@@ -0,0 +1,22 @@
+create table t2(a integer, b string) STORED AS ORC;
+insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5'),
+ (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5');
+analyze table t2 compute statistics for columns;
+
+set hive.stats.fetch.column.stats=true;
+
+set hive.stats.use.bitvectors=false;
+
+-- 1,2,10,11,12,13,14,15,20 => 9
+explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40)
order by a;
+
+set hive.stats.use.bitvectors=true;
+
+-- 1,2,20 => 3
+explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40)
order by a;
+
+-- A3 only => 1
+explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a;
+
+-- A3,B1,B5 => 3
+explain select a from t2 where b IN ('A3', 'B1', 'B5') order by a;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/in_bitvector_filter.q.out
b/ql/src/test/results/clientpositive/llap/in_bitvector_filter.q.out
new file mode 100644
index 0000000..7994fdd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/in_bitvector_filter.q.out
@@ -0,0 +1,274 @@
+PREHOOK: query: create table t2(a integer, b string) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2(a integer, b string) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4,
'A4'),(5, 'A5'),
+ (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t2
+POSTHOOK: query: insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4,
'A4'),(5, 'A5'),
+ (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SCRIPT []
+POSTHOOK: Lineage: t2.b SCRIPT []
+PREHOOK: query: analyze table t2 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table t2 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@t2
+#### A masked pattern was here ####
+PREHOOK: query: explain select a from t2 where a IN
(-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t2 where a IN
(-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20,
30, 40) (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15,
20, 30, 40) (type: boolean)
+ Statistics: Num rows: 9 Data size: 36 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 9 Data size: 36 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 9 Data size: 36 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select a from t2 where a IN
(-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t2 where a IN
(-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20,
30, 40) (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15,
20, 30, 40) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order
by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t2 where b IN ('A3', 'ABC', 'AXZ')
order by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: (b) IN ('A3', 'ABC', 'AXZ') (type: boolean)
+ Statistics: Num rows: 10 Data size: 900 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (b) IN ('A3', 'ABC', 'AXZ') (type: boolean)
+ Statistics: Num rows: 1 Data size: 90 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select a from t2 where b IN ('A3', 'B1', 'B5') order
by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t2 where b IN ('A3', 'B1', 'B5') order
by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: (b) IN ('A3', 'B1', 'B5') (type: boolean)
+ Statistics: Num rows: 10 Data size: 900 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (b) IN ('A3', 'B1', 'B5') (type: boolean)
+ Statistics: Num rows: 3 Data size: 270 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+