This is an automated email from the ASF dual-hosted git repository.
gopalv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8215cf6 HIVE-24666: Vectorized UDFToBoolean may unable to filter rows
if input is string (Zhihua Deng, reviewed by Gopal V)
8215cf6 is described below
commit 8215cf6b0ccf96234837292d46d71030c90090a3
Author: dengzhhu653 <[email protected]>
AuthorDate: Tue Feb 16 13:28:21 2021 -0800
HIVE-24666: Vectorized UDFToBoolean may unable to filter rows if input is
string (Zhihua Deng, reviewed by Gopal V)
Signed-off-by: Gopal V <[email protected]>
---
.../hive/ql/exec/vector/VectorizationContext.java | 64 ++--
.../exec/vector/expressions/FuncStringToLong.java | 4 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 15 +
.../hive/ql/parse/type/TypeCheckProcFactory.java | 23 ++
.../expressions/TestVectorFilterExpressions.java | 55 +++
.../queries/clientpositive/vector_cast_filter.q | 34 ++
.../llap/annotate_stats_filter.q.out | 10 +-
..._empty_where.q.out => vector_cast_filter.q.out} | 426 ++++++++++++++-------
.../clientpositive/llap/vector_empty_where.q.out | 20 +-
9 files changed, 462 insertions(+), 189 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 9958f66..4af7325 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -871,28 +871,11 @@ import com.google.common.annotations.VisibleForTesting;
} else {
// Ok, we need to convert.
List<ExprNodeDesc> exprAsList = Collections.singletonList(exprDesc);
-
- // First try our cast method that will handle a few special cases.
- VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
- if (castToBooleanExpr == null) {
-
- // Ok, try the UDF.
- castToBooleanExpr = getVectorExpressionForUdf(null,
UDFToBoolean.class, exprAsList,
- VectorExpressionDescriptor.Mode.PROJECTION,
TypeInfoFactory.booleanTypeInfo);
- if (castToBooleanExpr == null) {
- throw new HiveException("Cannot vectorize converting expression " +
- exprDesc.getExprString() + " to boolean");
- }
+ expr = getCastToBooleanExpression(exprAsList,
VectorExpressionDescriptor.Mode.FILTER);
+ if (expr == null) {
+ throw new HiveException("Cannot vectorize converting expression " +
+ exprDesc.getExprString() + " to boolean");
}
-
- final int outputColumnNum = castToBooleanExpr.getOutputColumnNum();
-
- expr = new SelectColumnIsTrue(outputColumnNum);
-
- expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr});
-
- expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo());
- expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
}
return expr;
}
@@ -1483,6 +1466,10 @@ import com.google.common.annotations.VisibleForTesting;
// Boolean is purposely excluded.
}
+ public static boolean isCastToBoolean(Class<? extends UDF> udfClass) {
+ return udfClass.equals(UDFToBoolean.class);
+ }
+
public static boolean isCastToFloatFamily(Class<? extends UDF> udfClass) {
return udfClass.equals(UDFToDouble.class)
|| udfClass.equals(UDFToFloat.class);
@@ -1598,7 +1585,11 @@ import com.google.common.annotations.VisibleForTesting;
if (typeInfo.getCategory() != Category.PRIMITIVE) {
throw new HiveException("Complex type constants (" +
typeInfo.getCategory() + ") not supported for type name " + typeName);
}
- return new ConstantVectorExpression(outCol, typeInfo, true);
+ if (mode == VectorExpressionDescriptor.Mode.FILTER) {
+ return new FilterConstantBooleanVectorExpression(0);
+ } else {
+ return new ConstantVectorExpression(outCol, typeInfo, true);
+ }
}
// Boolean is special case.
@@ -2992,8 +2983,8 @@ import com.google.common.annotations.VisibleForTesting;
PrimitiveCategory integerPrimitiveCategory =
getAnyIntegerPrimitiveCategoryFromUdfClass(cl);
ve = getCastToLongExpression(childExpr, integerPrimitiveCategory);
- } else if (cl.equals(UDFToBoolean.class)) {
- ve = getCastToBoolean(childExpr);
+ } else if (isCastToBoolean(cl)) {
+ ve = getCastToBooleanExpression(childExpr, mode);
} else if (isCastToFloatFamily(cl)) {
ve = getCastToDoubleExpression(cl, childExpr, returnType);
}
@@ -3470,26 +3461,41 @@ import com.google.common.annotations.VisibleForTesting;
return null;
}
- private VectorExpression getCastToBoolean(List<ExprNodeDesc> childExpr)
+ private VectorExpression getCastToBooleanExpression(List<ExprNodeDesc>
childExpr, VectorExpressionDescriptor.Mode mode)
throws HiveException {
ExprNodeDesc child = childExpr.get(0);
TypeInfo inputTypeInfo = child.getTypeInfo();
String inputType = inputTypeInfo.toString();
if (child instanceof ExprNodeConstantDesc) {
if (null == ((ExprNodeConstantDesc)child).getValue()) {
- return getConstantVectorExpression(null,
TypeInfoFactory.booleanTypeInfo, VectorExpressionDescriptor.Mode.PROJECTION);
+ return getConstantVectorExpression(null,
TypeInfoFactory.booleanTypeInfo, mode);
}
// Don't do constant folding here. Wait until the optimizer is changed
to do it.
// Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
return null;
}
+
+ VectorExpression ve;
// Long and double are handled using descriptors, string needs to be
specially handled.
if (isStringFamily(inputType)) {
-
- return createVectorExpression(CastStringToBoolean.class, childExpr,
+ ve = createVectorExpression(CastStringToBoolean.class, childExpr,
VectorExpressionDescriptor.Mode.PROJECTION,
TypeInfoFactory.booleanTypeInfo, DataTypePhysicalVariation.NONE);
+ } else {
+ // Ok, try the UDF.
+ ve = getVectorExpressionForUdf(null, UDFToBoolean.class, childExpr,
+ VectorExpressionDescriptor.Mode.PROJECTION,
TypeInfoFactory.booleanTypeInfo);
}
- return null;
+
+ if (ve == null || mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ return ve;
+ }
+
+ int outputColumnNum = ve.getOutputColumnNum();
+ SelectColumnIsTrue filterVectorExpr = new
SelectColumnIsTrue(outputColumnNum);
+ filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
+ filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
+
filterVectorExpr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
+ return filterVectorExpr;
}
private VectorExpression getCastToLongExpression(List<ExprNodeDesc>
childExpr, PrimitiveCategory integerPrimitiveCategory)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
index fffb36d..007cbd2 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
@@ -27,8 +27,8 @@ import
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.metadata.HiveException;
/**
- * Superclass to support vectorized functions that take a long
- * and return a string, optionally with additional configuration arguments.
+ * Superclass to support vectorized functions that take a string
+ * and return a long, optionally with additional configuration arguments.
* Used for cast(string), length(string), etc
*/
public abstract class FuncStringToLong extends VectorExpression {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d15e46a..7c73065 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3583,6 +3583,21 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
}
}
+ if (!filterCond.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo)) {
+ // If the returning type of the filter condition is not boolean, try to
implicitly
+ // convert the result of the condition to a boolean value.
+ if (filterCond.getTypeInfo().getCategory() ==
ObjectInspector.Category.PRIMITIVE) {
+ // For primitive types like string/double/timestamp, try to cast the
result of
+ // the child expression to a boolean.
+ filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor()
+ .createConversionCast(filterCond, TypeInfoFactory.booleanTypeInfo);
+ } else {
+ // For complex types like map/list/struct, create a isnotnull function
on the child expression.
+ filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor()
+ .getFuncExprNodeDesc("isnotnull", filterCond);
+ }
+ }
+
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new FilterDesc(filterCond, false), new RowSchema(
inputRR.getColumnInfos()), input), inputRR);
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java
index aa130c8..488dc50 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java
@@ -838,6 +838,29 @@ public class TypeCheckProcFactory<T> {
children.set(0, newColumn);
}
}
+
+ if (funcText.equalsIgnoreCase("and") || funcText.equalsIgnoreCase("or")
+ || funcText.equalsIgnoreCase("not") ||
funcText.equalsIgnoreCase("!")) {
+ // If the current function is a conjunction, the returning types of
the children should be booleans.
+ // Iterate on the children, if the result of a child expression is not
a boolean, try to implicitly
+ // convert the result of such a child to a boolean value.
+ for (int i = 0; i < children.size(); i++) {
+ T child = children.get(i);
+ TypeInfo typeInfo = exprFactory.getTypeInfo(child);
+ if (!TypeInfoFactory.booleanTypeInfo.accept(typeInfo)) {
+ if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+ // For primitive types like string/double/timestamp, try to cast
the result of
+ // the child expression to a boolean.
+ children.set(i, createConversionCast(child,
TypeInfoFactory.booleanTypeInfo));
+ } else {
+ // For complex types like map/list/struct, create a isnotnull
function on the child expression.
+ child =
exprFactory.createFuncCallExpr(TypeInfoFactory.booleanTypeInfo,
+ exprFactory.getFunctionInfo("isnotnull"),"isnotnull",
Arrays.asList(child));
+ children.set(i, child);
+ }
+ }
+ }
+ }
}
protected T getXpathOrFuncExprNodeDesc(ASTNode node,
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
index c8c75e0..d9d56a0 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
@@ -33,6 +33,7 @@ import
org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColGreaterEqualDecimalColumn;
import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColLessDecimalScalar;
import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalScalarGreaterDecimalColumn;
@@ -1057,4 +1058,58 @@ public class TestVectorFilterExpressions {
b.size = 3;
return b;
}
+
+ // Test that the cast filter should be wrapped in SelectColumnIsTrue
+ @Test
+ public void testCastFilter() throws HiveException {
+ int seed = 0;
+ VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
+ 3, 4, seed);
+ vrb.cols[0] = new BytesColumnVector();
+ BytesColumnVector bcv = (BytesColumnVector) vrb.cols[0];
+ bcv.initBuffer();
+ byte[] n = "no".getBytes();
+ byte[] f = "false".getBytes();
+ bcv.setVal(0, n, 0, n.length);
+ bcv.setVal(1, f, 0, f.length);
+ bcv.setVal(2, c, 0, 1);
+
+ VectorExpression ve1 = new CastStringToBoolean(0,2);
+ VectorExpression ve2 = new CastLongToBooleanViaLongToLong(1, 3);
+ VectorExpression orExpr = new FilterExprOrExpr();
+ orExpr.setChildExpressions(new VectorExpression[] {ve1, ve2});
+ orExpr.evaluate(vrb);
+
+ // Only one row should be filtered out, but both filters fail to take
effect
+ assertFalse(vrb.selectedInUse);
+ assertEquals(0, vrb.selected[0]);
+ assertEquals(1, vrb.selected[1]);
+ assertEquals(2, vrb.selected[2]);
+ assertEquals(3, vrb.size);
+
+ SelectColumnIsTrue filter1 = new SelectColumnIsTrue(2);
+ filter1.setChildExpressions(new VectorExpression[]{ ve1 });
+ VectorExpression andExpr = new FilterExprAndExpr();
+ // SelectColumnIsTrue(cast string) and CastLongToBooleanViaLongToLong
+ andExpr.setChildExpressions(new VectorExpression[]{filter1, ve2});
+ andExpr.evaluate(vrb);
+
+ // All should be filtered out, but CastLongToBooleanViaLongToLong fails to
take effect
+ assertTrue(vrb.selectedInUse);
+ assertEquals(2, vrb.selected[0]);
+ assertEquals(1, vrb.size);
+
+ // restore
+ vrb.selectedInUse = false;
+ vrb.size = 3;
+
+ SelectColumnIsTrue filter2 = new SelectColumnIsTrue(3);
+ filter2.setChildExpressions(new VectorExpression[]{ ve2 });
+ andExpr.setChildExpressions(new VectorExpression[]{filter1, filter2});
+ andExpr.evaluate(vrb);
+
+ assertTrue(vrb.selectedInUse);
+ assertEquals(0, vrb.size);
+ }
+
}
diff --git a/ql/src/test/queries/clientpositive/vector_cast_filter.q
b/ql/src/test/queries/clientpositive/vector_cast_filter.q
new file mode 100644
index 0000000..df60419
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_cast_filter.q
@@ -0,0 +1,34 @@
+--! qt:dataset:alltypesorc
+SET hive.vectorized.execution.enabled=false;
+
+explain
+select count (distinct cint) from alltypesorc where !cstring1;
+
+select count (distinct cint) from alltypesorc where !cstring1;
+
+explain
+select count (distinct cint) from alltypesorc where cint and cstring1;
+
+select count (distinct cint) from alltypesorc where cint and cstring1;
+
+explain
+select count (distinct cint) from alltypesorc where cfloat or cint;
+
+select count (distinct cint) from alltypesorc where cfloat or cint;
+
+SET hive.vectorized.execution.enabled=true;
+
+explain vectorization expression
+select count (distinct cint) from alltypesorc where !cstring1;
+
+select count (distinct cint) from alltypesorc where !cstring1;
+
+explain vectorization expression
+select count (distinct cint) from alltypesorc where cint and cstring1;
+
+select count (distinct cint) from alltypesorc where cint and cstring1;
+
+explain vectorization expression
+select count (distinct cint) from alltypesorc where cfloat or cint;
+
+select count (distinct cint) from alltypesorc where cfloat or cint;
diff --git
a/ql/src/test/results/clientpositive/llap/annotate_stats_filter.q.out
b/ql/src/test/results/clientpositive/llap/annotate_stats_filter.q.out
index fc0cad2..8e1e988 100644
--- a/ql/src/test/results/clientpositive/llap/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/llap/annotate_stats_filter.q.out
@@ -375,12 +375,10 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Filter Operator
- predicate: 'foo' (type: string)
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type:
bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- ListSink
+ Select Operator
+ expressions: state (type: string), locid (type: int), zip (type:
bigint), year (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ ListSink
PREHOOK: query: explain select * from loc_orc where true = true
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
b/ql/src/test/results/clientpositive/llap/vector_cast_filter.q.out
similarity index 67%
copy from ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
copy to ql/src/test/results/clientpositive/llap/vector_cast_filter.q.out
index adac463..7d73640 100644
--- a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_cast_filter.q.out
@@ -1,17 +1,13 @@
-PREHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where cstring1
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where !cstring1
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where cstring1
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where !cstring1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -29,32 +25,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
+ filterExpr: (not UDFToBoolean(cstring1)) (type: boolean)
Statistics: Num rows: 12288 Data size: 899146 Basic stats:
COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean)
- predicate: cstring1 (type: string)
+ predicate: (not UDFToBoolean(cstring1)) (type: boolean)
Statistics: Num rows: 6144 Data size: 449620 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
outputColumnNames: cint
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2]
Statistics: Num rows: 6144 Data size: 449620 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
keys: cint (type: int)
minReductionHashAggr: 0.4
mode: hash
@@ -65,52 +45,19 @@ STAGE PLANS:
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
- Execution mode: vectorized, llap
+ Execution mode: llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 6104 Data size: 18228 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(_col0)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 0:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
@@ -118,38 +65,18 @@ STAGE PLANS:
Reduce Output Operator
null sort order:
sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Reducer 3
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -162,22 +89,222 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count (distinct cint) from alltypesorc where cstring1
+PREHOOK: query: select count (distinct cint) from alltypesorc where !cstring1
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select count (distinct cint) from alltypesorc where cstring1
+POSTHOOK: query: select count (distinct cint) from alltypesorc where !cstring1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+0
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where cint and cstring1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where cint and cstring1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ filterExpr: (UDFToBoolean(cint) and UDFToBoolean(cstring1))
(type: boolean)
+ Statistics: Num rows: 12288 Data size: 899146 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToBoolean(cint) and UDFToBoolean(cstring1))
(type: boolean)
+ Statistics: Num rows: 3072 Data size: 224812 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: cint
+ Statistics: Num rows: 3072 Data size: 224812 Basic
stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: cint (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 9176 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3072 Data size: 9176 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 9176 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cint and
cstring1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cint and
cstring1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
6041
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where cfloat or cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where cfloat or cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ filterExpr: (UDFToBoolean(cfloat) or UDFToBoolean(cint))
(type: boolean)
+ Statistics: Num rows: 12288 Data size: 73392 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToBoolean(cfloat) or UDFToBoolean(cint))
(type: boolean)
+ Statistics: Num rows: 12288 Data size: 73392 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: cint
+ Statistics: Num rows: 12288 Data size: 73392 Basic
stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: cint (type: int)
+ minReductionHashAggr: 0.5031738
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6105 Data size: 18232 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6105 Data size: 18232 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6105 Data size: 18232 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat or
cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat or
cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+6082
PREHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where cint
+select count (distinct cint) from alltypesorc where !cstring1
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where cint
+select count (distinct cint) from alltypesorc where !cstring1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
@@ -202,39 +329,48 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 36696 Basic stats:
COMPLETE Column stats: COMPLETE
+ filterExpr: (not UDFToBoolean(cstring1)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 899146 Basic stats:
COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastLongToBooleanViaLongToLong(col 2:int) -> 13:boolean)
- predicate: cint (type: int)
- Statistics: Num rows: 6144 Data size: 18348 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
- keys: cint (type: int)
- minReductionHashAggr: 0.5030924
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3053 Data size: 9120 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 3053 Data size: 9120 Basic
stats: COMPLETE Column stats: COMPLETE
+ predicateExpression: SelectColumnIsFalse(col
13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean)
+ predicate: (not UDFToBoolean(cstring1)) (type: boolean)
+ Statistics: Num rows: 6144 Data size: 449620 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: cint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2]
+ Statistics: Num rows: 6144 Data size: 449620 Basic
stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 2:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: cint (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -266,7 +402,7 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 3053 Data size: 9120 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6104 Data size: 18228 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(_col0)
Group By Vectorization:
@@ -327,22 +463,22 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count (distinct cint) from alltypesorc where cint
+PREHOOK: query: select count (distinct cint) from alltypesorc where !cstring1
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select count (distinct cint) from alltypesorc where cint
+POSTHOOK: query: select count (distinct cint) from alltypesorc where !cstring1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-6082
+0
PREHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where cfloat
+select count (distinct cint) from alltypesorc where cint and cstring1
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where cfloat
+select count (distinct cint) from alltypesorc where cint and cstring1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
@@ -367,16 +503,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 73392 Basic stats:
COMPLETE Column stats: COMPLETE
+ filterExpr: (UDFToBoolean(cint) and UDFToBoolean(cstring1))
(type: boolean)
+ Statistics: Num rows: 12288 Data size: 899146 Basic stats:
COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastDoubleToBooleanViaDoubleToLong(col 4:float) ->
13:boolean)
- predicate: cfloat (type: float)
- Statistics: Num rows: 6144 Data size: 36696 Basic stats:
COMPLETE Column stats: COMPLETE
+ predicateExpression: FilterExprAndExpr(children:
SelectColumnIsTrue(col 13:boolean)(children: CastLongToBooleanViaLongToLong(col
2:int) -> 13:boolean), SelectColumnIsTrue(col 14:boolean)(children:
CastStringToBoolean(col 6) -> 14:boolean))
+ predicate: (UDFToBoolean(cint) and UDFToBoolean(cstring1))
(type: boolean)
+ Statistics: Num rows: 3072 Data size: 224812 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
outputColumnNames: cint
@@ -384,7 +521,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2]
- Statistics: Num rows: 6144 Data size: 36696 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3072 Data size: 224812 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
@@ -397,7 +534,7 @@ STAGE PLANS:
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3072 Data size: 9176 Basic
stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
@@ -407,7 +544,7 @@ STAGE PLANS:
className: VectorReduceSinkLongOperator
native: true
nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3072 Data size: 9176 Basic
stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -439,7 +576,7 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 6104 Data size: 18228 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3072 Data size: 9176 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(_col0)
Group By Vectorization:
@@ -500,22 +637,22 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat
+PREHOOK: query: select count (distinct cint) from alltypesorc where cint and
cstring1
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cint and
cstring1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-3022
+6041
PREHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where ctimestamp1
+select count (distinct cint) from alltypesorc where cfloat or cint
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization expression
-select count (distinct cint) from alltypesorc where ctimestamp1
+select count (distinct cint) from alltypesorc where cfloat or cint
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
@@ -540,16 +677,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 403656 Basic stats:
COMPLETE Column stats: COMPLETE
+ filterExpr: (UDFToBoolean(cfloat) or UDFToBoolean(cint))
(type: boolean)
+ Statistics: Num rows: 12288 Data size: 73392 Basic stats:
COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastTimestampToBoolean(col 8:timestamp) -> 13:boolean)
- predicate: ctimestamp1 (type: timestamp)
- Statistics: Num rows: 6144 Data size: 201828 Basic stats:
COMPLETE Column stats: COMPLETE
+ predicateExpression: FilterExprOrExpr(children:
SelectColumnIsTrue(col 13:boolean)(children:
CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 13:boolean),
SelectColumnIsTrue(col 14:boolean)(children: CastLongToBooleanViaLongToLong(col
2:int) -> 14:boolean))
+ predicate: (UDFToBoolean(cfloat) or UDFToBoolean(cint))
(type: boolean)
+ Statistics: Num rows: 12288 Data size: 73392 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
outputColumnNames: cint
@@ -557,7 +695,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2]
- Statistics: Num rows: 6144 Data size: 201828 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12288 Data size: 73392 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
@@ -567,10 +705,10 @@ STAGE PLANS:
vectorProcessingMode: HASH
projectedOutputColumnNums: []
keys: cint (type: int)
- minReductionHashAggr: 0.4
+ minReductionHashAggr: 0.5031738
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6105 Data size: 18232 Basic
stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
@@ -580,7 +718,7 @@ STAGE PLANS:
className: VectorReduceSinkLongOperator
native: true
nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6105 Data size: 18232 Basic
stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -612,7 +750,7 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 6104 Data size: 18228 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6105 Data size: 18232 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(_col0)
Group By Vectorization:
@@ -673,12 +811,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1
+PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat or
cint
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select count (distinct cint) from alltypesorc where
ctimestamp1
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat or
cint
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-3022
+6082
diff --git a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
b/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
index adac463..218fc41 100644
--- a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
@@ -29,6 +29,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
+ filterExpr: UDFToBoolean(cstring1) (type: boolean)
Statistics: Num rows: 12288 Data size: 899146 Basic stats:
COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
@@ -37,7 +38,7 @@ STAGE PLANS:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean)
- predicate: cstring1 (type: string)
+ predicate: UDFToBoolean(cstring1) (type: boolean)
Statistics: Num rows: 6144 Data size: 449620 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
@@ -202,6 +203,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
+ filterExpr: UDFToBoolean(cint) (type: boolean)
Statistics: Num rows: 12288 Data size: 36696 Basic stats:
COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
@@ -210,7 +212,7 @@ STAGE PLANS:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastLongToBooleanViaLongToLong(col 2:int) -> 13:boolean)
- predicate: cint (type: int)
+ predicate: UDFToBoolean(cint) (type: boolean)
Statistics: Num rows: 6144 Data size: 18348 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
Group By Vectorization:
@@ -221,10 +223,10 @@ STAGE PLANS:
vectorProcessingMode: HASH
projectedOutputColumnNums: []
keys: cint (type: int)
- minReductionHashAggr: 0.5030924
+ minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 3053 Data size: 9120 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6104 Data size: 18228 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
@@ -234,7 +236,7 @@ STAGE PLANS:
className: VectorReduceSinkLongOperator
native: true
nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 3053 Data size: 9120 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6104 Data size: 18228 Basic
stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -266,7 +268,7 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 3053 Data size: 9120 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6104 Data size: 18228 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(_col0)
Group By Vectorization:
@@ -367,6 +369,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
+ filterExpr: UDFToBoolean(cfloat) (type: boolean)
Statistics: Num rows: 12288 Data size: 73392 Basic stats:
COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
@@ -375,7 +378,7 @@ STAGE PLANS:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastDoubleToBooleanViaDoubleToLong(col 4:float) ->
13:boolean)
- predicate: cfloat (type: float)
+ predicate: UDFToBoolean(cfloat) (type: boolean)
Statistics: Num rows: 6144 Data size: 36696 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
@@ -540,6 +543,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
+ filterExpr: UDFToBoolean(ctimestamp1) (type: boolean)
Statistics: Num rows: 12288 Data size: 403656 Basic stats:
COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
@@ -548,7 +552,7 @@ STAGE PLANS:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsTrue(col
13:boolean)(children: CastTimestampToBoolean(col 8:timestamp) -> 13:boolean)
- predicate: ctimestamp1 (type: timestamp)
+ predicate: UDFToBoolean(ctimestamp1) (type: boolean)
Statistics: Num rows: 6144 Data size: 201828 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)