[hive] branch master updated: HIVE-21863: Improve Vectorizer type casting for WHEN expression (Vineet Garg, reviewed by Gopal V)

vgarg Tue, 16 Jul 2019 15:02:20 -0700

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new ee0a10d  HIVE-21863: Improve Vectorizer type casting for WHEN 
expression (Vineet Garg, reviewed by Gopal V)
ee0a10d is described below

commit ee0a10d904bd39ca22c40cd5768bc12eef7ba71d
Author: Vineet Garg <[email protected]>
AuthorDate: Tue Jul 16 15:01:32 2019 -0700

    HIVE-21863: Improve Vectorizer type casting for WHEN expression (Vineet 
Garg, reviewed by Gopal V)
---
 .../hive/ql/exec/vector/VectorizationContext.java  |  18 ++
 .../queries/clientpositive/vector_case_when_2.q    |  12 +
 .../clientpositive/llap/vector_case_when_2.q.out   | 268 +++++++++++++++++++++
 .../clientpositive/vector_case_when_2.q.out        | 210 ++++++++++++++++
 4 files changed, 508 insertions(+)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 51d92a3..d5257c7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -618,6 +618,7 @@ import com.google.common.annotations.VisibleForTesting;
     udfsNeedingImplicitDecimalCast.add(GenericUDFOPGreaterThan.class);
     udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrGreaterThan.class);
     udfsNeedingImplicitDecimalCast.add(GenericUDFBetween.class);
+    udfsNeedingImplicitDecimalCast.add(GenericUDFWhen.class);
     udfsNeedingImplicitDecimalCast.add(UDFSqrt.class);
     udfsNeedingImplicitDecimalCast.add(UDFRand.class);
     udfsNeedingImplicitDecimalCast.add(UDFLn.class);
@@ -1194,6 +1195,23 @@ import com.google.common.annotations.VisibleForTesting;
           childrenWithCasts.add(child);
         }
       }
+    } else if(genericUDF instanceof GenericUDFWhen) {
+      boolean hasElseClause = children.size() % 2 == 1 ;
+      for (int i=0; i<children.size(); i++) {
+        ExprNodeDesc castExpression = null;
+        if (i % 2 == 1) {
+          castExpression = getImplicitCastExpression(genericUDF, 
children.get(i), commonType);
+        }
+        if(hasElseClause && i == children.size()-1) {
+          castExpression = getImplicitCastExpression(genericUDF, 
children.get(i), commonType);
+        }
+        if (castExpression != null) {
+          atleastOneCastNeeded = true;
+          childrenWithCasts.add(castExpression);
+        } else {
+          childrenWithCasts.add(children.get(i));
+        }
+      }
     } else {
       for (ExprNodeDesc child : children) {
         ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, 
child, commonType);
diff --git a/ql/src/test/queries/clientpositive/vector_case_when_2.q 
b/ql/src/test/queries/clientpositive/vector_case_when_2.q
index 4d0a340..db13c0c 100644
--- a/ql/src/test/queries/clientpositive/vector_case_when_2.q
+++ b/ql/src/test/queries/clientpositive/vector_case_when_2.q
@@ -223,3 +223,15 @@ explain vectorization detail select q548284, CASE WHEN  
((q548284 = 4)) THEN (0.
 select q548284, CASE WHEN  ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) 
THEN (1) ELSE (8) END
     from foo order by q548284 limit 1;
 
+set hive.cbo.enable=false;
+explain vectorization detail select q548284, CASE WHEN ((q548284 = 1)) THEN 
(0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1;
+select q548284, CASE WHEN ((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1;
+
+explain vectorization detail select q548284, CASE WHEN  ((q548284 = 4)) THEN 
(0.8)
+    WHEN ((q548284 = 5)) THEN (1) ELSE (8) END from foo order by q548284 limit 
1;
+select q548284, CASE WHEN  ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) 
THEN (1) ELSE (8) END
+    from foo order by q548284 limit 1;
diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
index 38ba5f9..2f9133d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
@@ -1203,3 +1203,271 @@ POSTHOOK: Input: default@foo
 #### A masked pattern was here ####
 q548284        _c1
 1      8.0
+PREHOOK: query: explain vectorization detail select q548284, CASE WHEN 
((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select q548284, CASE WHEN 
((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: foo
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:q548284:int, 
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: q548284 (type: int), CASE WHEN ((q548284 = 
1)) THEN (0.2) WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) 
WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END 
(type: decimal(11,1))
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 16]
+                        selectExpressions: IfExprCondExprCondExpr(col 
2:boolean, col 3:decimal(11,1)col 15:decimal(11,1))(children: 
LongColEqualLongScalar(col 0:int, val 1) -> 2:boolean, 
ConstantVectorExpression(val 0.2) -> 3:decimal(11,1), 
IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(11,1)col 
14:decimal(11,1))(children: LongColEqualLongScalar(col 0:int, val 2) -> 
4:boolean, ConstantVectorExpression(val 0.4) -> 5:decimal(11,1), 
IfExprCondExprCondExpr(col 6:boolean, col 7:decimal(1 [...]
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkObjectHashOperator
+                          keyColumns: 0:int
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 16:decimal(11,1)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      TopN Hash Memory Usage: 0.1
+                      value expressions: _col1 (type: decimal(11,1))
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    includeColumns: [0]
+                    dataColumns: q548284:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint, decimal(11,1), bigint, 
decimal(11,1), bigint, decimal(11,1), bigint, decimal(11,1), bigint, 
decimal(11,1), decimal(11,1), decimal(11,1), decimal(11,1), decimal(11,1), 
decimal(11,1)]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: z
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:int, 
VALUE._col0:decimal(11,1)
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: decimal(11,1))
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                Limit
+                  Number of rows: 1
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select q548284, CASE WHEN ((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: select q548284, CASE WHEN ((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+q548284        _c1
+1      0.2
+PREHOOK: query: explain vectorization detail select q548284, CASE WHEN  
((q548284 = 4)) THEN (0.8)
+    WHEN ((q548284 = 5)) THEN (1) ELSE (8) END from foo order by q548284 limit 
1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select q548284, CASE WHEN  
((q548284 = 4)) THEN (0.8)
+    WHEN ((q548284 = 5)) THEN (1) ELSE (8) END from foo order by q548284 limit 
1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: foo
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:q548284:int, 
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: q548284 (type: int), CASE WHEN ((q548284 = 
4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END (type: decimal(11,1))
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 8]
+                        selectExpressions: IfExprCondExprCondExpr(col 
2:boolean, col 3:decimal(11,1)col 7:decimal(11,1))(children: 
LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, 
ConstantVectorExpression(val 0.8) -> 3:decimal(11,1), 
IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(11,1)col 
6:decimal(11,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 
4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(11,1), 
ConstantVectorExpression(val 8) -> 6:decimal(11,1)) -> 7: [...]
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkObjectHashOperator
+                          keyColumns: 0:int
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 8:decimal(11,1)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      TopN Hash Memory Usage: 0.1
+                      value expressions: _col1 (type: decimal(11,1))
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    includeColumns: [0]
+                    dataColumns: q548284:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint, decimal(11,1), bigint, 
decimal(11,1), decimal(11,1), decimal(11,1), decimal(11,1)]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: z
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:int, 
VALUE._col0:decimal(11,1)
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: decimal(11,1))
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                Limit
+                  Number of rows: 1
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select q548284, CASE WHEN  ((q548284 = 4)) THEN (0.8) WHEN 
((q548284 = 5)) THEN (1) ELSE (8) END
+    from foo order by q548284 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: select q548284, CASE WHEN  ((q548284 = 4)) THEN (0.8) WHEN 
((q548284 = 5)) THEN (1) ELSE (8) END
+    from foo order by q548284 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+q548284        _c1
+1      8.0
diff --git a/ql/src/test/results/clientpositive/vector_case_when_2.q.out 
b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
index 2d4dc5b..da7b675 100644
--- a/ql/src/test/results/clientpositive/vector_case_when_2.q.out
+++ b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
@@ -1067,3 +1067,213 @@ POSTHOOK: Input: default@foo
 #### A masked pattern was here ####
 q548284        _c1
 1      8.0
+PREHOOK: query: explain vectorization detail select q548284, CASE WHEN 
((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select q548284, CASE WHEN 
((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: foo
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:q548284:int, 
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+            Select Operator
+              expressions: q548284 (type: int), CASE WHEN ((q548284 = 1)) THEN 
(0.2) WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END (type: 
decimal(11,1))
+              outputColumnNames: _col0, _col1
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumnNums: [0, 16]
+                  selectExpressions: IfExprCondExprCondExpr(col 2:boolean, col 
3:decimal(11,1)col 15:decimal(11,1))(children: LongColEqualLongScalar(col 
0:int, val 1) -> 2:boolean, ConstantVectorExpression(val 0.2) -> 
3:decimal(11,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(11,1)col 
14:decimal(11,1))(children: LongColEqualLongScalar(col 0:int, val 2) -> 
4:boolean, ConstantVectorExpression(val 0.4) -> 5:decimal(11,1), 
IfExprCondExprCondExpr(col 6:boolean, col 7:decimal(11,1)co [...]
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: int)
+                sort order: +
+                Reduce Sink Vectorization:
+                    className: VectorReduceSinkOperator
+                    native: false
+                    nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                    nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                TopN Hash Memory Usage: 0.1
+                value expressions: _col1 (type: decimal(11,1))
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: [DECIMAL_64]
+          featureSupportInUse: [DECIMAL_64]
+          inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 1
+              includeColumns: [0]
+              dataColumns: q548284:int
+              partitionColumnCount: 0
+              scratchColumnTypeNames: [bigint, decimal(11,1), bigint, 
decimal(11,1), bigint, decimal(11,1), bigint, decimal(11,1), bigint, 
decimal(11,1), decimal(11,1), decimal(11,1), decimal(11,1), decimal(11,1), 
decimal(11,1)]
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: 
decimal(11,1))
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select q548284, CASE WHEN ((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: select q548284, CASE WHEN ((q548284 = 1)) THEN (0.2)
+    WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) WHEN 
((q548284 = 4))
+    THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END from foo order by 
q548284 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+q548284        _c1
+1      0.2
+PREHOOK: query: explain vectorization detail select q548284, CASE WHEN  
((q548284 = 4)) THEN (0.8)
+    WHEN ((q548284 = 5)) THEN (1) ELSE (8) END from foo order by q548284 limit 
1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select q548284, CASE WHEN  
((q548284 = 4)) THEN (0.8)
+    WHEN ((q548284 = 5)) THEN (1) ELSE (8) END from foo order by q548284 limit 
1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: foo
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:q548284:int, 
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+            Select Operator
+              expressions: q548284 (type: int), CASE WHEN ((q548284 = 4)) THEN 
(0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END (type: decimal(11,1))
+              outputColumnNames: _col0, _col1
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumnNums: [0, 8]
+                  selectExpressions: IfExprCondExprCondExpr(col 2:boolean, col 
3:decimal(11,1)col 7:decimal(11,1))(children: LongColEqualLongScalar(col 0:int, 
val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(11,1), 
IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(11,1)col 
6:decimal(11,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 
4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(11,1), 
ConstantVectorExpression(val 8) -> 6:decimal(11,1)) -> 7:decima [...]
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: int)
+                sort order: +
+                Reduce Sink Vectorization:
+                    className: VectorReduceSinkOperator
+                    native: false
+                    nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                    nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                TopN Hash Memory Usage: 0.1
+                value expressions: _col1 (type: decimal(11,1))
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: [DECIMAL_64]
+          featureSupportInUse: [DECIMAL_64]
+          inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 1
+              includeColumns: [0]
+              dataColumns: q548284:int
+              partitionColumnCount: 0
+              scratchColumnTypeNames: [bigint, decimal(11,1), bigint, 
decimal(11,1), decimal(11,1), decimal(11,1), decimal(11,1)]
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: 
decimal(11,1))
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select q548284, CASE WHEN  ((q548284 = 4)) THEN (0.8) WHEN 
((q548284 = 5)) THEN (1) ELSE (8) END
+    from foo order by q548284 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: select q548284, CASE WHEN  ((q548284 = 4)) THEN (0.8) WHEN 
((q548284 = 5)) THEN (1) ELSE (8) END
+    from foo order by q548284 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+q548284        _c1
+1      8.0

[hive] branch master updated: HIVE-21863: Improve Vectorizer type casting for WHEN expression (Vineet Garg, reviewed by Gopal V)

Reply via email to