Repository: hive
Updated Branches:
  refs/heads/master 08ca7b2de -> 016afe0d6


HIVE-15972: Runtime filtering not vectorizing for 
decimal/timestamp/char/varchar (Jason Dere, reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/016afe0d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/016afe0d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/016afe0d

Branch: refs/heads/master
Commit: 016afe0d69f3a90290e3a127149430ad6d4c603f
Parents: 08ca7b2
Author: Jason Dere <[email protected]>
Authored: Mon Feb 20 13:03:06 2017 -0800
Committer: Jason Dere <[email protected]>
Committed: Mon Feb 20 13:03:06 2017 -0800

----------------------------------------------------------------------
 .../FilterColumnBetweenDynamicValue.txt         |   2 +
 .../vectorized_dynamic_semijoin_reduction2.q    |  17 +-
 ...vectorized_dynamic_semijoin_reduction2.q.out | 428 ++++++++++++++++++-
 .../apache/hadoop/hive/tools/GenVectorCode.java |   8 +-
 4 files changed, 446 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
----------------------------------------------------------------------
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
index 1aee9b3..9d5432f 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
@@ -27,7 +27,9 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.sql.Timestamp;
+import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
 
 public class <ClassName> extends Filter<TypeName>ColumnBetween {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q 
b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
index 446407d..be8e4af 100644
--- 
a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
+++ 
b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
@@ -18,7 +18,10 @@ create table dsrv2_big stored as orc as
   cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
   cast(L_PARTKEY as double) as partkey_double,
   cast(l_shipdate as date) as shipdate_date,
-  cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+  cast(cast(l_shipdate as date) as timestamp) as shipdate_ts,
+  cast(l_shipdate as string) as shipdate_string,
+  cast(l_shipdate as char(10)) as shipdate_char,
+  cast(l_shipdate as varchar(10)) as shipdate_varchar
   from lineitem;
 create table dsrv2_small stored as orc as select * from dsrv2_big limit 20;
 analyze table dsrv2_big compute statistics;
@@ -46,5 +49,17 @@ select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_date = b.ship
 EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts 
= b.shipdate_ts);
 select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = 
b.shipdate_ts);
 
+-- single key (string)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_string = b.shipdate_string);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = 
b.shipdate_string);
+
+-- single key (char)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_char = b.shipdate_char);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = 
b.shipdate_char);
+
+-- single key (varchar)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_varchar = b.shipdate_varchar);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = 
b.shipdate_varchar);
+
 drop table dsrv2_big;
 drop table dsrv2_small;

http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
index 27d8152..062fef6 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
@@ -4,7 +4,10 @@ PREHOOK: query: create table dsrv2_big stored as orc as
   cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
   cast(L_PARTKEY as double) as partkey_double,
   cast(l_shipdate as date) as shipdate_date,
-  cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+  cast(cast(l_shipdate as date) as timestamp) as shipdate_ts,
+  cast(l_shipdate as string) as shipdate_string,
+  cast(l_shipdate as char(10)) as shipdate_char,
+  cast(l_shipdate as varchar(10)) as shipdate_varchar
   from lineitem
 PREHOOK: type: CREATETABLE_AS_SELECT
 PREHOOK: Input: default@lineitem
@@ -16,7 +19,10 @@ POSTHOOK: query: create table dsrv2_big stored as orc as
   cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
   cast(L_PARTKEY as double) as partkey_double,
   cast(l_shipdate as date) as shipdate_date,
-  cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+  cast(cast(l_shipdate as date) as timestamp) as shipdate_ts,
+  cast(l_shipdate as string) as shipdate_string,
+  cast(l_shipdate as char(10)) as shipdate_char,
+  cast(l_shipdate as varchar(10)) as shipdate_varchar
   from lineitem
 POSTHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: Input: default@lineitem
@@ -25,8 +31,11 @@ POSTHOOK: Output: default@dsrv2_big
 POSTHOOK: Lineage: dsrv2_big.partkey_bigint EXPRESSION 
[(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
 POSTHOOK: Lineage: dsrv2_big.partkey_decimal EXPRESSION 
[(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
 POSTHOOK: Lineage: dsrv2_big.partkey_double EXPRESSION 
[(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_char EXPRESSION 
[(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
 POSTHOOK: Lineage: dsrv2_big.shipdate_date EXPRESSION 
[(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_string SIMPLE 
[(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
 POSTHOOK: Lineage: dsrv2_big.shipdate_ts EXPRESSION 
[(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_varchar EXPRESSION 
[(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
 PREHOOK: query: create table dsrv2_small stored as orc as select * from 
dsrv2_big limit 20
 PREHOOK: type: CREATETABLE_AS_SELECT
 PREHOOK: Input: default@dsrv2_big
@@ -40,8 +49,11 @@ POSTHOOK: Output: default@dsrv2_small
 POSTHOOK: Lineage: dsrv2_small.partkey_bigint SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_bigint, type:bigint, 
comment:null), ]
 POSTHOOK: Lineage: dsrv2_small.partkey_decimal SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_decimal, type:decimal(10,1), 
comment:null), ]
 POSTHOOK: Lineage: dsrv2_small.partkey_double SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_double, type:double, 
comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.shipdate_char SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_char, type:char(10), 
comment:null), ]
 POSTHOOK: Lineage: dsrv2_small.shipdate_date SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_date, type:date, comment:null), 
]
+POSTHOOK: Lineage: dsrv2_small.shipdate_string SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_string, type:string, 
comment:null), ]
 POSTHOOK: Lineage: dsrv2_small.shipdate_ts SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_ts, type:timestamp, 
comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.shipdate_varchar SIMPLE 
[(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_varchar, type:varchar(10), 
comment:null), ]
 PREHOOK: query: analyze table dsrv2_big compute statistics
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dsrv2_big
@@ -247,7 +259,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: 
decimal(10,1))
                         Statistics: Num rows: 100 Data size: 11200 Basic 
stats: COMPLETE Column stats: COMPLETE
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: all inputs
         Map 4 
             Map Operator Tree:
@@ -655,7 +667,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: timestamp)
                         Statistics: Num rows: 100 Data size: 4000 Basic stats: 
COMPLETE Column stats: COMPLETE
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: all inputs
         Map 4 
             Map Operator Tree:
@@ -754,6 +766,414 @@ POSTHOOK: Input: default@dsrv2_big
 POSTHOOK: Input: default@dsrv2_small
 #### A masked pattern was here ####
 23
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_string = b.shipdate_string)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b 
on (a.shipdate_string = b.shipdate_string)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: (shipdate_string is not null and shipdate_string 
BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND 
DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, 
DynamicValue(RS_7_b_shipdate_string_bloom_filter))) (type: boolean)
+                  Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (shipdate_string is not null and 
shipdate_string BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND 
DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, 
DynamicValue(RS_7_b_shipdate_string_bloom_filter))) (type: boolean)
+                    Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: shipdate_string (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: shipdate_string is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: shipdate_string is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: shipdate_string (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=15)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=15)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_string = b.shipdate_string)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_string = b.shipdate_string)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_char = b.shipdate_char)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b 
on (a.shipdate_char = b.shipdate_char)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: (shipdate_char is not null and shipdate_char 
BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND 
DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, 
DynamicValue(RS_7_b_shipdate_char_bloom_filter))) (type: boolean)
+                  Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (shipdate_char is not null and shipdate_char 
BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND 
DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, 
DynamicValue(RS_7_b_shipdate_char_bloom_filter))) (type: boolean)
+                    Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: shipdate_char (type: char(10))
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(10))
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: char(10))
+                        Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: shipdate_char is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: shipdate_char is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: shipdate_char (type: char(10))
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(10))
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: char(10))
+                        Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: char(10))
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=15)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 282 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 282 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: char(10)), _col1 
(type: char(10)), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: char(10))
+                  1 _col0 (type: char(10))
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=15)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col0 (type: char(10)), _col1 (type: 
char(10)), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_char = b.shipdate_char)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_char = b.shipdate_char)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_varchar = b.shipdate_varchar)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b 
on (a.shipdate_varchar = b.shipdate_varchar)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: (shipdate_varchar is not null and 
shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND 
DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, 
DynamicValue(RS_7_b_shipdate_varchar_bloom_filter))) (type: boolean)
+                  Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (shipdate_varchar is not null and 
shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND 
DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, 
DynamicValue(RS_7_b_shipdate_varchar_bloom_filter))) (type: boolean)
+                    Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: shipdate_varchar (type: varchar(10))
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: varchar(10))
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: varchar(10))
+                        Statistics: Num rows: 100 Data size: 9400 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: shipdate_varchar is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: shipdate_varchar is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: shipdate_varchar (type: varchar(10))
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: varchar(10))
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: varchar(10))
+                        Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: varchar(10))
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 20 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=15)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 282 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 282 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: varchar(10)), 
_col1 (type: varchar(10)), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: varchar(10))
+                  1 _col0 (type: varchar(10))
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=15)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col0 (type: varchar(10)), _col1 (type: 
varchar(10)), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_varchar = b.shipdate_varchar)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on 
(a.shipdate_varchar = b.shipdate_varchar)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
 PREHOOK: query: drop table dsrv2_big
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@dsrv2_big

http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
----------------------------------------------------------------------
diff --git 
a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java 
b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
index 55cfb7b..926321e 100644
--- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
+++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
@@ -1418,7 +1418,7 @@ public class GenVectorCode extends Task {
       getValueMethod = "";
       conversionMethod = "";
     } else if (operandType.equals("decimal")) {
-      defaultValue = "null";
+      defaultValue = "HiveDecimal.ZERO";
       vectorType = "HiveDecimal";
       getPrimitiveMethod = "getHiveDecimal";
       getValueMethod = "";
@@ -1430,13 +1430,13 @@ public class GenVectorCode extends Task {
       getValueMethod = ".getBytes()";
       conversionMethod = "";
     } else if (operandType.equals("char")) {
-      defaultValue = "null";
+      defaultValue = "new HiveChar(\"\", 1)";
       vectorType = "byte[]";
       getPrimitiveMethod = "getHiveChar";
       getValueMethod = ".getStrippedValue().getBytes()";  // Does 
vectorization use stripped char values?
       conversionMethod = "";
     } else if (operandType.equals("varchar")) {
-      defaultValue = "null";
+      defaultValue = "new HiveVarchar(\"\", 1)";
       vectorType = "byte[]";
       getPrimitiveMethod = "getHiveVarchar";
       getValueMethod = ".getValue().getBytes()";
@@ -1450,7 +1450,7 @@ public class GenVectorCode extends Task {
       // Special case - Date requires its own specific BetweenDynamicValue 
class, but derives from FilterLongColumnBetween
       typeName = "Long";
     } else if (operandType.equals("timestamp")) {
-      defaultValue = "null";
+      defaultValue = "new Timestamp(0)";
       vectorType = "Timestamp";
       getPrimitiveMethod = "getTimestamp";
       getValueMethod = "";

Reply via email to