Repository: hive
Updated Branches:
  refs/heads/master 9aac80609 -> 0ad71121d


HIVE-20240 : Semijoin Reduction : Use local variable to check for external 
table condition (Deepak Jaiswal, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ad71121
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ad71121
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ad71121

Branch: refs/heads/master
Commit: 0ad71121d1883b56d364ca855ccda9bb7bfdb67a
Parents: 9aac806
Author: Deepak Jaiswal <[email protected]>
Authored: Thu Jul 26 11:59:56 2018 -0700
Committer: Deepak Jaiswal <[email protected]>
Committed: Fri Jul 27 11:24:46 2018 -0700

----------------------------------------------------------------------
 .../DynamicPartitionPruningOptimization.java    |   4 +-
 .../dynamic_semijoin_reduction_4.q              |  11 +
 .../llap/dynamic_semijoin_reduction_4.q.out     | 364 +++++++++++++++++++
 3 files changed, 377 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0ad71121/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index caec2c0..a1401aa 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -190,8 +190,8 @@ public class DynamicPartitionPruningOptimization implements 
NodeProcessor {
           }
         } else {
           LOG.debug("Column " + column + " is not a partition column");
-          semiJoin = semiJoin && 
!disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx);
-          if (semiJoin && ts.getConf().getFilterExpr() != null) {
+          if (semiJoin && 
!disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx)
+                  && ts.getConf().getFilterExpr() != null) {
             LOG.debug("Initiate semijoin reduction for " + column + " ("
                 + ts.getConf().getFilterExpr().getExprString());
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0ad71121/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q 
b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q
index a04ab66..9b1723a 100644
--- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q
@@ -18,6 +18,7 @@ set hive.disable.unsafe.external.table.operations=true;
 -- Create Tables
 create table srcpart_date_n1 (key string, value string) partitioned by (ds 
string ) stored as ORC;
 CREATE TABLE srcpart_small_n0(key1 STRING, value1 STRING) partitioned by (ds 
string) STORED as ORC;
+CREATE TABLE srcpart_medium_n0(key2 STRING, value2 STRING) partitioned by (ds 
string) STORED as ORC;
 create external table srcpart_date_ext (key string, value string) partitioned 
by (ds string ) stored as ORC;
 CREATE external TABLE srcpart_small_ext(key1 STRING, value1 STRING) 
partitioned by (ds string) STORED as ORC;
 
@@ -28,6 +29,8 @@ alter table srcpart_date_n1 add partition (ds = "2008-04-09");
 alter table srcpart_small_n0 add partition (ds = "2008-04-08");
 alter table srcpart_small_n0 add partition (ds = "2008-04-09");
 
+alter table srcpart_medium_n0 add partition (ds = "2008-04-08");
+
 alter table srcpart_date_ext add partition (ds = "2008-04-08");
 alter table srcpart_date_ext add partition (ds = "2008-04-09");
 
@@ -38,6 +41,7 @@ alter table srcpart_small_ext add partition (ds = 
"2008-04-09");
 insert overwrite table srcpart_date_n1 partition (ds = "2008-04-08" ) select 
key, value from srcpart where ds = "2008-04-08";
 insert overwrite table srcpart_date_n1 partition (ds = "2008-04-09") select 
key, value from srcpart where ds = "2008-04-09";
 insert overwrite table srcpart_small_n0 partition (ds = "2008-04-09") select 
key, value from srcpart where ds = "2008-04-09" limit 20;
+insert overwrite table srcpart_medium_n0 partition (ds = "2008-04-08") select 
key, value from srcpart where ds = "2008-04-09" limit 50;
 
 insert overwrite table srcpart_date_ext partition (ds = "2008-04-08" ) select 
key, value from srcpart where ds = "2008-04-08";
 insert overwrite table srcpart_date_ext partition (ds = "2008-04-09") select 
key, value from srcpart where ds = "2008-04-09";
@@ -45,6 +49,7 @@ insert overwrite table srcpart_small_ext partition (ds = 
"2008-04-09") select ke
 
 analyze table srcpart_date_n1 compute statistics for columns;
 analyze table srcpart_small_n0 compute statistics for columns;
+analyze table srcpart_medium_n0 compute statistics for columns;
 
 analyze table srcpart_date_ext compute statistics for columns;
 analyze table srcpart_small_ext compute statistics for columns;
@@ -54,6 +59,8 @@ analyze table srcpart_small_ext compute statistics for 
columns;
 set test.comment=This query should use semijoin reduction optimization;
 set test.comment;
 EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_n0 on 
(srcpart_date_n1.key = srcpart_small_n0.key1);
+-- multiple sources, single key
+EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_n0 on 
(srcpart_date_n1.key = srcpart_small_n0.key1) join srcpart_medium_n0 on 
(srcpart_medium_n0.key2 = srcpart_date_n1.key);
 
 set test.comment=Big table is external table - no semijoin reduction opt;
 set test.comment;
@@ -63,3 +70,7 @@ set test.comment=Small table is external table - no semijoin 
reduction opt;
 set test.comment;
 EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_ext on 
(srcpart_date_n1.key = srcpart_small_ext.key1);
 
+set test.comment=Small table is external table - no semijoin reduction opt for 
ext table but semijoin reduction opt for regular table;
+set test.comment;
+EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_ext on 
(srcpart_date_n1.key = srcpart_small_ext.key1)  join srcpart_medium_n0 on 
(srcpart_medium_n0.key2 = srcpart_date_n1.key);
+

http://git-wip-us.apache.org/repos/asf/hive/blob/0ad71121/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out 
b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out
index 0feb362..4c04eb1 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out
@@ -14,6 +14,14 @@ POSTHOOK: query: CREATE TABLE srcpart_small_n0(key1 STRING, 
value1 STRING) parti
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@srcpart_small_n0
+PREHOOK: query: CREATE TABLE srcpart_medium_n0(key2 STRING, value2 STRING) 
partitioned by (ds string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_medium_n0
+POSTHOOK: query: CREATE TABLE srcpart_medium_n0(key2 STRING, value2 STRING) 
partitioned by (ds string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_medium_n0
 PREHOOK: query: create external table srcpart_date_ext (key string, value 
string) partitioned by (ds string ) stored as ORC
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -58,6 +66,13 @@ POSTHOOK: query: alter table srcpart_small_n0 add partition 
(ds = "2008-04-09")
 POSTHOOK: type: ALTERTABLE_ADDPARTS
 POSTHOOK: Output: default@srcpart_small_n0
 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-09
+PREHOOK: query: alter table srcpart_medium_n0 add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_medium_n0
+POSTHOOK: query: alter table srcpart_medium_n0 add partition (ds = 
"2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_medium_n0
+POSTHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08
 PREHOOK: query: alter table srcpart_date_ext add partition (ds = "2008-04-08")
 PREHOOK: type: ALTERTABLE_ADDPARTS
 PREHOOK: Output: default@srcpart_date_ext
@@ -128,6 +143,20 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-09
 POSTHOOK: Lineage: srcpart_small_n0 PARTITION(ds=2008-04-09).key1 SIMPLE 
[(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: srcpart_small_n0 PARTITION(ds=2008-04-09).value1 SIMPLE 
[(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_medium_n0 partition (ds = 
"2008-04-08") select key, value from srcpart where ds = "2008-04-09" limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_medium_n0 partition (ds = 
"2008-04-08") select key, value from srcpart where ds = "2008-04-09" limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_medium_n0 PARTITION(ds=2008-04-08).key2 SIMPLE 
[(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_medium_n0 PARTITION(ds=2008-04-08).value2 SIMPLE 
[(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: insert overwrite table srcpart_date_ext partition (ds = 
"2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart
@@ -206,6 +235,20 @@ POSTHOOK: Output: default@srcpart_small_n0
 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-08
 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-09
 #### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_medium_n0 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@srcpart_medium_n0
+PREHOOK: Input: default@srcpart_medium_n0@ds=2008-04-08
+PREHOOK: Output: default@srcpart_medium_n0
+PREHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_medium_n0 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@srcpart_medium_n0
+POSTHOOK: Input: default@srcpart_medium_n0@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_medium_n0
+POSTHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08
+#### A masked pattern was here ####
 PREHOOK: query: analyze table srcpart_date_ext compute statistics for columns
 PREHOOK: type: ANALYZE_TABLE
 PREHOOK: Input: default@srcpart_date_ext
@@ -368,6 +411,179 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join 
srcpart_small_n0 on (srcpart_date_n1.key = srcpart_small_n0.key1) join 
srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join 
srcpart_small_n0 on (srcpart_date_n1.key = srcpart_small_n0.key1) join 
srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 
(SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date_n1
+                  filterExpr: (key is not null and (key BETWEEN 
DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND 
DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, 
DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter))) and (key BETWEEN 
DynamicValue(RS_11_srcpart_small_n0_key1_min) AND 
DynamicValue(RS_11_srcpart_small_n0_key1_max) and in_bloom_filter(key, 
DynamicValue(RS_11_srcpart_small_n0_key1_bloom_filter)))) (type: boolean)
+                  Statistics: Num rows: 2000 Data size: 174000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((key BETWEEN 
DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND 
DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, 
DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter))) and (key BETWEEN 
DynamicValue(RS_11_srcpart_small_n0_key1_min) AND 
DynamicValue(RS_11_srcpart_small_n0_key1_max) and in_bloom_filter(key, 
DynamicValue(RS_11_srcpart_small_n0_key1_bloom_filter))) and key is not null) 
(type: boolean)
+                    Statistics: Num rows: 2000 Data size: 174000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2000 Data size: 174000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 2000 Data size: 174000 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_medium_n0
+                  filterExpr: key2 is not null (type: boolean)
+                  Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key2 is not null (type: boolean)
+                    Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key2 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=49)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small_n0
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=20)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 639 Basic stats: 
PARTIAL Column stats: PARTIAL
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 639 Basic 
stats: PARTIAL Column stats: PARTIAL
+                            value expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                Statistics: Num rows: 4400 Data size: 382800 Basic stats: 
PARTIAL Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=49)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: binary)
+        Reducer 7 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=20)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL 
Column stats: PARTIAL
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL 
Column stats: PARTIAL
+                  value expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 test.comment=Big table is external table - no semijoin reduction opt
 PREHOOK: query: EXPLAIN select count(*) from srcpart_date_ext join 
srcpart_small_n0 on (srcpart_date_ext.key = srcpart_small_n0.key1)
 PREHOOK: type: QUERY
@@ -566,3 +782,151 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+test.comment=Small table is external table - no semijoin reduction opt for ext 
table but semijoin reduction opt for regular table
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join 
srcpart_small_ext on (srcpart_date_n1.key = srcpart_small_ext.key1)  join 
srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join 
srcpart_small_ext on (srcpart_date_n1.key = srcpart_small_ext.key1)  join 
srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 
(SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date_n1
+                  filterExpr: (key is not null and (key BETWEEN 
DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND 
DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, 
DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter)))) (type: boolean)
+                  Statistics: Num rows: 2000 Data size: 174000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((key BETWEEN 
DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND 
DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, 
DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter))) and key is not null) 
(type: boolean)
+                    Statistics: Num rows: 2000 Data size: 174000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2000 Data size: 174000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 2000 Data size: 174000 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_medium_n0
+                  filterExpr: key2 is not null (type: boolean)
+                  Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key2 is not null (type: boolean)
+                    Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key2 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 50 Data size: 4350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=49)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small_ext
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: 
PARTIAL Column stats: PARTIAL
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                Statistics: Num rows: 4400 Data size: 382800 Basic stats: 
PARTIAL Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=49)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

Reply via email to