This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ae68ec91652 HIVE-26659: Proceed MapJoin with empty storage if it
performs AntiJoin (Seonggon Namgung, reviewed by Ramesh Kumar Thangarajan, Aman
Sinha, Stephen Carlin)
ae68ec91652 is described below
commit ae68ec91652b80a4a7a76d5df5fe316b590b319a
Author: seonggon <[email protected]>
AuthorDate: Wed May 3 21:13:22 2023 +0900
HIVE-26659: Proceed MapJoin with empty storage if it performs AntiJoin
(Seonggon Namgung, reviewed by Ramesh Kumar Thangarajan, Aman Sinha, Stephen
Carlin)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 7 +-
.../hadoop/hive/ql/exec/MapJoinOperator.java | 4 +
ql/src/test/queries/clientpositive/antijoin2.q | 100 ++
.../results/clientpositive/llap/antijoin2.q.out | 1112 ++++++++++++++++++++
4 files changed, 1218 insertions(+), 5 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index b6835928011..60ef76474a0 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2363,11 +2363,8 @@ public class HiveConf extends Configuration {
"Whether Hive enables the optimization about converting common join
into mapjoin based on the input file size. \n" +
"If this parameter is on, and the sum of size for n-1 of the
tables/partitions for a n-way join is smaller than the\n" +
"specified size, the join is directly converted to a mapjoin (there is
no conditional task)."),
- HIVE_CONVERT_ANTI_JOIN("hive.auto.convert.anti.join", false,
- "Whether Hive enables the optimization about converting join with null
filter to anti join. The " +
- "default is currently false because of HIVE-26659. There is the
possibility of incorrect results. " +
- "The incorrect results shown on testing were zeros and nulls on
aggregate tpcds queries so it was " +
- "fairly obvious they were incorrect. But be wary when turning on this
flag."),
+ HIVE_CONVERT_ANTI_JOIN("hive.auto.convert.anti.join", true,
+ "Whether Hive enables the optimization about converting join with null
filter to anti join."),
HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size",
10000000L,
"If hive.auto.convert.join.noconditionaltask is off, this parameter
does not take affect. \n" +
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index 1897413de4a..02352c40084 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
@@ -556,6 +556,10 @@ public class MapJoinOperator extends
AbstractMapJoinOperator<MapJoinDesc> implem
}
} else {
storage[pos] = emptyList;
+ if (pos != 0 && condn[pos - 1].getType() == JoinDesc.ANTI_JOIN) {
+ // For AntiJoin, we should call checkAndGenObject() when right
side is empty.
+ joinNeeded = true;
+ }
}
} else {
joinNeeded = true;
diff --git a/ql/src/test/queries/clientpositive/antijoin2.q
b/ql/src/test/queries/clientpositive/antijoin2.q
new file mode 100644
index 00000000000..be7e7867bfa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/antijoin2.q
@@ -0,0 +1,100 @@
+set hive.merge.nway.joins=false;
+set hive.vectorized.execution.enabled=false;
+set hive.auto.convert.join=true;
+set hive.auto.convert.anti.join=true;
+
+drop table if exists tt1;
+drop table if exists tt2;
+drop table if exists tt3;
+
+create table tt1 (ws_order_number bigint, ws_ext_ship_cost decimal(7, 2));
+create table tt2 (ws_order_number bigint);
+create table tt3 (wr_order_number bigint);
+
+insert into tt1 values (42, 3093.96), (1041, 299.28), (1378, 85.56), (1378,
719.44), (1395, 145.68);
+insert into tt2 values (1378), (1395);
+insert into tt3 values (42), (1041);
+
+-- The result should be the same regardless of vectorization.
+
+explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+
+explain
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number);
+explain cbo
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number);
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number);
+
+
+set hive.vectorized.execution.enabled=true;
+
+explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+
+explain
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number);
+explain cbo
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number);
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number);
+
+
+-- Test n-way join which contains AntiJoin
+
+set hive.vectorized.execution.enabled=false;
+set hive.merge.nway.joins=true;
+
+explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+
+
+-- Test MergeJoin -> MapJoin pattern
+
+set hive.merge.nway.joins=false;
+set hive.vectorized.execution.enabled=false;
+set hive.auto.convert.join=true;
+set hive.auto.convert.anti.join=true;
+
+alter table tt1 update statistics set ('numRows'='10000000');
+alter table tt2 update statistics set ('numRows'='10000000');
+alter table tt3 update statistics set ('numRows'='2');
+
+explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number);
+
diff --git a/ql/src/test/results/clientpositive/llap/antijoin2.q.out
b/ql/src/test/results/clientpositive/llap/antijoin2.q.out
new file mode 100644
index 00000000000..d09fd451f2f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/antijoin2.q.out
@@ -0,0 +1,1112 @@
+PREHOOK: query: drop table if exists tt1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tt1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists tt2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tt2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists tt3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tt3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tt1 (ws_order_number bigint, ws_ext_ship_cost
decimal(7, 2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt1
+POSTHOOK: query: create table tt1 (ws_order_number bigint, ws_ext_ship_cost
decimal(7, 2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt1
+PREHOOK: query: create table tt2 (ws_order_number bigint)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt2
+POSTHOOK: query: create table tt2 (ws_order_number bigint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt2
+PREHOOK: query: create table tt3 (wr_order_number bigint)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt3
+POSTHOOK: query: create table tt3 (wr_order_number bigint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt3
+PREHOOK: query: insert into tt1 values (42, 3093.96), (1041, 299.28), (1378,
85.56), (1378, 719.44), (1395, 145.68)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tt1
+POSTHOOK: query: insert into tt1 values (42, 3093.96), (1041, 299.28), (1378,
85.56), (1378, 719.44), (1395, 145.68)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tt1
+POSTHOOK: Lineage: tt1.ws_ext_ship_cost SCRIPT []
+POSTHOOK: Lineage: tt1.ws_order_number SCRIPT []
+PREHOOK: query: insert into tt2 values (1378), (1395)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tt2
+POSTHOOK: query: insert into tt2 values (1378), (1395)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tt2
+POSTHOOK: Lineage: tt2.ws_order_number SCRIPT []
+PREHOOK: query: insert into tt3 values (42), (1041)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tt3
+POSTHOOK: query: insert into tt3 values (42), (1041)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tt3
+POSTHOOK: Lineage: tt3.wr_order_number SCRIPT []
+PREHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ filterExpr: ws_order_number is not null (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_44_container,
bigKeyColName:ws_order_number, smallTablePos:1, keyRatio:0.2
+ Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint),
ws_ext_ship_cost (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 2 Data size: 240 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Anti Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(17,2))
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ filterExpr: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: wr1
+ filterExpr: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: wr_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{}], agg#0=[sum($1)])
+ HiveAntiJoin(condition=[=($0, $4)], joinType=[anti])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(ws_order_number=[$0], ws_ext_ship_cost=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt1]], table:alias=[ws1])
+ HiveProject(ws_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt2]], table:alias=[ws2])
+ HiveProject(literalTrue=[true], wr_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt3]], table:alias=[wr1])
+
+PREHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+950.68
+PREHOOK: query: explain
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tt1
+ Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint),
ws_ext_ship_cost (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Anti Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 3 Data size: 360 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 360 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: tt2
+ filterExpr: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain cbo
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAntiJoin(condition=[=($0, $3)], joinType=[anti])
+ HiveProject(ws_order_number=[$0], ws_ext_ship_cost=[$1])
+ HiveTableScan(table=[[default, tt1]], table:alias=[tt1])
+ HiveProject(literalTrue=[true], ws_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt2]], table:alias=[tt2])
+
+PREHOOK: query: select * from tt1 where not exists(select * from tt2 where
tt1.ws_order_number = tt2.ws_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tt1 where not exists(select * from tt2 where
tt1.ws_order_number = tt2.ws_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+#### A masked pattern was here ####
+42 3093.96
+1041 299.28
+PREHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ filterExpr: ws_order_number is not null (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_44_container,
bigKeyColName:ws_order_number, smallTablePos:1, keyRatio:0.2
+ Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint),
ws_ext_ship_cost (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 2 Data size: 240 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Anti Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(17,2))
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ filterExpr: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: wr1
+ filterExpr: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: wr_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{}], agg#0=[sum($1)])
+ HiveAntiJoin(condition=[=($0, $4)], joinType=[anti])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(ws_order_number=[$0], ws_ext_ship_cost=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt1]], table:alias=[ws1])
+ HiveProject(ws_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt2]], table:alias=[ws2])
+ HiveProject(literalTrue=[true], wr_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt3]], table:alias=[wr1])
+
+PREHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+950.68
+PREHOOK: query: explain
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tt1
+ Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint),
ws_ext_ship_cost (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Anti Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 3 Data size: 360 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 360 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: tt2
+ filterExpr: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain cbo
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select * from tt1 where not exists(select * from tt2 where tt1.ws_order_number
= tt2.ws_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAntiJoin(condition=[=($0, $3)], joinType=[anti])
+ HiveProject(ws_order_number=[$0], ws_ext_ship_cost=[$1])
+ HiveTableScan(table=[[default, tt1]], table:alias=[tt1])
+ HiveProject(literalTrue=[true], ws_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt2]], table:alias=[tt2])
+
+PREHOOK: query: select * from tt1 where not exists(select * from tt2 where
tt1.ws_order_number = tt2.ws_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tt1 where not exists(select * from tt2 where
tt1.ws_order_number = tt2.ws_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+#### A masked pattern was here ####
+42 3093.96
+1041 299.28
+PREHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ filterExpr: ws_order_number is not null (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_40_container,
bigKeyColName:ws_order_number, smallTablePos:1, keyRatio:0.4
+ Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint),
ws_ext_ship_cost (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 600 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Anti Join 0 to 2
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ 2 _col0 (type: bigint)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 3
+ 2 Map 4
+ Statistics: Num rows: 2 Data size: 224 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(17,2))
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ filterExpr: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: wr1
+ filterExpr: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: wr_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{}], agg#0=[sum($1)])
+ HiveAntiJoin(condition=[=($0, $4)], joinType=[anti])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(ws_order_number=[$0], ws_ext_ship_cost=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt1]], table:alias=[ws1])
+ HiveProject(ws_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt2]], table:alias=[ws2])
+ HiveProject(literalTrue=[true], wr_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt3]], table:alias=[wr1])
+
+PREHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+950.68
+PREHOOK: query: alter table tt1 update statistics set ('numRows'='10000000')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@tt1
+PREHOOK: Output: default@tt1
+POSTHOOK: query: alter table tt1 update statistics set ('numRows'='10000000')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@tt1
+POSTHOOK: Output: default@tt1
+PREHOOK: query: alter table tt2 update statistics set ('numRows'='10000000')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@tt2
+PREHOOK: Output: default@tt2
+POSTHOOK: query: alter table tt2 update statistics set ('numRows'='10000000')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@tt2
+POSTHOOK: Output: default@tt2
+PREHOOK: query: alter table tt3 update statistics set ('numRows'='2')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@tt3
+PREHOOK: Output: default@tt3
+POSTHOOK: query: alter table tt3 update statistics set ('numRows'='2')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@tt3
+POSTHOOK: Output: default@tt3
+PREHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5
(BROADCAST_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ filterExpr: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 10000000 Data size: 1200000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 10000000 Data size: 1200000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint),
ws_ext_ship_cost (type: decimal(7,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10000000 Data size: 1200000000
Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 10000000 Data size: 1200000000
Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: decimal(7,2))
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ filterExpr: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 10000000 Data size: 80000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ws_order_number is not null (type: boolean)
+ Statistics: Num rows: 10000000 Data size: 80000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ws_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10000000 Data size: 80000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 10000000 Data size: 80000000
Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: wr1
+ filterExpr: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: wr_order_number is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: wr_order_number (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25000000000000 Data size:
3000000000000000 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Anti Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(17,2))
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{}], agg#0=[sum($1)])
+ HiveAntiJoin(condition=[=($0, $4)], joinType=[anti])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(ws_order_number=[$0], ws_ext_ship_cost=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt1]], table:alias=[ws1])
+ HiveProject(ws_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt2]], table:alias=[ws2])
+ HiveProject(literalTrue=[true], wr_order_number=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tt3]], table:alias=[wr1])
+
+PREHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tt1
+PREHOOK: Input: default@tt2
+PREHOOK: Input: default@tt3
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(ws_ext_ship_cost) from tt1 ws1, tt2 ws2
+where ws1.ws_order_number = ws2.ws_order_number
+and not exists(select * from tt3 wr1 where ws1.ws_order_number =
wr1.wr_order_number)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tt1
+POSTHOOK: Input: default@tt2
+POSTHOOK: Input: default@tt3
+#### A masked pattern was here ####
+950.68