This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 5e6ce2a6640 HIVE-27324: Hive query with NOT IN condition is giving
incorrect results when the sub query table contains the null value (Diksha,
reviewed by Mahesh Kumar Behera, Sankar Hariappan)
5e6ce2a6640 is described below
commit 5e6ce2a66404ef0267c27f407f14e601e566dfc0
Author: Diksha628 <[email protected]>
AuthorDate: Fri Nov 3 21:08:26 2023 +0530
HIVE-27324: Hive query with NOT IN condition is giving incorrect results
when the sub query table contains the null value (Diksha, reviewed by Mahesh
Kumar Behera, Sankar Hariappan)
Signed-off-by: Sankar Hariappan <[email protected]>
Closes (#4636)
---
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 42 +-
ql/src/test/queries/clientpositive/notInTest.q | 93 +
.../llap/create_view_disable_cbo.q.out | 4 +-
.../results/clientpositive/llap/notInTest.q.out | 1825 ++++++++++++++++++++
.../llap/special_character_in_tabnames_1.q.out | 185 +-
.../special_character_in_tabnames_quotes_1.q.out | 185 +-
.../llap/subquery_unqual_corr_expr.q.out | 36 +-
8 files changed, 2225 insertions(+), 146 deletions(-)
diff --git a/itests/src/test/resources/testconfiguration.properties
b/itests/src/test/resources/testconfiguration.properties
index e56f6ba8bdb..367b922d130 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -126,6 +126,7 @@ minillap.query.files=\
multi_count_distinct_null.q,\
newline.q,\
nonreserved_keywords_insert_into1.q,\
+ notInTest.q,\
nullscript.q,\
orc_createas1.q,\
orc_llap_counters.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 250f7c2fcbc..ca0cc179876 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3704,7 +3704,14 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
* push filters only for this QBJoinTree. Child QBJoinTrees have
already been handled.
*/
pushJoinFilters(qb, joinTree, aliasToOpInfo, false);
- input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input);
+
+ /*
+ * Note that: in case of multi dest queries, with even one containing
a notIn operator, the code is not changed yet.
+ * That needs to be worked on as a separate bug :
https://issues.apache.org/jira/browse/HIVE-27844
+ */
+ boolean notInCheckPresent = (subQuery.getNotInCheck() != null &&
!qb.isMultiDestQuery());
+ input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo , input,
notInCheckPresent);
+
searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
}
}
@@ -3771,14 +3778,26 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
* for inner joins push a 'is not null predicate' to the join sources for
* every non nullSafe predicate.
*/
+
private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input,
- QBJoinTree joinTree,
ExprNodeDesc[] joinKeys) throws SemanticException {
+ QBJoinTree joinTree,
ExprNodeDesc[] joinKeys) throws SemanticException {
+ return genNotNullFilterForJoinSourcePlan(qb, input, joinTree, joinKeys,
false);
+ }
+
+ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input,
+ QBJoinTree joinTree,
ExprNodeDesc[] joinKeys, boolean OuternotInCheck) throws SemanticException {
+
+ /*
+ * The notInCheck param is used for the purpose of adding an
+ * (outerQueryTable.outerQueryCol is not null ) predicate to the join,
+ * since it is not added naturally because of outer join
+ */
if (qb == null || joinTree == null) {
return input;
}
- if (!joinTree.getNoOuterJoin()) {
+ if (!joinTree.getNoOuterJoin() && !OuternotInCheck) {
return input;
}
@@ -3843,6 +3862,8 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
return output;
}
+
+
Integer genExprNodeDescRegex(String colRegex, String tabAlias, ASTNode sel,
List<ExprNodeDesc> exprList, Set<ColumnInfo> excludeCols, RowResolver
input,
RowResolver colSrcRR, Integer pos, RowResolver output, List<String>
aliases,
@@ -9855,8 +9876,15 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
private Operator genJoinOperator(QB qb, QBJoinTree joinTree,
Map<String, Operator> map,
Operator joiningOp) throws
SemanticException {
+ return genJoinOperator(qb, joinTree, map, joiningOp, false);
+ }
+
+ private Operator genJoinOperator(QB qb, QBJoinTree joinTree,
+ Map<String, Operator> map,
+ Operator joiningOp, boolean
notInCheckPresent) throws SemanticException {
QBJoinTree leftChild = joinTree.getJoinSrc();
Operator joinSrcOp = joiningOp instanceof JoinOperator ? joiningOp : null;
+ Operator OuterSrcOp = joiningOp;
if (joinSrcOp == null && leftChild != null) {
joinSrcOp = genJoinOperator(qb, leftChild, map, null);
@@ -9910,7 +9938,13 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
// generate a ReduceSink operator for the join
String[] srcs = baseSrc[i] != null ? new String[] {baseSrc[i]} :
joinTree.getLeftAliases();
if (!isCBOExecuted()) {
- srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree,
joinKeys[i]);
+ /*
+ * The condition srcOps[i] == OuterSrcOp is used to make sure that the
predicate for notnull check
+ * is added only for the outer query table.outerqueryCol
+ * even after the outer join condition
+ */
+ boolean outerNotInCheck = (notInCheckPresent && (srcOps[i] ==
OuterSrcOp));
+ srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree,
joinKeys[i], outerNotInCheck);
}
srcOps[i] = genJoinReduceSinkChild(joinKeys[i], srcOps[i], srcs,
joinTree.getNextTag());
}
diff --git a/ql/src/test/queries/clientpositive/notInTest.q
b/ql/src/test/queries/clientpositive/notInTest.q
new file mode 100644
index 00000000000..19da7c213af
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/notInTest.q
@@ -0,0 +1,93 @@
+create table t3 (id int,name string, age int);
+insert into t3
values(1,'Sagar',23),(2,'Sultan',NULL),(3,'Surya',23),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23),(9,'ron',3),(10,'Sam',22),(11,'nick',19),(12,'fed',18),(13,'kong',13),(14,'hela',45);
+
+create table t4 (id int,name string, age int);
+insert into t4
values(1,'Sagar',23),(3,'Surya',23),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23);
+
+create table t5 (id int,name string, ages int);
+insert into t5
values(1,'Sagar',23),(3,'Surya',NULL),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23);
+
+set hive.cbo.enable = false;
+
+select * from t3
+where age in (select distinct(age) age from t4)
+order by age ;
+
+select * from t3
+where age not in (select distinct(age) age from t4 )
+order by age ;
+
+
+select * from t3
+where age not in (select distinct(ages) ages from t5 where t5.ages is not null)
+order by age ;
+
+
+select * from t3
+where age not in (select distinct(ages) ages from t5 )
+order by age ;
+
+select count(*) from t3
+where age not in (23,22, null );
+
+explain select * from t3
+ where age not in (select distinct(age) age from t4);
+
+explain select * from t3
+where age not in (select distinct(ages) ages from t5 );
+
+explain select * from t3
+ where age not in (select distinct(ages) ages from t5 where t5.ages is
not null);
+
+select count(*) from t3
+where age not in (select distinct(age)age from t3 t1 where t1.age > 10);
+
+
+
+explain select id, name, age
+from t3 b where b.age not in
+(select min(age)
+ from (select id, age from t3) a
+ where age < 10 and b.age = a.age)
+ order by name;
+
+set hive.cbo.enable = true;
+
+select * from t3
+where age in (select distinct(age) age from t4)
+order by age ;
+
+select * from t3
+where age not in (select distinct(age) age from t4 )
+order by age ;
+
+select * from t3
+where age not in (select distinct(ages) ages from t5 where t5.ages is not null)
+order by age ;
+
+
+select * from t3
+where age not in (select distinct(ages) ages from t5 )
+order by age ;
+
+select count(*) from t3
+where age not in (23,22, null );
+
+explain select * from t3
+ where age not in (select distinct(age) age from t4);
+
+explain select * from t3
+where age not in (select distinct(ages) ages from t5 );
+
+explain select * from t3
+ where age not in (select distinct(ages) ages from t5 where t5.ages is
not null);
+
+select count(*) from t3
+where age not in (select distinct(age)age from t3 t1 where t1.age > 10);
+
+ explain select id, name, age
+ from t3 b where b.age not in
+ (select min(age)
+ from (select id, age from t3) a
+ where age < 10 and b.age = a.age)
+ order by name;
diff --git
a/ql/src/test/results/clientpositive/llap/create_view_disable_cbo.q.out
b/ql/src/test/results/clientpositive/llap/create_view_disable_cbo.q.out
index 31387cb4a97..d7a10c1ab41 100644
--- a/ql/src/test/results/clientpositive/llap/create_view_disable_cbo.q.out
+++ b/ql/src/test/results/clientpositive/llap/create_view_disable_cbo.q.out
@@ -53,7 +53,7 @@ POSTHOOK: Input: cdh_82023_repro_db@data
POSTHOOK: Output: cdh_82023_repro_db@background
POSTHOOK: Output: database:cdh_82023_repro_db
POSTHOOK: Lineage: background.text EXPRESSION
[(data)xouter.FieldSchema(name:text, type:string, comment:null), ]
-Warning: Shuffle Join MERGEJOIN[42][tables = [xouter, sq_1_notin_nullcheck]]
in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[45][tables = [xouter, sq_1_notin_nullcheck]]
in Stage 'Reducer 2' is a cross product
PREHOOK: query: SELECT * FROM `cdh_82023_repro_db`.`background`
PREHOOK: type: QUERY
PREHOOK: Input: cdh_82023_repro_db@background
@@ -85,7 +85,7 @@ POSTHOOK: Input: cdh_82023_repro_db@data
POSTHOOK: Output: cdh_82023_repro_db@foreground
POSTHOOK: Output: database:cdh_82023_repro_db
POSTHOOK: Lineage: foreground.text EXPRESSION
[(data)xouter.FieldSchema(name:text, type:string, comment:null), ]
-Warning: Shuffle Join MERGEJOIN[43][tables = [xouter, sq_1_notin_nullcheck]]
in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[46][tables = [xouter, sq_1_notin_nullcheck]]
in Stage 'Reducer 2' is a cross product
PREHOOK: query: SELECT * FROM `cdh_82023_repro_db`.`foreground`
PREHOOK: type: QUERY
PREHOOK: Input: cdh_82023_repro_db@background
diff --git a/ql/src/test/results/clientpositive/llap/notInTest.q.out
b/ql/src/test/results/clientpositive/llap/notInTest.q.out
new file mode 100644
index 00000000000..75a26c4b71f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/notInTest.q.out
@@ -0,0 +1,1825 @@
+PREHOOK: query: create table t3 (id int,name string, age int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t3
+POSTHOOK: query: create table t3 (id int,name string, age int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t3
+PREHOOK: query: insert into t3
values(1,'Sagar',23),(2,'Sultan',NULL),(3,'Surya',23),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23),(9,'ron',3),(10,'Sam',22),(11,'nick',19),(12,'fed',18),(13,'kong',13),(14,'hela',45)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t3
+POSTHOOK: query: insert into t3
values(1,'Sagar',23),(2,'Sultan',NULL),(3,'Surya',23),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23),(9,'ron',3),(10,'Sam',22),(11,'nick',19),(12,'fed',18),(13,'kong',13),(14,'hela',45)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t3
+POSTHOOK: Lineage: t3.age SCRIPT []
+POSTHOOK: Lineage: t3.id SCRIPT []
+POSTHOOK: Lineage: t3.name SCRIPT []
+PREHOOK: query: create table t4 (id int,name string, age int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t4
+POSTHOOK: query: create table t4 (id int,name string, age int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t4
+PREHOOK: query: insert into t4
values(1,'Sagar',23),(3,'Surya',23),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t4
+POSTHOOK: query: insert into t4
values(1,'Sagar',23),(3,'Surya',23),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t4
+POSTHOOK: Lineage: t4.age SCRIPT []
+POSTHOOK: Lineage: t4.id SCRIPT []
+POSTHOOK: Lineage: t4.name SCRIPT []
+PREHOOK: query: create table t5 (id int,name string, ages int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t5
+POSTHOOK: query: create table t5 (id int,name string, ages int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t5
+PREHOOK: query: insert into t5
values(1,'Sagar',23),(3,'Surya',NULL),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t5
+POSTHOOK: query: insert into t5
values(1,'Sagar',23),(3,'Surya',NULL),(4,'Raman',45),(5,'Scott',23),(6,'Ramya',5),(7,'',23),(8,'',23)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t5
+POSTHOOK: Lineage: t5.ages SCRIPT []
+POSTHOOK: Lineage: t5.id SCRIPT []
+POSTHOOK: Lineage: t5.name SCRIPT []
+PREHOOK: query: select * from t3
+where age in (select distinct(age) age from t4)
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t4
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age in (select distinct(age) age from t4)
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t4
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+6 Ramya 5
+1 Sagar 23
+3 Surya 23
+5 Scott 23
+7 23
+8 23
+4 Raman 45
+14 hela 45
+Warning: Shuffle Join MERGEJOIN[46][tables = [t3, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: select * from t3
+where age not in (select distinct(age) age from t4 )
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t4
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age not in (select distinct(age) age from t4 )
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t4
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9 ron 3
+13 kong 13
+12 fed 18
+11 nick 19
+10 Sam 22
+Warning: Shuffle Join MERGEJOIN[48][tables = [t3, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 where t5.ages is not null)
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 where t5.ages is not null)
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9 ron 3
+13 kong 13
+12 fed 18
+11 nick 19
+10 Sam 22
+Warning: Shuffle Join MERGEJOIN[46][tables = [t3, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 )
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 )
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PREHOOK: query: select count(*) from t3
+where age not in (23,22, null )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from t3
+where age not in (23,22, null )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0
+Warning: Shuffle Join MERGEJOIN[44][tables = [t3, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain select * from t3
+ where age not in (select distinct(age) age from t4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t4
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t3
+ where age not in (select distinct(age) age from t4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t4
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 6 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t3
+ filterExpr: age is not null (type: boolean)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: age is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: id (type: int), name (type: string),
age (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t4
+ filterExpr: (age is not null or age is null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: age is not null (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: age (type: int)
+ minReductionHashAggr: 0.57142854
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: age is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: null (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: null (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: null (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1248 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col7
+ Statistics: Num rows: 17 Data size: 1652 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: _col7 is null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1264 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1264 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13 Data size: 1264 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: null (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = 0L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ keys: 0L (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[44][tables = [t3, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain select * from t3
+where age not in (select distinct(ages) ages from t5 )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t3
+where age not in (select distinct(ages) ages from t5 )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 6 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t3
+ filterExpr: age is not null (type: boolean)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: age is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: id (type: int), name (type: string),
age (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t5
+ filterExpr: (ages is not null or ages is null) (type:
boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ages is not null (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: ages (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ages is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: null (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: null (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: null (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1248 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col7
+ Statistics: Num rows: 17 Data size: 1648 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: _col7 is null (type: boolean)
+ Statistics: Num rows: 14 Data size: 1356 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1356 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 14 Data size: 1356 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: null (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = 0L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ keys: 0L (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[46][tables = [t3, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain select * from t3
+ where age not in (select distinct(ages) ages from t5 where t5.ages is
not null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t3
+ where age not in (select distinct(ages) ages from t5 where t5.ages is
not null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 6 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t3
+ filterExpr: age is not null (type: boolean)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: age is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: id (type: int), name (type: string),
age (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t5
+ filterExpr: (ages is not null or (ages is not null and ages
is null)) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ages is not null (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: ages (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (ages is not null and ages is null) (type:
boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: null (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: null (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: null (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1248 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col7
+ Statistics: Num rows: 17 Data size: 1648 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: _col7 is null (type: boolean)
+ Statistics: Num rows: 14 Data size: 1356 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1356 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 14 Data size: 1356 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: null (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = 0L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ keys: 0L (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[50][tables = [t3, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: select count(*) from t3
+where age not in (select distinct(age)age from t3 t1 where t1.age > 10)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from t3
+where age not in (select distinct(age)age from t3 t1 where t1.age > 10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2
+Warning: Shuffle Join MERGEJOIN[52][tables = [b, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain select id, name, age
+from t3 b where b.age not in
+(select min(age)
+ from (select id, age from t3) a
+ where age < 10 and b.age = a.age)
+ order by name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select id, name, age
+from t3 b where b.age not in
+(select min(age)
+ from (select id, age from t3) a
+ where age < 10 and b.age = a.age)
+ order by name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 5 <- Map 1 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: (age is not null or (age < 10)) (type: boolean)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: age is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: id (type: int), name (type: string),
age (type: int)
+ Filter Operator
+ predicate: (age < 10) (type: boolean)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: age (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 2 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col1)
+ keys: _col1 (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1248 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int), _col2 (type: int)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: int), _col2
(type: int)
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int), _col2 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col7
+ Statistics: Num rows: 16 Data size: 1552 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: _col7 is null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1260 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col2 (type: int)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY.reducesinkkey0
(type: string), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1248 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13 Data size: 1248 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col1 is null or _col0 is null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = 0L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ keys: 0L (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from t3
+where age in (select distinct(age) age from t4)
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t4
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age in (select distinct(age) age from t4)
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t4
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+6 Ramya 5
+1 Sagar 23
+3 Surya 23
+5 Scott 23
+7 23
+8 23
+4 Raman 45
+14 hela 45
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in
Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from t3
+where age not in (select distinct(age) age from t4 )
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t4
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age not in (select distinct(age) age from t4 )
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t4
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9 ron 3
+13 kong 13
+12 fed 18
+11 nick 19
+10 Sam 22
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in
Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 where t5.ages is not null)
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 where t5.ages is not null)
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9 ron 3
+13 kong 13
+12 fed 18
+11 nick 19
+10 Sam 22
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in
Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 )
+order by age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from t3
+where age not in (select distinct(ages) ages from t5 )
+order by age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PREHOOK: query: select count(*) from t3
+where age not in (23,22, null )
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@t3
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from t3
+where age not in (23,22, null )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@t3
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0
+Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in
Stage 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from t3
+ where age not in (select distinct(age) age from t4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t4
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t3
+ where age not in (select distinct(age) age from t4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t4
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 6 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t3
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), name (type: string), age
(type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t4
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: age is not null (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: age (type: int)
+ minReductionHashAggr: 0.57142854
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: age (type: int)
+ outputColumnNames: age
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: age (type: int)
+ minReductionHashAggr: 0.57142854
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4
+ Statistics: Num rows: 19 Data size: 1848 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 19 Data size: 1848 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int), _col4 (type: boolean)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 19 Data size: 2152 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2
(type: int), _col5 (type: bigint), _col6 (type: bigint), _col4 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
+ Statistics: Num rows: 19 Data size: 2152 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((_col3 = 0L) or (_col6 is null and (_col4 >=
_col3) and _col2 is not null)) (type: boolean)
+ Statistics: Num rows: 19 Data size: 2152 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 19 Data size: 1824 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 19 Data size: 1824 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(), count(_col0)
+ minReductionHashAggr: 0.6666666
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint)
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in
Stage 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from t3
+where age not in (select distinct(ages) ages from t5 )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t3
+where age not in (select distinct(ages) ages from t5 )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 6 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t3
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), name (type: string), age
(type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t5
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ages is not null (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: ages (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ages (type: int)
+ outputColumnNames: ages
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: ages (type: int)
+ minReductionHashAggr: 0.57142854
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4
+ Statistics: Num rows: 19 Data size: 1848 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 19 Data size: 1848 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int), _col4 (type: boolean)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 19 Data size: 2152 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2
(type: int), _col5 (type: bigint), _col6 (type: bigint), _col4 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
+ Statistics: Num rows: 19 Data size: 2152 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((_col3 = 0L) or (_col6 is null and (_col4 >=
_col3) and _col2 is not null)) (type: boolean)
+ Statistics: Num rows: 19 Data size: 2152 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 19 Data size: 1824 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 19 Data size: 1824 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(), count(_col0)
+ minReductionHashAggr: 0.6666666
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint)
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in
Stage 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from t3
+ where age not in (select distinct(ages) ages from t5 where t5.ages is
not null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@t5
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t3
+ where age not in (select distinct(ages) ages from t5 where t5.ages is
not null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t3
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), name (type: string), age
(type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t5
+ filterExpr: ages is not null (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ages is not null (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: ages (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4
+ Statistics: Num rows: 19 Data size: 1848 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 19 Data size: 1848 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int), _col4 (type: boolean)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 19 Data size: 2152 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2
(type: int), _col5 (type: bigint), _col6 (type: bigint), _col4 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
+ Statistics: Num rows: 19 Data size: 2152 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((_col3 = 0L) or (_col6 is null and (_col4 >=
_col3) and _col2 is not null)) (type: boolean)
+ Statistics: Num rows: 19 Data size: 2152 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 19 Data size: 1824 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 19 Data size: 1824 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Group By Operator
+ aggregations: count(), count(_col0)
+ minReductionHashAggr: 0.6666666
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint)
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in
Stage 'Reducer 3' is a cross product
+PREHOOK: query: select count(*) from t3
+where age not in (select distinct(age)age from t3 t1 where t1.age > 10)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from t3
+where age not in (select distinct(age)age from t3 t1 where t1.age > 10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2
+PREHOOK: query: explain select id, name, age
+ from t3 b where b.age not in
+ (select min(age)
+ from (select id, age from t3) a
+ where age < 10 and b.age = a.age)
+ order by name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select id, name, age
+ from t3 b where b.age not in
+ (select min(age)
+ from (select id, age from t3) a
+ where age < 10 and b.age = a.age)
+ order by name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+ Reducer 6 <- Map 1 (SIMPLE_EDGE)
+ Reducer 7 <- Map 1 (SIMPLE_EDGE)
+ Reducer 8 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), name (type: string), age
(type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 14 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
string)
+ Filter Operator
+ predicate: (age < 10) (type: boolean)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ keys: age (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(age)
+ keys: age (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
+ Group By Operator
+ aggregations: min(age)
+ keys: age (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4
+ Statistics: Num rows: 17 Data size: 1664 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: if(_col4 is null, sq_count_check(0L, true),
sq_count_check(_col4, true)) (type: boolean)
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 768 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 8 Data size: 768 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 8 Data size: 896 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 8 Data size: 896 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string),
_col4 (type: bigint), _col5 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 8 Data size: 916 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col4 is null or (_col4 = 0L) or (_col6 is not
null or _col2 is null or (_col5 < _col4)) is not true) (type: boolean)
+ Statistics: Num rows: 8 Data size: 916 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 768 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 8 Data size: 768 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col2 (type: int)
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY.reducesinkkey0
(type: string), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 768 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 768 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(), count(_col1)
+ keys: _col0 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 40 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type:
bigint)
+ Reducer 8
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = _col1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: true (type: boolean), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git
a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out
b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out
index c386a6dd0a0..335cc4a4b99 100644
---
a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out
+++
b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out
@@ -2386,12 +2386,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src/_/cbo
+ filterExpr: (key is not null or (key > '2') or ((key > '2')
and key is null)) (type: boolean)
Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: key (type: string), value (type: string)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string), value (type:
string)
Filter Operator
predicate: (key > '2') (type: boolean)
Statistics: Num rows: 166 Data size: 14442 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -2560,14 +2564,18 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
+ filterExpr: ((p_name is not null and p_mfgr is not null) or
((p_size < 10) and p_name is not null and p_mfgr is not null) or ((p_size < 10)
and (p_name is null or p_mfgr is null))) (type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: (p_name is not null and p_mfgr is not null)
(type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
Filter Operator
- predicate: (p_size < 10) (type: boolean)
+ predicate: ((p_size < 10) and p_name is not null and
p_mfgr is not null) (type: boolean)
Statistics: Num rows: 5 Data size: 1115 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: p_name (type: string), p_mfgr (type: string)
@@ -2734,12 +2742,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: p/a/r/t
+ filterExpr: (UDFToDouble(p_size) is not null or (p_size <
10)) (type: boolean)
Statistics: Num rows: 26 Data size: 3250 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: UDFToDouble(p_size) is not null (type: boolean)
Statistics: Num rows: 26 Data size: 3250 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: p_name (type: string), p_size (type:
int)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 26 Data size: 3250 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: p_name (type: string), p_size (type:
int)
Filter Operator
predicate: (p_size < 10) (type: boolean)
Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -2824,12 +2836,15 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
predicate: _col0 is null (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
@@ -2899,23 +2914,49 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Map 1 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 1 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: b
+ filterExpr: ((p_size is not null and p_mfgr is not null) or
((p_size < 10) and p_mfgr is not null) or (p_size < 10)) (type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: (p_size is not null and p_mfgr is not null)
(type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
+ Filter Operator
+ predicate: ((p_size < 10) and p_mfgr is not null) (type:
boolean)
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_mfgr (type: string), p_size (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col1)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
Filter Operator
predicate: (p_size < 10) (type: boolean)
Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -3004,16 +3045,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col1 (type: int), _col0 (type: string)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE
Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: string)
- null sort order: zz
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
predicate: (_col1 is null or _col0 is null) (type: boolean)
Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE
Column stats: COMPLETE
@@ -3030,7 +3083,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
- Reducer 6
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -3104,10 +3157,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: line/item
- filterExpr: ((l_shipmode = 'AIR') or ((l_shipmode = 'AIR')
and l_orderkey is null) or (l_linenumber = 1)) (type: boolean)
+ filterExpr: (((l_shipmode = 'AIR') and l_orderkey is not
null) or ((l_shipmode = 'AIR') and l_orderkey is null) or (l_orderkey is not
null and (l_linenumber = 1))) (type: boolean)
Statistics: Num rows: 100 Data size: 9200 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (l_shipmode = 'AIR') (type: boolean)
+ predicate: ((l_shipmode = 'AIR') and l_orderkey is not
null) (type: boolean)
Statistics: Num rows: 14 Data size: 1288 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: l_orderkey (type: int)
@@ -3136,7 +3189,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Filter Operator
- predicate: (l_linenumber = 1) (type: boolean)
+ predicate: (l_orderkey is not null and (l_linenumber = 1))
(type: boolean)
Statistics: Num rows: 14 Data size: 168 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
@@ -3349,20 +3402,23 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: struct<count:bigint,sum:double,input:double>)
- Group By Operator
- aggregations: min(p_retailprice), max(p_retailprice),
avg(p_retailprice)
- keys: p_mfgr (type: string)
- minReductionHashAggr: 0.8076923
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Filter Operator
+ predicate: p_mfgr is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2756 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_retailprice), max(p_retailprice),
avg(p_retailprice)
+ keys: p_mfgr (type: string)
+ minReductionHashAggr: 0.8076923
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: struct<count:bigint,sum:double,input:double>)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: struct<count:bigint,sum:double,input:double>)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
@@ -3500,7 +3556,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 570 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: ((_col2 - _col1) > 600) (type: boolean)
+ predicate: (((_col2 - _col1) > 600) and _col1 is not null)
(type: boolean)
Statistics: Num rows: 1 Data size: 114 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: double)
@@ -3604,20 +3660,23 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 530 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double)
- Group By Operator
- aggregations: max(p_retailprice), min(p_retailprice)
- keys: p_mfgr (type: string)
- minReductionHashAggr: 0.8076923
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 570 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Filter Operator
+ predicate: p_mfgr is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2756 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(p_retailprice), min(p_retailprice)
+ keys: p_mfgr (type: string)
+ minReductionHashAggr: 0.8076923
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 570 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: double), _col2 (type:
double)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 570 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type:
double)
Filter Operator
predicate: p_mfgr is null (type: boolean)
Statistics: Num rows: 1 Data size: 106 Basic stats:
COMPLETE Column stats: COMPLETE
diff --git
a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out
b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out
index 56e922b69cd..ca3e3cc8ee8 100644
---
a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out
+++
b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out
@@ -2558,12 +2558,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src/_/cbo
+ filterExpr: (key is not null or (key > '2') or ((key > '2')
and key is null)) (type: boolean)
Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: key (type: string), value (type: string)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string), value (type:
string)
Filter Operator
predicate: (key > '2') (type: boolean)
Statistics: Num rows: 166 Data size: 14442 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -2732,14 +2736,18 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
+ filterExpr: ((p_name is not null and p_mfgr is not null) or
((p_size < 10) and p_name is not null and p_mfgr is not null) or ((p_size < 10)
and (p_name is null or p_mfgr is null))) (type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: (p_name is not null and p_mfgr is not null)
(type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
Filter Operator
- predicate: (p_size < 10) (type: boolean)
+ predicate: ((p_size < 10) and p_name is not null and
p_mfgr is not null) (type: boolean)
Statistics: Num rows: 5 Data size: 1115 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: p_name (type: string), p_mfgr (type: string)
@@ -2906,12 +2914,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: p/a/r/t
+ filterExpr: (UDFToDouble(p_size) is not null or (p_size <
10)) (type: boolean)
Statistics: Num rows: 26 Data size: 3250 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: UDFToDouble(p_size) is not null (type: boolean)
Statistics: Num rows: 26 Data size: 3250 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: p_name (type: string), p_size (type:
int)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 26 Data size: 3250 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: p_name (type: string), p_size (type:
int)
Filter Operator
predicate: (p_size < 10) (type: boolean)
Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -2996,12 +3008,15 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
predicate: _col0 is null (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
@@ -3071,23 +3086,49 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Map 1 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 1 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: b
+ filterExpr: ((p_size is not null and p_mfgr is not null) or
((p_size < 10) and p_mfgr is not null) or (p_size < 10)) (type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: (p_size is not null and p_mfgr is not null)
(type: boolean)
Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 26 Data size: 5798 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: p_name (type: string), p_mfgr (type:
string), p_size (type: int)
+ Filter Operator
+ predicate: ((p_size < 10) and p_mfgr is not null) (type:
boolean)
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_mfgr (type: string), p_size (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col1)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
Filter Operator
predicate: (p_size < 10) (type: boolean)
Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -3176,16 +3217,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col1 (type: int), _col0 (type: string)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE
Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: string)
- null sort order: zz
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 5 Data size: 510 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
predicate: (_col1 is null or _col0 is null) (type: boolean)
Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE
Column stats: COMPLETE
@@ -3202,7 +3255,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
- Reducer 6
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -3276,10 +3329,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: line/item
- filterExpr: ((l_shipmode = 'AIR') or ((l_shipmode = 'AIR')
and l_orderkey is null) or (l_linenumber = 1)) (type: boolean)
+ filterExpr: (((l_shipmode = 'AIR') and l_orderkey is not
null) or ((l_shipmode = 'AIR') and l_orderkey is null) or (l_orderkey is not
null and (l_linenumber = 1))) (type: boolean)
Statistics: Num rows: 100 Data size: 9200 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (l_shipmode = 'AIR') (type: boolean)
+ predicate: ((l_shipmode = 'AIR') and l_orderkey is not
null) (type: boolean)
Statistics: Num rows: 14 Data size: 1288 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: l_orderkey (type: int)
@@ -3308,7 +3361,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Filter Operator
- predicate: (l_linenumber = 1) (type: boolean)
+ predicate: (l_orderkey is not null and (l_linenumber = 1))
(type: boolean)
Statistics: Num rows: 14 Data size: 168 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
@@ -3521,20 +3574,23 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: struct<count:bigint,sum:double,input:double>)
- Group By Operator
- aggregations: min(p_retailprice), max(p_retailprice),
avg(p_retailprice)
- keys: p_mfgr (type: string)
- minReductionHashAggr: 0.8076923
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Filter Operator
+ predicate: p_mfgr is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2756 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_retailprice), max(p_retailprice),
avg(p_retailprice)
+ keys: p_mfgr (type: string)
+ minReductionHashAggr: 0.8076923
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: struct<count:bigint,sum:double,input:double>)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: struct<count:bigint,sum:double,input:double>)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
@@ -3672,7 +3728,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 570 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: ((_col2 - _col1) > 600) (type: boolean)
+ predicate: (((_col2 - _col1) > 600) and _col1 is not null)
(type: boolean)
Statistics: Num rows: 1 Data size: 114 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: double)
@@ -3776,20 +3832,23 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 530 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double)
- Group By Operator
- aggregations: max(p_retailprice), min(p_retailprice)
- keys: p_mfgr (type: string)
- minReductionHashAggr: 0.8076923
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 570 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Filter Operator
+ predicate: p_mfgr is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2756 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(p_retailprice), min(p_retailprice)
+ keys: p_mfgr (type: string)
+ minReductionHashAggr: 0.8076923
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 570 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: double), _col2 (type:
double)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 570 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type:
double)
Filter Operator
predicate: p_mfgr is null (type: boolean)
Statistics: Num rows: 1 Data size: 106 Basic stats:
COMPLETE Column stats: COMPLETE
diff --git
a/ql/src/test/results/clientpositive/llap/subquery_unqual_corr_expr.q.out
b/ql/src/test/results/clientpositive/llap/subquery_unqual_corr_expr.q.out
index 5e193274860..91b88fbc48f 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_unqual_corr_expr.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_unqual_corr_expr.q.out
@@ -109,7 +109,7 @@ POSTHOOK: Input: default@src
484 val_484
86 val_86
98 val_98
-Warning: Shuffle Join MERGEJOIN[28][tables = [src, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[31][tables = [src, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
PREHOOK: query: explain
select * from src tablesample (10 rows) where concat(key,value) not in (select
key from src)
PREHOOK: type: QUERY
@@ -138,30 +138,38 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src
+ filterExpr: concat(key, value) is not null (type: boolean)
Row Limit Per Split: 10
Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Filter Operator
+ predicate: concat(key, value) is not null (type: boolean)
Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: key (type: string), value (type: string)
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string), value (type:
string)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 4
Map Operator Tree:
TableScan
alias: src
+ filterExpr: (key is not null or key is null) (type: boolean)
Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
+ Filter Operator
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic
stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is null (type: boolean)
Statistics: Num rows: 1 Data size: 87 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -253,7 +261,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[28][tables = [src, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[31][tables = [src, sq_1_notin_nullcheck]] in
Stage 'Reducer 2' is a cross product
PREHOOK: query: select * from src tablesample (10 rows) where
concat(key,value) not in (select key from src)
PREHOOK: type: QUERY
PREHOOK: Input: default@src