This is an automated email from the ASF dual-hosted git repository.
vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 223574b HIVE-22788: Query cause NPE due to implicit cast on ROW__ID
(Krisztian Kasa, Vineet Garg reviewed by Jesus Camacho Rodriguez)
223574b is described below
commit 223574ba137b2738f0e3094b0c249546dec5f157
Author: Vineet Garg <[email protected]>
AuthorDate: Wed Jan 29 14:39:27 2020 -0800
HIVE-22788: Query cause NPE due to implicit cast on ROW__ID (Krisztian
Kasa, Vineet Garg reviewed by Jesus Camacho Rodriguez)
---
.../optimizer/calcite/translator/ASTConverter.java | 11 +-
.../test/queries/clientpositive/intersect_all_rj.q | 90 ++++++++
.../results/clientpositive/intersect_all_rj.q.out | 256 +++++++++++++++++++++
.../clientpositive/llap/intersect_all_rj.q.out | 256 +++++++++++++++++++++
4 files changed, 610 insertions(+), 3 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 718a2d0..e03e96f 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -722,6 +722,13 @@ public class ASTConverter {
}
break;
case CAST:
+ assert(call.getOperands().size() == 1);
+ if(call.getType().isStruct()) {
+ // cast for struct types can be ignored safely because explicit
casting on struct
+ // types are not possible, implicit casting e.g. CAST(ROW__ID as
<...>) can be ignored
+ return call.getOperands().get(0).accept(this);
+ }
+
HiveToken ht = TypeConverter.hiveToken(call.getType());
ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text);
if (ht.args != null) {
@@ -730,9 +737,7 @@ public class ASTConverter {
}
}
astNodeLst.add(astBldr.node());
- for (RexNode operand : call.operands) {
- astNodeLst.add(operand.accept(this));
- }
+ astNodeLst.add(call.getOperands().get(0).accept(this));
break;
case EXTRACT:
// Extract on date: special handling since function in Hive does
diff --git a/ql/src/test/queries/clientpositive/intersect_all_rj.q
b/ql/src/test/queries/clientpositive/intersect_all_rj.q
new file mode 100644
index 0000000..30d15b7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/intersect_all_rj.q
@@ -0,0 +1,90 @@
+CREATE TABLE table_16 (
+timestamp_col_19 timestamp,
+timestamp_col_29 timestamp
+);
+
+INSERT INTO table_16(timestamp_col_19, timestamp_col_29) VALUES
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0'),
+('2020-01-01 00:00:01.0', '2020-01-01 00:00:02.0');
+
+
+CREATE TABLE table_7 (
+int_col_10 int,
+bigint_col_3 bigint
+);
+
+INSERT INTO table_7(int_col_10, bigint_col_3) VALUES
+(3, 200),
+(3, 100),
+(2, 250),
+(2, 280),
+(2, 50);
+
+
+CREATE TABLE table_10 (
+boolean_col_16 boolean,
+timestamp_col_5 timestamp,
+timestamp_col_15 timestamp,
+timestamp_col_30 timestamp,
+int_col_18 int
+);
+
+INSERT INTO table_10(boolean_col_16, timestamp_col_5, timestamp_col_15,
timestamp_col_30, int_col_18) VALUES
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-03-10 03:05:01.0', '2018-03-10 03:05:01.0', '2018-03-10
03:05:01.0', 18);
+
+
+explain cbo
+SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ );
+
+SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ );
diff --git a/ql/src/test/results/clientpositive/intersect_all_rj.q.out
b/ql/src/test/results/clientpositive/intersect_all_rj.q.out
new file mode 100644
index 0000000..b8ff98a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/intersect_all_rj.q.out
@@ -0,0 +1,256 @@
+PREHOOK: query: CREATE TABLE table_16 (
+timestamp_col_19 timestamp,
+timestamp_col_29 timestamp
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_16
+POSTHOOK: query: CREATE TABLE table_16 (
+timestamp_col_19 timestamp,
+timestamp_col_29 timestamp
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_16
+PREHOOK: query: INSERT INTO table_16(timestamp_col_19, timestamp_col_29) VALUES
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0'),
+('2020-01-01 00:00:01.0', '2020-01-01 00:00:02.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_16
+POSTHOOK: query: INSERT INTO table_16(timestamp_col_19, timestamp_col_29)
VALUES
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0'),
+('2020-01-01 00:00:01.0', '2020-01-01 00:00:02.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_16
+POSTHOOK: Lineage: table_16.timestamp_col_19 SCRIPT []
+POSTHOOK: Lineage: table_16.timestamp_col_29 SCRIPT []
+PREHOOK: query: CREATE TABLE table_7 (
+int_col_10 int,
+bigint_col_3 bigint
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_7
+POSTHOOK: query: CREATE TABLE table_7 (
+int_col_10 int,
+bigint_col_3 bigint
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_7
+PREHOOK: query: INSERT INTO table_7(int_col_10, bigint_col_3) VALUES
+(3, 200),
+(3, 100),
+(2, 250),
+(2, 280),
+(2, 50)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_7
+POSTHOOK: query: INSERT INTO table_7(int_col_10, bigint_col_3) VALUES
+(3, 200),
+(3, 100),
+(2, 250),
+(2, 280),
+(2, 50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_7
+POSTHOOK: Lineage: table_7.bigint_col_3 SCRIPT []
+POSTHOOK: Lineage: table_7.int_col_10 SCRIPT []
+PREHOOK: query: CREATE TABLE table_10 (
+boolean_col_16 boolean,
+timestamp_col_5 timestamp,
+timestamp_col_15 timestamp,
+timestamp_col_30 timestamp,
+int_col_18 int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_10
+POSTHOOK: query: CREATE TABLE table_10 (
+boolean_col_16 boolean,
+timestamp_col_5 timestamp,
+timestamp_col_15 timestamp,
+timestamp_col_30 timestamp,
+int_col_18 int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_10
+PREHOOK: query: INSERT INTO table_10(boolean_col_16, timestamp_col_5,
timestamp_col_15, timestamp_col_30, int_col_18) VALUES
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-03-10 03:05:01.0', '2018-03-10 03:05:01.0', '2018-03-10
03:05:01.0', 18)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_10
+POSTHOOK: query: INSERT INTO table_10(boolean_col_16, timestamp_col_5,
timestamp_col_15, timestamp_col_30, int_col_18) VALUES
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-03-10 03:05:01.0', '2018-03-10 03:05:01.0', '2018-03-10
03:05:01.0', 18)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_10
+POSTHOOK: Lineage: table_10.boolean_col_16 SCRIPT []
+POSTHOOK: Lineage: table_10.int_col_18 SCRIPT []
+POSTHOOK: Lineage: table_10.timestamp_col_15 SCRIPT []
+POSTHOOK: Lineage: table_10.timestamp_col_30 SCRIPT []
+POSTHOOK: Lineage: table_10.timestamp_col_5 SCRIPT []
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage
'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain cbo
+SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_10
+PREHOOK: Input: default@table_16
+PREHOOK: Input: default@table_7
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_10
+POSTHOOK: Input: default@table_16
+POSTHOOK: Input: default@table_7
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject($f0=[$1])
+ HiveTableFunctionScan(invocation=[replicate_rows($0, $1)],
rowType=[RecordType(BIGINT $f0, TIMESTAMP(9) $f1)])
+ HiveProject($f0=[$2], $f1=[$0])
+ HiveFilter(condition=[=($1, 2)])
+ HiveAggregate(group=[{0}], agg#0=[count($1)], agg#1=[min($1)])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveUnion(all=[true])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[count()])
+ HiveProject($f0=[$0])
+ HiveAggregate(group=[{0}])
+ HiveProject($f0=[CASE(IS NOT NULL($7), $7, if($5, $8,
$6))])
+ HiveJoin(condition=[>=($1, $13)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(int_col_10=[$0], bigint_col_3=[$1],
BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3],
CAST=[CAST($4):RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid)])
+ HiveFilter(condition=[IS NOT NULL($1)])
+ HiveTableScan(table=[[default, table_7]],
table:alias=[a3])
+ HiveProject(boolean_col_16=[$0],
timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3],
int_col_18=[$4], BLOCK__OFFSET__INSIDE__FILE=[$5], INPUT__FILE__NAME=[$6],
ROW__ID=[$7], CAST=[CAST($4):BIGINT])
+ HiveFilter(condition=[IS NOT
NULL(CAST($4):BIGINT)])
+ HiveTableScan(table=[[default, table_10]],
table:alias=[a4])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[count()])
+ HiveProject($f0=[$0])
+ HiveAggregate(group=[{0}])
+ HiveProject($f0=[CASE(IS NOT NULL(least(CASE(IS NOT
NULL($0), $0, 2010-03-29 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1,
2014-08-16 00:00:00:TIMESTAMP(9)))), least(CASE(IS NOT NULL($0), $0, 2010-03-29
00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2014-08-16
00:00:00:TIMESTAMP(9))), greatest(CASE(IS NOT NULL($0), $0, 2013-07-01
00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2028-06-18
00:00:00:TIMESTAMP(9))))])
+ HiveTableScan(table=[[default, table_16]],
table:alias=[a1])
+
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage
'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_10
+PREHOOK: Input: default@table_16
+PREHOOK: Input: default@table_7
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_10
+POSTHOOK: Input: default@table_16
+POSTHOOK: Input: default@table_7
+#### A masked pattern was here ####
+2018-01-10 15:03:55
+2018-02-10 07:12:55
diff --git a/ql/src/test/results/clientpositive/llap/intersect_all_rj.q.out
b/ql/src/test/results/clientpositive/llap/intersect_all_rj.q.out
new file mode 100644
index 0000000..cdfbc22
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/intersect_all_rj.q.out
@@ -0,0 +1,256 @@
+PREHOOK: query: CREATE TABLE table_16 (
+timestamp_col_19 timestamp,
+timestamp_col_29 timestamp
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_16
+POSTHOOK: query: CREATE TABLE table_16 (
+timestamp_col_19 timestamp,
+timestamp_col_29 timestamp
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_16
+PREHOOK: query: INSERT INTO table_16(timestamp_col_19, timestamp_col_29) VALUES
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0'),
+('2020-01-01 00:00:01.0', '2020-01-01 00:00:02.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_16
+POSTHOOK: query: INSERT INTO table_16(timestamp_col_19, timestamp_col_29)
VALUES
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-01-10 15:03:55.0', '2018-01-10 15:04:55.0'),
+('2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0'),
+('2020-01-01 00:00:01.0', '2020-01-01 00:00:02.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_16
+POSTHOOK: Lineage: table_16.timestamp_col_19 SCRIPT []
+POSTHOOK: Lineage: table_16.timestamp_col_29 SCRIPT []
+PREHOOK: query: CREATE TABLE table_7 (
+int_col_10 int,
+bigint_col_3 bigint
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_7
+POSTHOOK: query: CREATE TABLE table_7 (
+int_col_10 int,
+bigint_col_3 bigint
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_7
+PREHOOK: query: INSERT INTO table_7(int_col_10, bigint_col_3) VALUES
+(3, 200),
+(3, 100),
+(2, 250),
+(2, 280),
+(2, 50)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_7
+POSTHOOK: query: INSERT INTO table_7(int_col_10, bigint_col_3) VALUES
+(3, 200),
+(3, 100),
+(2, 250),
+(2, 280),
+(2, 50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_7
+POSTHOOK: Lineage: table_7.bigint_col_3 SCRIPT []
+POSTHOOK: Lineage: table_7.int_col_10 SCRIPT []
+PREHOOK: query: CREATE TABLE table_10 (
+boolean_col_16 boolean,
+timestamp_col_5 timestamp,
+timestamp_col_15 timestamp,
+timestamp_col_30 timestamp,
+int_col_18 int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_10
+POSTHOOK: query: CREATE TABLE table_10 (
+boolean_col_16 boolean,
+timestamp_col_5 timestamp,
+timestamp_col_15 timestamp,
+timestamp_col_30 timestamp,
+int_col_18 int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_10
+PREHOOK: query: INSERT INTO table_10(boolean_col_16, timestamp_col_5,
timestamp_col_15, timestamp_col_30, int_col_18) VALUES
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-03-10 03:05:01.0', '2018-03-10 03:05:01.0', '2018-03-10
03:05:01.0', 18)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_10
+POSTHOOK: query: INSERT INTO table_10(boolean_col_16, timestamp_col_5,
timestamp_col_15, timestamp_col_30, int_col_18) VALUES
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-01-10 15:03:55.0', '2018-01-10 15:03:55.0', '2018-01-10
15:03:55.0', 11),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-02-10 07:12:55.0', '2018-02-10 07:12:55.0', '2018-02-10
07:12:55.0', 15),
+(true, '2018-03-10 03:05:01.0', '2018-03-10 03:05:01.0', '2018-03-10
03:05:01.0', 18)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_10
+POSTHOOK: Lineage: table_10.boolean_col_16 SCRIPT []
+POSTHOOK: Lineage: table_10.int_col_18 SCRIPT []
+POSTHOOK: Lineage: table_10.timestamp_col_15 SCRIPT []
+POSTHOOK: Lineage: table_10.timestamp_col_30 SCRIPT []
+POSTHOOK: Lineage: table_10.timestamp_col_5 SCRIPT []
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1]] in Stage
'Reducer 2' is a cross product
+PREHOOK: query: explain cbo
+SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_10
+PREHOOK: Input: default@table_16
+PREHOOK: Input: default@table_7
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_10
+POSTHOOK: Input: default@table_16
+POSTHOOK: Input: default@table_7
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject($f0=[$1])
+ HiveTableFunctionScan(invocation=[replicate_rows($0, $1)],
rowType=[RecordType(BIGINT $f0, TIMESTAMP(9) $f1)])
+ HiveProject($f0=[$2], $f1=[$0])
+ HiveFilter(condition=[=($1, 2)])
+ HiveAggregate(group=[{0}], agg#0=[count($1)], agg#1=[min($1)])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveUnion(all=[true])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[count()])
+ HiveProject($f0=[$0])
+ HiveAggregate(group=[{0}])
+ HiveProject($f0=[CASE(IS NOT NULL($7), $7, if($5, $8,
$6))])
+ HiveJoin(condition=[>=($1, $13)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(int_col_10=[$0], bigint_col_3=[$1],
BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3],
CAST=[CAST($4):RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid)])
+ HiveFilter(condition=[IS NOT NULL($1)])
+ HiveTableScan(table=[[default, table_7]],
table:alias=[a3])
+ HiveProject(boolean_col_16=[$0],
timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3],
int_col_18=[$4], BLOCK__OFFSET__INSIDE__FILE=[$5], INPUT__FILE__NAME=[$6],
ROW__ID=[$7], CAST=[CAST($4):BIGINT])
+ HiveFilter(condition=[IS NOT
NULL(CAST($4):BIGINT)])
+ HiveTableScan(table=[[default, table_10]],
table:alias=[a4])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[count()])
+ HiveProject($f0=[$0])
+ HiveAggregate(group=[{0}])
+ HiveProject($f0=[CASE(IS NOT NULL(least(CASE(IS NOT
NULL($0), $0, 2010-03-29 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1,
2014-08-16 00:00:00:TIMESTAMP(9)))), least(CASE(IS NOT NULL($0), $0, 2010-03-29
00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2014-08-16
00:00:00:TIMESTAMP(9))), greatest(CASE(IS NOT NULL($0), $0, 2013-07-01
00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2028-06-18
00:00:00:TIMESTAMP(9))))])
+ HiveTableScan(table=[[default, table_16]],
table:alias=[a1])
+
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1]] in Stage
'Reducer 2' is a cross product
+PREHOOK: query: SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_10
+PREHOOK: Input: default@table_16
+PREHOOK: Input: default@table_7
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16,
a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col
+FROM table_7 a3
+RIGHT JOIN table_10 a4
+WHERE (a3.bigint_col_3) >= (a4.int_col_18)
+INTERSECT ALL
+SELECT
+ COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ ) AS timestamp_col
+FROM table_16 a1
+ GROUP BY COALESCE(LEAST(
+ COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP))
+ ),
+ GREATEST(
+ COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS
TIMESTAMP)),
+ COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS
TIMESTAMP)))
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_10
+POSTHOOK: Input: default@table_16
+POSTHOOK: Input: default@table_7
+#### A masked pattern was here ####
+2018-02-10 07:12:55
+2018-01-10 15:03:55