http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mapjoin2.q b/ql/src/test/queries/clientpositive/mapjoin2.q index e194bd0..014dabe 100644 --- a/ql/src/test/queries/clientpositive/mapjoin2.q +++ b/ql/src/test/queries/clientpositive/mapjoin2.q @@ -6,16 +6,30 @@ create table tbl_n1 (n bigint, t string); insert into tbl_n1 values (1, 'one'); insert into tbl_n1 values(2, 'two'); +explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin46.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mapjoin46.q b/ql/src/test/queries/clientpositive/mapjoin46.q index 9de7113..81f9610 100644 --- a/ql/src/test/queries/clientpositive/mapjoin46.q +++ b/ql/src/test/queries/clientpositive/mapjoin46.q @@ -3,6 +3,8 @@ set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; set hive.join.emit.interval=2; +-- SORT_QUERY_RESULTS + CREATE TABLE test1_n4 (key INT, value INT, col_1 STRING); INSERT INTO test1_n4 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); @@ -173,6 +175,22 @@ ON (test1_n4.value=test2_n2.value OR test2_n2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -185,8 +203,23 @@ FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and left input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -197,8 +230,23 @@ SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test1_n4.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and right input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -209,8 +257,25 @@ SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test2_n2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Keys plus residual (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -223,8 +288,51 @@ FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value AND (test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102)); +SET hive.merge.nway.joins=true; -- Mixed ( FOJ (ROJ, LOJ) ) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM ( @@ -263,3 +371,4 @@ FULL OUTER JOIN ( OR test2_n2.key between 100 and 102)) ) sq2 ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); +SET hive.merge.nway.joins=true; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_0.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_0.q b/ql/src/test/queries/clientpositive/parquet_vectorization_0.q index fa8ec86..55816d8 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_0.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_0.q @@ -10,7 +10,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -25,7 +25,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesparquet ORDER BY c1; -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesparquet ORDER BY c1; @@ -59,7 +59,7 @@ SELECT FROM alltypesparquet ORDER BY c1; -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -74,7 +74,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesparquet ORDER BY c1; -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesparquet ORDER BY c1; @@ -108,7 +108,7 @@ SELECT FROM alltypesparquet ORDER BY c1; -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -123,7 +123,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesparquet ORDER BY c1; -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesparquet ORDER BY c1; @@ -157,7 +157,7 @@ SELECT FROM alltypesparquet ORDER BY c1; -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_1.q b/ql/src/test/queries/clientpositive/parquet_vectorization_1.q index 311334d..33c5698 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_1.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_1.q @@ -5,7 +5,7 @@ set hive.vectorized.execution.reduce.enabled=true; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_10.q b/ql/src/test/queries/clientpositive/parquet_vectorization_10.q index c560515..1329a47 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_10.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_10.q @@ -4,7 +4,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, ctimestamp1, ctinyint, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_11.q b/ql/src/test/queries/clientpositive/parquet_vectorization_11.q index a6e6810..4115c29 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_11.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_11.q @@ -4,7 +4,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cstring1, cboolean1, cdouble, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_12.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_12.q b/ql/src/test/queries/clientpositive/parquet_vectorization_12.q index ff6949a..e878755 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_12.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_12.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cbigint, cboolean1, cstring1, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_13.q b/ql/src/test/queries/clientpositive/parquet_vectorization_13.q index 0b23f50..d13ae73 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_13.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_13.q @@ -6,7 +6,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_14.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_14.q b/ql/src/test/queries/clientpositive/parquet_vectorization_14.q index 3ec437f..868c899 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_14.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_14.q @@ -6,7 +6,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cfloat, cstring1, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_15.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_15.q b/ql/src/test/queries/clientpositive/parquet_vectorization_15.q index 6906523..70cf175 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_15.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_15.q @@ -9,7 +9,7 @@ set hive.vectorized.execution.reduce.enabled=false; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cboolean1, cdouble, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_16.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_16.q b/ql/src/test/queries/clientpositive/parquet_vectorization_16.q index 49d7b5a..afae446 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_16.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_16.q @@ -6,7 +6,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cstring1, cdouble, ctimestamp1, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_17.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_17.q b/ql/src/test/queries/clientpositive/parquet_vectorization_17.q index dafe677..1af4dde 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_17.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_17.q @@ -6,7 +6,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cstring1, cint, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_2.q b/ql/src/test/queries/clientpositive/parquet_vectorization_2.q index a2c87f7..8bec598 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_2.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_2.q @@ -4,7 +4,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(csmallint), (AVG(csmallint) % -563), (AVG(csmallint) + 762), http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_3.q b/ql/src/test/queries/clientpositive/parquet_vectorization_3.q index 7b08952..4f4bd57 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_3.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_3.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT STDDEV_SAMP(csmallint), (STDDEV_SAMP(csmallint) - 10.175), STDDEV_POP(ctinyint), http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_4.q b/ql/src/test/queries/clientpositive/parquet_vectorization_4.q index d2bd832..380cda8 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_4.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_4.q @@ -4,7 +4,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cint), (SUM(cint) * -563), (-3728 + SUM(cint)), http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_5.q b/ql/src/test/queries/clientpositive/parquet_vectorization_5.q index 071048c..4dd9705 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_5.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_5.q @@ -4,7 +4,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(csmallint), (MAX(csmallint) * -75), COUNT(*), http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_6.q b/ql/src/test/queries/clientpositive/parquet_vectorization_6.q index 7a7a18b..bee0c53 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_6.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_6.q @@ -4,7 +4,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cfloat, cstring1, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_7.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_7.q b/ql/src/test/queries/clientpositive/parquet_vectorization_7.q index 55f21af..70717f4 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_7.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_7.q @@ -6,7 +6,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_8.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_8.q b/ql/src/test/queries/clientpositive/parquet_vectorization_8.q index 4d97cad..6a5a2c5 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_8.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_8.q @@ -6,7 +6,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_9.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_9.q b/ql/src/test/queries/clientpositive/parquet_vectorization_9.q index 49d7b5a..afae446 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_9.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_9.q @@ -6,7 +6,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION DETAIL +EXPLAIN VECTORIZATION EXPRESSION SELECT cstring1, cdouble, ctimestamp1, http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/parquet_vectorization_limit.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_limit.q b/ql/src/test/queries/clientpositive/parquet_vectorization_limit.q index 399c67e..6dac0d6 100644 --- a/ql/src/test/queries/clientpositive/parquet_vectorization_limit.q +++ b/ql/src/test/queries/clientpositive/parquet_vectorization_limit.q @@ -14,31 +14,31 @@ set hive.limit.pushdown.memory.usage=0.3f; -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown -explain vectorization detail +explain VECTORIZATION EXPRESSION select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20; select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20; -- deduped RS -explain vectorization detail +explain VECTORIZATION EXPRESSION select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20; select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20; -- distincts -explain vectorization detail +explain VECTORIZATION EXPRESSION select distinct(ctinyint) from alltypesparquet limit 20; select distinct(ctinyint) from alltypesparquet limit 20; -explain vectorization detail +explain VECTORIZATION EXPRESSION select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20; select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20; -- limit zero -explain vectorization detail +explain VECTORIZATION EXPRESSION select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0; select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0; -- 2MR (applied to last RS) -explain vectorization detail +explain VECTORIZATION EXPRESSION select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20; select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q index 0500a62..10e982e 100644 --- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -24,3 +24,9 @@ select a.* from alltypesorc a left outer join src b on a.cint = cast(b.key as int) and (a.cint < 100) limit 1; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/union14.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union14.q b/ql/src/test/queries/clientpositive/union14.q index 34f73cd..f56e53d 100644 --- a/ql/src/test/queries/clientpositive/union14.q +++ b/ql/src/test/queries/clientpositive/union14.q @@ -2,7 +2,9 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; set hive.map.aggr = true; --- SORT_BEFORE_DIFF + +-- SORT_QUERY_RESULTS + -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink explain http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/union7.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union7.q b/ql/src/test/queries/clientpositive/union7.q index 4d3eed5..8668be0 100644 --- a/ql/src/test/queries/clientpositive/union7.q +++ b/ql/src/test/queries/clientpositive/union7.q @@ -4,7 +4,8 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.map.aggr = true; --- SORT_BEFORE_DIFF +-- SORT_QUERY_RESULTS + -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink explain http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/union_null.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_null.q b/ql/src/test/queries/clientpositive/union_null.q index 49a0e08..44d148f 100644 --- a/ql/src/test/queries/clientpositive/union_null.q +++ b/ql/src/test/queries/clientpositive/union_null.q @@ -1,6 +1,7 @@ --! qt:dataset:src1 --! qt:dataset:src --- SORT_BEFORE_DIFF + +-- SORT_QUERY_RESULTS -- HIVE-2901 select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/union_view.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_view.q b/ql/src/test/queries/clientpositive/union_view.q index 186cb02..a378707 100644 --- a/ql/src/test/queries/clientpositive/union_view.q +++ b/ql/src/test/queries/clientpositive/union_view.q @@ -3,6 +3,8 @@ set hive.mapred.mode=nonstrict; set hive.stats.dbclass=fs; set hive.explain.user=false; +-- SORT_QUERY_RESULTS + CREATE TABLE src_union_1_n0 (key int, value string) PARTITIONED BY (ds string); CREATE TABLE src_union_2_n0 (key int, value string) PARTITIONED BY (ds string, part_1 string); http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_full_outer_join.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_full_outer_join.q b/ql/src/test/queries/clientpositive/vector_full_outer_join.q new file mode 100644 index 0000000..cc77488 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_full_outer_join.q @@ -0,0 +1,82 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +-- SORT_QUERY_RESULTS + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +SET hive.mapjoin.full.outer=true; + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +-- Omit tjoin2.c1 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +-- Omit tjoin2.c1 and tjoin2.c2 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q new file mode 100644 index 0000000..1685f35 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for FAST hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..8b59266 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q new file mode 100644 index 0000000..869668e --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized PASS-TRUE Mode MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +