HIVE-10903: Add hive.in.test for HoS tests (Rui reviewed by Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/02921ed8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/02921ed8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/02921ed8 Branch: refs/heads/spark Commit: 02921ed893bac87340a0fed7ddf13c567fbf88f0 Parents: 8cf9535 Author: Rui Li <rui...@intel.com> Authored: Wed Jun 10 15:11:36 2015 +0800 Committer: Rui Li <rui...@intel.com> Committed: Wed Jun 10 15:11:36 2015 +0800 ---------------------------------------------------------------------- data/conf/spark/standalone/hive-site.xml | 6 + data/conf/spark/yarn-client/hive-site.xml | 6 + .../test/queries/clientpositive/cbo_subq_in.q | 1 + ...groupby_complex_types_multi_single_reducer.q | 8 +- .../results/clientpositive/cbo_subq_in.q.out | 2 + ...pby_complex_types_multi_single_reducer.q.out | 68 +-- .../spark/annotate_stats_join.q.out | 204 +++---- .../clientpositive/spark/auto_join1.q.out | 60 +- .../clientpositive/spark/auto_join10.q.out | 28 +- .../clientpositive/spark/auto_join11.q.out | 32 +- .../clientpositive/spark/auto_join14.q.out | 64 +- .../clientpositive/spark/auto_join16.q.out | 68 ++- .../clientpositive/spark/auto_join17.q.out | 60 +- .../clientpositive/spark/auto_join18.q.out | 30 +- .../spark/auto_join18_multi_distinct.q.out | 30 +- .../clientpositive/spark/auto_join19.q.out | 56 +- .../clientpositive/spark/auto_join26.q.out | 68 ++- .../clientpositive/spark/auto_join27.q.out | 24 +- .../clientpositive/spark/auto_join4.q.out | 14 +- .../clientpositive/spark/auto_join5.q.out | 24 +- .../clientpositive/spark/auto_join6.q.out | 10 +- .../clientpositive/spark/auto_join8.q.out | 16 +- .../clientpositive/spark/auto_join9.q.out | 56 +- .../spark/auto_join_filters.q.out | 4 +- .../clientpositive/spark/auto_join_nulls.q.out | 6 +- .../spark/auto_join_without_localtask.q.out | 32 +- .../clientpositive/spark/cbo_subq_in.q.out | 2 + .../spark/constprog_partitioner.q.out | 80 +-- .../results/clientpositive/spark/count.q.out | 14 +- .../clientpositive/spark/cross_join.q.out | 74 ++- .../spark/cross_product_check_1.q.out | 40 +- .../spark/cross_product_check_2.q.out | 38 +- .../results/clientpositive/spark/groupby1.q.out | 8 +- .../results/clientpositive/spark/groupby4.q.out | 6 +- ...pby_complex_types_multi_single_reducer.q.out | 90 ++- .../clientpositive/spark/groupby_cube1.q.out | 24 +- .../clientpositive/spark/groupby_position.q.out | 130 +++-- .../clientpositive/spark/groupby_rollup1.q.out | 20 +- .../spark/groupby_sort_1_23.q.out | 82 +-- .../spark/groupby_sort_skew_1_23.q.out | 82 +-- .../results/clientpositive/spark/having.q.out | 66 ++- .../spark/index_auto_self_join.q.out | 116 ++-- .../spark/infer_bucket_sort_map_operators.q.out | 5 +- .../spark/infer_bucket_sort_merge.q.out | 4 +- .../clientpositive/spark/innerjoin.q.out | 54 +- .../results/clientpositive/spark/join1.q.out | 38 +- .../results/clientpositive/spark/join10.q.out | 28 +- .../results/clientpositive/spark/join11.q.out | 18 +- .../results/clientpositive/spark/join14.q.out | 54 +- .../results/clientpositive/spark/join16.q.out | 36 +- .../results/clientpositive/spark/join17.q.out | 52 +- .../results/clientpositive/spark/join18.q.out | 12 +- .../spark/join18_multi_distinct.q.out | 12 +- .../results/clientpositive/spark/join29.q.out | 66 ++- .../results/clientpositive/spark/join31.q.out | 104 ++-- .../results/clientpositive/spark/join34.q.out | 36 +- .../results/clientpositive/spark/join35.q.out | 92 +-- .../results/clientpositive/spark/join4.q.out | 16 +- .../results/clientpositive/spark/join40.q.out | 116 ++-- .../results/clientpositive/spark/join5.q.out | 20 +- .../results/clientpositive/spark/join6.q.out | 10 +- .../results/clientpositive/spark/join8.q.out | 18 +- .../results/clientpositive/spark/join9.q.out | 42 +- .../clientpositive/spark/join_alt_syntax.q.out | 42 +- .../results/clientpositive/spark/join_vc.q.out | 40 +- .../spark/limit_partition_metadataonly.q.out | 4 +- .../clientpositive/spark/limit_pushdown.q.out | 138 ++--- .../clientpositive/spark/load_dyn_part13.q.out | 4 +- .../clientpositive/spark/louter_join_ppr.q.out | 514 +++++++--------- .../results/clientpositive/spark/merge1.q.out | 4 +- .../results/clientpositive/spark/merge2.q.out | 4 +- .../spark/metadata_only_queries.q.out | 12 +- .../results/clientpositive/spark/order2.q.out | 2 +- .../spark/outer_join_ppr.q.java1.7.out | 366 +++++------- .../spark/outer_join_ppr.q.java1.8.out | 366 +++++------- .../clientpositive/spark/parallel_join1.q.out | 38 +- .../test/results/clientpositive/spark/pcr.q.out | 4 +- .../clientpositive/spark/ppd_gby_join.q.out | 132 +++-- .../results/clientpositive/spark/ppd_join.q.out | 118 ++-- .../clientpositive/spark/ppd_join_filter.q.out | 442 +++++++------- .../clientpositive/spark/ppd_outer_join1.q.out | 120 ++-- .../clientpositive/spark/ppd_outer_join2.q.out | 158 ++--- .../clientpositive/spark/ppd_outer_join3.q.out | 154 +++-- .../spark/ql_rewrite_gbtoidx_cbo_1.q.out | 208 +++---- .../clientpositive/spark/router_join_ppr.q.out | 582 ++++++++----------- .../results/clientpositive/spark/semijoin.q.out | 26 +- .../results/clientpositive/spark/skewjoin.q.out | 160 ++--- .../clientpositive/spark/skewjoin_noskew.q.out | 32 +- .../results/clientpositive/spark/stats1.q.out | 2 +- .../clientpositive/spark/stats_only_null.q.out | 8 +- .../clientpositive/spark/subquery_exists.q.out | 20 +- .../clientpositive/spark/subquery_in.q.out | 230 ++++---- .../spark/subquery_multiinsert.q.java1.8.out | 14 +- .../spark/table_access_keys_stats.q.out | 18 +- .../clientpositive/spark/temp_table.q.out | 4 +- .../results/clientpositive/spark/union.q.out | 4 +- .../results/clientpositive/spark/union10.q.out | 6 +- .../results/clientpositive/spark/union11.q.out | 82 ++- .../results/clientpositive/spark/union12.q.out | 2 +- .../results/clientpositive/spark/union13.q.out | 2 +- .../results/clientpositive/spark/union14.q.out | 52 +- .../results/clientpositive/spark/union15.q.out | 80 ++- .../results/clientpositive/spark/union2.q.out | 2 +- .../results/clientpositive/spark/union20.q.out | 10 +- .../results/clientpositive/spark/union24.q.out | 36 +- .../results/clientpositive/spark/union25.q.out | 38 +- .../results/clientpositive/spark/union28.q.out | 78 +-- .../results/clientpositive/spark/union30.q.out | 78 +-- .../results/clientpositive/spark/union33.q.out | 12 +- .../results/clientpositive/spark/union4.q.out | 4 +- .../results/clientpositive/spark/union5.q.out | 54 +- .../results/clientpositive/spark/union6.q.out | 2 +- .../results/clientpositive/spark/union7.q.out | 52 +- .../results/clientpositive/spark/union8.q.out | 4 +- .../results/clientpositive/spark/union9.q.out | 4 +- .../clientpositive/spark/union_remove_1.q.out | 8 +- .../clientpositive/spark/union_remove_10.q.out | 8 +- .../clientpositive/spark/union_remove_15.q.out | 8 +- .../clientpositive/spark/union_remove_16.q.out | 8 +- .../clientpositive/spark/union_remove_18.q.out | 8 +- .../clientpositive/spark/union_remove_19.q.out | 192 +++--- .../clientpositive/spark/union_remove_2.q.out | 8 +- .../clientpositive/spark/union_remove_20.q.out | 8 +- .../clientpositive/spark/union_remove_21.q.out | 70 +-- .../clientpositive/spark/union_remove_22.q.out | 8 +- .../clientpositive/spark/union_remove_24.q.out | 8 +- .../clientpositive/spark/union_remove_25.q.out | 8 +- .../clientpositive/spark/union_remove_4.q.out | 8 +- .../clientpositive/spark/union_remove_5.q.out | 8 +- .../spark/union_remove_6_subq.q.out | 8 +- .../clientpositive/spark/union_remove_7.q.out | 8 +- .../clientpositive/spark/union_remove_8.q.out | 8 +- .../clientpositive/spark/union_remove_9.q.out | 4 +- .../clientpositive/spark/union_top_level.q.out | 24 +- .../spark/vector_cast_constant.q.java1.7.out | 6 +- .../spark/vector_cast_constant.q.java1.8.out | 6 +- .../spark/vector_decimal_aggregate.q.out | 12 +- .../spark/vector_distinct_2.q.out | 4 +- .../clientpositive/spark/vector_elt.q.out | 2 +- .../clientpositive/spark/vector_groupby_3.q.out | 6 +- .../spark/vector_mapjoin_reduce.q.out | 261 +++++---- .../clientpositive/spark/vector_orderby_5.q.out | 6 +- .../spark/vector_string_concat.q.out | 6 +- .../clientpositive/spark/vectorization_0.q.out | 20 +- .../clientpositive/spark/vectorization_13.q.out | 20 +- .../clientpositive/spark/vectorization_14.q.out | 12 +- .../clientpositive/spark/vectorization_15.q.out | 10 +- .../clientpositive/spark/vectorization_16.q.out | 8 +- .../clientpositive/spark/vectorization_9.q.out | 8 +- .../spark/vectorization_div0.q.out | 6 +- .../spark/vectorization_part_project.q.out | 2 +- .../spark/vectorization_pushdown.q.out | 8 +- .../spark/vectorization_short_regress.q.out | 112 ++-- .../clientpositive/spark/vectorized_case.q.out | 2 +- .../spark/vectorized_mapjoin.q.out | 60 +- .../spark/vectorized_shufflejoin.q.out | 52 +- .../spark/vectorized_timestamp_funcs.q.out | 12 +- .../clientpositive/tez/cbo_subq_in.q.out | 2 + 158 files changed, 4207 insertions(+), 4096 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/data/conf/spark/standalone/hive-site.xml ---------------------------------------------------------------------- diff --git a/data/conf/spark/standalone/hive-site.xml b/data/conf/spark/standalone/hive-site.xml index 016f568..abc73f8 100644 --- a/data/conf/spark/standalone/hive-site.xml +++ b/data/conf/spark/standalone/hive-site.xml @@ -230,4 +230,10 @@ <value>hive_admin_user</value> </property> +<property> + <name>hive.in.test</name> + <value>true</value> + <description>Internal marker for test. Used for masking env-dependent values</description> +</property> + </configuration> http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/data/conf/spark/yarn-client/hive-site.xml ---------------------------------------------------------------------- diff --git a/data/conf/spark/yarn-client/hive-site.xml b/data/conf/spark/yarn-client/hive-site.xml index 39ba20e..b7211ee 100644 --- a/data/conf/spark/yarn-client/hive-site.xml +++ b/data/conf/spark/yarn-client/hive-site.xml @@ -250,4 +250,10 @@ <value>hive_admin_user</value> </property> +<property> + <name>hive.in.test</name> + <value>true</value> + <description>Internal marker for test. Used for masking env-dependent values</description> +</property> + </configuration> http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/queries/clientpositive/cbo_subq_in.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cbo_subq_in.q b/ql/src/test/queries/clientpositive/cbo_subq_in.q index 7a0bfba..0a25b9c 100644 --- a/ql/src/test/queries/clientpositive/cbo_subq_in.q +++ b/ql/src/test/queries/clientpositive/cbo_subq_in.q @@ -29,6 +29,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey ; -- where and having http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q b/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q index b7e1bf1..0a1f137 100644 --- a/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q +++ b/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q @@ -7,12 +7,12 @@ CREATE TABLE DEST2(key MAP<STRING, STRING>, value BIGINT) STORED AS TEXTFILE; EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10; +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10; FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10; +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10; SELECT DEST1.* FROM DEST1; SELECT DEST2.* FROM DEST2; http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/cbo_subq_in.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_subq_in.q.out b/ql/src/test/results/clientpositive/cbo_subq_in.q.out index c1f3de7..f6bfad2 100644 --- a/ql/src/test/results/clientpositive/cbo_subq_in.q.out +++ b/ql/src/test/results/clientpositive/cbo_subq_in.q.out @@ -69,6 +69,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem #### A masked pattern was here #### @@ -77,6 +78,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@lineitem #### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out index 4611f60..0564056 100644 --- a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out @@ -20,13 +20,13 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@DEST2 PREHOOK: query: EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -84,29 +84,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array<string>), _col1 (type: bigint) + key expressions: _col0 (type: array<string>) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: array<string>), VALUE._col1 (type: bigint) + expressions: KEY.reducesinkkey0 (type: array<string>), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -149,29 +147,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: map<string,string>), _col1 (type: bigint) + key expressions: _col0 (type: map<string,string>) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: map<string,string>), VALUE._col1 (type: bigint) + expressions: KEY.reducesinkkey0 (type: map<string,string>), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -198,15 +194,15 @@ STAGE PLANS: Stats-Aggr Operator PREHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 POSTHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 032926d..8955a61 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -173,12 +173,16 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -187,33 +191,33 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int) - 1 deptid (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -248,12 +252,16 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -262,35 +270,32 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -321,12 +326,16 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -335,32 +344,32 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -395,12 +404,16 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string), lastname (type: string) - sort order: +++ - Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string), _col0 (type: string) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -409,35 +422,32 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string), deptname (type: string) - sort order: +++ - Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int), lastname (type: string), lastname (type: string) - 1 deptid (type: int), deptname (type: string), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int), _col0 (type: string), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join1.q.out b/ql/src/test/results/clientpositive/spark/auto_join1.q.out index d9215f8..d26a33e 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -32,15 +32,19 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -56,28 +60,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work @@ -105,7 +113,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join10.q.out b/ql/src/test/results/clientpositive/spark/auto_join10.q.out index cd50576..b43e55c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join10.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join10.q.out @@ -33,8 +33,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -58,8 +58,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -67,19 +67,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash + Select Operator + expressions: hash(_col0,_col1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join11.q.out b/ql/src/test/results/clientpositive/spark/auto_join11.q.out index ed32dec..f8fc309 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join11.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join11.q.out @@ -30,11 +30,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -55,11 +55,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -67,19 +67,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash + Select Operator + expressions: hash(_col2,_col1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join14.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join14.q.out b/ql/src/test/results/clientpositive/spark/auto_join14.q.out index 830314e..30be2e8 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -29,18 +29,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -48,36 +52,40 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and key is not null) (type: boolean) Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col3) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join16.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join16.q.out b/ql/src/test/results/clientpositive/spark/auto_join16.q.out index 8c166b0..5c4bbb3 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join16.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join16.q.out @@ -24,61 +24,69 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > 10) and value is not null) and (key > 20)) and (value < 200)) (type: boolean) - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + predicate: (((((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) > 20.0)) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string), _col1 (type: string) - 1 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: tab + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(value) < 200.0)) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join17.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join17.q.out b/ql/src/test/results/clientpositive/spark/auto_join17.q.out index 3144db6..bc492c9 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -28,15 +28,19 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -52,28 +56,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work @@ -101,9 +109,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest1.key1,dest1.value1,dest1.key2,dest1.value2)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join18.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join18.q.out b/ql/src/test/results/clientpositive/spark/auto_join18.q.out index f5a2227..eaef06c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -45,11 +45,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -66,11 +66,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -103,15 +103,19 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 07c7aa5..df737c2 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -47,11 +47,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -68,11 +68,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) + aggregations: count(DISTINCT _col1), count(DISTINCT _col0) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -105,15 +105,19 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4)) - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join19.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join19.q.out b/ql/src/test/results/clientpositive/spark/auto_join19.q.out index f2b0140..9e4fb8f 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -35,10 +35,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -54,28 +58,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col8 - input vertices: - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + input vertices: + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/02921ed8/ql/src/test/results/clientpositive/spark/auto_join26.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join26.q.out b/ql/src/test/results/clientpositive/spark/auto_join26.q.out index 58821e9..3c437a1 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: x @@ -37,20 +37,24 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -58,31 +62,39 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -128,7 +140,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ] POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY