HIVE-14767: Migrate slow MiniMr tests to faster options (Prasanth Jayachandran reviewed by Siddharth Seth)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/05e25103 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/05e25103 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/05e25103 Branch: refs/heads/hive-14535 Commit: 05e251036ce66be279749ca30ac5ee0cb97861c7 Parents: 6b51023 Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Sat Sep 17 00:03:54 2016 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Sat Sep 17 00:03:54 2016 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 87 ++- ql/src/test/queries/clientpositive/bucket5.q | 1 + .../results/clientpositive/llap/bucket5.q.out | 586 +++++++++++++++++++ .../results/clientpositive/llap/bucket6.q.out | 203 +++++++ .../clientpositive/llap/bucket_many.q.out | 223 +++++++ .../llap/bucketizedhiveinputformat.q.out | 112 ++++ .../clientpositive/llap/bucketmapjoin6.q.out | 146 +++++ .../clientpositive/llap/bucketmapjoin7.q.out | 315 ++++++++++ .../llap/empty_dir_in_table.q.out | 46 ++ .../llap/exchgpartition2lel.q.out | 203 +++++++ ...rnal_table_with_space_in_location_path.q.out | 88 +++ .../llap/file_with_header_footer.q.out | 184 ++++++ .../llap/import_exported_table.q.out | 28 + .../llap/infer_bucket_sort_bucketed_table.q.out | 121 ++++ .../clientpositive/llap/input16_cc.q.out | 534 +++++++++++++++++ .../clientpositive/llap/insert_dir_distcp.q.out | 14 + .../llap/join_acid_non_acid.q.out | 58 ++ .../clientpositive/llap/leftsemijoin_mr.q.out | 98 ++++ .../llap/list_bucket_dml_10.q.out | 256 ++++++++ .../results/clientpositive/llap/load_fs2.q.out | 121 ++++ .../load_hdfs_file_with_space_in_the_name.q.out | 25 + .../llap/non_native_window_udf.q.out | 52 ++ .../clientpositive/llap/quotedid_smb.q.out | 81 +++ .../llap/reduce_deduplicate.q.out | 387 ++++++++++++ .../clientpositive/llap/remote_script.q.out | 49 ++ .../clientpositive/llap/schemeAuthority.q.out | 84 +++ .../clientpositive/llap/schemeAuthority2.q.out | 53 ++ .../llap/table_nonprintable.q.out | 72 +++ .../llap/temp_table_external.q.out | 34 ++ .../clientpositive/llap/uber_reduce.q.out | 31 + 30 files changed, 4247 insertions(+), 45 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 96a03f6..4597ace 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1,53 +1,18 @@ # NOTE: files should be listed in alphabetical order -minimr.query.files=auto_sortmerge_join_16.q,\ - bucket4.q,\ - bucket5.q,\ - bucket6.q,\ - bucket_many.q,\ - bucket_num_reducers.q,\ - bucket_num_reducers2.q,\ - bucketizedhiveinputformat.q,\ - bucketmapjoin6.q,\ - bucketmapjoin7.q,\ - disable_merge_for_bucketing.q,\ - empty_dir_in_table.q,\ - exchgpartition2lel.q,\ - external_table_with_space_in_location_path.q,\ - file_with_header_footer.q,\ - groupby2.q,\ - import_exported_table.q,\ - index_bitmap3.q,\ - index_bitmap_auto.q,\ - infer_bucket_sort_bucketed_table.q,\ +minimr.query.files=infer_bucket_sort_map_operators.q,\ infer_bucket_sort_dyn_part.q,\ - infer_bucket_sort_map_operators.q,\ infer_bucket_sort_merge.q,\ - infer_bucket_sort_num_buckets.q,\ infer_bucket_sort_reducers_power_two.q,\ - input16_cc.q,\ - insert_dir_distcp.q,\ - join1.q,\ - join_acid_non_acid.q,\ - leftsemijoin_mr.q,\ - list_bucket_dml_10.q,\ - load_fs2.q,\ - load_hdfs_file_with_space_in_the_name.q,\ - non_native_window_udf.q, \ - parallel_orderby.q,\ - quotedid_smb.q,\ - reduce_deduplicate.q,\ - remote_script.q,\ + infer_bucket_sort_num_buckets.q,\ root_dir_external_table.q,\ - schemeAuthority.q,\ - schemeAuthority2.q,\ + parallel_orderby.q,\ + bucket_num_reducers.q,\ + udf_using.q,\ + index_bitmap3.q,\ + index_bitmap_auto.q,\ scriptfile1.q,\ - scriptfile1_win.q,\ - skewjoin_onesideskew.q,\ - table_nonprintable.q,\ - temp_table_external.q,\ - truncate_column_buckets.q,\ - uber_reduce.q,\ - udf_using.q + bucket_num_reducers2.q,\ + scriptfile1_win.q # These tests are disabled for minimr # ql_rewrite_gbtoidx.q,\ @@ -245,7 +210,6 @@ minillap.shared.query.files=acid_globallimit.q,\ script_env_var1.q,\ script_env_var2.q,\ script_pipe.q,\ - scriptfile1.q,\ selectDistinctStar.q,\ select_dummy_source.q,\ skewjoin.q,\ @@ -440,29 +404,57 @@ minillap.shared.query.files=acid_globallimit.q,\ minillap.query.files=acid_bucket_pruning.q,\ acid_vectorization_missing_cols.q,\ + auto_sortmerge_join_16.q,\ + bucket4.q,\ + bucket5.q,\ + bucket6.q,\ + bucket_many.q,\ bucket_map_join_tez1.q,\ bucket_map_join_tez2.q,\ + bucketizedhiveinputformat.q,\ + bucketmapjoin6.q,\ + bucketmapjoin7.q,\ bucketpruning1.q,\ constprog_dpp.q,\ + disable_merge_for_bucketing.q,\ dynamic_partition_pruning.q,\ dynamic_partition_pruning_2.q,\ + empty_dir_in_table.q,\ + exchgpartition2lel.q,\ explainuser_1.q,\ explainuser_2.q,\ explainuser_4.q,\ + external_table_with_space_in_location_path.q,\ + file_with_header_footer.q,\ + groupby2.q,\ hybridgrace_hashjoin_1.q,\ hybridgrace_hashjoin_2.q,\ + import_exported_table.q,\ + infer_bucket_sort_bucketed_table.q,\ + input16_cc.q,\ + insert_dir_distcp.q,\ + join1.q,\ + join_acid_non_acid.q,\ + leftsemijoin_mr.q,\ + list_bucket_dml_10.q,\ llap_nullscan.q,\ llap_udf.q,\ llapdecider.q,\ + load_fs2.q,\ + load_hdfs_file_with_space_in_the_name.q,\ lvj_mapjoin.q,\ mapjoin_decimal.q,\ mergejoin_3way.q,\ mrr.q,\ + non_native_window_udf.q,\ orc_llap.q,\ orc_llap_counters.q,\ orc_llap_counters1.q,\ orc_llap_nonvector.q,\ orc_ppd_basic.q,\ + quotedid_smb.q,\ + reduce_deduplicate.q,\ + remote_script.q,\ schema_evol_orc_acid_part.q,\ schema_evol_orc_acid_part_update.q,\ schema_evol_orc_acid_table.q,\ @@ -492,7 +484,11 @@ minillap.query.files=acid_bucket_pruning.q,\ schema_evol_text_vecrow_part_all_complex.q,\ schema_evol_text_vecrow_part_all_primitive.q,\ schema_evol_text_vecrow_table.q,\ + schemeAuthority.q,\ + schemeAuthority2.q,\ smb_cache.q,\ + table_nonprintable.q,\ + temp_table_external.q,\ tez_aggr_part_stats.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ @@ -520,6 +516,7 @@ minillap.query.files=acid_bucket_pruning.q,\ tez_union_view.q,\ tez_vector_dynpart_hashjoin_1.q,\ tez_vector_dynpart_hashjoin_2.q,\ + uber_reduce.q,\ vectorized_dynamic_partition_pruning.q,\ windowing_gby.q http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/queries/clientpositive/bucket5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket5.q b/ql/src/test/queries/clientpositive/bucket5.q index 0b3bcc5..bf4fbb0 100644 --- a/ql/src/test/queries/clientpositive/bucket5.q +++ b/ql/src/test/queries/clientpositive/bucket5.q @@ -4,6 +4,7 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.exec.reducers.max = 1; set hive.merge.mapfiles = true; set hive.merge.mapredfiles = true; +set hive.merge.tezfiles = true; set mapred.reduce.tasks = 2; -- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/bucket5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket5.q.out b/ql/src/test/results/clientpositive/llap/bucket5.q.out new file mode 100644 index 0000000..708d1a0 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -0,0 +1,586 @@ +PREHOOK: query: -- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed +-- the bucketed table is not merged and the table which is not bucketed is + +CREATE TABLE bucketed_table(key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketed_table +POSTHOOK: query: -- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed +-- the bucketed table is not merged and the table which is not bucketed is + +CREATE TABLE bucketed_table(key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketed_table +PREHOOK: query: CREATE TABLE unbucketed_table(key INT, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unbucketed_table +POSTHOOK: query: CREATE TABLE unbucketed_table(key INT, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unbucketed_table +PREHOOK: query: EXPLAIN EXTENDED +FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketed_table + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketed_table + + Stage: Stage-4 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + + Stage: Stage-5 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Tez +#### A masked pattern was here #### + Vertices: + File Merge + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + name: default.unbucketed_table + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-8 + Tez +#### A masked pattern was here #### + Vertices: + File Merge + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + name: default.unbucketed_table + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketed_table +PREHOOK: Output: default@unbucketed_table +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketed_table +POSTHOOK: Output: default@unbucketed_table +POSTHOOK: Lineage: bucketed_table.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucketed_table.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: unbucketed_table.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: unbucketed_table.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED bucketed_table +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@bucketed_table +POSTHOOK: query: DESC FORMATTED bucketed_table +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@bucketed_table +# col_name data_type comment + +key int +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + SORTBUCKETCOLSPREFIX TRUE + numFiles 2 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 1 OUT OF 2) s LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 1 OUT OF 2) s LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +8 val_8 +10 val_10 +12 val_12 +12 val_12 +18 val_18 +PREHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 2 OUT OF 2) s LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 2 OUT OF 2) s LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +5 val_5 +5 val_5 +5 val_5 +9 val_9 +11 val_11 +15 val_15 +15 val_15 +17 val_17 +19 val_19 +27 val_27 +PREHOOK: query: -- Should be 2 (not merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM bucketed_table +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +POSTHOOK: query: -- Should be 2 (not merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM bucketed_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +2 +PREHOOK: query: -- Should be 1 (merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM unbucketed_table +PREHOOK: type: QUERY +PREHOOK: Input: default@unbucketed_table +#### A masked pattern was here #### +POSTHOOK: query: -- Should be 1 (merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM unbucketed_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@unbucketed_table +#### A masked pattern was here #### +1 http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/bucket6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket6.q.out b/ql/src/test/results/clientpositive/llap/bucket6.q.out new file mode 100644 index 0000000..20895f8 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_bucket +POSTHOOK: query: CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_bucket +PREHOOK: query: explain +insert into table src_bucket select key,value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table src_bucket select key,value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_bucket + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_bucket + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table src_bucket select key,value from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@src_bucket +POSTHOOK: query: insert into table src_bucket select key,value from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@src_bucket +POSTHOOK: Lineage: src_bucket.key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_bucket.value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_bucket limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_bucket +#### A masked pattern was here #### +POSTHOOK: query: select * from src_bucket limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_bucket +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +105 val_105 +105 val_105 +105 val_105 +105 val_105 +11 val_11 +11 val_11 +11 val_11 +11 val_11 +114 val_114 +114 val_114 +114 val_114 +114 val_114 +116 val_116 +116 val_116 +116 val_116 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +136 val_136 +136 val_136 +136 val_136 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +143 val_143 +143 val_143 +143 val_143 +145 val_145 +145 val_145 +145 val_145 +145 val_145 +149 val_149 +149 val_149 +149 val_149 +149 val_149 http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/bucket_many.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket_many.q.out b/ql/src/test/results/clientpositive/llap/bucket_many.q.out new file mode 100644 index 0000000..8433022 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -0,0 +1,223 @@ +PREHOOK: query: create table bucket_many(key int, value string) clustered by (key) into 256 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_many +POSTHOOK: query: create table bucket_many(key int, value string) clustered by (key) into 256 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_many +PREHOOK: query: explain extended +insert overwrite table bucket_many +select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket_many +select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 16 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count 256 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket_many + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucket_many { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_many + TotalFiles: 256 + GatherStats: true + MultiFileSpray: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count 256 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket_many + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucket_many { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_many + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucket_many +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket_many +POSTHOOK: query: insert overwrite table bucket_many +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket_many +POSTHOOK: Lineage: bucket_many.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket_many.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select * from bucket_many tablesample (bucket 1 out of 256) s +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from bucket_many tablesample (bucket 1 out of 256) s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Filter Operator + predicate: (((hash(key) & 2147483647) % 256) = 0) (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select * from bucket_many tablesample (bucket 1 out of 256) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_many +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket_many tablesample (bucket 1 out of 256) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_many +#### A masked pattern was here #### +0 val_0 +0 val_0 +256 val_256 +256 val_256 +0 val_0 http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out b/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out new file mode 100644 index 0000000..163e819 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( +SELECT tmp1.name as name FROM ( + SELECT name, 'MMM' AS n FROM T1) tmp1 + JOIN (SELECT 'MMM' AS n FROM T1) tmp2 + JOIN (SELECT 'MMM' AS n FROM T1) tmp3 + ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( +SELECT tmp1.name as name FROM ( + SELECT name, 'MMM' AS n FROM T1) tmp1 + JOIN (SELECT 'MMM' AS n FROM T1) tmp2 + JOIN (SELECT 'MMM' AS n FROM T1) tmp3 + ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.name SIMPLE [(t1)t1.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE T3(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: -- 2 split by max.split.size +SELECT COUNT(1) FROM T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: -- 2 split by max.split.size +SELECT COUNT(1) FROM T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +5000000 +PREHOOK: query: -- 1 split for two file +SELECT COUNT(1) FROM T3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: -- 1 split for two file +SELECT COUNT(1) FROM T3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1000 +PREHOOK: query: -- 1 split +SELECT COUNT(1) FROM T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: -- 1 split +SELECT COUNT(1) FROM T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +5000000 +PREHOOK: query: -- 2 split for two file +SELECT COUNT(1) FROM T3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: -- 2 split for two file +SELECT COUNT(1) FROM T3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1000 http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/bucketmapjoin6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin6.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin6.q.out new file mode 100644 index 0000000..198404b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin6.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp1 +PREHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp2 +POSTHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp2 +PREHOOK: query: insert overwrite table tmp1 select * from src where key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp1 +POSTHOOK: query: insert overwrite table tmp1 select * from src where key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp1 +POSTHOOK: Lineage: tmp1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tmp2 select * from src where key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp2 +POSTHOOK: query: insert overwrite table tmp2 select * from src where key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp2 +POSTHOOK: Lineage: tmp2.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp2.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp3 +POSTHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp3 +PREHOOK: query: insert overwrite table tmp3 + select /*+ MAPJOIN(l) */ i.a, i.b, l.b + from tmp1 i join tmp2 l ON i.a = l.a +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Input: default@tmp2 +PREHOOK: Output: default@tmp3 +POSTHOOK: query: insert overwrite table tmp3 + select /*+ MAPJOIN(l) */ i.a, i.b, l.b + from tmp1 i join tmp2 l ON i.a = l.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Input: default@tmp2 +POSTHOOK: Output: default@tmp3 +POSTHOOK: Lineage: tmp3.a SIMPLE [(tmp1)i.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: tmp3.b SIMPLE [(tmp1)i.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: tmp3.c SIMPLE [(tmp2)l.FieldSchema(name:b, type:string, comment:null), ] +PREHOOK: query: select * from tmp3 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp3 +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp3 +#### A masked pattern was here #### +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +10 val_10 val_10 +11 val_11 val_11 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +17 val_17 val_17 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +19 val_19 val_19 +2 val_2 val_2 +20 val_20 val_20 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +27 val_27 val_27 +28 val_28 val_28 +30 val_30 val_30 +33 val_33 val_33 +34 val_34 val_34 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +4 val_4 val_4 +41 val_41 val_41 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +43 val_43 val_43 +44 val_44 val_44 +47 val_47 val_47 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +8 val_8 val_8 +9 val_9 val_9 http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out new file mode 100644 index 0000000..b515af6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out @@ -0,0 +1,315 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +PREHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 8246 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 687 Data size: 8246 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 687 Data size: 8246 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 0 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 26 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 0 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col8 + Position of Big Table: 0 + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: aa + sort order: ++ + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + tag: -1 + TopN: 1 + TopN Hash Memory Usage: 0.1 + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +#### A masked pattern was here #### +0 val_0 http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/empty_dir_in_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/empty_dir_in_table.q.out b/ql/src/test/results/clientpositive/llap/empty_dir_in_table.q.out new file mode 100644 index 0000000..e804de3 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/empty_dir_in_table.q.out @@ -0,0 +1,46 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@roottable +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@roottable +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +0 +PREHOOK: query: insert into table roottable select key from src where (key < 20) order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@roottable +POSTHOOK: query: insert into table roottable select key from src where (key < 20) order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@roottable +POSTHOOK: Lineage: roottable.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +20 +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +20 http://git-wip-us.apache.org/repos/asf/hive/blob/05e25103/ql/src/test/results/clientpositive/llap/exchgpartition2lel.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/exchgpartition2lel.q.out b/ql/src/test/results/clientpositive/llap/exchgpartition2lel.q.out new file mode 100644 index 0000000..69d6d88 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/exchgpartition2lel.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: DROP TABLE IF EXISTS t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE t1 (a int) PARTITIONED BY (d1 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (a int) PARTITIONED BY (d1 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE t2 (a int) PARTITIONED BY (d1 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (a int) PARTITIONED BY (d1 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE t3 (a int) PARTITIONED BY (d1 int, d2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: CREATE TABLE t3 (a int) PARTITIONED BY (d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: CREATE TABLE t4 (a int) PARTITIONED BY (d1 int, d2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: CREATE TABLE t4 (a int) PARTITIONED BY (d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: CREATE TABLE t5 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t5 +POSTHOOK: query: CREATE TABLE t5 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t5 +PREHOOK: query: CREATE TABLE t6 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t6 +POSTHOOK: query: CREATE TABLE t6 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t6 +PREHOOK: query: INSERT OVERWRITE TABLE t1 PARTITION (d1 = 1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@d1=1 +POSTHOOK: query: INSERT OVERWRITE TABLE t1 PARTITION (d1 = 1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@d1=1 +POSTHOOK: Lineage: t1 PARTITION(d1=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t3 PARTITION (d1 = 1, d2 = 1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: query: INSERT OVERWRITE TABLE t3 PARTITION (d1 = 1, d2 = 1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: Lineage: t3 PARTITION(d1=1,d2=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t5 PARTITION (d1 = 1, d2 = 1, d3=1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: query: INSERT OVERWRITE TABLE t5 PARTITION (d1 = 1, d2 = 1, d3=1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Lineage: t5 PARTITION(d1=1,d2=1,d3=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@d1=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@d1=1 +#### A masked pattern was here #### +100 1 +PREHOOK: query: SELECT * FROM t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t3@d1=1/d2=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@d1=1/d2=1 +#### A masked pattern was here #### +100 1 1 +PREHOOK: query: ALTER TABLE t2 EXCHANGE PARTITION (d1 = 1) WITH TABLE t1 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 EXCHANGE PARTITION (d1 = 1) WITH TABLE t1 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@d1=1 +POSTHOOK: Output: default@t1@d1=1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2@d1=1 +PREHOOK: query: SELECT * FROM t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@d1=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@d1=1 +#### A masked pattern was here #### +100 1 +PREHOOK: query: ALTER TABLE t4 EXCHANGE PARTITION (d1 = 1, d2 = 1) WITH TABLE t3 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t3 +PREHOOK: Output: default@t4 +POSTHOOK: query: ALTER TABLE t4 EXCHANGE PARTITION (d1 = 1, d2 = 1) WITH TABLE t3 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@d1=1/d2=1 +POSTHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: Output: default@t4 +POSTHOOK: Output: default@t4@d1=1/d2=1 +PREHOOK: query: SELECT * FROM t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +PREHOOK: Input: default@t4@d1=1/d2=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +POSTHOOK: Input: default@t4@d1=1/d2=1 +#### A masked pattern was here #### +100 1 1 +PREHOOK: query: ALTER TABLE t6 EXCHANGE PARTITION (d1 = 1, d2 = 1, d3 = 1) WITH TABLE t5 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t5 +PREHOOK: Output: default@t6 +POSTHOOK: query: ALTER TABLE t6 EXCHANGE PARTITION (d1 = 1, d2 = 1, d3 = 1) WITH TABLE t5 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t5 +POSTHOOK: Input: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Output: default@t6 +POSTHOOK: Output: default@t6@d1=1/d2=1/d3=1 +PREHOOK: query: SELECT * FROM t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t6 +PREHOOK: type: QUERY +PREHOOK: Input: default@t6 +PREHOOK: Input: default@t6@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t6 +POSTHOOK: Input: default@t6@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +100 1 1 1