HIVE-15114: Remove extra MoveTask operators from the ConditionalTask (Sergio Pena, reviewed by Sahil Takiar and Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e00b1a33 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e00b1a33 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e00b1a33 Branch: refs/heads/master Commit: e00b1a3392bb4474f89ca8eea4f253e185596299 Parents: 0d49b36 Author: Sergio Pena <[email protected]> Authored: Tue Nov 29 10:03:13 2016 -0600 Committer: Sergio Pena <[email protected]> Committed: Tue Nov 29 10:03:13 2016 -0600 ---------------------------------------------------------------------- .../test/queries/clientpositive/insert_into.q | 8 - .../queries/clientpositive/insert_into_table.q | 20 + .../clientpositive/insert_overwrite_directory.q | 27 + .../clientpositive/insert_overwrite_table.q | 22 + .../results/clientpositive/insert_into.q.out | 343 ---------- .../clientpositive/insert_into_table.q.out | 599 +++++++++++++++++ .../insert_overwrite_directory.q.out | 653 ++++++++++++++++++ .../clientpositive/insert_overwrite_table.q.out | 655 +++++++++++++++++++ .../hive/ql/optimizer/GenMapRedUtils.java | 134 +++- ...TestGenMapRedUtilsCreateConditionalTask.java | 199 ++++++ 10 files changed, 2308 insertions(+), 352 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q ---------------------------------------------------------------------- diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q deleted file mode 100644 index c9ed57d..0000000 --- a/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q +++ /dev/null @@ -1,8 +0,0 @@ -set hive.blobstore.use.blobstore.as.scratchdir=true; - -DROP TABLE qtest; -CREATE TABLE qtest (value int) LOCATION '${hiveconf:test.blobstore.path.unique}/qtest/'; -INSERT INTO qtest VALUES (1), (10), (100), (1000); -INSERT INTO qtest VALUES (2), (20), (200), (2000); -EXPLAIN EXTENDED INSERT INTO qtest VALUES (1), (10), (100), (1000); -SELECT * FROM qtest; http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q ---------------------------------------------------------------------- diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q new file mode 100644 index 0000000..25e2e70 --- /dev/null +++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q @@ -0,0 +1,20 @@ +SET hive.blobstore.optimizations.enabled=true; +SET hive.blobstore.use.blobstore.as.scratchdir=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +-- Insert unpartitioned table; +DROP TABLE table1; +CREATE TABLE table1 (id int) LOCATION '${hiveconf:test.blobstore.path.unique}/table1/'; +INSERT INTO TABLE table1 VALUES (1); +INSERT INTO TABLE table1 VALUES (2); +SELECT * FROM table1; +EXPLAIN EXTENDED INSERT INTO TABLE table1 VALUES (1); +DROP TABLE table1; + +-- Insert dynamic partitions; +CREATE TABLE table1 (id int) partitioned by (key string) clustered by (id) into 2 buckets LOCATION '${hiveconf:test.blobstore.path.unique}/table1/'; +INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505'); +INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505'); +SELECT * FROM table1; +EXPLAIN EXTENDED INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505'); +DROP TABLE table1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q ---------------------------------------------------------------------- diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q new file mode 100644 index 0000000..f1b5a0b --- /dev/null +++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q @@ -0,0 +1,27 @@ +SET hive.blobstore.optimizations.enabled=true; +SET hive.blobstore.use.blobstore.as.scratchdir=true; + +-- Create a simple source table; +DROP TABLE table1; +CREATE TABLE table1 (id int, key string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; +INSERT INTO TABLE table1 VALUES (1, 'k1'); +INSERT INTO TABLE table1 VALUES (2, 'k2'); + +-- Write and verify data on the directory; +INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT * FROM table1; +dfs -cat ${hiveconf:test.blobstore.path.unique}/table1.dir/000000_0; + +-- Write and verify data using FROM ... INSERT OVERWRITE DIRECTORY; +FROM table1 +INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT id +INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table2.dir/' SELECT key; + +dfs -cat ${hiveconf:test.blobstore.path.unique}/table1.dir/000000_0; +dfs -cat ${hiveconf:test.blobstore.path.unique}/table2.dir/000000_0; + +-- Verify plan is optimizedl +EXPLAIN EXTENDED INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT * FROM table1; + +EXPLAIN EXTENDED FROM table1 + INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT id + INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table2.dir/' SELECT key; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q ---------------------------------------------------------------------- diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q new file mode 100644 index 0000000..846b2b1 --- /dev/null +++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q @@ -0,0 +1,22 @@ +SET hive.blobstore.optimizations.enabled=true; +SET hive.blobstore.use.blobstore.as.scratchdir=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +-- Insert unpartitioned table; +DROP TABLE table1; +CREATE TABLE table1 (id int) LOCATION '${hiveconf:test.blobstore.path.unique}/table1/'; +INSERT OVERWRITE TABLE table1 VALUES (1); +SELECT * FROM table1; +INSERT OVERWRITE TABLE table1 VALUES (2); +SELECT * FROM table1; +EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 VALUES (1); +DROP TABLE table1; + +-- Insert dynamic partitions; +CREATE TABLE table1 (id int) partitioned by (key string) clustered by (id) into 2 buckets LOCATION '${hiveconf:test.blobstore.path.unique}/table1/'; +INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505'); +SELECT * FROM table1; +INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505'); +SELECT * FROM table1; +EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505'); +DROP TABLE table1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out ---------------------------------------------------------------------- diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out deleted file mode 100644 index 00ad136..0000000 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out +++ /dev/null @@ -1,343 +0,0 @@ -PREHOOK: query: DROP TABLE qtest -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE qtest -POSTHOOK: type: DROPTABLE -#### A masked pattern was here #### -PREHOOK: type: CREATETABLE -PREHOOK: Input: ### test.blobstore.path ###/qtest -PREHOOK: Output: database:default -PREHOOK: Output: default@qtest -#### A masked pattern was here #### -POSTHOOK: type: CREATETABLE -POSTHOOK: Input: ### test.blobstore.path ###/qtest -POSTHOOK: Output: database:default -POSTHOOK: Output: default@qtest -PREHOOK: query: INSERT INTO qtest VALUES (1), (10), (100), (1000) -PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__1 -PREHOOK: Output: default@qtest -POSTHOOK: query: INSERT INTO qtest VALUES (1), (10), (100), (1000) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__1 -POSTHOOK: Output: default@qtest -POSTHOOK: Lineage: qtest.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: INSERT INTO qtest VALUES (2), (20), (200), (2000) -PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__2 -PREHOOK: Output: default@qtest -POSTHOOK: query: INSERT INTO qtest VALUES (2), (20), (200), (2000) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__2 -POSTHOOK: Output: default@qtest -POSTHOOK: Lineage: qtest.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN EXTENDED INSERT INTO qtest VALUES (1), (10), (100), (1000) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED INSERT INTO qtest VALUES (1), (10), (100), (1000) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: values__tmp__table__3 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: UDFToInteger(tmp_values_col1) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 - directory: ### BLOBSTORE_STAGING_PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.qtest - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: Values__Tmp__Table__3 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns tmp_values_col1 - columns.comments - columns.types string -#### A masked pattern was here #### - name default.values__tmp__table__3 - serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns tmp_values_col1 - columns.comments - columns.types string -#### A masked pattern was here #### - name default.values__tmp__table__3 - serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.values__tmp__table__3 - name: default.values__tmp__table__3 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true - source: ### BLOBSTORE_STAGING_PATH ### - destination: ### BLOBSTORE_STAGING_PATH ### - - Stage: Stage-0 - Move Operator - tables: - replace: false - source: ### BLOBSTORE_STAGING_PATH ### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.qtest - - Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 - directory: ### BLOBSTORE_STAGING_PATH ### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.qtest - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: - ### BLOBSTORE_STAGING_PATH ### - Path -> Partition: - ### BLOBSTORE_STAGING_PATH ### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.qtest - name: default.qtest - Truncated Path -> Alias: - ### BLOBSTORE_STAGING_PATH ### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 - directory: ### BLOBSTORE_STAGING_PATH ### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.qtest - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: - ### BLOBSTORE_STAGING_PATH ### - Path -> Partition: - ### BLOBSTORE_STAGING_PATH ### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns value - columns.comments - columns.types int -#### A masked pattern was here #### - location ### test.blobstore.path ###/qtest - name default.qtest - numFiles 2 - serialization.ddl struct qtest { i32 value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 28 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.qtest - name: default.qtest - Truncated Path -> Alias: - ### BLOBSTORE_STAGING_PATH ### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true - source: ### BLOBSTORE_STAGING_PATH ### - destination: ### BLOBSTORE_STAGING_PATH ### - -PREHOOK: query: SELECT * FROM qtest -PREHOOK: type: QUERY -PREHOOK: Input: default@qtest -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM qtest -POSTHOOK: type: QUERY -POSTHOOK: Input: default@qtest -#### A masked pattern was here #### -1 -10 -100 -1000 -2 -20 -200 -2000 http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out ---------------------------------------------------------------------- diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out new file mode 100644 index 0000000..fbb52c1 --- /dev/null +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -0,0 +1,599 @@ +PREHOOK: query: -- Insert unpartitioned table; +DROP TABLE table1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Insert unpartitioned table; +DROP TABLE table1 +POSTHOOK: type: DROPTABLE +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: ### test.blobstore.path ###/table1 +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: ### test.blobstore.path ###/table1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: INSERT INTO TABLE table1 VALUES (1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: INSERT INTO TABLE table1 VALUES (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +PREHOOK: query: INSERT INTO TABLE table1 VALUES (2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: INSERT INTO TABLE table1 VALUES (2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +PREHOOK: query: SELECT * FROM table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +1 +2 +PREHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 VALUES (1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 VALUES (1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-2 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__3 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: Values__Tmp__Table__3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns tmp_values_col1 + columns.comments + columns.types string +#### A masked pattern was here #### + name default.values__tmp__table__3 + serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns tmp_values_col1 + columns.comments + columns.types string +#### A masked pattern was here #### + name default.values__tmp__table__3 + serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.values__tmp__table__3 + name: default.values__tmp__table__3 + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + tables: + replace: false + source: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + name: default.table1 + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-0 + Move Operator + tables: + replace: false + source: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + name: default.table1 + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-6 + Move Operator + tables: + replace: false + source: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + numFiles 2 + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: -- Insert dynamic partitions; +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: ### test.blobstore.path ###/table1 +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Insert dynamic partitions; +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: ### test.blobstore.path ###/table1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table1 +POSTHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table1@key=101 +POSTHOOK: Output: default@table1@key=202 +POSTHOOK: Output: default@table1@key=303 +POSTHOOK: Output: default@table1@key=404 +POSTHOOK: Output: default@table1@key=505 +POSTHOOK: Lineage: table1 PARTITION(key=101).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=202).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=303).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=404).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=505).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table1 +POSTHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table1@key=101 +POSTHOOK: Output: default@table1@key=202 +POSTHOOK: Output: default@table1@key=303 +POSTHOOK: Output: default@table1@key=404 +POSTHOOK: Output: default@table1@key=505 +POSTHOOK: Lineage: table1 PARTITION(key=101).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=202).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=303).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=404).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1 PARTITION(key=505).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT * FROM table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Input: default@table1@key=101 +PREHOOK: Input: default@table1@key=202 +PREHOOK: Input: default@table1@key=303 +PREHOOK: Input: default@table1@key=404 +PREHOOK: Input: default@table1@key=505 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Input: default@table1@key=101 +POSTHOOK: Input: default@table1@key=202 +POSTHOOK: Input: default@table1@key=303 +POSTHOOK: Input: default@table1@key=404 +POSTHOOK: Input: default@table1@key=505 +#### A masked pattern was here #### +1 101 +1 101 +2 202 +2 202 +3 303 +3 303 +4 404 +4 404 +5 505 +5 505 +PREHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__6 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), '_bucket_number' (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: Values__Tmp__Table__6 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns tmp_values_col1,tmp_values_col2 + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.values__tmp__table__6 + serialization.ddl struct values__tmp__table__6 { string tmp_values_col1, string tmp_values_col2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns tmp_values_col1,tmp_values_col2 + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.values__tmp__table__6 + serialization.ddl struct values__tmp__table__6 { string tmp_values_col1, string tmp_values_col2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.values__tmp__table__6 + name: default.values__tmp__table__6 + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, '_bucket_number' + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name id + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + partition_columns key + partition_columns.types string + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + partition: + key + replace: false + source: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name id + columns id + columns.comments + columns.types int +#### A masked pattern was here #### + location ### test.blobstore.path ###/table1 + name default.table1 + partition_columns key + partition_columns.types string + serialization.ddl struct table1 { i32 id} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out ---------------------------------------------------------------------- diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out new file mode 100644 index 0000000..9f575a6 --- /dev/null +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out @@ -0,0 +1,653 @@ +PREHOOK: query: -- Create a simple source table; +DROP TABLE table1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Create a simple source table; +DROP TABLE table1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE table1 (id int, key string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: CREATE TABLE table1 (id int, key string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: INSERT INTO TABLE table1 VALUES (1, 'k1') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: INSERT INTO TABLE table1 VALUES (1, 'k1') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE table1 VALUES (2, 'k2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: INSERT INTO TABLE table1 VALUES (2, 'k2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: -- Write and verify data on the directory; +INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Output: ### test.blobstore.path ###/table1.dir +POSTHOOK: query: -- Write and verify data on the directory; +INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Output: ### test.blobstore.path ###/table1.dir +1k1 +2k2 +PREHOOK: query: -- Write and verify data using FROM ... INSERT OVERWRITE DIRECTORY; +FROM table1 +INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id +INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Output: ### test.blobstore.path ###/table1.dir +PREHOOK: Output: ### test.blobstore.path ###/table2.dir +POSTHOOK: query: -- Write and verify data using FROM ... INSERT OVERWRITE DIRECTORY; +FROM table1 +INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id +INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Output: ### test.blobstore.path ###/table1.dir +POSTHOOK: Output: ### test.blobstore.path ###/table2.dir +1 +2 +k1 +k2 +PREHOOK: query: -- Verify plan is optimizedl +EXPLAIN EXTENDED INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Verify plan is optimizedl +EXPLAIN EXTENDED INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4 + Stage-3 + Stage-2 + Stage-0 depends on stages: Stage-2 + Stage-4 + Stage-5 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: table1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: id (type: int), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns id,key + columns.comments + columns.types int:string + field.delim , +#### A masked pattern was here #### + name default.table1 + numFiles 2 + numRows 2 + rawDataSize 8 + serialization.ddl struct table1 { i32 id, string key} + serialization.format , + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 10 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns id,key + columns.comments + columns.types int:string + field.delim , +#### A masked pattern was here #### + name default.table1 + numFiles 2 + numRows 2 + rawDataSize 8 + serialization.ddl struct table1 { i32 id, string key} + serialization.format , + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 10 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + name: default.table1 + Truncated Path -> Alias: + /table1 [table1] + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table1.dir + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table1.dir + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table1.dir + +PREHOOK: query: EXPLAIN EXTENDED FROM table1 + INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id + INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED FROM table1 + INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id + INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-7 depends on stages: Stage-2 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-12 depends on stages: Stage-2 , consists of Stage-9, Stage-8, Stage-10 + Stage-9 + Stage-8 + Stage-1 depends on stages: Stage-8 + Stage-10 + Stage-11 depends on stages: Stage-10 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: table1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 2 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns id,key + columns.comments + columns.types int:string + field.delim , +#### A masked pattern was here #### + name default.table1 + numFiles 2 + numRows 2 + rawDataSize 8 + serialization.ddl struct table1 { i32 id, string key} + serialization.format , + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 10 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns id,key + columns.comments + columns.types int:string + field.delim , +#### A masked pattern was here #### + name default.table1 + numFiles 2 + numRows 2 + rawDataSize 8 + serialization.ddl struct table1 { i32 id, string key} + serialization.format , + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 10 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + name: default.table1 + Truncated Path -> Alias: + /table1 [table1] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table1.dir + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table1.dir + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table1.dir + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table2.dir + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10005 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table2.dir + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: ### BLOBSTORE_STAGING_PATH ### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + Path -> Partition: + ### BLOBSTORE_STAGING_PATH ### + Partition + base file name: -ext-10005 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Truncated Path -> Alias: + ### BLOBSTORE_STAGING_PATH ### + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true + source: ### BLOBSTORE_STAGING_PATH ### + destination: ### test.blobstore.path ###/table2.dir +
