Repository: hive
Updated Branches:
  refs/heads/master 6b03a9c5a -> 2b71ad306


HIVE-16784: Missing lineage information when 
hive.blobstore.optimizations.enabled is true (Barna Zsombor Klara, reviewed by 
Peter Vary, Sahil Takiar)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2b71ad30
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2b71ad30
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2b71ad30

Branch: refs/heads/master
Commit: 2b71ad306addd9513aeda023ae955484d3173db7
Parents: 6b03a9c
Author: Barna Zsombor Klara <zsombor.kl...@cloudera.com>
Authored: Tue Aug 8 08:01:25 2017 -0700
Committer: Sahil Takiar <stak...@cloudera.com>
Committed: Tue Aug 8 08:01:25 2017 -0700

----------------------------------------------------------------------
 .../insert_blobstore_to_blobstore.q.out         |  6 +++++
 .../insert_empty_into_blobstore.q.out           |  3 +++
 .../clientpositive/insert_into_table.q.out      |  2 ++
 .../clientpositive/insert_overwrite_table.q.out |  2 ++
 .../clientpositive/orc_format_nonpart.q.out     |  6 +++++
 .../clientpositive/orc_format_part.q.out        | 16 +++++++++++++
 .../orc_nonstd_partitions_loc.q.out             | 24 ++++++++++++++++++++
 .../clientpositive/rcfile_format_nonpart.q.out  |  6 +++++
 .../clientpositive/rcfile_format_part.q.out     | 16 +++++++++++++
 .../rcfile_nonstd_partitions_loc.q.out          | 24 ++++++++++++++++++++
 .../clientpositive/zero_rows_blobstore.q.out    |  2 ++
 .../hive/ql/optimizer/GenMapRedUtils.java       |  5 ++++
 .../hadoop/hive/ql/session/LineageState.java    |  7 ++++++
 ...TestGenMapRedUtilsCreateConditionalTask.java |  2 ++
 14 files changed, 121 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/insert_blobstore_to_blobstore.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_blobstore_to_blobstore.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_blobstore_to_blobstore.q.out
index 4651899..6b6a54b 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_blobstore_to_blobstore.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_blobstore_to_blobstore.q.out
@@ -60,6 +60,9 @@ POSTHOOK: query: INSERT OVERWRITE TABLE blobstore_table 
SELECT * FROM blobstore_
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@blobstore_table
+POSTHOOK: Lineage: blobstore_table.a SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:a, type:string, 
comment:null), ]
+POSTHOOK: Lineage: blobstore_table.b SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:b, type:string, 
comment:null), ]
+POSTHOOK: Lineage: blobstore_table.c SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:c, type:double, 
comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM blobstore_table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@blobstore_table
@@ -77,6 +80,9 @@ POSTHOOK: query: INSERT INTO TABLE blobstore_table SELECT * 
FROM blobstore_sourc
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@blobstore_table
+POSTHOOK: Lineage: blobstore_table.a SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:a, type:string, 
comment:null), ]
+POSTHOOK: Lineage: blobstore_table.b SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:b, type:string, 
comment:null), ]
+POSTHOOK: Lineage: blobstore_table.c SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:c, type:double, 
comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM blobstore_table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@blobstore_table

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/insert_empty_into_blobstore.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_empty_into_blobstore.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_empty_into_blobstore.q.out
index 8e5e096..ccd9ba5 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_empty_into_blobstore.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_empty_into_blobstore.q.out
@@ -108,6 +108,9 @@ POSTHOOK: query: INSERT INTO TABLE blobstore_list_bucketing 
PARTITION (pt='a', d
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@empty
 POSTHOOK: Output: default@blobstore_list_bucketing@pt=a/dt=a/hr=a
+POSTHOOK: Lineage: blobstore_list_bucketing PARTITION(pt=a,dt=a,hr=a).dept 
SIMPLE [(empty)empty.FieldSchema(name:dept, type:string, comment:null), ]
+POSTHOOK: Lineage: blobstore_list_bucketing PARTITION(pt=a,dt=a,hr=a).id 
SIMPLE [(empty)empty.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: blobstore_list_bucketing PARTITION(pt=a,dt=a,hr=a).name 
SIMPLE [(empty)empty.FieldSchema(name:name, type:string, comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM blobstore_list_bucketing
 PREHOOK: type: QUERY
 PREHOOK: Input: default@blobstore_list_bucketing

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
index 663a572..f50f4af 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
@@ -18,12 +18,14 @@ PREHOOK: Output: default@table1
 POSTHOOK: query: INSERT INTO TABLE table1 VALUES (1)
 POSTHOOK: type: QUERY
 POSTHOOK: Output: default@table1
+POSTHOOK: Lineage: table1.id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: INSERT INTO TABLE table1 VALUES (2)
 PREHOOK: type: QUERY
 PREHOOK: Output: default@table1
 POSTHOOK: query: INSERT INTO TABLE table1 VALUES (2)
 POSTHOOK: type: QUERY
 POSTHOOK: Output: default@table1
+POSTHOOK: Lineage: table1.id EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: SELECT * FROM table1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@table1

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
index 8052fd8..ba0e83d 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
@@ -18,6 +18,7 @@ PREHOOK: Output: default@table1
 POSTHOOK: query: INSERT OVERWRITE TABLE table1 VALUES (1)
 POSTHOOK: type: QUERY
 POSTHOOK: Output: default@table1
+POSTHOOK: Lineage: table1.id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: SELECT * FROM table1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@table1
@@ -33,6 +34,7 @@ PREHOOK: Output: default@table1
 POSTHOOK: query: INSERT OVERWRITE TABLE table1 VALUES (2)
 POSTHOOK: type: QUERY
 POSTHOOK: Output: default@table1
+POSTHOOK: Lineage: table1.id EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: SELECT * FROM table1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@table1

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/orc_format_nonpart.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/orc_format_nonpart.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/orc_format_nonpart.q.out
index f470568..954a62a 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/orc_format_nonpart.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/orc_format_nonpart.q.out
@@ -56,6 +56,9 @@ SELECT * FROM blobstore_source
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@orc_table
+POSTHOOK: Lineage: orc_table.a EXPRESSION 
[(blobstore_source)blobstore_source.FieldSchema(name:a, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_table.b SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:b, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_table.value SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:c, type:double, 
comment:null), ]
 PREHOOK: query: SELECT * FROM orc_table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_table
@@ -126,6 +129,9 @@ SELECT * FROM blobstore_source
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@orc_table
+POSTHOOK: Lineage: orc_table.a EXPRESSION 
[(blobstore_source)blobstore_source.FieldSchema(name:a, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_table.b SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:b, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_table.value SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:c, type:double, 
comment:null), ]
 PREHOOK: query: SELECT * FROM orc_table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_table

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out 
b/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out
index 1ef9810..7b25613 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out
@@ -242,6 +242,14 @@ WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND 
event_name='hq_change'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@orc_events@run_date=201209/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM orc_events
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_events
@@ -263,6 +271,14 @@ WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND 
event_name='hq_change'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@orc_events@run_date=201209/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM orc_events
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_events

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out
index 9de4190..1201ce2 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out
@@ -167,6 +167,14 @@ WHERE SUBSTR(run_date,1,6)='201211'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@orc_events@run_date=201211/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SHOW PARTITIONS orc_events
 PREHOOK: type: SHOWPARTITIONS
 PREHOOK: Input: default@orc_events
@@ -198,6 +206,14 @@ WHERE SUBSTR(run_date,1,6)='201211'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@orc_events@run_date=201211/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SHOW PARTITIONS orc_events
 PREHOOK: type: SHOWPARTITIONS
 PREHOOK: Input: default@orc_events
@@ -367,6 +383,14 @@ WHERE SUBSTR(run_date,1,6)='201209'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@orc_events@run_date=201207/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SHOW PARTITIONS orc_events
 PREHOOK: type: SHOWPARTITIONS
 PREHOOK: Input: default@orc_events

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_nonpart.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_nonpart.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_nonpart.q.out
index 44a1f11..de5104b 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_nonpart.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_nonpart.q.out
@@ -56,6 +56,9 @@ SELECT * FROM blobstore_source
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@rcfile_table
+POSTHOOK: Lineage: rcfile_table.a EXPRESSION 
[(blobstore_source)blobstore_source.FieldSchema(name:a, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_table.b SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:b, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_table.value SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:c, type:double, 
comment:null), ]
 PREHOOK: query: SELECT * FROM rcfile_table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@rcfile_table
@@ -126,6 +129,9 @@ SELECT * FROM blobstore_source
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@rcfile_table
+POSTHOOK: Lineage: rcfile_table.a EXPRESSION 
[(blobstore_source)blobstore_source.FieldSchema(name:a, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_table.b SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:b, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_table.value SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:c, type:double, 
comment:null), ]
 PREHOOK: query: SELECT * FROM rcfile_table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@rcfile_table

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out
index defca3b..24fc525 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out
@@ -242,6 +242,14 @@ WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND 
event_name='hq_change'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@rcfile_events@run_date=201209/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM rcfile_events
 PREHOOK: type: QUERY
 PREHOOK: Input: default@rcfile_events
@@ -263,6 +271,14 @@ WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND 
event_name='hq_change'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@rcfile_events@run_date=201209/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201209,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM rcfile_events
 PREHOOK: type: QUERY
 PREHOOK: Input: default@rcfile_events

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out
index 5db9c7e..6bcfe41 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out
@@ -167,6 +167,14 @@ WHERE SUBSTR(run_date,1,6)='201211'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@rcfile_events@run_date=201211/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SHOW PARTITIONS rcfile_events
 PREHOOK: type: SHOWPARTITIONS
 PREHOOK: Input: default@rcfile_events
@@ -198,6 +206,14 @@ WHERE SUBSTR(run_date,1,6)='201211'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@rcfile_events@run_date=201211/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201211,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SHOW PARTITIONS rcfile_events
 PREHOOK: type: SHOWPARTITIONS
 PREHOOK: Input: default@rcfile_events
@@ -387,6 +403,14 @@ WHERE SUBSTR(run_date,1,6)='201209'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_events
 POSTHOOK: Output: 
default@rcfile_events@run_date=201207/game_id=39/event_name=hq_change
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).event_data SIMPLE 
[(src_events)src_events.FieldSchema(name:event_data, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).full_uid SIMPLE 
[(src_events)src_events.FieldSchema(name:full_uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).log_id SIMPLE 
[(src_events)src_events.FieldSchema(name:log_id, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).session_id SIMPLE 
[(src_events)src_events.FieldSchema(name:session_id, type:string, 
comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).time SIMPLE 
[(src_events)src_events.FieldSchema(name:time, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).type SIMPLE 
[(src_events)src_events.FieldSchema(name:type, type:int, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).uid SIMPLE 
[(src_events)src_events.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: rcfile_events 
PARTITION(run_date=201207,game_id=39,event_name=hq_change).user_id SIMPLE 
[(src_events)src_events.FieldSchema(name:user_id, type:bigint, comment:null), ]
 PREHOOK: query: SHOW PARTITIONS rcfile_events
 PREHOOK: type: SHOWPARTITIONS
 PREHOOK: Input: default@rcfile_events

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/itests/hive-blobstore/src/test/results/clientpositive/zero_rows_blobstore.q.out
----------------------------------------------------------------------
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/zero_rows_blobstore.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/zero_rows_blobstore.q.out
index 590f947..20f8311 100644
--- 
a/itests/hive-blobstore/src/test/results/clientpositive/zero_rows_blobstore.q.out
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/zero_rows_blobstore.q.out
@@ -63,6 +63,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE blobstore_target 
SELECT key FROM blobsto
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@blobstore_target
+POSTHOOK: Lineage: blobstore_target.key SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:key, type:int, 
comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM blobstore_target
 PREHOOK: type: QUERY
 PREHOOK: Input: default@blobstore_target
@@ -80,6 +81,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE blobstore_target 
SELECT key FROM blobsto
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@blobstore_source
 POSTHOOK: Output: default@blobstore_target
+POSTHOOK: Lineage: blobstore_target.key SIMPLE 
[(blobstore_source)blobstore_source.FieldSchema(name:key, type:int, 
comment:null), ]
 PREHOOK: query: SELECT COUNT(*) FROM blobstore_target
 PREHOOK: type: QUERY
 PREHOOK: Input: default@blobstore_target

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 88bf829..da153e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -115,6 +115,8 @@ import org.apache.hadoop.hive.ql.plan.StatsWork;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.session.LineageState;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -1679,12 +1681,15 @@ public final class GenMapRedUtils {
     LoadFileDesc fileDesc = null;
     LoadTableDesc tableDesc = null;
 
+    LineageState lineageState = SessionState.get().getLineageState();
     if (linkedMoveWork.getLoadFileWork() != null) {
       fileDesc = new LoadFileDesc(linkedMoveWork.getLoadFileWork());
       fileDesc.setSourcePath(condInputPath);
+      lineageState.updateDirToOpMap(condInputPath, 
linkedMoveWork.getLoadFileWork().getSourcePath());
     } else if (linkedMoveWork.getLoadTableWork() != null) {
       tableDesc = new LoadTableDesc(linkedMoveWork.getLoadTableWork());
       tableDesc.setSourcePath(condInputPath);
+      lineageState.updateDirToOpMap(condInputPath, 
linkedMoveWork.getLoadTableWork().getSourcePath());
     } else {
       throw new IllegalArgumentException("Merging a path with a MoveWork with 
multi-files work is not allowed.");
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 
b/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java
index 0f95063..2a606a4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java
@@ -76,6 +76,13 @@ public class LineageState {
     dirToFop.put(dir, fop);
   }
 
+  public void updateDirToOpMap(Path newPath, Path oldPath) {
+    Operator op = dirToFop.remove(oldPath);
+    if (op != null) {
+      dirToFop.put(newPath, op);
+    }
+  }
+
   /**
    * Set the lineage information for the associated directory.
    *

http://git-wip-us.apache.org/repos/asf/hive/blob/2b71ad30/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
index 68ccda9..e7ce234 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.*;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -53,6 +54,7 @@ public class TestGenMapRedUtilsCreateConditionalTask {
   @Before
   public void setUp() {
     dummyMRTask = new MapRedTask();
+    SessionState.start(hiveConf);
   }
 
   @Test

Reply via email to