Repository: hive Updated Branches: refs/heads/master a036e52df -> 36653c2cd
HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36653c2c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36653c2c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36653c2c Branch: refs/heads/master Commit: 36653c2cd4af9815151a2453d1b50065510a4fe9 Parents: a036e52 Author: Deepak Jaiswal <djais...@apache.org> Authored: Tue Sep 25 22:55:35 2018 -0700 Committer: Deepak Jaiswal <djais...@apache.org> Committed: Tue Sep 25 22:55:35 2018 -0700 ---------------------------------------------------------------------- .../hive/ql/parse/LoadSemanticAnalyzer.java | 6 + .../clientpositive/load_data_using_job.q | 18 ++- .../llap/load_data_using_job.q.out | 110 ++++++++++++++++++- 3 files changed, 128 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/36653c2c/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 8d33cf5..308297e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -23,11 +23,13 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; + import org.antlr.runtime.tree.Tree; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.net.URLCodec; @@ -40,6 +42,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; @@ -474,6 +477,9 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer { // wipe out partition columns tempTableObj.setPartCols(new ArrayList<>()); + // Reset table params + tempTableObj.setParameters(new HashMap<>()); + // Set data location and input format, it must be text tempTableObj.setDataLocation(new Path(fromURI)); if (inputFormatClassName != null && serDeClassName != null) { http://git-wip-us.apache.org/repos/asf/hive/blob/36653c2c/ql/src/test/queries/clientpositive/load_data_using_job.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q index b760d9b..970a752 100644 --- a/ql/src/test/queries/clientpositive/load_data_using_job.q +++ b/ql/src/test/queries/clientpositive/load_data_using_job.q @@ -91,4 +91,20 @@ load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; select * from srcbucket_mapjoin_n8; -drop table srcbucket_mapjoin_n8; \ No newline at end of file +drop table srcbucket_mapjoin_n8; + +-- Load into ACID table using ORC files +set hive.mapred.mode=nonstrict; +set hive.optimize.ppd=true; +set hive.optimize.index.filter=true; +set hive.tez.bucket.pruning=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true'); +explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; +load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; + +select * from orc_test_txn; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/36653c2c/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out index 765ffdf..8a82467 100644 --- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out +++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out @@ -977,16 +977,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_mapjoin_n8__temp_table_for_load_data__ - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -996,10 +996,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3018,3 +3018,103 @@ POSTHOOK: query: drop table srcbucket_mapjoin_n8 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@srcbucket_mapjoin_n8 POSTHOOK: Output: default@srcbucket_mapjoin_n8 +PREHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_test_txn +POSTHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_test_txn +#### A masked pattern was here #### +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_test_txn__temp_table_for_load_data__ + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), name (type: string), dept (type: string), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_test_txn + Write Type: INSERT + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + year + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_test_txn + Write Type: INSERT + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test_txn__temp_table_for_load_data__ +PREHOOK: Output: default@orc_test_txn +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test_txn__temp_table_for_load_data__ +POSTHOOK: Output: default@orc_test_txn@year=2016 +POSTHOOK: Output: default@orc_test_txn@year=2017 +POSTHOOK: Output: default@orc_test_txn@year=2018 +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: select * from orc_test_txn +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test_txn +PREHOOK: Input: default@orc_test_txn@year=2016 +PREHOOK: Input: default@orc_test_txn@year=2017 +PREHOOK: Input: default@orc_test_txn@year=2018 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_test_txn +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test_txn +POSTHOOK: Input: default@orc_test_txn@year=2016 +POSTHOOK: Input: default@orc_test_txn@year=2017 +POSTHOOK: Input: default@orc_test_txn@year=2018 +#### A masked pattern was here #### +9 Harris CSE 2017 +8 Henry CSE 2016 +10 Haley CSE 2018