HIVE-11198: Fix load data query file format check for partitioned tables (Prasanth Jayachandran reviewed by Sushanth Sowmyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a2dabcb8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a2dabcb8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a2dabcb8 Branch: refs/heads/parquet Commit: a2dabcb8c7bc52bd8def1402fa649420ef41e3bd Parents: be89eac Author: Prasanth Jayachandran <j.prasant...@gmail.com> Authored: Thu Jul 9 11:20:29 2015 -0700 Committer: Prasanth Jayachandran <j.prasant...@gmail.com> Committed: Thu Jul 9 11:20:58 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/parse/LoadSemanticAnalyzer.java | 12 +++- .../clientnegative/load_orc_negative_part.q | 14 ++++ .../test/queries/clientpositive/load_orc_part.q | 15 +++++ .../clientnegative/load_orc_negative_part.q.out | 52 +++++++++++++++ .../results/clientpositive/load_orc_part.q.out | 70 ++++++++++++++++++++ 5 files changed, 162 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a2dabcb8/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 187dc20..944cee4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -335,7 +335,17 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { } private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException { - Class<? extends InputFormat> destInputFormat = ts.tableHandle.getInputFormatClass(); + final Class<? extends InputFormat> destInputFormat; + try { + if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) { + destInputFormat = ts.tableHandle.getInputFormatClass(); + } else { + destInputFormat = ts.partHandle.getInputFormatClass(); + } + } catch (HiveException e) { + throw new SemanticException(e); + } + // Other file formats should do similar check to make sure file formats match // when doing LOAD DATA .. INTO TABLE if (OrcInputFormat.class.equals(destInputFormat)) { http://git-wip-us.apache.org/repos/asf/hive/blob/a2dabcb8/ql/src/test/queries/clientnegative/load_orc_negative_part.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/load_orc_negative_part.q b/ql/src/test/queries/clientnegative/load_orc_negative_part.q new file mode 100644 index 0000000..5de4917 --- /dev/null +++ b/ql/src/test/queries/clientnegative/load_orc_negative_part.q @@ -0,0 +1,14 @@ +set hive.default.fileformat=ORC; +create table orc_staging (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp); +create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (ds string); + +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_staging; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_staging/; + +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split_elim.orc' into table orc_test partition (ds='10'); +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10'); +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/; + +alter table orc_test add partition(ds='11'); +load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11'); +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=11/; http://git-wip-us.apache.org/repos/asf/hive/blob/a2dabcb8/ql/src/test/queries/clientpositive/load_orc_part.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q new file mode 100644 index 0000000..0927ea4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/load_orc_part.q @@ -0,0 +1,15 @@ +set hive.default.fileformat=ORC; +create table orc_staging (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp); +create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (ds string); + +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_staging; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_staging/; + +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split_elim.orc' into table orc_test partition (ds='10'); +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10'); +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/; + +alter table orc_test add partition(ds='11'); +alter table orc_test partition(ds='11') set fileformat textfile; +load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11'); +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=11/; http://git-wip-us.apache.org/repos/asf/hive/blob/a2dabcb8/ql/src/test/results/clientnegative/load_orc_negative_part.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/load_orc_negative_part.q.out b/ql/src/test/results/clientnegative/load_orc_negative_part.q.out new file mode 100644 index 0000000..32dd627 --- /dev/null +++ b/ql/src/test/results/clientnegative/load_orc_negative_part.q.out @@ -0,0 +1,52 @@ +PREHOOK: query: create table orc_staging (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_staging +POSTHOOK: query: create table orc_staging (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_staging +PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_test +POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_test +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_staging +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_staging +Found 1 items +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_test +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_test +POSTHOOK: Output: default@orc_test@ds=10 +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_test@ds=10 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_test@ds=10 +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: alter table orc_test add partition(ds='11') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@orc_test +POSTHOOK: query: alter table orc_test add partition(ds='11') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@orc_test +POSTHOOK: Output: default@orc_test@ds=11 +FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file. http://git-wip-us.apache.org/repos/asf/hive/blob/a2dabcb8/ql/src/test/results/clientpositive/load_orc_part.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out new file mode 100644 index 0000000..34ca493 --- /dev/null +++ b/ql/src/test/results/clientpositive/load_orc_part.q.out @@ -0,0 +1,70 @@ +PREHOOK: query: create table orc_staging (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_staging +POSTHOOK: query: create table orc_staging (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_staging +PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_test +POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_test +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_staging +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_staging +Found 1 items +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_test +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_test +POSTHOOK: Output: default@orc_test@ds=10 +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_test@ds=10 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_test@ds=10 +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: alter table orc_test add partition(ds='11') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@orc_test +POSTHOOK: query: alter table orc_test add partition(ds='11') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@orc_test +POSTHOOK: Output: default@orc_test@ds=11 +PREHOOK: query: alter table orc_test partition(ds='11') set fileformat textfile +PREHOOK: type: ALTERPARTITION_FILEFORMAT +PREHOOK: Input: default@orc_test +PREHOOK: Output: default@orc_test@ds=11 +POSTHOOK: query: alter table orc_test partition(ds='11') set fileformat textfile +POSTHOOK: type: ALTERPARTITION_FILEFORMAT +POSTHOOK: Input: default@orc_test +POSTHOOK: Input: default@orc_test@ds=11 +POSTHOOK: Output: default@orc_test@ds=11 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_test@ds=11 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_test@ds=11 +Found 1 items +#### A masked pattern was here ####