HIVE-16080: Add parquet to possible values for hive.default.fileformat and hive.default.fileformat.managed (Sahil Takiar, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/06b24122 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/06b24122 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/06b24122 Branch: refs/heads/hive-14535 Commit: 06b24122eed6be9b33c215ee710c895427ef1a48 Parents: ddbe226 Author: Sahil Takiar <[email protected]> Authored: Wed Mar 15 10:59:03 2017 -0500 Committer: Sergio Pena <[email protected]> Committed: Wed Mar 15 10:59:03 2017 -0500 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +- .../clientpositive/default_file_format.q | 35 ++ .../clientpositive/default_file_format.q.out | 347 +++++++++++++++++++ 3 files changed, 384 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/06b24122/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index bd66d89..99e3294 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1176,10 +1176,10 @@ public class HiveConf extends Configuration { "when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious \n" + "because this may prevent TaskTracker from killing tasks with infinite loops."), - HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC"), + HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC", "parquet"), "Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"), HIVEDEFAULTMANAGEDFILEFORMAT("hive.default.fileformat.managed", "none", - new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"), + new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC", "parquet"), "Default file format for CREATE TABLE statement applied to managed tables only. External tables will be \n" + "created with format specified by hive.default.fileformat. Leaving this null will result in using hive.default.fileformat \n" + "for all tables."), http://git-wip-us.apache.org/repos/asf/hive/blob/06b24122/ql/src/test/queries/clientpositive/default_file_format.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/default_file_format.q b/ql/src/test/queries/clientpositive/default_file_format.q index 577fa36..24f4c17 100644 --- a/ql/src/test/queries/clientpositive/default_file_format.q +++ b/ql/src/test/queries/clientpositive/default_file_format.q @@ -11,18 +11,53 @@ create table i (c int) location 'pfile://${system:test.tmp.dir}/bar'; set hive.default.fileformat=orc; create table io (c int); +create external table e2 (c int) location 'pfile://${system:test.tmp.dir}/bar'; describe formatted t; describe formatted o; describe formatted io; describe formatted e; describe formatted i; +describe formatted e2; drop table t; drop table o; drop table io; drop table e; drop table i; +drop table e2; + +set hive.default.fileformat=TextFile; +set hive.default.fileformat.managed=none; + +create table t (c int); + +set hive.default.fileformat.managed=parquet; + +create table o (c int); + +create external table e (c int) location 'pfile://${system:test.tmp.dir}/foo'; + +create table i (c int) location 'pfile://${system:test.tmp.dir}/bar'; + +set hive.default.fileformat=parquet; + +create table io (c int); +create external table e2 (c int) location 'pfile://${system:test.tmp.dir}/bar'; + +describe formatted t; +describe formatted o; +describe formatted io; +describe formatted e; +describe formatted i; +describe formatted e2; + +drop table t; +drop table o; +drop table io; +drop table e; +drop table i; +drop table e2; set hive.default.fileformat=TextFile; set hive.default.fileformat.managed=none; http://git-wip-us.apache.org/repos/asf/hive/blob/06b24122/ql/src/test/results/clientpositive/default_file_format.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/default_file_format.q.out b/ql/src/test/results/clientpositive/default_file_format.q.out index 4e5f27d..ef0ca52 100644 --- a/ql/src/test/results/clientpositive/default_file_format.q.out +++ b/ql/src/test/results/clientpositive/default_file_format.q.out @@ -42,6 +42,16 @@ POSTHOOK: query: create table io (c int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@io +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@e2 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e2 PREHOOK: query: describe formatted t PREHOOK: type: DESCTABLE PREHOOK: Input: default@t @@ -203,6 +213,38 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 +PREHOOK: query: describe formatted e2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@e2 +POSTHOOK: query: describe formatted e2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@e2 +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + EXTERNAL TRUE + numFiles 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: drop table t PREHOOK: type: DROPTABLE PREHOOK: Input: default@t @@ -243,3 +285,308 @@ POSTHOOK: query: drop table i POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@i POSTHOOK: Output: default@i +PREHOOK: query: drop table e2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@e2 +PREHOOK: Output: default@e2 +POSTHOOK: query: drop table e2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@e2 +POSTHOOK: Output: default@e2 +PREHOOK: query: create table t (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: create table o (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@o +POSTHOOK: query: create table o (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@o +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@e +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@i +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: create table io (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@io +POSTHOOK: query: create table io (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@io +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@e2 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e2 +PREHOOK: query: describe formatted t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t +POSTHOOK: query: describe formatted t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted o +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@o +POSTHOOK: query: describe formatted o +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@o +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted io +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@io +POSTHOOK: query: describe formatted io +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@io +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted e +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@e +POSTHOOK: query: describe formatted e +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@e +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + EXTERNAL TRUE + numFiles 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: describe formatted i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted e2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@e2 +POSTHOOK: query: describe formatted e2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@e2 +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + EXTERNAL TRUE + numFiles 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table t +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: drop table t +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: drop table o +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@o +PREHOOK: Output: default@o +POSTHOOK: query: drop table o +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@o +POSTHOOK: Output: default@o +PREHOOK: query: drop table io +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@io +PREHOOK: Output: default@io +POSTHOOK: query: drop table io +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@io +POSTHOOK: Output: default@io +PREHOOK: query: drop table e +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@e +PREHOOK: Output: default@e +POSTHOOK: query: drop table e +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@e +POSTHOOK: Output: default@e +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: drop table e2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@e2 +PREHOOK: Output: default@e2 +POSTHOOK: query: drop table e2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@e2 +POSTHOOK: Output: default@e2
