Repository: incubator-impala Updated Branches: refs/heads/master 6604083f5 -> 26a3d04b8
IMPALA-2525: Treat parquet ENUMs as STRINGs when creating impala tables. Change-Id: Ia7a2e20c3ab83eb3fac422c3b33c117856fec475 Reviewed-on: http://gerrit.cloudera.org:8080/6550 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0992a6af Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0992a6af Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0992a6af Branch: refs/heads/master Commit: 0992a6afdaf96df744f8f25c5e747e5bb82d8a40 Parents: 6604083 Author: Jakub Kukul <[email protected]> Authored: Mon Mar 13 15:52:08 2017 +0100 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Jun 7 02:51:54 2017 +0000 ---------------------------------------------------------------------- docs/topics/impala_parquet.xml | 1 + .../analysis/CreateTableLikeFileStmt.java | 6 ++++- .../apache/impala/analysis/AnalyzeDDLTest.java | 2 ++ testdata/bin/create-load-data.sh | 1 + testdata/data/schemas/enum/enum.parquet | Bin 0 -> 696 bytes .../QueryTest/create-table-like-file.test | 24 +++++++++++++++++++ 6 files changed, 33 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/docs/topics/impala_parquet.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_parquet.xml b/docs/topics/impala_parquet.xml index ef1af24..fee3180 100644 --- a/docs/topics/impala_parquet.xml +++ b/docs/topics/impala_parquet.xml @@ -1151,6 +1151,7 @@ INT96 -> TIMESTAMP </p> <codeblock>BINARY + OriginalType UTF8 -> STRING +BINARY + OriginalType ENUM -> STRING BINARY + OriginalType DECIMAL -> DECIMAL </codeblock> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java index a9cdd86..de2901f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java @@ -270,8 +270,12 @@ public class CreateTableLikeFileStmt extends CreateTableStmt { PrimitiveType prim = parquetType.asPrimitiveType(); if (prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY && - orig == OriginalType.UTF8) { + (orig == OriginalType.UTF8 || orig == OriginalType.ENUM)) { // UTF8 is the type annotation Parquet uses for strings + // ENUM is the type annotation Parquet uses to indicate that + // the original data type, before conversion to parquet, had been enum. + // Applications which do not have enumerated types (e.g. Impala) + // should interpret it as a string. // We check to make sure it applies to BINARY to avoid errors if there is a bad // annotation. return Type.STRING; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index e5ba4bc..d950915 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -1406,6 +1406,8 @@ public class AnalyzeDDLTest extends FrontendTestBase { + "'/test-warehouse/schemas/zipcode_incomes.parquet'"); AnalyzesOk("create table if not exists newtbl_DNE like parquet " + "'/test-warehouse/schemas/decimal.parquet'"); + AnalyzesOk("create table if not exists newtbl_DNE like parquet'" + + " /test-warehouse/schemas/enum/enum.parquet'"); // check we error in the same situations as standard create table AnalysisError("create table functional.zipcode_incomes like parquet " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/testdata/bin/create-load-data.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh index bc24485..6e43380 100755 --- a/testdata/bin/create-load-data.sh +++ b/testdata/bin/create-load-data.sh @@ -141,6 +141,7 @@ function load-custom-schemas { hadoop fs -mkdir ${SCHEMA_DEST_DIR} hadoop fs -put $SCHEMA_SRC_DIR/zipcode_incomes.parquet ${SCHEMA_DEST_DIR}/ hadoop fs -put $SCHEMA_SRC_DIR/alltypestiny.parquet ${SCHEMA_DEST_DIR}/ + hadoop fs -put $SCHEMA_SRC_DIR/enum ${SCHEMA_DEST_DIR}/ hadoop fs -put $SCHEMA_SRC_DIR/malformed_decimal_tiny.parquet ${SCHEMA_DEST_DIR}/ hadoop fs -put $SCHEMA_SRC_DIR/decimal.parquet ${SCHEMA_DEST_DIR}/ hadoop fs -put $SCHEMA_SRC_DIR/nested/modern_nested.parquet ${SCHEMA_DEST_DIR}/ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/testdata/data/schemas/enum/enum.parquet ---------------------------------------------------------------------- diff --git a/testdata/data/schemas/enum/enum.parquet b/testdata/data/schemas/enum/enum.parquet new file mode 100644 index 0000000..ae97856 Binary files /dev/null and b/testdata/data/schemas/enum/enum.parquet differ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test index 43a7929..a67d450 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test @@ -52,6 +52,30 @@ describe $DATABASE.like_alltypestiny_file STRING, STRING, STRING ==== ---- QUERY +# Creating a table from a file containing parquet logical types +# used to annotate string columns (UTF8, ENUM). +create table $DATABASE.like_enumtype_file like parquet +'$FILESYSTEM_PREFIX/test-warehouse/schemas/enum/enum.parquet' +STORED AS PARQUET +LOCATION '$FILESYSTEM_PREFIX/test-warehouse/schemas/enum' +---- RESULTS +==== +---- QUERY +describe $DATABASE.like_enumtype_file +---- RESULTS +'species','string','Inferred from Parquet file.' +'name','string','Inferred from Parquet file.' +---- TYPES +STRING, STRING, STRING +==== +---- QUERY +select * FROM $DATABASE.like_enumtype_file +---- RESULTS +'BEAR','Winnie' +---- TYPES +STRING, STRING +==== +---- QUERY # Make sure creating a table with the same name doesn't throw an error when # IF NOT EXISTS is specified. create table if not exists $DATABASE.like_alltypestiny_file like parquet
