Repository: incubator-impala
Updated Branches:
  refs/heads/master 6604083f5 -> 26a3d04b8


IMPALA-2525: Treat parquet ENUMs as STRINGs when creating impala tables.

Change-Id: Ia7a2e20c3ab83eb3fac422c3b33c117856fec475
Reviewed-on: http://gerrit.cloudera.org:8080/6550
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0992a6af
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0992a6af
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0992a6af

Branch: refs/heads/master
Commit: 0992a6afdaf96df744f8f25c5e747e5bb82d8a40
Parents: 6604083
Author: Jakub Kukul <[email protected]>
Authored: Mon Mar 13 15:52:08 2017 +0100
Committer: Impala Public Jenkins <[email protected]>
Committed: Wed Jun 7 02:51:54 2017 +0000

----------------------------------------------------------------------
 docs/topics/impala_parquet.xml                  |   1 +
 .../analysis/CreateTableLikeFileStmt.java       |   6 ++++-
 .../apache/impala/analysis/AnalyzeDDLTest.java  |   2 ++
 testdata/bin/create-load-data.sh                |   1 +
 testdata/data/schemas/enum/enum.parquet         | Bin 0 -> 696 bytes
 .../QueryTest/create-table-like-file.test       |  24 +++++++++++++++++++
 6 files changed, 33 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/docs/topics/impala_parquet.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_parquet.xml b/docs/topics/impala_parquet.xml
index ef1af24..fee3180 100644
--- a/docs/topics/impala_parquet.xml
+++ b/docs/topics/impala_parquet.xml
@@ -1151,6 +1151,7 @@ INT96 -&gt; TIMESTAMP
       </p>
 
 <codeblock>BINARY + OriginalType UTF8 -&gt; STRING
+BINARY + OriginalType ENUM -&gt; STRING
 BINARY + OriginalType DECIMAL -&gt; DECIMAL
 </codeblock>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
----------------------------------------------------------------------
diff --git 
a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
index a9cdd86..de2901f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
@@ -270,8 +270,12 @@ public class CreateTableLikeFileStmt extends 
CreateTableStmt {
 
     PrimitiveType prim = parquetType.asPrimitiveType();
     if (prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY 
&&
-        orig == OriginalType.UTF8) {
+        (orig == OriginalType.UTF8 || orig == OriginalType.ENUM)) {
       // UTF8 is the type annotation Parquet uses for strings
+      // ENUM is the type annotation Parquet uses to indicate that
+      // the original data type, before conversion to parquet, had been enum.
+      // Applications which do not have enumerated types (e.g. Impala)
+      // should interpret it as a string.
       // We check to make sure it applies to BINARY to avoid errors if there 
is a bad
       // annotation.
       return Type.STRING;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index e5ba4bc..d950915 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -1406,6 +1406,8 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         + "'/test-warehouse/schemas/zipcode_incomes.parquet'");
     AnalyzesOk("create table if not exists newtbl_DNE like parquet "
         + "'/test-warehouse/schemas/decimal.parquet'");
+    AnalyzesOk("create table if not exists newtbl_DNE like parquet'"
+        + " /test-warehouse/schemas/enum/enum.parquet'");
 
     // check we error in the same situations as standard create table
     AnalysisError("create table functional.zipcode_incomes like parquet "

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index bc24485..6e43380 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -141,6 +141,7 @@ function load-custom-schemas {
   hadoop fs -mkdir ${SCHEMA_DEST_DIR}
   hadoop fs -put $SCHEMA_SRC_DIR/zipcode_incomes.parquet ${SCHEMA_DEST_DIR}/
   hadoop fs -put $SCHEMA_SRC_DIR/alltypestiny.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/enum ${SCHEMA_DEST_DIR}/
   hadoop fs -put $SCHEMA_SRC_DIR/malformed_decimal_tiny.parquet 
${SCHEMA_DEST_DIR}/
   hadoop fs -put $SCHEMA_SRC_DIR/decimal.parquet ${SCHEMA_DEST_DIR}/
   hadoop fs -put $SCHEMA_SRC_DIR/nested/modern_nested.parquet 
${SCHEMA_DEST_DIR}/

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/testdata/data/schemas/enum/enum.parquet
----------------------------------------------------------------------
diff --git a/testdata/data/schemas/enum/enum.parquet 
b/testdata/data/schemas/enum/enum.parquet
new file mode 100644
index 0000000..ae97856
Binary files /dev/null and b/testdata/data/schemas/enum/enum.parquet differ

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0992a6af/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
index 43a7929..a67d450 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
@@ -52,6 +52,30 @@ describe $DATABASE.like_alltypestiny_file
 STRING, STRING, STRING
 ====
 ---- QUERY
+# Creating a table from a file containing parquet logical types
+# used to annotate string columns (UTF8, ENUM).
+create table $DATABASE.like_enumtype_file like parquet
+'$FILESYSTEM_PREFIX/test-warehouse/schemas/enum/enum.parquet'
+STORED AS PARQUET
+LOCATION '$FILESYSTEM_PREFIX/test-warehouse/schemas/enum'
+---- RESULTS
+====
+---- QUERY
+describe $DATABASE.like_enumtype_file
+---- RESULTS
+'species','string','Inferred from Parquet file.'
+'name','string','Inferred from Parquet file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+select * FROM $DATABASE.like_enumtype_file
+---- RESULTS
+'BEAR','Winnie'
+---- TYPES
+STRING, STRING
+====
+---- QUERY
 # Make sure creating a table with the same name doesn't throw an error when
 # IF NOT EXISTS is specified.
 create table if not exists $DATABASE.like_alltypestiny_file like parquet

Reply via email to