IMPALA-5994: Lower case struct-field names

Impala tries to always store column names in lower case. As part of a
cleanup of issues related to upper case Kudu column names, a check was
added in Analyzer to enforce this.

The check fails when doing star expansion on a struct to select all
fields in the case where a table was created in Hive with upper case
letters in a struct field name. This happens because Hive does not
covert struct field names to all lower case in HMS.

The solution is to force StructField names to lower case.

Testing:
- Added a test in test_nested_types.py
- Fixed FE test that expected struct field to be output in upper case.

Change-Id: Iacd9714ac2301a55ee8b64f0102f6f156fb0370e
Reviewed-on: http://gerrit.cloudera.org:8080/8169
Reviewed-by: Thomas Tauber-Marshall <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c07391ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c07391ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c07391ce

Branch: refs/heads/master
Commit: c07391ce518ffec0441839f70a822f78683acffc
Parents: 5d92264
Author: Thomas Tauber-Marshall <[email protected]>
Authored: Thu Sep 28 14:13:42 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Sat Sep 30 01:13:19 2017 +0000

----------------------------------------------------------------------
 .../java/org/apache/impala/catalog/StructField.java  |  4 +++-
 .../org/apache/impala/analysis/AnalyzeDDLTest.java   |  2 +-
 tests/query_test/test_nested_types.py                | 15 +++++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c07391ce/fe/src/main/java/org/apache/impala/catalog/StructField.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/StructField.java 
b/fe/src/main/java/org/apache/impala/catalog/StructField.java
index 6e44714..7f4a8a3 100644
--- a/fe/src/main/java/org/apache/impala/catalog/StructField.java
+++ b/fe/src/main/java/org/apache/impala/catalog/StructField.java
@@ -34,7 +34,9 @@ public class StructField {
   protected int position_;  // in struct
 
   public StructField(String name, Type type, String comment) {
-    name_ = name;
+    // Impala expects field names to be in lower case, but type strings stored 
in the HMS
+    // are not guaranteed to be lower case.
+    name_ = name.toLowerCase();
     type_ = type;
     comment_ = comment;
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c07391ce/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 4d5ac0a..1935754 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2236,7 +2236,7 @@ public class AnalyzeDDLTest extends FrontendTestBase {
     // Test unsupported Kudu types
     List<String> unsupportedTypes = Lists.newArrayList(
         "DECIMAL(9,0)", "VARCHAR(20)", "CHAR(20)",
-        "STRUCT<F1:INT,F2:STRING>", "ARRAY<INT>", "MAP<STRING,STRING>");
+        "STRUCT<f1:INT,f2:STRING>", "ARRAY<INT>", "MAP<STRING,STRING>");
     for (String t: unsupportedTypes) {
       String expectedError = String.format(
           "Cannot create table 'tab': Type %s is not supported in Kudu", t);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c07391ce/tests/query_test/test_nested_types.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_nested_types.py 
b/tests/query_test/test_nested_types.py
index bb74faa..20def2c 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -85,6 +85,21 @@ class TestNestedTypes(ImpalaTestSuite):
     vector.get_value('exec_option')['num_nodes'] = 1
     self.run_test_case('QueryTest/nested-types-parquet-stats', vector)
 
+  @SkipIfIsilon.hive
+  @SkipIfS3.hive
+  @SkipIfADLS.hive
+  @SkipIfLocal.hive
+  def test_upper_case_field_name(self, unique_database):
+    """IMPALA-5994: Tests that a Hive-created table with a struct field name 
with upper
+    case characters can be selected."""
+    table_name = "%s.upper_case_test" % unique_database
+    create_table = "CREATE TABLE %s (s struct<upperCaseName:int>) STORED AS 
PARQUET" % \
+        table_name
+    self.run_stmt_in_hive(create_table)
+    self.client.execute("invalidate metadata %s" % table_name)
+    self.client.execute("select s.UppercasenamE from %s" % table_name)
+    self.client.execute("select s.* from %s" % table_name)
+
 class TestParquetArrayEncodings(ImpalaTestSuite):
   TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'],
                               "testdata/parquet_nested_types_encodings")

Reply via email to