IMPALA-5994: Lower case struct-field names Impala tries to always store column names in lower case. As part of a cleanup of issues related to upper case Kudu column names, a check was added in Analyzer to enforce this.
The check fails when doing star expansion on a struct to select all fields in the case where a table was created in Hive with upper case letters in a struct field name. This happens because Hive does not covert struct field names to all lower case in HMS. The solution is to force StructField names to lower case. Testing: - Added a test in test_nested_types.py - Fixed FE test that expected struct field to be output in upper case. Change-Id: Iacd9714ac2301a55ee8b64f0102f6f156fb0370e Reviewed-on: http://gerrit.cloudera.org:8080/8169 Reviewed-by: Thomas Tauber-Marshall <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c07391ce Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c07391ce Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c07391ce Branch: refs/heads/master Commit: c07391ce518ffec0441839f70a822f78683acffc Parents: 5d92264 Author: Thomas Tauber-Marshall <[email protected]> Authored: Thu Sep 28 14:13:42 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Sat Sep 30 01:13:19 2017 +0000 ---------------------------------------------------------------------- .../java/org/apache/impala/catalog/StructField.java | 4 +++- .../org/apache/impala/analysis/AnalyzeDDLTest.java | 2 +- tests/query_test/test_nested_types.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c07391ce/fe/src/main/java/org/apache/impala/catalog/StructField.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/StructField.java b/fe/src/main/java/org/apache/impala/catalog/StructField.java index 6e44714..7f4a8a3 100644 --- a/fe/src/main/java/org/apache/impala/catalog/StructField.java +++ b/fe/src/main/java/org/apache/impala/catalog/StructField.java @@ -34,7 +34,9 @@ public class StructField { protected int position_; // in struct public StructField(String name, Type type, String comment) { - name_ = name; + // Impala expects field names to be in lower case, but type strings stored in the HMS + // are not guaranteed to be lower case. + name_ = name.toLowerCase(); type_ = type; comment_ = comment; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c07391ce/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 4d5ac0a..1935754 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -2236,7 +2236,7 @@ public class AnalyzeDDLTest extends FrontendTestBase { // Test unsupported Kudu types List<String> unsupportedTypes = Lists.newArrayList( "DECIMAL(9,0)", "VARCHAR(20)", "CHAR(20)", - "STRUCT<F1:INT,F2:STRING>", "ARRAY<INT>", "MAP<STRING,STRING>"); + "STRUCT<f1:INT,f2:STRING>", "ARRAY<INT>", "MAP<STRING,STRING>"); for (String t: unsupportedTypes) { String expectedError = String.format( "Cannot create table 'tab': Type %s is not supported in Kudu", t); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c07391ce/tests/query_test/test_nested_types.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py index bb74faa..20def2c 100644 --- a/tests/query_test/test_nested_types.py +++ b/tests/query_test/test_nested_types.py @@ -85,6 +85,21 @@ class TestNestedTypes(ImpalaTestSuite): vector.get_value('exec_option')['num_nodes'] = 1 self.run_test_case('QueryTest/nested-types-parquet-stats', vector) + @SkipIfIsilon.hive + @SkipIfS3.hive + @SkipIfADLS.hive + @SkipIfLocal.hive + def test_upper_case_field_name(self, unique_database): + """IMPALA-5994: Tests that a Hive-created table with a struct field name with upper + case characters can be selected.""" + table_name = "%s.upper_case_test" % unique_database + create_table = "CREATE TABLE %s (s struct<upperCaseName:int>) STORED AS PARQUET" % \ + table_name + self.run_stmt_in_hive(create_table) + self.client.execute("invalidate metadata %s" % table_name) + self.client.execute("select s.UppercasenamE from %s" % table_name) + self.client.execute("select s.* from %s" % table_name) + class TestParquetArrayEncodings(ImpalaTestSuite): TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'], "testdata/parquet_nested_types_encodings")
