IMPALA-4372: 'Describe formatted' returns types in upper case A recent change caused 'describe formatted' to display the types in all upper case, but we want 'describe formatted' to match Hive's 'describe' output, which displays the types in lower case.
This patch also fixes several problems with test_describe_formatted, which was encountering an error but reporting success. Change-Id: I274b97d4d1247244247fb38a5ca7f4c10bba8d22 Reviewed-on: http://gerrit.cloudera.org:8080/4861 Reviewed-by: Dimitris Tsirogiannis <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e6e2baea Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e6e2baea Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e6e2baea Branch: refs/heads/hadoop-next Commit: e6e2baea33eca0fbad9958c7c2f6087ee1019c46 Parents: 0ea4a66 Author: Thomas Tauber-Marshall <[email protected]> Authored: Wed Oct 26 19:41:19 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Tue Nov 15 05:38:12 2016 +0000 ---------------------------------------------------------------------- .../java/org/apache/impala/catalog/Column.java | 2 +- .../queries/QueryTest/avro-schema-changes.test | 6 +-- tests/common/impala_test_suite.py | 9 ++-- .../metadata/test_metadata_query_statements.py | 47 +++++++++++++++----- tests/performance/query_exec_functions.py | 10 ++--- tests/performance/query_executor.py | 1 + 6 files changed, 49 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/fe/src/main/java/org/apache/impala/catalog/Column.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/Column.java b/fe/src/main/java/org/apache/impala/catalog/Column.java index ab064e7..91928aa 100644 --- a/fe/src/main/java/org/apache/impala/catalog/Column.java +++ b/fe/src/main/java/org/apache/impala/catalog/Column.java @@ -124,7 +124,7 @@ public class Column { return Lists.transform(columns, new Function<Column, FieldSchema>() { public FieldSchema apply(Column column) { Preconditions.checkNotNull(column.getType()); - return new FieldSchema(column.getName(), column.getType().toSql(), + return new FieldSchema(column.getName(), column.getType().toSql().toLowerCase(), column.getComment()); } }); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test index 1e3eac9..8233a02 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test +++ b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test @@ -101,8 +101,8 @@ DESCRIBE FORMATTED avro_alter_schema_add_new_column; ---- TYPES string,string,string ---- RESULTS: VERIFY_IS_SUBSET -'old_col','STRING','from deserializer' -'new_col','STRING','from deserializer' +'old_col','string','from deserializer' +'new_col','string','from deserializer' ==== ---- QUERY # IMPALA-3776: Create an Avro table, remove a column from the Avro schema and make sure @@ -134,5 +134,5 @@ DESCRIBE FORMATTED avro_alter_schema_remove_column; ---- TYPES string,string,string ---- RESULTS: VERIFY_IS_SUBSET -'col1','STRING','from deserializer' +'col1','string','from deserializer' ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/common/impala_test_suite.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index afa8e2c..e287360 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -467,24 +467,23 @@ class ImpalaTestSuite(BaseTestSuite): assert len(result.data) <= 1, 'Multiple values returned from scalar' return result.data[0] if len(result.data) == 1 else None - def exec_and_compare_hive_and_impala_hs2(self, stmt): + def exec_and_compare_hive_and_impala_hs2(self, stmt, compare = lambda x, y: x == y): """Compare Hive and Impala results when executing the same statment over HS2""" # execute_using_jdbc expects a Query object. Convert the query string into a Query # object query = Query() query.query_str = stmt # Run the statement targeting Hive - exec_opts = JdbcQueryExecConfig(impalad=HIVE_HS2_HOST_PORT) + exec_opts = JdbcQueryExecConfig(impalad=HIVE_HS2_HOST_PORT, transport='SASL') hive_results = execute_using_jdbc(query, exec_opts).data # Run the statement targeting Impala - exec_opts = JdbcQueryExecConfig(impalad=IMPALAD_HS2_HOST_PORT) + exec_opts = JdbcQueryExecConfig(impalad=IMPALAD_HS2_HOST_PORT, transport='NOSASL') impala_results = execute_using_jdbc(query, exec_opts).data # Compare the results assert (impala_results is not None) and (hive_results is not None) - for impala, hive in zip(impala_results, hive_results): - assert impala == hive + assert compare(impala_results, hive_results) def load_query_test_file(self, workload, file_name, valid_section_names=None, encoding=None): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/metadata/test_metadata_query_statements.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_metadata_query_statements.py b/tests/metadata/test_metadata_query_statements.py index 0a51acb..8e25a2a 100644 --- a/tests/metadata/test_metadata_query_statements.py +++ b/tests/metadata/test_metadata_query_statements.py @@ -18,6 +18,7 @@ # Impala tests for queries that query metadata and set session settings import pytest +import re from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite @@ -75,13 +76,36 @@ class TestMetadataQueryStatements(ImpalaTestSuite): @SkipIfS3.hive @SkipIfLocal.hive def test_describe_formatted(self, vector, unique_database): + # For describe formmated, we try to match Hive's output as closely as possible. + # However, we're inconsistent with our handling of NULLs vs theirs - Impala sometimes + # specifies 'NULL' where Hive uses an empty string, and Hive somtimes specifies 'null' + # with padding where Impala uses a sequence of blank spaces - and for now + # we want to leave it that way to not affect users who rely on this output. + def compare_describe_formatted(impala_results, hive_results): + for impala, hive in zip(re.split(',|\n', impala_results), + re.split(',|\n', hive_results)): + + if impala != hive: + # If they don't match, check if it's because of the inconsistent null handling. + impala = impala.replace(' ', '').lower() + hive = hive.replace(' ', '').lower() + if not ((impala == "'null'" and hive == "''") or + (impala == "''" and hive == "'null'")): + return False + return True + # Describe a partitioned table. - self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes") + self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes", + compare=compare_describe_formatted) self.exec_and_compare_hive_and_impala_hs2( - "describe formatted functional_text_lzo.alltypes") + "describe formatted functional_text_lzo.alltypes", + compare=compare_describe_formatted) + # Describe an unpartitioned table. - self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem") - self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.jointbl") + self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem", + compare=compare_describe_formatted) + self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.jointbl", + compare=compare_describe_formatted) # Create and describe an unpartitioned and partitioned Avro table created # by Impala without any column definitions. @@ -91,20 +115,19 @@ class TestMetadataQueryStatements(ImpalaTestSuite): self.client.execute(( "create table %s.%s with serdeproperties ('avro.schema.url'='%s') stored as avro" % (unique_database, "avro_alltypes_nopart", self.AVRO_SCHEMA_LOC))) - self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_nopart") + self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_nopart", + compare=compare_describe_formatted) self.client.execute(( "create table %s.%s partitioned by (year int, month int) " "with serdeproperties ('avro.schema.url'='%s') stored as avro" % (unique_database, "avro_alltypes_part", self.AVRO_SCHEMA_LOC))) - self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_part") + self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_part", + compare=compare_describe_formatted) - try: - # Describe a view - self.exec_and_compare_hive_and_impala_hs2(\ - "describe formatted functional.alltypes_view_sub") - except AssertionError: - pytest.xfail("Investigate minor difference in displaying null vs empty values") + self.exec_and_compare_hive_and_impala_hs2(\ + "describe formatted functional.alltypes_view_sub", + compare=compare_describe_formatted) @pytest.mark.execute_serially # due to data src setup/teardown def test_show_data_sources(self, vector): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/performance/query_exec_functions.py ---------------------------------------------------------------------- diff --git a/tests/performance/query_exec_functions.py b/tests/performance/query_exec_functions.py index 3abcae2..f7d00ec 100644 --- a/tests/performance/query_exec_functions.py +++ b/tests/performance/query_exec_functions.py @@ -245,7 +245,7 @@ def execute_using_jdbc(query, query_config): cmd = query_config.jdbc_client_cmd + " -q \"%s\"" % query_string return run_query_capture_results(cmd, query, exit_on_error=False) -def parse_jdbc_query_results(stdout, stderr): +def parse_jdbc_query_results(stdout, stderr, query): """ Parse query execution results for the Impala JDBC client @@ -260,10 +260,10 @@ def parse_jdbc_query_results(stdout, stderr): time_taken = float(('%s.%s') % (match.group(1), match.group(2))) break result_data = re.findall(r'\[START\]----\n(.*?)\n----\[END\]', stdout, re.DOTALL)[0] - return create_exec_result(time_taken, result_data) + return create_exec_result(time_taken, result_data, query) -def create_exec_result(time_taken, result_data): - exec_result = HiveQueryResult() +def create_exec_result(time_taken, result_data, query): + exec_result = HiveQueryResult(query) if result_data: LOG.debug('Data:\n%s\n' % result_data) exec_result.data = result_data @@ -296,7 +296,7 @@ def run_query_capture_results(cmd, query, exit_on_error): exec_result.query_error = msg return exec_result # The command completed - exec_result = parse_jdbc_query_results(stdout, stderr) + exec_result = parse_jdbc_query_results(stdout, stderr, query) exec_result.query = query exec_result.start_time = start_time return exec_result http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/performance/query_executor.py ---------------------------------------------------------------------- diff --git a/tests/performance/query_executor.py b/tests/performance/query_executor.py index 56065b8..5d9fc61 100644 --- a/tests/performance/query_executor.py +++ b/tests/performance/query_executor.py @@ -91,6 +91,7 @@ class JdbcQueryExecConfig(ImpalaQueryExecConfig): Constructed on the fly, since the impalad it points to can change. """ + assert self.transport is not None return JdbcQueryExecConfig.JDBC_CLIENT_PATH + ' -i "%s" -t %s' % (self._impalad, self.transport)
