IMPALA-3491: Use unique_database fixture in test_col_stats.py. The patch also addresses a TODO asking for test_col_stats.py to be merged into test_compute_stats.py
Testing: I ran the test by itself in a loop 10 times, and the whole test_compute_stats.py locally. Also did a private core/hdfs run. Change-Id: I88aa77464a95993c018e19a52eeb496d7c3eef08 Reviewed-on: http://gerrit.cloudera.org:8080/2963 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bff194ce Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bff194ce Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bff194ce Branch: refs/heads/master Commit: bff194ce17ea62c71f941b8091ecddfa791b39e1 Parents: 5cae398 Author: Alex Behm <[email protected]> Authored: Wed May 4 19:48:56 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Thu May 12 14:17:58 2016 -0700 ---------------------------------------------------------------------- tests/metadata/test_col_stats.py | 89 ------------------------------- tests/metadata/test_compute_stats.py | 42 ++++++++++++++- 2 files changed, 40 insertions(+), 91 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bff194ce/tests/metadata/test_col_stats.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_col_stats.py b/tests/metadata/test_col_stats.py deleted file mode 100644 index 447861a..0000000 --- a/tests/metadata/test_col_stats.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2012 Cloudera, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Impala tests for column statistics - -import logging -import pytest -import shlex -from tests.common.test_result_verifier import * -from subprocess import call -from tests.common.test_vector import * -from tests.common.impala_test_suite import * -from tests.common.skip import SkipIfS3 - -TEST_DB = 'colstats_test_db' - -# End-to-end validation of Impala column stats usage. -class TestColStats(ImpalaTestSuite): - @classmethod - def get_workload(self): - return 'functional-query' - - @classmethod - def add_test_dimensions(cls): - super(TestColStats, cls).add_test_dimensions() - # There is no reason to run these tests using all dimensions. - cls.TestMatrix.add_dimension(create_single_exec_option_dimension()) - cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload())) - - def setup_method(self, method): - self.__cleanup() - - def teardown_method(self, method): - self.__cleanup() - - def __cleanup(self): - self.cleanup_db(TEST_DB) - - def test_incompatible_col_stats(self, vector): - """Tests Impala is able to use tables when the column stats data is not compatible - with the column type. Regression test for IMPALA-588.""" - - # Create a test database. - self.client.execute("create database " + TEST_DB); - self.client.execute("use " + TEST_DB) - - # Create a table with a string column and populate it with some data. - self.client.execute("create table badstats(s string)") - self.client.execute("insert into table badstats select cast(int_col as string) "\ - "from functional.alltypes limit 10") - - # Compute stats for this table, they will be for the string column type. - self.__compute_table_stats(TEST_DB, 'badstats') - self.client.execute("refresh badstats") - - # Change the column type to int which will cause a mismatch between the column - # stats data and the column type metadata. - self.client.execute("alter table badstats change s s int") - # Should still be able to query the table - result = self.client.execute("select s from badstats") - assert len(result.data) == 10 - - # Recompute stats with the new column type. Impala should now have stats for this - # column and should be able to access the table. - # TODO: Currently this just verifies Impala can query the table, it does not - # verify the stats are there or correct. Expand the verification once Impala has a - # mechanism to expose this metadata. - self.__compute_table_stats(TEST_DB, 'badstats') - self.client.execute("refresh badstats") - result = self.client.execute("select s from badstats") - assert len(result.data) == 10 - - def __compute_table_stats(self, db_name, table_name): - compute_stats_script =\ - os.path.join(os.environ['IMPALA_HOME'],'tests/util/compute_table_stats.py') - rval = call([compute_stats_script, - '--db_names=' + db_name, '--table_names=' + table_name]) - assert rval == 0, 'Compute table stats failed on: %s.%s' % (db_name, table_name) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bff194ce/tests/metadata/test_compute_stats.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py index f4ae2ff..16c96ab 100644 --- a/tests/metadata/test_compute_stats.py +++ b/tests/metadata/test_compute_stats.py @@ -21,7 +21,6 @@ from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIf, SkipIfLocal from tests.util.filesystem_utils import WAREHOUSE # Tests the COMPUTE STATS command for gathering table and column stats. -# TODO: Merge this test file with test_col_stats.py @SkipIf.not_default_fs # Isilon: Missing coverage: compute stats class TestComputeStats(ImpalaTestSuite): @classmethod @@ -115,7 +114,6 @@ class TestComputeStats(ImpalaTestSuite): @SkipIf.not_default_fs # Isilon: Missing coverage: compute stats class TestCorruptTableStats(TestComputeStats): - @classmethod def add_test_dimensions(cls): super(TestComputeStats, cls).add_test_dimensions() @@ -130,3 +128,43 @@ class TestCorruptTableStats(TestComputeStats): issued and the small query optimization is disabled.""" if self.exploration_strategy() != 'exhaustive': pytest.skip("Only run in exhaustive") self.run_test_case('QueryTest/corrupt-stats', vector, unique_database) + + +class TestIncompatibleColStats(TestComputeStats): + @classmethod + def add_test_dimensions(cls): + super(TestIncompatibleColStats, cls).add_test_dimensions() + # There is no reason to run these tests using all dimensions. + cls.TestMatrix.add_dimension(create_single_exec_option_dimension()) + cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload())) + + def test_incompatible_col_stats(self, vector, unique_database): + """Tests Impala is able to use tables when the column stats data is not compatible + with the column type. Regression test for IMPALA-588.""" + + # Create a table with a string column and populate it with some data. + table_name = unique_database + ".badstats" + self.client.execute("create table %s (s string)" % table_name) + self.client.execute("insert into table %s select cast(int_col as string) " + "from functional.alltypes limit 10" % table_name) + + # Compute stats for this table, they will be for the string column type. + self.client.execute("compute stats %s" % table_name) + + # Change the column type to int which will cause a mismatch between the column + # stats data and the column type metadata. + self.client.execute("alter table %s change s s int" % table_name) + # Force a reload of the table metadata. + self.client.execute("invalidate metadata %s" % table_name) + # Should still be able to load the metadata and query the table. + result = self.client.execute("select s from %s" % table_name) + assert len(result.data) == 10 + + # Recompute stats with the new column type. Impala should now have stats for this + # column and should be able to access the table. + # TODO: Currently this just verifies Impala can query the table, it does not + # verify the stats are there or correct. Expand the verification once Impala has a + # mechanism to expose this metadata. + self.client.execute("compute stats %s" % table_name) + result = self.client.execute("select s from %s" % table_name) + assert len(result.data) == 10
