IMPALA-3491: Merge test_hbase_metadata.py into compute_stats.py. Use unique db fixture.
- Moves the test into compute_stats.py - Changes some test classes in compute_stats.py to inherit from ImpalaTestSuite and not from TestComputeStats because that will cause all tests in TestComputeStats to be run in the subclasses again (redundantly). - Clean up and add more coverage to testing incremental stats on HBase which was probably broken in this commit 6b32ff06. - Fixes a side effect that the original test had for testing incremental stats on HBase. It computes stats on a functional table which was not supposed to have stats. Testing: Ran compute_stats.py on exhaustive locally in a loop 10 times. Did a private hdfs/core run. Change-Id: Iee8b84e30948c3c98166e08cae2666574777730c Reviewed-on: http://gerrit.cloudera.org:8080/3074 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ea45de84 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ea45de84 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ea45de84 Branch: refs/heads/master Commit: ea45de84f49412778b2f3f009bf2f5819307bfd6 Parents: 1ccfc45 Author: Alex Behm <[email protected]> Authored: Thu May 12 10:28:13 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Mon May 23 08:40:19 2016 -0700 ---------------------------------------------------------------------- .../hbase-compute-stats-incremental.test | 43 +++++++++++++++++- .../queries/QueryTest/hbase-compute-stats.test | 33 +++++--------- tests/metadata/test_compute_stats.py | 40 ++++++++++++++--- tests/metadata/test_hbase_metadata.py | 47 -------------------- 4 files changed, 88 insertions(+), 75 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test index 4d536bc..ea76090 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test @@ -1,7 +1,48 @@ ==== ---- QUERY -compute incremental stats alltypes; +create table alltypessmall_hbase like functional_hbase.alltypessmall +---- RESULTS +==== +---- QUERY +compute incremental stats alltypessmall_hbase ---- RESULTS 'Updated 1 partition(s) and 13 column(s).' ---- TYPES STRING +==== +---- QUERY +show table stats alltypessmall_hbase +---- LABELS +REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE +---- RESULTS: VERIFY_IS_EQUAL +regex:.+,'',regex:.+,regex:.+B +regex:.+,'1',regex:.+,regex:.+B +regex:.+,'3',regex:.+,regex:.+B +regex:.+,'5',regex:.+,regex:.+B +regex:.+,'7',regex:.+,regex:.+B +regex:.+,'9',regex:.+,regex:.+B +'Total','',regex:.+,regex:.+B +---- TYPES +STRING, STRING, BIGINT, STRING +==== +---- QUERY +show column stats alltypessmall_hbase +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',99,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'bool_col','BOOLEAN',2,-1,1,1 +'date_string_col','STRING',12,-1,8,8 +'double_col','DOUBLE',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'int_col','INT',10,-1,4,4 +'month','INT',4,-1,4,4 +'smallint_col','SMALLINT',10,-1,2,2 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',101,-1,16,16 +'tinyint_col','TINYINT',10,-1,1,1 +'year','INT',1,-1,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test index eae6cec..8ec19b5 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test @@ -1,18 +1,17 @@ ==== ---- QUERY # test computing stats on an HBase table -create table compute_stats_db_hbase.alltypessmall_hbase -like alltypessmall; +create table alltypessmall_hbase like functional_hbase.alltypessmall ==== ---- QUERY -compute stats compute_stats_db_hbase.alltypessmall_hbase +compute stats alltypessmall_hbase ---- RESULTS 'Updated 1 partition(s) and 13 column(s).' ---- TYPES STRING ==== ---- QUERY -show table stats compute_stats_db_hbase.alltypessmall_hbase +show table stats alltypessmall_hbase ---- LABELS REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ---- RESULTS: VERIFY_IS_EQUAL @@ -27,7 +26,7 @@ regex:.+,'9',regex:.+,regex:.+B STRING, STRING, BIGINT, STRING ==== ---- QUERY -show column stats compute_stats_db_hbase.alltypessmall_hbase +show column stats alltypessmall_hbase ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS @@ -49,18 +48,17 @@ STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # test computing stats on an binary HBase table -create table compute_stats_db_hbase.alltypessmall_hbase_bin -like alltypessmallbinary; +create table alltypessmall_hbase_bin like functional_hbase.alltypessmallbinary ==== ---- QUERY -compute stats compute_stats_db_hbase.alltypessmall_hbase_bin +compute stats alltypessmall_hbase_bin ---- RESULTS 'Updated 1 partition(s) and 13 column(s).' ---- TYPES STRING ==== ---- QUERY: VERIFY_IS_EQUAL -show table stats compute_stats_db_hbase.alltypessmall_hbase_bin +show table stats alltypessmall_hbase_bin ---- LABELS REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ---- RESULTS @@ -69,7 +67,7 @@ regex:.+,'',regex:.+,regex:.+ STRING, STRING, BIGINT, STRING ==== ---- QUERY -show column stats compute_stats_db_hbase.alltypessmall_hbase_bin +show column stats alltypessmall_hbase_bin ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS @@ -92,18 +90,17 @@ STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ---- QUERY # IMP-1227: Test computing stats on an HBase table that has a # complex-typed column that Impala does not yet support. -create table compute_stats_db_hbase.allcomplextypes -like allcomplextypes +create table allcomplextypes_hbase like functional_hbase.allcomplextypes ==== ---- QUERY -compute stats compute_stats_db_hbase.allcomplextypes +compute stats allcomplextypes_hbase ---- RESULTS 'Updated 1 partition(s) and 3 column(s).' ---- TYPES STRING ==== ---- QUERY: VERIFY_IS_EQUAL -show table stats compute_stats_db_hbase.allcomplextypes +show table stats allcomplextypes_hbase ---- LABELS REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ---- RESULTS @@ -112,7 +109,7 @@ regex:.+,'',regex:.+,regex:.+ STRING, STRING, BIGINT, STRING ==== ---- QUERY -show column stats compute_stats_db_hbase.allcomplextypes +show column stats allcomplextypes_hbase ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS @@ -134,9 +131,3 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ----- QUERY -compute incremental stats alltypes; ----- RESULTS -'Updated 1 partition(s) and 13 column(s).' ----- TYPES -STRING http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/tests/metadata/test_compute_stats.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py index 16c96ab..925fce2 100644 --- a/tests/metadata/test_compute_stats.py +++ b/tests/metadata/test_compute_stats.py @@ -48,7 +48,7 @@ class TestComputeStats(ImpalaTestSuite): def test_compute_stats_many_partitions(self, vector): # To cut down on test execution time, only run the compute stats test against many # partitions if performing an exhaustive test run. - if self.exploration_strategy() != 'exhaustive': return + if self.exploration_strategy() != 'exhaustive': pytest.skip() self.run_test_case('QueryTest/compute-stats-many-partitions', vector) @pytest.mark.execute_serially @@ -111,16 +111,40 @@ class TestComputeStats(ImpalaTestSuite): assert(len(show_result.data) == 2) assert("1\tpval\t8" in show_result.data[0]) +# Tests compute stats on HBase tables. This test is separate from TestComputeStats, +# because we want to use the existing machanism to disable running tests on hbase/none +# based on the filesystem type (S3, Isilon, etc.). +class TestHbaseComputeStats(ImpalaTestSuite): + @classmethod + def get_workload(self): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestHbaseComputeStats, cls).add_test_dimensions() + cls.TestMatrix.add_dimension(create_single_exec_option_dimension()) + cls.TestMatrix.add_constraint( + lambda v: v.get_value('table_format').file_format == 'hbase') + + def test_hbase_compute_stats(self, vector, unique_database): + self.run_test_case('QueryTest/hbase-compute-stats', vector, unique_database) + + def test_hbase_compute_stats_incremental(self, vector, unique_database): + self.run_test_case('QueryTest/hbase-compute-stats-incremental', vector, + unique_database) + @SkipIf.not_default_fs # Isilon: Missing coverage: compute stats -class TestCorruptTableStats(TestComputeStats): +class TestCorruptTableStats(ImpalaTestSuite): + @classmethod + def get_workload(self): + return 'functional-query' + @classmethod def add_test_dimensions(cls): - super(TestComputeStats, cls).add_test_dimensions() + super(TestCorruptTableStats, cls).add_test_dimensions() cls.TestMatrix.add_dimension(create_exec_option_dimension( disable_codegen_options=[False], exec_single_node_option=[100])) - # Do not run these tests using all dimensions because the expected results - # are different for different file formats. cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload())) def test_corrupt_stats(self, vector, unique_database): @@ -130,7 +154,11 @@ class TestCorruptTableStats(TestComputeStats): self.run_test_case('QueryTest/corrupt-stats', vector, unique_database) -class TestIncompatibleColStats(TestComputeStats): +class TestIncompatibleColStats(ImpalaTestSuite): + @classmethod + def get_workload(self): + return 'functional-query' + @classmethod def add_test_dimensions(cls): super(TestIncompatibleColStats, cls).add_test_dimensions() http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/tests/metadata/test_hbase_metadata.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hbase_metadata.py b/tests/metadata/test_hbase_metadata.py deleted file mode 100644 index 81b87cb..0000000 --- a/tests/metadata/test_hbase_metadata.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2012 Cloudera, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from tests.common.test_vector import * -from tests.common.impala_test_suite import * -from tests.common.test_dimensions import create_uncompressed_text_dimension - -# Tests the COMPUTE STATS command for gathering table and column stats. -# TODO: Merge this test file with test_col_stats.py [email protected]_serially -class TestHbaseMetadata(ImpalaTestSuite): - TEST_DB_NAME = "compute_stats_db_hbase" - - @classmethod - def get_workload(self): - return 'functional-query' - - @classmethod - def add_test_dimensions(cls): - super(TestHbaseMetadata, cls).add_test_dimensions() - cls.TestMatrix.add_dimension(create_single_exec_option_dimension()) - cls.TestMatrix.add_constraint(\ - lambda v: v.get_value('table_format').file_format == 'hbase') - - def setup_method(self, method): - # cleanup and create a fresh test database - self.cleanup_db(self.TEST_DB_NAME) - self.execute_query("create database %s" % (self.TEST_DB_NAME)) - - def teardown_method(self, method): - self.cleanup_db(self.TEST_DB_NAME) - - def test_hbase_compute_stats(self, vector): - self.run_test_case('QueryTest/hbase-compute-stats', vector) - - def test_hbase_compute_stats_incremental(self, vector): - self.run_test_case('QueryTest/hbase-compute-stats-incremental', vector)
