IMPALA-3491: Use unique database fixture in test_insert_parquet.py Testing: Ran the test locally in a loop. Did a private debug/core/hdfs build.
Change-Id: I790b2ed5236640c7263826d1d2a74b64d43ac6f7 Reviewed-on: http://gerrit.cloudera.org:8080/4317 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f0ffbca2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f0ffbca2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f0ffbca2 Branch: refs/heads/master Commit: f0ffbca2c36ce3ad8ea5b3629d91e6b579d7dfcf Parents: 157c800 Author: Alex Behm <[email protected]> Authored: Fri Sep 2 10:44:20 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Thu Sep 8 03:25:29 2016 +0000 ---------------------------------------------------------------------- .../workloads/tpch/queries/insert_parquet.test | 6 +- tests/query_test/test_insert_parquet.py | 60 ++++++++------------ 2 files changed, 26 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f0ffbca2/testdata/workloads/tpch/queries/insert_parquet.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/tpch/queries/insert_parquet.test b/testdata/workloads/tpch/queries/insert_parquet.test index 35b7cde..4707b7b 100644 --- a/testdata/workloads/tpch/queries/insert_parquet.test +++ b/testdata/workloads/tpch/queries/insert_parquet.test @@ -1,8 +1,8 @@ ==== ---- QUERY # Tests using a larger table. -create table if not exists orders_insert_test like orders location -'$FILESYSTEM_PREFIX/test-warehouse/orders_insert_table'; +create table if not exists orders_insert_test like tpch_parquet.orders +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/orders_insert_table'; insert overwrite table orders_insert_test select * from tpch.orders ---- RESULTS : 1500000 @@ -56,7 +56,7 @@ bigint ---- QUERY # Test to verify that huge (larger than 64k) values can be written, see IMPALA-1705 create table if not exists test_insert_huge_vals (s string) stored as parquet -location '$FILESYSTEM_PREFIX/test-warehouse/test_insert_huge_vals'; +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/test_insert_huge_vals'; insert overwrite table test_insert_huge_vals select cast(l_orderkey as string) from tpch.lineitem union select group_concat(concat(s_name, s_address, s_phone)) from tpch.supplier http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f0ffbca2/tests/query_test/test_insert_parquet.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py index 70d3545..d5b8c17 100644 --- a/tests/query_test/test_insert_parquet.py +++ b/tests/query_test/test_insert_parquet.py @@ -26,6 +26,7 @@ from tempfile import mkdtemp as make_tmp_dir from tests.common.environ import impalad_basedir from tests.common.impala_test_suite import ImpalaTestSuite +from tests.common.parametrize import UniqueDatabase from tests.common.skip import SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_vector import TestDimension @@ -63,18 +64,14 @@ class TestInsertParquetQueries(ImpalaTestSuite): cls.TestMatrix.add_constraint(lambda v:\ v.get_value('table_format').compression_codec == 'none') - @classmethod - def setup_class(cls): - super(TestInsertParquetQueries, cls).setup_class() - - @pytest.mark.execute_serially @SkipIfLocal.multiple_impalad - def test_insert_parquet(self, vector): + @UniqueDatabase.parametrize(sync_ddl=True) + def test_insert_parquet(self, vector, unique_database): vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = \ vector.get_value('file_size') vector.get_value('exec_option')['COMPRESSION_CODEC'] = \ vector.get_value('compression_codec') - self.run_test_case('insert_parquet', vector, multiple_impalad=True) + self.run_test_case('insert_parquet', vector, unique_database, multiple_impalad=True) class TestInsertParquetInvalidCodec(ImpalaTestSuite): @classmethod @@ -94,10 +91,6 @@ class TestInsertParquetInvalidCodec(ImpalaTestSuite): cls.TestMatrix.add_constraint(lambda v:\ v.get_value('table_format').compression_codec == 'none') - @classmethod - def setup_class(cls): - super(TestInsertParquetInvalidCodec, cls).setup_class() - @SkipIfLocal.multiple_impalad def test_insert_parquet_invalid_codec(self, vector): vector.get_value('exec_option')['COMPRESSION_CODEC'] = \ @@ -124,40 +117,34 @@ class TestInsertParquetVerifySize(ImpalaTestSuite): v.get_value('table_format').compression_codec == 'none') cls.TestMatrix.add_dimension(TestDimension("compression_codec", *PARQUET_CODECS)); - @classmethod - def setup_class(cls): - super(TestInsertParquetVerifySize, cls).setup_class() - - @pytest.mark.execute_serially @SkipIfIsilon.hdfs_block_size @SkipIfLocal.hdfs_client - def test_insert_parquet_verify_size(self, vector): - # Test to verify that the result file size is close to what we expect.i - TBL = "parquet_insert_size" - DROP = "drop table if exists {0}".format(TBL) - CREATE = ("create table parquet_insert_size like tpch_parquet.orders" - " stored as parquet location '{0}/{1}'".format(WAREHOUSE, TBL)) - QUERY = "insert overwrite {0} select * from tpch.orders".format(TBL) - DIR = get_fs_path("test-warehouse/{0}/".format(TBL)) - BLOCK_SIZE = 40 * 1024 * 1024 - - self.execute_query(DROP) - self.execute_query(CREATE) - - vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = BLOCK_SIZE + def test_insert_parquet_verify_size(self, vector, unique_database): + # Test to verify that the result file size is close to what we expect. + tbl_name = "parquet_insert_size" + fq_tbl_name = unique_database + "." + tbl_name + location = get_fs_path("test-warehouse/{0}.db/{1}/" + .format(unique_database, tbl_name)) + create = ("create table {0} like tpch_parquet.orders stored as parquet" + .format(fq_tbl_name, location)) + query = "insert overwrite {0} select * from tpch.orders".format(fq_tbl_name) + block_size = 40 * 1024 * 1024 + + self.execute_query(create) + vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = block_size vector.get_value('exec_option')['COMPRESSION_CODEC'] =\ vector.get_value('compression_codec') vector.get_value('exec_option')['num_nodes'] = 1 - self.execute_query(QUERY, vector.get_value('exec_option')) + self.execute_query(query, vector.get_value('exec_option')) # Get the files in hdfs and verify. There can be at most 1 file that is smaller - # that the BLOCK_SIZE. The rest should be within 80% of it and not over. + # that the block_size. The rest should be within 80% of it and not over. found_small_file = False - sizes = self.filesystem_client.get_all_file_sizes(DIR) + sizes = self.filesystem_client.get_all_file_sizes(location) for size in sizes: - assert size < BLOCK_SIZE, "File size greater than expected.\ - Expected: {0}, Got: {1}".format(BLOCK_SIZE, size) - if size < BLOCK_SIZE * 0.80: + assert size < block_size, "File size greater than expected.\ + Expected: {0}, Got: {1}".format(block_size, size) + if size < block_size * 0.80: assert found_small_file == False found_small_file = True @@ -179,7 +166,6 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite): """ table_name = "test_hdfs_parquet_table_writer" qualified_table_name = "%s.%s" % (unique_database, table_name) - self.execute_query("drop table if exists %s" % qualified_table_name) self.execute_query("create table %s stored as parquet as select l_linenumber from " "tpch_parquet.lineitem limit 180000" % qualified_table_name)
