This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2c4d84b1e11ae1f8857abebf7e7983e7449ff3e1 Author: Zoltan Borok-Nagy <[email protected]> AuthorDate: Thu Mar 14 20:56:42 2024 +0100 IMPALA-12904: test_type_conversions_hive3 silently passes because of wrongly defined test dimensions test_type_conversions_hive3 silently passes because we are not creating the test dimenstion for query option orc_shema_resolution correctly. If we set orc_shema_resolution correctly, i.e. to also exercise the name-based schema resolution, the test fails. The cause of the failure is that the ill-typed tables have dummy column names like 'c1', 'c2', etc. These are completely fine for position-based schema resolution, but it is not OK for name-based schema resolution. The test just wants to check error messages related to type errors, the column names are irrelevant, so we can just use the correct names. Change-Id: I786a5eaae9243b4728484f3f3b1427b20a1d2d28 Reviewed-on: http://gerrit.cloudera.org:8080/21151 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../queries/DataErrorsTest/orc-type-checks.test | 40 +++++++++++----------- tests/query_test/test_scanners.py | 32 ++++++++++------- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test b/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test index 244898d0d..52d8595e6 100644 --- a/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test +++ b/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test @@ -1,71 +1,71 @@ ==== ---- QUERY -select c1 from illtypes +select id from illtypes ---- CATCH Type mismatch: table column BOOLEAN is map to column int in ORC file ==== ---- QUERY -select c2 from illtypes +select bool_col from illtypes ---- CATCH Type mismatch: table column FLOAT is map to column boolean in ORC file ==== ---- QUERY -select c3 from illtypes +select tinyint_col from illtypes ---- CATCH Type mismatch: table column BOOLEAN is map to column tinyint in ORC file ==== ---- QUERY -select c4 from illtypes +select smallint_col from illtypes ---- CATCH Type mismatch: table column TINYINT is map to column smallint in ORC file ==== ---- QUERY -select c5 from illtypes +select int_col from illtypes ---- CATCH Type mismatch: table column SMALLINT is map to column int in ORC file ==== ---- QUERY -select c6 from illtypes +select bigint_col from illtypes ---- CATCH Type mismatch: table column INT is map to column bigint in ORC file ==== ---- QUERY -select c7 from illtypes +select float_col from illtypes ---- CATCH Type mismatch: table column BOOLEAN is map to column float in ORC file ==== ---- QUERY -select c8 from illtypes +select double_col from illtypes ---- CATCH Type mismatch: table column STRING is map to column double in ORC file ==== ---- QUERY -select c9 from illtypes +select date_string_col from illtypes ---- CATCH Type mismatch: table column INT is map to column string in ORC file ==== ---- QUERY -select c10 from illtypes +select string_col from illtypes ---- CATCH Type mismatch: table column FLOAT is map to column string in ORC file ==== ---- QUERY -select c11 from illtypes +select timestamp_col from illtypes ---- CATCH Type mismatch: table column BIGINT is map to column timestamp in ORC file ==== ---- QUERY -select c11 from illtypes_ts_to_date +select timestamp_col from illtypes_ts_to_date ---- CATCH Type mismatch: table column DATE is map to column timestamp in ORC file ==== ---- QUERY -select c2 from illtypes_date_tbl +select date_col from illtypes_date_tbl ---- CATCH Type mismatch: table column TIMESTAMP is map to column date in ORC file ==== ---- QUERY -select * from safetypes order by c1 +select * from safetypes order by id ---- TYPES bigint,boolean,smallint,int,bigint,bigint,double,double,char,string,timestamp,int,int ---- RESULTS @@ -150,8 +150,8 @@ insert into $DATABASE.union_complextypes create external table $DATABASE.ill_complextypes( id int, u map<int, double>, - int_map map<string, int>, - int_array array<int>) + int_array map<string, int>, + int_map array<int>) stored as orc location '$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes'; ==== @@ -172,17 +172,17 @@ File '$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes/000000_0' Column type: map, ORC schema: uniontype<int,boolean> ==== ---- QUERY -select m.key, m.value from ill_complextypes.int_map m +select m.key, m.value from ill_complextypes.int_array m ---- CATCH File '$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes/000000_0' - has an incompatible ORC schema for column '$DATABASE.ill_complextypes.int_map', + has an incompatible ORC schema for column '$DATABASE.ill_complextypes.int_array', Column type: map, ORC schema: array<int> ==== ---- QUERY -select a.item from ill_complextypes.int_array a +select a.item from ill_complextypes.int_map a ---- CATCH File '$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes/000000_0' - has an incompatible ORC schema for column '$DATABASE.ill_complextypes.int_array', + has an incompatible ORC schema for column '$DATABASE.ill_complextypes.int_map', Column type: array, ORC schema: map<string,int> ==== ---- HIVE_QUERY diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index daa163504..4a5e5bc78 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -42,6 +42,7 @@ from tests.common.skip import ( SkipIfLocal, SkipIfNotHdfsMinicluster) from tests.common.test_dimensions import ( + add_exec_option_dimension, create_single_exec_option_dimension, create_exec_option_dimension, create_uncompressed_text_dimension) @@ -1639,7 +1640,7 @@ class TestOrc(ImpalaTestSuite): super(TestOrc, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'orc') - cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 0, 1)) + add_exec_option_dimension(cls, 'orc_schema_resolution', [0, 1]) @SkipIfFS.hdfs_small_block @SkipIfLocal.multiple_impalad @@ -1713,6 +1714,8 @@ class TestOrc(ImpalaTestSuite): # queries that hang in some cases (IMPALA-9345). It would be possible to separate # the tests that use Hive and run most tests on S3, but I think that running these on # S3 doesn't add too much coverage. + # TODO(IMPALA-12349): Rename the columns to use the correct names (see + # test_type_conversions_hive3). @SkipIfFS.hive @SkipIfHive3.non_acid def test_type_conversions_hive2(self, vector, unique_database): @@ -1772,23 +1775,28 @@ class TestOrc(ImpalaTestSuite): tmp_alltypes = unique_database + ".alltypes" create_plain_orc_table("functional.alltypestiny", tmp_alltypes) tbl_loc = self._get_table_location(tmp_alltypes, vector) - self.client.execute("""create table %s.illtypes (c1 boolean, c2 float, - c3 boolean, c4 tinyint, c5 smallint, c6 int, c7 boolean, c8 string, c9 int, - c10 float, c11 bigint) partitioned by (year int, month int) stored as ORC + self.client.execute("""create table %s.illtypes (id boolean, bool_col float, + tinyint_col boolean, smallint_col tinyint, int_col smallint, bigint_col int, + float_col boolean, double_col string, date_string_col int, string_col float, + timestamp_col bigint) partitioned by (year int, month int) stored as ORC location '%s'""" % (unique_database, tbl_loc)) - self.client.execute("""create table %s.illtypes_ts_to_date (c1 boolean, - c2 float, c3 boolean, c4 tinyint, c5 smallint, c6 int, c7 boolean, c8 string, - c9 int, c10 float, c11 date) partitioned by (year int, month int) stored as ORC + self.client.execute("""create table %s.illtypes_ts_to_date (id boolean, + bool_col float, tinyint_col boolean, smallint_col tinyint, int_col smallint, + bigint_col int, float_col boolean, double_col string, + date_string_col int, string_col float, timestamp_col date) + partitioned by (year int, month int) stored as ORC location '%s'""" % (unique_database, tbl_loc)) - self.client.execute("""create table %s.safetypes (c1 bigint, c2 boolean, - c3 smallint, c4 int, c5 bigint, c6 bigint, c7 double, c8 double, c9 char(3), - c10 varchar(3), c11 timestamp) partitioned by (year int, month int) stored as ORC + self.client.execute("""create table %s.safetypes (id bigint, bool_col boolean, + tinyint_col smallint, smallint_col int, int_col bigint, bigint_col bigint, + float_col double, double_col double, date_string_col char(3), + string_col varchar(3), timestamp_col timestamp) + partitioned by (year int, month int) stored as ORC location '%s'""" % (unique_database, tbl_loc)) tmp_date_tbl = unique_database + ".date_tbl" create_plain_orc_table("functional.date_tbl", tmp_date_tbl) date_tbl_loc = self._get_table_location(tmp_date_tbl, vector) - self.client.execute("""create table %s.illtypes_date_tbl (c1 boolean, - c2 timestamp) partitioned by (date_part date) stored as ORC location '%s'""" + self.client.execute("""create table %s.illtypes_date_tbl (id boolean, + date_col timestamp) partitioned by (date_part date) stored as ORC location '%s'""" % (unique_database, date_tbl_loc)) self.client.execute("alter table %s.illtypes recover partitions" % unique_database) self.client.execute("alter table %s.illtypes_ts_to_date recover partitions"
