(impala) 02/02: IMPALA-12904: test_type_conversions_hive3 silently passes because of wrongly defined test dimensions

stigahuang Mon, 18 Mar 2024 23:46:43 -0700

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 2c4d84b1e11ae1f8857abebf7e7983e7449ff3e1
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Thu Mar 14 20:56:42 2024 +0100

    IMPALA-12904: test_type_conversions_hive3 silently passes because of 
wrongly defined test dimensions
    
    test_type_conversions_hive3 silently passes because we are not creating
    the test dimenstion for query option orc_shema_resolution correctly. If
    we set orc_shema_resolution correctly, i.e. to also exercise the
    name-based schema resolution, the test fails. The cause of the failure
    is that the ill-typed tables have dummy column names like 'c1', 'c2',
    etc. These are completely fine for position-based schema resolution,
    but it is not OK for name-based schema resolution.
    
    The test just wants to check error messages related to type errors,
    the column names are irrelevant, so we can just use the correct
    names.
    
    Change-Id: I786a5eaae9243b4728484f3f3b1427b20a1d2d28
    Reviewed-on: http://gerrit.cloudera.org:8080/21151
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../queries/DataErrorsTest/orc-type-checks.test    | 40 +++++++++++-----------
 tests/query_test/test_scanners.py                  | 32 ++++++++++-------
 2 files changed, 40 insertions(+), 32 deletions(-)

diff --git 
a/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test
 
b/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test
index 244898d0d..52d8595e6 100644
--- 
a/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test
+++ 
b/testdata/workloads/functional-query/queries/DataErrorsTest/orc-type-checks.test
@@ -1,71 +1,71 @@
 ====
 ---- QUERY
-select c1 from illtypes
+select id from illtypes
 ---- CATCH
 Type mismatch: table column BOOLEAN is map to column int in ORC file
 ====
 ---- QUERY
-select c2 from illtypes
+select bool_col from illtypes
 ---- CATCH
 Type mismatch: table column FLOAT is map to column boolean in ORC file
 ====
 ---- QUERY
-select c3 from illtypes
+select tinyint_col from illtypes
 ---- CATCH
 Type mismatch: table column BOOLEAN is map to column tinyint in ORC file
 ====
 ---- QUERY
-select c4 from illtypes
+select smallint_col from illtypes
 ---- CATCH
 Type mismatch: table column TINYINT is map to column smallint in ORC file
 ====
 ---- QUERY
-select c5 from illtypes
+select int_col from illtypes
 ---- CATCH
 Type mismatch: table column SMALLINT is map to column int in ORC file
 ====
 ---- QUERY
-select c6 from illtypes
+select bigint_col from illtypes
 ---- CATCH
 Type mismatch: table column INT is map to column bigint in ORC file
 ====
 ---- QUERY
-select c7 from illtypes
+select float_col from illtypes
 ---- CATCH
 Type mismatch: table column BOOLEAN is map to column float in ORC file
 ====
 ---- QUERY
-select c8 from illtypes
+select double_col from illtypes
 ---- CATCH
 Type mismatch: table column STRING is map to column double in ORC file
 ====
 ---- QUERY
-select c9 from illtypes
+select date_string_col from illtypes
 ---- CATCH
 Type mismatch: table column INT is map to column string in ORC file
 ====
 ---- QUERY
-select c10 from illtypes
+select string_col from illtypes
 ---- CATCH
 Type mismatch: table column FLOAT is map to column string in ORC file
 ====
 ---- QUERY
-select c11 from illtypes
+select timestamp_col from illtypes
 ---- CATCH
 Type mismatch: table column BIGINT is map to column timestamp in ORC file
 ====
 ---- QUERY
-select c11 from illtypes_ts_to_date
+select timestamp_col from illtypes_ts_to_date
 ---- CATCH
 Type mismatch: table column DATE is map to column timestamp in ORC file
 ====
 ---- QUERY
-select c2 from illtypes_date_tbl
+select date_col from illtypes_date_tbl
 ---- CATCH
 Type mismatch: table column TIMESTAMP is map to column date in ORC file
 ====
 ---- QUERY
-select * from safetypes order by c1
+select * from safetypes order by id
 ---- TYPES
 
bigint,boolean,smallint,int,bigint,bigint,double,double,char,string,timestamp,int,int
 ---- RESULTS
@@ -150,8 +150,8 @@ insert into $DATABASE.union_complextypes
 create external table $DATABASE.ill_complextypes(
   id int,
   u map<int, double>,
-  int_map map<string, int>,
-  int_array array<int>)
+  int_array map<string, int>,
+  int_map array<int>)
 stored as orc
 location '$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes';
 ====
@@ -172,17 +172,17 @@ File 
'$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes/000000_0'
  Column type: map, ORC schema: uniontype<int,boolean>
 ====
 ---- QUERY
-select m.key, m.value from ill_complextypes.int_map m
+select m.key, m.value from ill_complextypes.int_array m
 ---- CATCH
 File '$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes/000000_0'
- has an incompatible ORC schema for column 
'$DATABASE.ill_complextypes.int_map',
+ has an incompatible ORC schema for column 
'$DATABASE.ill_complextypes.int_array',
  Column type: map, ORC schema: array<int>
 ====
 ---- QUERY
-select a.item from ill_complextypes.int_array a
+select a.item from ill_complextypes.int_map a
 ---- CATCH
 File '$NAMENODE/test-warehouse/$DATABASE.db/union_complextypes/000000_0'
- has an incompatible ORC schema for column 
'$DATABASE.ill_complextypes.int_array',
+ has an incompatible ORC schema for column 
'$DATABASE.ill_complextypes.int_map',
  Column type: array, ORC schema: map<string,int>
 ====
 ---- HIVE_QUERY
diff --git a/tests/query_test/test_scanners.py 
b/tests/query_test/test_scanners.py
index daa163504..4a5e5bc78 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -42,6 +42,7 @@ from tests.common.skip import (
     SkipIfLocal,
     SkipIfNotHdfsMinicluster)
 from tests.common.test_dimensions import (
+    add_exec_option_dimension,
     create_single_exec_option_dimension,
     create_exec_option_dimension,
     create_uncompressed_text_dimension)
@@ -1639,7 +1640,7 @@ class TestOrc(ImpalaTestSuite):
     super(TestOrc, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'orc')
-    
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 
0, 1))
+    add_exec_option_dimension(cls, 'orc_schema_resolution', [0, 1])
 
   @SkipIfFS.hdfs_small_block
   @SkipIfLocal.multiple_impalad
@@ -1713,6 +1714,8 @@ class TestOrc(ImpalaTestSuite):
   # queries that hang in some cases (IMPALA-9345). It would be possible to 
separate
   # the tests that use Hive and run most tests on S3, but I think that running 
these on
   # S3 doesn't add too much coverage.
+  # TODO(IMPALA-12349): Rename the columns to use the correct names (see
+  # test_type_conversions_hive3).
   @SkipIfFS.hive
   @SkipIfHive3.non_acid
   def test_type_conversions_hive2(self, vector, unique_database):
@@ -1772,23 +1775,28 @@ class TestOrc(ImpalaTestSuite):
     tmp_alltypes = unique_database + ".alltypes"
     create_plain_orc_table("functional.alltypestiny", tmp_alltypes)
     tbl_loc = self._get_table_location(tmp_alltypes, vector)
-    self.client.execute("""create table %s.illtypes (c1 boolean, c2 float,
-        c3 boolean, c4 tinyint, c5 smallint, c6 int, c7 boolean, c8 string, c9 
int,
-        c10 float, c11 bigint) partitioned by (year int, month int) stored as 
ORC
+    self.client.execute("""create table %s.illtypes (id boolean, bool_col 
float,
+        tinyint_col boolean, smallint_col tinyint, int_col smallint, 
bigint_col int,
+        float_col boolean, double_col string, date_string_col int, string_col 
float,
+        timestamp_col bigint) partitioned by (year int, month int) stored as 
ORC
         location '%s'""" % (unique_database, tbl_loc))
-    self.client.execute("""create table %s.illtypes_ts_to_date (c1 boolean,
-        c2 float, c3 boolean, c4 tinyint, c5 smallint, c6 int, c7 boolean, c8 
string,
-        c9 int, c10 float, c11 date) partitioned by (year int, month int) 
stored as ORC
+    self.client.execute("""create table %s.illtypes_ts_to_date (id boolean,
+        bool_col float, tinyint_col boolean, smallint_col tinyint, int_col 
smallint,
+        bigint_col int, float_col boolean, double_col string,
+        date_string_col int, string_col float, timestamp_col date)
+        partitioned by (year int, month int) stored as ORC
         location '%s'""" % (unique_database, tbl_loc))
-    self.client.execute("""create table %s.safetypes (c1 bigint, c2 boolean,
-        c3 smallint, c4 int, c5 bigint, c6 bigint, c7 double, c8 double, c9 
char(3),
-        c10 varchar(3), c11 timestamp) partitioned by (year int, month int) 
stored as ORC
+    self.client.execute("""create table %s.safetypes (id bigint, bool_col 
boolean,
+        tinyint_col smallint, smallint_col int, int_col bigint, bigint_col 
bigint,
+        float_col double, double_col double, date_string_col char(3),
+        string_col varchar(3), timestamp_col timestamp)
+        partitioned by (year int, month int) stored as ORC
         location '%s'""" % (unique_database, tbl_loc))
     tmp_date_tbl = unique_database + ".date_tbl"
     create_plain_orc_table("functional.date_tbl", tmp_date_tbl)
     date_tbl_loc = self._get_table_location(tmp_date_tbl, vector)
-    self.client.execute("""create table %s.illtypes_date_tbl (c1 boolean,
-        c2 timestamp) partitioned by (date_part date) stored as ORC location 
'%s'"""
+    self.client.execute("""create table %s.illtypes_date_tbl (id boolean,
+        date_col timestamp) partitioned by (date_part date) stored as ORC 
location '%s'"""
         % (unique_database, date_tbl_loc))
     self.client.execute("alter table %s.illtypes recover partitions" % 
unique_database)
     self.client.execute("alter table %s.illtypes_ts_to_date recover partitions"

(impala) 02/02: IMPALA-12904: test_type_conversions_hive3 silently passes because of wrongly defined test dimensions

Reply via email to