IMPALA-6245: Tolerate column indenting from Hive The fix for HIVE-3140 started indenting multi-line comments, which breaks Impala testing when run against Hive 2.1.1.
To test this using the pure test runner proved difficult since it would require extensive changes to support both row_regexes (since the columns changed order) and subset support (since the number of rows changed). Instead, we manually verify the hints are present in the output in the python test. The fact that the hints have been reformatted leaves us in an uncertain state as to whether they actually get applied, so a new test case has been added to run EXPLAIN SELECT on the view and verify the joins happen exactly as we expect. Testing: Ran the views-ddl test against Impala mini-cluster setups using both Hive 2.1.1 and Hive 1.1.0 Change-Id: I49e53b1230520ca6e850af28078526e6627d69de Reviewed-on: http://gerrit.cloudera.org:8080/8719 Reviewed-by: Alex Behm <alex.b...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/245df3c6 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/245df3c6 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/245df3c6 Branch: refs/heads/master Commit: 245df3c69a932abf4accc57cb359c7bcda6d8aec Parents: b4cf5f2 Author: Zach Amsden <zams...@cloudera.com> Authored: Tue Nov 28 00:07:20 2017 +0000 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Tue Dec 12 00:17:56 2017 +0000 ---------------------------------------------------------------------- .../queries/QueryTest/views-ddl.test | 39 ------------------ tests/metadata/test_ddl.py | 42 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/245df3c6/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test b/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test index b62955e..98f116f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test +++ b/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test @@ -277,42 +277,3 @@ select * from $DATABASE.decimal_view 2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1 ---- TYPES decimal,decimal,decimal,decimal,decimal,decimal -==== ----- QUERY -# Test that plan hints are stored in the view as end-of-line commented hints -# for view-compatibility with Hive. -create view $DATABASE.hints_test as -select /* +straight_join */ a.* from functional.alltypestiny a -inner join /* +broadcast */ functional.alltypes b on a.id = b.id -inner join /* +shuffle */ functional.alltypessmall c on b.id = c.id ----- RESULTS -==== ----- QUERY -describe formatted $DATABASE.hints_test ----- TYPES -string,string,string ----- RESULTS: VERIFY_IS_SUBSET -'# View Information','NULL','NULL' -'View Original Text: ','SELECT ','NULL' -'-- +straight_join','NULL','NULL' -' a.* FROM functional.alltypestiny a INNER JOIN ','NULL','NULL' -'-- +broadcast','NULL','NULL' -' functional.alltypes b ON a.id = b.id INNER JOIN ','NULL','NULL' -'-- +shuffle','NULL','NULL' -' functional.alltypessmall c ON b.id = c.id','NULL','NULL' -'View Expanded Text: ','SELECT ','NULL' -'-- +straight_join','NULL','NULL' -' a.* FROM functional.alltypestiny a INNER JOIN ','NULL','NULL' -'-- +broadcast','NULL','NULL' -' functional.alltypes b ON a.id = b.id INNER JOIN ','NULL','NULL' -'-- +shuffle','NULL','NULL' -' functional.alltypessmall c ON b.id = c.id','NULL','NULL' -==== ----- QUERY -# Test querying the hinted view. -select count(*) from $DATABASE.hints_test ----- RESULTS -8 ----- TYPES -bigint -==== http://git-wip-us.apache.org/repos/asf/impala/blob/245df3c6/tests/metadata/test_ddl.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index d8c6000..27748bd 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -319,6 +319,48 @@ class TestDdlStatements(TestDdlBase): self.run_test_case('QueryTest/views-ddl', vector, use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector)) + @UniqueDatabase.parametrize() + def test_view_hints(self, vector, unique_database): + # Test that plan hints are stored in the view's comment field; this should work + # regardless of how Hive formats the output. Getting this to work with the + # automated test case runner is rather difficult, so verify directly. There + # should be two # of each join hint, one for the original text, one for the expanded + self.client.execute(""" + create view {0}.hints_test as + select /* +straight_join */ a.* from functional.alltypestiny a + inner join /* +broadcast */ functional.alltypes b on a.id = b.id + inner join /* +shuffle */ functional.alltypessmall c on b.id = c.id + """.format(unique_database)) + results = self.execute_query("describe formatted %s.hints_test" % unique_database) + sj, bc, shuf = 0,0,0 + for row in results.data: + sj += '-- +straight_join' in row + bc += '-- +broadcast' in row + shuf += '-- +shuffle' in row + assert sj == 2 + assert bc == 2 + assert shuf == 2 + + # Test querying the hinted view. + results = self.execute_query("select count(*) from %s.hints_test" % unique_database) + assert results.success + assert len(results.data) == 1 + assert results.data[0] == '8' + + # Test the plan to make sure hints were applied correctly + plan = self.execute_query("explain select * from %s.hints_test" % unique_database, + query_options={'explain_level':0}) + assert """PLAN-ROOT SINK +08:EXCHANGE [UNPARTITIONED] +04:HASH JOIN [INNER JOIN, PARTITIONED] +|--07:EXCHANGE [HASH(c.id)] +| 02:SCAN HDFS [functional.alltypessmall c] +06:EXCHANGE [HASH(b.id)] +03:HASH JOIN [INNER JOIN, BROADCAST] +|--05:EXCHANGE [BROADCAST] +| 01:SCAN HDFS [functional.alltypes b] +00:SCAN HDFS [functional.alltypestiny a]""" in '\n'.join(plan.data) + @UniqueDatabase.parametrize(sync_ddl=True) def test_functions_ddl(self, vector, unique_database): self.run_test_case('QueryTest/functions-ddl', vector, use_db=unique_database,