IMPALA-6245: Tolerate column indenting from Hive

The fix for HIVE-3140 started indenting multi-line comments,
which breaks Impala testing when run against Hive 2.1.1.

To test this using the pure test runner proved difficult
since it would require extensive changes to support both
row_regexes (since the columns changed order) and subset
support (since the number of rows changed).

Instead, we manually verify the hints are present in the
output in the python test.

The fact that the hints have been reformatted leaves us
in an uncertain state as to whether they actually get applied,
so a new test case has been added to run EXPLAIN SELECT
on the view and verify the joins happen exactly as we
expect.

Testing: Ran the views-ddl test against Impala mini-cluster
setups using both Hive 2.1.1 and Hive 1.1.0

Change-Id: I49e53b1230520ca6e850af28078526e6627d69de
Reviewed-on: http://gerrit.cloudera.org:8080/8719
Reviewed-by: Alex Behm <alex.b...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/245df3c6
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/245df3c6
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/245df3c6

Branch: refs/heads/master
Commit: 245df3c69a932abf4accc57cb359c7bcda6d8aec
Parents: b4cf5f2
Author: Zach Amsden <zams...@cloudera.com>
Authored: Tue Nov 28 00:07:20 2017 +0000
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Tue Dec 12 00:17:56 2017 +0000

----------------------------------------------------------------------
 .../queries/QueryTest/views-ddl.test            | 39 ------------------
 tests/metadata/test_ddl.py                      | 42 ++++++++++++++++++++
 2 files changed, 42 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/245df3c6/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test 
b/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test
index b62955e..98f116f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/views-ddl.test
@@ -277,42 +277,3 @@ select * from $DATABASE.decimal_view
 2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
 ---- TYPES
 decimal,decimal,decimal,decimal,decimal,decimal
-====
----- QUERY
-# Test that plan hints are stored in the view as end-of-line commented hints
-# for view-compatibility with Hive.
-create view $DATABASE.hints_test as
-select /* +straight_join */ a.* from functional.alltypestiny a
-inner join /* +broadcast */ functional.alltypes b on a.id = b.id
-inner join /* +shuffle */ functional.alltypessmall c on b.id = c.id
----- RESULTS
-====
----- QUERY
-describe formatted $DATABASE.hints_test
----- TYPES
-string,string,string
----- RESULTS: VERIFY_IS_SUBSET
-'# View Information','NULL','NULL'
-'View Original Text: ','SELECT ','NULL'
-'-- +straight_join','NULL','NULL'
-' a.* FROM functional.alltypestiny a INNER JOIN ','NULL','NULL'
-'-- +broadcast','NULL','NULL'
-' functional.alltypes b ON a.id = b.id INNER JOIN ','NULL','NULL'
-'-- +shuffle','NULL','NULL'
-' functional.alltypessmall c ON b.id = c.id','NULL','NULL'
-'View Expanded Text: ','SELECT ','NULL'
-'-- +straight_join','NULL','NULL'
-' a.* FROM functional.alltypestiny a INNER JOIN ','NULL','NULL'
-'-- +broadcast','NULL','NULL'
-' functional.alltypes b ON a.id = b.id INNER JOIN ','NULL','NULL'
-'-- +shuffle','NULL','NULL'
-' functional.alltypessmall c ON b.id = c.id','NULL','NULL'
-====
----- QUERY
-# Test querying the hinted view.
-select count(*) from $DATABASE.hints_test
----- RESULTS
-8
----- TYPES
-bigint
-====

http://git-wip-us.apache.org/repos/asf/impala/blob/245df3c6/tests/metadata/test_ddl.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index d8c6000..27748bd 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -319,6 +319,48 @@ class TestDdlStatements(TestDdlBase):
     self.run_test_case('QueryTest/views-ddl', vector, use_db=unique_database,
         multiple_impalad=self._use_multiple_impalad(vector))
 
+  @UniqueDatabase.parametrize()
+  def test_view_hints(self, vector, unique_database):
+    # Test that plan hints are stored in the view's comment field; this should 
work
+    # regardless of how Hive formats the output.  Getting this to work with the
+    # automated test case runner is rather difficult, so verify directly.  
There
+    # should be two # of each join hint, one for the original text, one for 
the expanded
+    self.client.execute("""
+        create view {0}.hints_test as
+        select /* +straight_join */ a.* from functional.alltypestiny a
+        inner join /* +broadcast */ functional.alltypes b on a.id = b.id
+        inner join /* +shuffle */ functional.alltypessmall c on b.id = c.id
+        """.format(unique_database))
+    results = self.execute_query("describe formatted %s.hints_test" % 
unique_database)
+    sj, bc, shuf = 0,0,0
+    for row in results.data:
+        sj += '-- +straight_join' in row
+        bc += '-- +broadcast' in row
+        shuf += '-- +shuffle' in row
+    assert sj == 2
+    assert bc == 2
+    assert shuf == 2
+
+    # Test querying the hinted view.
+    results = self.execute_query("select count(*) from %s.hints_test" % 
unique_database)
+    assert results.success
+    assert len(results.data) == 1
+    assert results.data[0] == '8'
+
+    # Test the plan to make sure hints were applied correctly
+    plan = self.execute_query("explain select * from %s.hints_test" % 
unique_database,
+        query_options={'explain_level':0})
+    assert """PLAN-ROOT SINK
+08:EXCHANGE [UNPARTITIONED]
+04:HASH JOIN [INNER JOIN, PARTITIONED]
+|--07:EXCHANGE [HASH(c.id)]
+|  02:SCAN HDFS [functional.alltypessmall c]
+06:EXCHANGE [HASH(b.id)]
+03:HASH JOIN [INNER JOIN, BROADCAST]
+|--05:EXCHANGE [BROADCAST]
+|  01:SCAN HDFS [functional.alltypes b]
+00:SCAN HDFS [functional.alltypestiny a]""" in '\n'.join(plan.data)
+
   @UniqueDatabase.parametrize(sync_ddl=True)
   def test_functions_ddl(self, vector, unique_database):
     self.run_test_case('QueryTest/functions-ddl', vector, 
use_db=unique_database,

Reply via email to