This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f59c0917fe382f675c6fa0f55e9b84791db579e5
Author: Riza Suminto <[email protected]>
AuthorDate: Mon Feb 24 23:31:24 2025 -0800

    IMPALA-13786: Skip rewriting expr of Hive auto-generated label
    
    IMPALA-10836 introduced the SimplifyCastExprRule optimization to
    simplify CAST expressions. However, applying this rewrite rule over
    expression referred by Hive auto-generated label has caused
    AnalysisException like following:
    
    AnalysisException: Could not resolve column/field reference:
    'failing_view._c0'
    
    It is most likely that, before IMPALA-10836, expression referred by Hive
    auto-generated label never effectively being rewritten. Thus, the
    ExprSubstitutionMap across multiple InlineViewRef was intact.
    
    This patch attempt to fix the issue by making any expression in
    SelectList that mapped to Hive auto-generated label ineligible for any
    kind of expression rewrite.
    
    Also addressed some flake8 errors in test_views_compatibility.py.
    
    Testing:
    - Add test case in views-compatibility.test.
    - Break test_view_compatibility_hive into 3 separate tests.
    - Refactor test_views_compatibility.py to run both EXPLAIN and SELECT
      query over the test view.
    - Pass test_views_compatibility.py in exhaustive exploration.
    - Pass core tests.
    
    Change-Id: I4b8bbd0afd6da0532bf2ef460989d4f01337d198
    Reviewed-on: http://gerrit.cloudera.org:8080/22546
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../org/apache/impala/analysis/SelectList.java     |   1 +
 .../org/apache/impala/analysis/SelectListItem.java |  10 +-
 .../queries/QueryTest/views-compatibility.test     |  27 ++++
 tests/metadata/test_views_compatibility.py         | 152 ++++++++++-----------
 4 files changed, 112 insertions(+), 78 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectList.java 
b/fe/src/main/java/org/apache/impala/analysis/SelectList.java
index 9ed28c0e9..cb52ce0c8 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectList.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectList.java
@@ -97,6 +97,7 @@ public class SelectList {
     List<Subquery> subqueryExprs = new ArrayList<>();
     for (SelectListItem item: items_) {
       if (item.isStar()) continue;
+      if (item.isSkipExprRewrite()) continue;
       item.setExpr(rewriter.rewrite(item.getExpr(), analyzer));
       item.getExpr().collect(Subquery.class, subqueryExprs);
     }
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java 
b/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
index 17884e53d..200cba717 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
@@ -34,6 +34,10 @@ public class SelectListItem {
   // True if the item shouldn't be included in star expansion
   private final boolean isHidden_;
 
+  // If True, this expr_ must not be rewritten for correctness reason.
+  // Set to True by toColumnLabel() if it returns Hive's auto-generated label.
+  private boolean isSkipExprRewrite_ = false;
+
   public SelectListItem(Expr expr, String alias, boolean isHidden) {
     Preconditions.checkNotNull(expr);
     expr_ = expr;
@@ -65,6 +69,7 @@ public class SelectListItem {
   public String getAlias() { return alias_; }
   public List<String> getRawPath() { return rawPath_; }
   public boolean isHidden() { return isHidden_; }
+  public boolean isSkipExprRewrite() { return isSkipExprRewrite_; }
 
   @Override
   public String toString() {
@@ -122,7 +127,10 @@ public class SelectListItem {
       return Joiner.on(".").join(slotRef.getResolvedPath().getRawPath());
     }
     // Optionally return auto-generated column label.
-    if (useHiveColLabels) return "_c" + selectListPos;
+    if (useHiveColLabels) {
+      isSkipExprRewrite_ = true;
+      return "_c" + selectListPos;
+    }
     // Abbreviate the toSql() for analytic exprs.
     if (expr_ instanceof AnalyticExpr) {
       AnalyticExpr expr = (AnalyticExpr) expr_;
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/views-compatibility.test
 
b/testdata/workloads/functional-query/queries/QueryTest/views-compatibility.test
index c8d6e85c5..5e765e389 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/views-compatibility.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/views-compatibility.test
@@ -261,3 +261,30 @@ HIVE=FAILURE
 IMPALA=SUCCESS
 HIVE=FAILURE
 ====
+---- CREATE_VIEW
+# IMPALA-13786: Regression case for SimplifyCastExprRule.
+# The topmost SELECT column does not have explicit alias and will be referred
+# by a Hive auto-generated alias (_c0).
+CREATE VIEW test (`varchar_col`) AS
+SELECT
+  CAST(`alias2` AS VARCHAR(5))
+FROM
+  (
+    SELECT
+      CAST(`alias1` AS VARCHAR(5)) `alias2`
+    FROM
+      (
+        SELECT
+          "varchar val" AS `alias1`
+      ) b
+  ) a
+---- CREATE_VIEW_RESULTS
+IMPALA=SUCCESS
+HIVE=SUCCESS
+---- QUERY_HIVE_VIEW_RESULTS
+IMPALA=SUCCESS
+HIVE=SUCCESS
+---- QUERY_IMPALA_VIEW_RESULTS
+IMPALA=SUCCESS
+HIVE=SUCCESS
+====
diff --git a/tests/metadata/test_views_compatibility.py 
b/tests/metadata/test_views_compatibility.py
index 8d19fceb1..ced54f738 100644
--- a/tests/metadata/test_views_compatibility.py
+++ b/tests/metadata/test_views_compatibility.py
@@ -19,21 +19,22 @@ from __future__ import absolute_import, division, 
print_function
 import pprint
 import pytest
 import shlex
-from subprocess import call
 
-from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.environ import HIVE_MAJOR_VERSION
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfFS
 from tests.common.test_dimensions import create_uncompressed_text_dimension
+from tests.common.test_vector import ImpalaTestDimension
 from tests.util.test_file_parser import QueryTestSectionReader
 
 # The purpose of view compatibility testing is to check whether views created 
in Hive
 # can be queried in Impala and vice versa. A test typically consists of
 # the following actions specified in different test sections.
 # 1. create a view with a certain definition using Hive and Impala
-# 2. explain a "select *" query on the view created by Hive using Hive and 
Impala
-# 3. explain a "select *" query on the view created by Impala using Hive and 
Impala
+# 2. explain and run a "select *" query on the view created by Hive
+#    using Hive and Impala
+# 3. explain and run a "select *" query on the view created by Impala
+#    using Hive and Impala
 # For each of the steps above its corresponding test section specifies our 
expectations
 # on whether Impala and Hive will succeed or fail.
 #
@@ -44,13 +45,17 @@ from tests.util.test_file_parser import 
QueryTestSectionReader
 # potentially change in Impala and/or Hive simply testing various SQL 
statements
 # in Impala and Hive would be insufficient.
 
+HIVE = 'HIVE'
+IMPALA = 'IMPALA'
+VALID_ENGINES = [HIVE, IMPALA]
+
 
 # Missing Coverage: Views created by Hive and Impala being visible and 
queryable by each
 # other on non hdfs storage.
 @SkipIfFS.hive
 class TestViewCompatibility(ImpalaTestSuite):
-  VALID_SECTION_NAMES = ["CREATE_VIEW", "CREATE_VIEW_RESULTS",\
-                        "QUERY_HIVE_VIEW_RESULTS", "QUERY_IMPALA_VIEW_RESULTS"]
+  VALID_SECTION_NAMES = ["CREATE_VIEW", "CREATE_VIEW_RESULTS",
+                         "QUERY_HIVE_VIEW_RESULTS", 
"QUERY_IMPALA_VIEW_RESULTS"]
 
   @classmethod
   def get_workload(self):
@@ -68,16 +73,23 @@ class TestViewCompatibility(ImpalaTestSuite):
     # There is no reason to run these tests using all dimensions.
     cls.ImpalaTestMatrix.add_dimension(
         create_uncompressed_text_dimension(cls.get_workload()))
+    # Add creator_engine dimension to parallelize test.
+    cls.ImpalaTestMatrix.add_dimension(
+      ImpalaTestDimension('creator_engine', *VALID_ENGINES))
 
-  def test_view_compatibility(self, vector, unique_database):
+  def test_view_compatibility_hive_all(self, vector, unique_database):
     self._run_view_compat_test_case('QueryTest/views-compatibility', vector,
       unique_database)
-    if HIVE_MAJOR_VERSION == 2:
-      
self._run_view_compat_test_case('QueryTest/views-compatibility-hive2-only', 
vector,
-          unique_database)
-    if HIVE_MAJOR_VERSION >= 3:
-      
self._run_view_compat_test_case('QueryTest/views-compatibility-hive3-only', 
vector,
-          unique_database)
+
+  @pytest.mark.skipif(HIVE_MAJOR_VERSION != 2, reason="Test require Hive 2")
+  def test_view_compatibility_hive_2(self, vector, unique_database):
+    
self._run_view_compat_test_case('QueryTest/views-compatibility-hive2-only', 
vector,
+        unique_database)
+
+  @pytest.mark.skipif(HIVE_MAJOR_VERSION < 3, reason="Test require Hive 3+")
+  def test_view_compatibility_hive_3(self, vector, unique_database):
+    
self._run_view_compat_test_case('QueryTest/views-compatibility-hive3-only', 
vector,
+        unique_database)
 
   def _run_view_compat_test_case(self, test_file_name, vector, test_db_name):
     """
@@ -92,76 +104,63 @@ class TestViewCompatibility(ImpalaTestSuite):
     ---- QUERY_IMPALA_VIEW_RESULTS
     whether we expect to be able to query the view created by Impala in 
Hive/Impala
     """
+    creator_engine = vector.get_value('creator_engine')
+    engine_to_exec = {
+      HIVE: self._exec_in_hive,
+      IMPALA: self._exec_in_impala
+    }
 
-    sections = self.load_query_test_file(self.get_workload(), test_file_name,\
+    sections = self.load_query_test_file(self.get_workload(), test_file_name,
                                          self.VALID_SECTION_NAMES)
 
     for test_section in sections:
       # validate the test
       test_case = ViewCompatTestCase(test_section, test_file_name, 
test_db_name)
 
-      # create views in Hive and Impala checking against the expected results
-      self._exec_in_hive(test_case.get_create_view_sql('HIVE'),\
-                          test_case.get_create_view_sql('HIVE'),\
-                          test_case.get_create_exp_res())
-      # The table may or may not have been created in Hive. And so, 
"invalidate metadata"
-      # may throw an exception.
-      try:
-        self.client.execute("invalidate metadata 
{0}".format(test_case.hive_view_name))
-      except ImpalaBeeswaxException as e:
-        assert "TableNotFoundException" in str(e)
-
-      self._exec_in_impala(test_case.get_create_view_sql('IMPALA'),\
-                            test_case.get_create_view_sql('IMPALA'),\
-                            test_case.get_create_exp_res())
-
-      # explain a simple query on the view created by Hive in Hive and Impala
-      if test_case.has_query_hive_section():
-        exp_res = test_case.get_query_exp_res('HIVE');
-        if 'HIVE' in exp_res:
-          self._exec_in_hive(test_case.get_query_view_sql('HIVE'),\
-                              test_case.get_create_view_sql('HIVE'), exp_res)
-        if 'IMPALA' in exp_res:
-          self._exec_in_impala(test_case.get_query_view_sql('HIVE'),\
-                                test_case.get_create_view_sql('HIVE'), exp_res)
-
-      # explain a simple query on the view created by Impala in Hive and Impala
-      if test_case.has_query_impala_section():
-        exp_res = test_case.get_query_exp_res('IMPALA');
-        if 'HIVE' in exp_res:
-          self._exec_in_hive(test_case.get_query_view_sql('IMPALA'),\
-                              test_case.get_create_view_sql('IMPALA'), exp_res)
-        if 'IMPALA' in exp_res:
-          self._exec_in_impala(test_case.get_query_view_sql('IMPALA'),\
-                                test_case.get_create_view_sql('IMPALA'), 
exp_res)
+      # create views in creator_engine checking against the expected results
+      create_query = test_case.get_create_view_sql(creator_engine)
+      engine_to_exec[creator_engine](create_query, create_query,
+                                     test_case.get_create_exp_res())
+
+      # explain and run a simple query on the view created by creator_engine in
+      # reader_engine
+      exp_res = test_case.get_query_exp_res(creator_engine)
+      if exp_res:
+        for reader_engine, exec_func in engine_to_exec.items():
+          if reader_engine in exp_res:
+            if creator_engine == HIVE and reader_engine == IMPALA:
+              # Run invalidate query from Impala so the view that just created 
by Hive
+              # is queryable by Impala.
+              invalidate_query = "invalidate metadata {0}".format(
+                  test_case.hive_view_name)
+              engine_to_exec[IMPALA](invalidate_query, create_query, None)
+            explain_query = test_case.get_query_view_sql(creator_engine)
+            exec_func(explain_query, create_query, exp_res)
+            if exp_res[reader_engine]:
+              select_query = explain_query.replace('explain ', '')
+              assert select_query != explain_query
+              exec_func(select_query, create_query, exp_res)
 
       # drop the views without checking success or failure
-      self._exec_in_hive(test_case.get_drop_view_sql('HIVE'),\
-                          test_case.get_create_view_sql('HIVE'), None)
-      try:
-        self.client.execute("invalidate metadata 
{0}".format(test_case.hive_view_name))
-      except ImpalaBeeswaxException as e:
-        assert "TableNotFoundException" in str(e)
-
-      self._exec_in_impala(test_case.get_drop_view_sql('IMPALA'),\
-                            test_case.get_create_view_sql('IMPALA'), None)
+      drop_query = test_case.get_drop_view_sql(creator_engine)
+      engine_to_exec[creator_engine](drop_query, create_query, None)
 
   def _exec_in_hive(self, sql_str, create_view_sql, exp_res):
     try:
       self.run_stmt_in_hive(sql_str)
       success = True
-    except: # consider any exception a failure
+    except Exception:  # consider any exception a failure
       success = False
-    self._cmp_expected(sql_str, create_view_sql, exp_res, "HIVE", success)
+    self._cmp_expected(sql_str, create_view_sql, exp_res, HIVE, success)
 
   def _exec_in_impala(self, sql_str, create_view_sql, exp_res):
     success = True
     try:
       impala_ret = self.execute_query(sql_str)
       success = impala_ret.success
-    except: # consider any exception a failure
+    except Exception:  # consider any exception a failure
       success = False
-    self._cmp_expected(sql_str, create_view_sql, exp_res, "IMPALA", success)
+    self._cmp_expected(sql_str, create_view_sql, exp_res, IMPALA, success)
 
   def _cmp_expected(self, sql_str, create_view_sql, exp_res, engine, success):
     if exp_res is None:
@@ -173,10 +172,10 @@ class TestViewCompatibility(ImpalaTestSuite):
       assert 0, '%s unexpectedly succeeded in executing\n%s\nwhile testing '\
           'a view created as\n%s' % (engine, create_view_sql, sql_str)
 
+
 # Represents one view-compatibility test case. Performs validation of the test 
sections
 # and provides SQL to execute for each section.
 class ViewCompatTestCase(object):
-  RESULT_KEYS = ["IMPALA", "HIVE"]
 
   def __init__(self, test_section, test_file_name, test_db_name):
     if 'CREATE_VIEW' not in test_section:
@@ -268,15 +267,15 @@ class ViewCompatTestCase(object):
 
     # check that the results section contains at least one entry
     if not (lambda a, b: any(i in b for i in a)):
-      assert 0, 'No valid entry in expected-results section. '\
-      'Expected an IMPALA or HIVE entry.'
+      assert 0, ('No valid entry in expected-results section. '
+                 'Expected an IMPALA or HIVE entry.')
     return exp_res
 
   def get_create_view_sql(self, engine):
-    engine = engine.upper();
-    if engine == "HIVE":
+    engine = engine.upper()
+    if engine == HIVE:
       return self.hive_create_view_sql
-    elif engine == "IMPALA":
+    elif engine == IMPALA:
       return self.impala_create_view_sql
     else:
       assert 0, "Unknown execution engine %s" % (engine)
@@ -285,32 +284,31 @@ class ViewCompatTestCase(object):
     return self.create_exp_res
 
   def get_drop_view_sql(self, engine):
-    engine = engine.upper();
-    if engine == "HIVE":
+    engine = engine.upper()
+    if engine == HIVE:
       return self.drop_hive_view_sql
-    elif engine == "IMPALA":
+    elif engine == IMPALA:
       return self.drop_impala_view_sql
     else:
       assert 0, "Unknown execution engine %s" % (engine)
 
   def get_query_exp_res(self, engine):
-    engine = engine.upper();
-    if engine == "HIVE":
+    engine = engine.upper()
+    if engine == HIVE:
       return self.query_hive_exp_res
-    elif engine == "IMPALA":
+    elif engine == IMPALA:
       return self.query_impala_exp_res
     else:
       assert 0, "Unknown execution engine %s" % (engine)
 
   def get_query_view_sql(self, engine):
-    engine = engine.upper();
-    if engine == "HIVE":
+    engine = engine.upper()
+    if engine == HIVE:
       return self.query_hive_view_sql
-    elif engine == "IMPALA":
+    elif engine == IMPALA:
       return self.query_impala_view_sql
     else:
       assert 0, "Unknown execution engine %s" % (engine)
-    return self.query_hive_view_sql
 
   def has_query_hive_section(self):
     return hasattr(self, 'query_hive_view_sql')

Reply via email to