This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 41d7a2b449218209b8e7a58649f589fd2252113f
Author: Steve Carlin <[email protected]>
AuthorDate: Wed Jan 28 17:04:31 2026 -0800

    IMPALA-14746: Calcite planner: Allow overrides in test framework
    
    This commit will allow overrides in the test framework for Calcite
    where the result set is different from the original planner.
    
    The overrides will be used if the USE_CALCITE_PLANNER environment
    variable is set.
    
    The allowable overridden sections are: RESULTS, CATCH, RUNTIME_PROFILE.
    
    The sections will be overridden if the environment variable is set
    and if the section has a CALCITE_PLANNER_ prefix
    (e.g. CALCITE_PLANNER_RESULTS)
    
    A sample is given in the subquery.test file.
    
    Change-Id: Id7e22c63b27232bffc442a75952b9942067f0e85
    Reviewed-on: http://gerrit.cloudera.org:8080/23907
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 bin/impala-config.sh                               |  2 ++
 bin/start-impala-cluster.py                        |  6 ++--
 .../queries/QueryTest/subquery.test                | 15 ++++++++-
 tests/common/environ.py                            |  1 +
 tests/common/impala_test_suite.py                  | 39 ++++++++++++++--------
 tests/util/test_file_parser.py                     | 14 ++++----
 6 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 9f923e426..074e1c14d 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -333,6 +333,8 @@ export IMPALA_DOCKER_JAVA=${IMPALA_DOCKER_JAVA:-"17"}
 # to true due to the large performance benefits.
 export IMPALA_USE_CLOUDFLARE_ZLIB=${IMPALA_USE_CLOUDFLARE_ZLIB:-"true"}
 
+export USE_CALCITE_PLANNER=${USE_CALCITE_PLANNER:-"false"}
+
 # When IMPALA_(CDP_COMPONENT)_URL are overridden, they may contain 
'$(platform_label)'
 # which will be substituted for the CDP platform label in 
bootstrap_toolchain.py
 unset IMPALA_HADOOP_URL
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index 4dee99b8b..f452b0a1a 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -56,6 +56,8 @@ INTERNAL_LISTEN_HOST = os.getenv("INTERNAL_LISTEN_HOST", 
"localhost")
 TARGET_FILESYSTEM = os.getenv("TARGET_FILESYSTEM") or "hdfs"
 HOST_TZ = os.getenv("TZ", None)
 
+BOOLEAN_STRINGS = ["true", "false"]
+
 # Options
 parser = OptionParser()
 parser.add_option("-s", "--cluster_size", type="int", dest="cluster_size", 
default=3,
@@ -201,8 +203,8 @@ parser.add_option("--tuple_cache_debug_dump_dir", 
dest="tuple_cache_debug_dump_d
 parser.add_option("--tuple_cache_eviction_policy", 
dest="tuple_cache_eviction_policy",
                   default="LRU", help="This specifies the cache eviction 
policy to use "
                   "for the tuple cache.")
-parser.add_option("--use_calcite_planner", default="False", type="choice",
-                  choices=["true", "True", "false", "False"],
+parser.add_option("--use_calcite_planner", type="choice", 
choices=BOOLEAN_STRINGS,
+                  default=os.environ.get("USE_CALCITE_PLANNER", "false"),
                   help="If true, use the Calcite planner for query 
optimization "
                   "instead of the Impala planner")
 parser.add_option("--enable_ranger_authz", dest="enable_ranger_authz",
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/subquery.test 
b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
index ebe76dd1c..cab6a3e59 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
@@ -1017,7 +1017,7 @@ SELECT id FROM alltypes
 WHERE id = (SELECT bigint_col FROM functional.alltypes_view)
 ---- RESULTS
 ---- CATCH
-Subquery must not return more than one row: SELECT bigint_col FROM 
functional.alltypes_view
+row_regex:.*Subquery must not return more than one row.*
 ====
 ---- QUERY
 # Runtime scalar subquery with offset.
@@ -1092,8 +1092,21 @@ select id, (select count(id) from alltypessmall where 
id=t.id)
   from alltypestiny t
 order by id
 ---- RESULTS
+---- CALCITE_PLANNER_RESULTS
+0,1
+1,1
+2,1
+3,1
+4,1
+5,1
+6,1
+7,1
+---- TYPES
+INT, BIGINT
 ---- CATCH
 A correlated scalar subquery is not supported in the expression:
+---- CALCITE_PLANNER_CATCH
+__NO_ERROR__
 ====
 ---- QUERY
 # Uncorrelated Scalar Aggregate in select list combined with aggregation in 
parent query
diff --git a/tests/common/environ.py b/tests/common/environ.py
index bba7df8fc..53701d990 100644
--- a/tests/common/environ.py
+++ b/tests/common/environ.py
@@ -127,6 +127,7 @@ IS_TUPLE_CACHE_CORRECT_CHECK = (
     os.getenv("TUPLE_CACHE_DEBUG_DUMP_DIR", "") != ""
 )
 
+IS_CALCITE_PLANNER = os.environ.get("USE_CALCITE_PLANNER", False) == 'true'
 
 class ImpalaBuildFlavors:
   """
diff --git a/tests/common/impala_test_suite.py 
b/tests/common/impala_test_suite.py
index fe59cd024..ab710a387 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -48,6 +48,7 @@ from tests.common.environ import (
     MANAGED_WAREHOUSE_DIR,
     EXTERNAL_WAREHOUSE_DIR,
     ICEBERG_DEFAULT_FORMAT_VERSION,
+    IS_CALCITE_PLANNER,
     ImpalaTestClusterProperties)
 from tests.common.errors import Timeout
 from tests.common.impala_connection import create_connection
@@ -723,14 +724,15 @@ class ImpalaTestSuite(BaseTestSuite):
     assert False, 'Unexpected exception string. Expected: %s\nNot found in 
actual: %s' % \
       (expected_str, actual_str)
 
-  def __verify_results_and_errors(self, vector, test_section, result, use_db):
+  def __verify_results_and_errors(self, vector, test_section, 
result_section_name,
+      result, use_db):
     """Verifies that both results and error sections are as expected. Rewrites 
both
       by replacing $NAMENODE, $DATABASE and $IMPALA_HOME with their actual 
values, and
       optionally rewriting filenames with __HDFS_FILENAME__, to ensure that 
expected and
       actual values are easily compared.
     """
     replace_filenames_with_placeholder = True
-    for section_name in ('RESULTS', 'ERRORS'):
+    for section_name in (result_section_name, 'ERRORS'):
       if section_name in test_section:
         if "$NAMENODE" in test_section[section_name]:
           replace_filenames_with_placeholder = False
@@ -751,7 +753,7 @@ class ImpalaTestSuite(BaseTestSuite):
         if use_db:
           test_section[section_name] = test_section[section_name].replace(
               '$DATABASE', use_db)
-    result_section, type_section = 'RESULTS', 'TYPES'
+    result_section, type_section = result_section_name, 'TYPES'
     verify_raw_results(test_section, result, vector,
                        result_section, type_section,
                        self.pytest_config().option.update_results,
@@ -894,18 +896,22 @@ class ImpalaTestSuite(BaseTestSuite):
           LOG.info('Query Name: \n%s\n' % test_section['QUERY_NAME'])
 
         result = None
+        catch_section_name = 'CALCITE_PLANNER_CATCH' \
+            if IS_CALCITE_PLANNER and 'CALCITE_PLANNER_CATCH' in test_section \
+            else 'CATCH'
         try:
           result = exec_fn(query, user=test_section.get('USER', '').strip() or 
None)
         except Exception as e:
-          if 'CATCH' in test_section:
-            self.__verify_exceptions(test_section['CATCH'], str(e), use_db)
+          if catch_section_name in test_section:
+            self.__verify_exceptions(test_section[catch_section_name], str(e), 
use_db)
             assert error_msg_startswith(str(e))
             continue
           raise
 
-        if 'CATCH' in test_section and '__NO_ERROR__' not in 
test_section['CATCH']:
+        if catch_section_name in test_section \
+            and '__NO_ERROR__' not in test_section[catch_section_name]:
           expected_str = self.__do_replacements(
-              " or ".join(test_section['CATCH']).strip(),
+              " or ".join(test_section[catch_section_name]).strip(),
               use_db=use_db,
               extra=test_file_vars)
           assert False, "Expected exception: {0}\n\nwhen 
running:\n\n{1}".format(
@@ -918,15 +924,19 @@ class ImpalaTestSuite(BaseTestSuite):
         if encoding and result.data:
             result.data = [row.decode(encoding) for row in result.data]
         # Replace $NAMENODE in the expected results with the actual namenode 
URI.
-        if 'RESULTS' in test_section:
+        results_section_name = 'CALCITE_PLANNER_RESULTS' \
+            if IS_CALCITE_PLANNER and 'CALCITE_PLANNER_RESULTS' in 
test_section \
+            else 'RESULTS'
+        if results_section_name in test_section:
           # Combining 'RESULTS' with 'DML_RESULTS" is currently unsupported 
because
           # __verify_results_and_errors calls verify_raw_results which always 
checks
           # ERRORS, TYPES, LABELS, etc. which doesn't make sense if there are 
two
           # different result sets to consider (IMPALA-4471).
           assert 'DML_RESULTS' not in test_section
-          test_section['RESULTS'] = self.__do_replacements(
-              test_section['RESULTS'], use_db=use_db, extra=test_file_vars)
-          self.__verify_results_and_errors(vector, test_section, result, 
use_db)
+          test_section[results_section_name] = self.__do_replacements(
+              test_section[results_section_name], use_db=use_db, 
extra=test_file_vars)
+          self.__verify_results_and_errors(vector, test_section, 
results_section_name,
+              result, use_db)
         else:
           # TODO: Can't validate errors without expected results for now.
           assert 'ERRORS' not in test_section,\
@@ -934,8 +944,9 @@ class ImpalaTestSuite(BaseTestSuite):
         # If --update_results, then replace references to the namenode URI 
with $NAMENODE.
         # TODO(todd) consider running do_replacements in reverse, though that 
may cause
         # some false replacements for things like username.
-        if self.pytest_config().option.update_results and 'RESULTS' in 
test_section:
-          test_section['RESULTS'] = test_section['RESULTS'] \
+        if self.pytest_config().option.update_results \
+            and results_section_name in test_section:
+          test_section[results_section_name] = 
test_section[results_section_name] \
               .replace(NAMENODE, '$NAMENODE') \
               .replace(IMPALA_HOME, '$IMPALA_HOME') \
               .replace(INTERNAL_LISTEN_HOST, '$INTERNAL_LISTEN_HOST') \
@@ -945,6 +956,8 @@ class ImpalaTestSuite(BaseTestSuite):
           # If this table format has a RUNTIME_PROFILE section specifically 
for it,
           # evaluate that section and ignore any general RUNTIME_PROFILE 
sections.
           rt_profile_info = 'RUNTIME_PROFILE_%s' % 
table_format_info.file_format
+        elif IS_CALCITE_PLANNER and 'CALCITE_PLANNER_RUNTIME_PROFILE' in 
test_section:
+            rt_profile_info = 'CALCITE_PLANNER_RUNTIME_PROFILE'
         elif 'RUNTIME_PROFILE' in test_section:
           rt_profile_info = 'RUNTIME_PROFILE'
 
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index b6e3601f8..5f80ff3d9 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -102,7 +102,9 @@ def parse_query_test_file(file_name, 
valid_section_names=None, encoding=None):
   if section_names is None:
     section_names = ['QUERY', 'HIVE_QUERY', 'RESULTS', 'TYPES', 'LABELS', 
'SETUP',
         'CATCH', 'ERRORS', 'USER', 'RUNTIME_PROFILE', 'SHELL', 'DML_RESULTS',
-        'HS2_TYPES', 'HIVE_MAJOR_VERSION', 'LINEAGE', 'IS_HDFS_ONLY']
+        'HS2_TYPES', 'HIVE_MAJOR_VERSION', 'LINEAGE', 'IS_HDFS_ONLY',
+        'CALCITE_PLANNER_RESULTS', 'CALCITE_PLANNER_CATCH',
+        'CALCITE_PLANNER_RUNTIME_PROFILE']
   return parse_test_file(file_name, section_names, encoding=encoding,
       skip_unknown_sections=False)
 
@@ -266,15 +268,15 @@ def parse_test_file_text(text, valid_section_names, 
skip_unknown_sections=True):
           else:
             raise RuntimeError('Unknown subsection comment: %s' % comment)
 
-      if subsection_name == 'CATCH':
-        parsed_sections['CATCH'] = list()
+      if 'CATCH' in subsection_name:
+        parsed_sections[subsection_name] = list()
         if subsection_comment is None:
-          parsed_sections['CATCH'].append(subsection_str)
+          parsed_sections[subsection_name].append(subsection_str)
         elif subsection_comment == 'ANY_OF':
-          parsed_sections['CATCH'].extend(lines_content)
+          parsed_sections[subsection_name].extend(lines_content)
         else:
           raise RuntimeError('Unknown subsection comment: %s' % 
subsection_comment)
-        for exception_str in parsed_sections['CATCH']:
+        for exception_str in parsed_sections[subsection_name]:
           assert exception_str.strip(), "Empty exception string."
         continue
 

Reply via email to