This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 1233ac3c5 IMPALA-13061: Create query live as external table
1233ac3c5 is described below

commit 1233ac3c579b5929866dba23debae63e5d2aae90
Author: Michael Smith <[email protected]>
AuthorDate: Mon May 6 16:10:02 2024 -0700

    IMPALA-13061: Create query live as external table
    
    Impala determines whether a managed table is transactional based on the
    'transactional' table property. It assumes any managed table with
    transactional=true returns non-null getValidWriteIds.
    
    When 'default_transactional_type=insert_only' is set at startup (via
    default_query_options), impala_query_live is created as a managed table
    with transactional=true, but SystemTables don't implement
    getValidWriteIds and are not meant to be transactional.
    
    DataSourceTable has a similar problem, and when a JDBC table is
    created setJdbcDataSourceProperties sets transactional=false. This
    patch uses CREATE EXTERNAL TABLE sys.impala_Query_live so that it is not
    created as a managed table and 'transactional' is not set. That avoids
    creating a SystemTable that Impala can't read (it encounters an
    IllegalStateException).
    
    Change-Id: Ie60a2bd03fabc63c85bcd9fa2489e9d47cd2aa65
    Reviewed-on: http://gerrit.cloudera.org:8080/21401
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/service/workload-management.cc   |  5 ++++-
 tests/custom_cluster/test_query_live.py | 33 +++++++++++++++++++++++++++++----
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/be/src/service/workload-management.cc 
b/be/src/service/workload-management.cc
index cf88365d3..15527fa97 100644
--- a/be/src/service/workload-management.cc
+++ b/be/src/service/workload-management.cc
@@ -122,7 +122,10 @@ static const Status SetupTable(InternalServer* server, 
const string& table_name,
   insert_query_opts[TImpalaQueryOptions::SYNC_DDL] = "true";
 
   StringStreamPop create_table_sql;
-  create_table_sql << "CREATE TABLE IF NOT EXISTS " << table_name << "(";
+  create_table_sql << "CREATE ";
+  // System tables do not have anything to purge, and must not be managed 
tables.
+  if (is_system_table) create_table_sql << "EXTERNAL ";
+  create_table_sql << "TABLE IF NOT EXISTS " << table_name << "(";
 
   for (const auto& field : FIELD_DEFINITIONS) {
     create_table_sql << GetColumnName(field) << " " << field.db_column_type;
diff --git a/tests/custom_cluster/test_query_live.py 
b/tests/custom_cluster/test_query_live.py
index b1ffba09c..f315f402b 100644
--- a/tests/custom_cluster/test_query_live.py
+++ b/tests/custom_cluster/test_query_live.py
@@ -34,10 +34,20 @@ class TestQueryLive(CustomClusterTestSuite):
   def setup_method(self, method):
     super(TestQueryLive, self).setup_method(method)
     create_match = self.assert_impalad_log_contains("INFO", 
r'\]\s+(\w+:\w+)\]\s+'
-        r'Analyzing query: CREATE TABLE IF NOT EXISTS sys.impala_query_live')
+        r'Analyzing query: CREATE EXTERNAL TABLE IF NOT EXISTS 
sys.impala_query_live')
     self.assert_impalad_log_contains("INFO", r'Query successfully 
unregistered: '
         r'query_id={}'.format(create_match.group(1)))
 
+  def assert_describe_extended(self):
+    describe_ext_result = self.execute_query('describe extended 
sys.impala_query_live')
+    assert len(describe_ext_result.data) == 80
+    system_table_re = re.compile(r'__IMPALA_SYSTEM_TABLE\s+true')
+    assert list(filter(system_table_re.search, describe_ext_result.data))
+    external_re = re.compile(r'EXTERNAL\s+TRUE')
+    assert list(filter(external_re.search, describe_ext_result.data))
+    external_table_re = re.compile(r'Table Type:\s+EXTERNAL_TABLE')
+    assert list(filter(external_table_re.search, describe_ext_result.data))
+
   def assert_impalads(self, profile, present=[0, 1, 2], absent=[]):
     for port_idx in present:
       assert ":" + str(DEFAULT_KRPC_PORT + port_idx) + ":" in profile
@@ -120,9 +130,7 @@ class TestQueryLive(CustomClusterTestSuite):
     # describe query
     describe_result = self.execute_query('describe sys.impala_query_live')
     assert len(describe_result.data) == 49
-
-    describe_ext_result = self.execute_query('describe extended 
sys.impala_query_live')
-    assert len(describe_ext_result.data) == 82
+    self.assert_describe_extended()
 
     # show create table
     show_create_tbl = self.execute_query('show create table 
sys.impala_query_live')
@@ -161,6 +169,23 @@ class TestQueryLive(CustomClusterTestSuite):
     # Drop table at the end, it's only recreated on impalad startup.
     self.execute_query_expect_success(self.client, 'drop table 
sys.impala_query_live')
 
+  # Must come directly after "drop table sys.impala_query_live"
+  @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
+                                                 "--cluster_id=test_query_live 
"
+                                                 "--use_local_catalog=true",
+                                    catalogd_args="--enable_workload_mgmt "
+                                                  
"--catalog_topic_mode=minimal",
+                                    default_query_options=[
+                                      ('default_transactional_type', 
'insert_only')])
+  def test_default_transactional(self):
+    """Asserts the query live table works when impala is started with
+    default_transactional_type=insert_only."""
+    result = self.client.execute("select * from functional.alltypes",
+        fetch_profile_after_close=True)
+    assert_query('sys.impala_query_live', self.client, 'test_query_live',
+                 result.runtime_profile)
+    self.assert_describe_extended()
+
   @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
                                                  "--cluster_id=test_query_live 
"
                                                  "--use_local_catalog=true",

Reply via email to