This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 3dd5c9e66 IMPALA-3880: Add list of all tables queried to runtime 
profile
3dd5c9e66 is described below

commit 3dd5c9e661c45f1793f3ca4d86e9ab83d12e8b3b
Author: Peter Rozsa <[email protected]>
AuthorDate: Thu Jan 5 10:32:38 2023 +0100

    IMPALA-3880: Add list of all tables queried to runtime profile
    
    This change adds a new info string to the frontend runtime profile
    which contains the referenced tables by the query in a
    comma-separated format.
    
    Tests:
     - Added tests to check if the referenced tables are enumerated
       correctly
     - Added test to check if referenced table is filled properly with
       different DLM statements
    
    Change-Id: Ib474a5c6522032679701103aa225a18edca62f5a
    Reviewed-on: http://gerrit.cloudera.org:8080/19401
    Reviewed-by: Daniel Becker <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/service/Frontend.java   |  7 +++
 tests/query_test/test_observability.py             |  1 +
 tests/shell/test_shell_commandline.py              | 57 +++++++++++++++++++++-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java 
b/fe/src/main/java/org/apache/impala/service/Frontend.java
index c28398e77..df89ce079 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -2054,6 +2054,13 @@ public class Frontend {
     //TODO (IMPALA-8788): should load table write ids in transaction context.
     StmtTableCache stmtTableCache = metadataLoader.loadTables(stmt);
 
+    // Add referenced tables to frontend profile
+    FrontendProfile.getCurrent().addInfoString("Referenced Tables",
+        stmtTableCache.tables.keySet()
+            .stream()
+            .map(TableName::toString)
+            .collect(Collectors.joining(", ")));
+
     // Analyze and authorize stmt
     AnalysisContext analysisCtx = new AnalysisContext(queryCtx, authzFactory_, 
timeline);
     AnalysisResult analysisResult = analysisCtx.analyzeAndAuthorize(stmt, 
stmtTableCache,
diff --git a/tests/query_test/test_observability.py 
b/tests/query_test/test_observability.py
index a92347f88..25bc2347e 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -288,6 +288,7 @@ class TestObservability(ImpalaTestSuite):
     else:
       load_event_regexes = [
         r'Frontend:',
+        r'Referenced Tables:',
         r'CatalogFetch.ColumnStats.Hits',
         r'CatalogFetch.ColumnStats.Misses',
         r'CatalogFetch.ColumnStats.Requests',
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index c91c31797..d097ccdaf 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -374,7 +374,7 @@ class TestImpalaShell(ImpalaTestSuite):
 
   def test_runtime_profile(self, vector):
     if vector.get_value('strict_hs2_protocol'):
-      pytest.skip("Runtime profile not support in strict hs2 mode.")
+      pytest.skip("Runtime profile is not supported in strict hs2 mode.")
     # test summary is in both the profile printed by the
     # -p option and the one printed by the profile command
     args = ['-p', '-q', 'select 1; profile;']
@@ -385,6 +385,61 @@ class TestImpalaShell(ImpalaTestSuite):
     assert len(re.findall(regex, result_set.stdout)) == 2, \
         "Could not detect two profiles, stdout: %s" % result_set.stdout
 
+  def test_runtime_profile_referenced_tables(self, vector, unique_database):
+    if vector.get_value('strict_hs2_protocol'):
+      pytest.skip("Runtime profile is not supported in strict hs2 mode.")
+    db = unique_database
+    base_args = ['-p', '-q']
+
+    statements = ['select id from %s.shell_profile_test' % db,
+                  'alter table %s.shell_profile_test add column b int' % db,
+                  'insert into %s.shell_profile_test(id) values (1)' % db,
+                  'truncate table %s.shell_profile_test' % db,
+                  'drop table %s.shell_profile_test' % db]
+
+    args = base_args + ['create table %s.shell_profile_test (id int)' % db]
+    create = run_impala_shell_cmd(vector, args)
+    assert "Referenced Tables: \n" in create.stdout
+
+    for statement in statements:
+      args = base_args + [statement]
+      result = run_impala_shell_cmd(vector, args)
+      assert "Referenced Tables: %s.shell_profile_test" % unique_database in 
result.stdout
+
+  def test_runtime_profile_multiple_referenced_tables(self, vector, 
unique_database):
+    if vector.get_value('strict_hs2_protocol'):
+      pytest.skip("Runtime profile is not supported in strict hs2 mode.")
+
+    def get_referenced_tables(profile):
+      return re.findall(r'Referenced Tables: (.*)', profile)[0].split(', ')
+
+    db = unique_database
+    base_args = ['-p', '-q']
+
+    for i in range(0, 2):
+      args = base_args + ['create table %s.shell_profile_test%d (id int)' % 
(db, i)]
+      run_impala_shell_cmd(vector, args)
+
+    args = base_args + ["select * from {db}.shell_profile_test0 t0 inner join "
+                        "{db}.shell_profile_test1 t1 on t0.id = 
t1.id".format(db=db)]
+    result = run_impala_shell_cmd(vector, args)
+    referenced_tables = get_referenced_tables(result.stdout)
+
+    assert len(referenced_tables) == 2
+    for i in range(0, 2):
+      assert "{db}.shell_profile_test{index}".format(db=db, index=i) in 
referenced_tables
+
+    args = base_args + ["select * from {db}.shell_profile_test0 t0 inner join "
+                        "{db}.shell_profile_test1 t1 on t0.id = t1.id inner 
join "
+                        "{db}.shell_profile_test1 t11 on t0.id = 
t11.id".format(db=db)]
+
+    result = run_impala_shell_cmd(vector, args)
+    referenced_tables = get_referenced_tables(result.stdout)
+
+    assert len(referenced_tables) == 2
+    for i in range(0, 2):
+      assert "{db}.shell_profile_test{index}".format(db=db, index=i) in 
referenced_tables
+
   def test_summary(self, vector):
     if vector.get_value('strict_hs2_protocol'):
       pytest.skip("Summary not supported in strict hs2 mode.")

Reply via email to