This is an automated email from the ASF dual-hosted git repository. wzhou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit f68986d4522aa2e581c4bbf464a0454f881d7553 Author: stiga-huang <[email protected]> AuthorDate: Sun Apr 23 12:17:45 2023 +0800 IMPALA-12082: Fix db not found error of INVALIDATE METADATA under unloaded db INVALIDATE METADATA can be executed on tables under unloaded db. It will bring up the metadata of the db by the way. However, this feature is broken after IMPALA-11808 in which we try to get the table from catalog cache assuming it's loaded. This causes the above use case failed by DatabaseNotFoundException. This patch fixes the regression by not getting the table from catalog cache for INVALIDATE METADATA commands. We only do so for REFRESH commands. After the INVALIDATE METADATA command succeeds, if we need to fire reload events, we get the table from catalog cache. Tests: - Add e2e tests for event-processor is disabled and enabled. Change-Id: Ifd0a9e87f06c38f569c32bd10cc2668403681fd4 Reviewed-on: http://gerrit.cloudera.org:8080/19786 Reviewed-by: Michael Smith <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../org/apache/impala/service/CatalogOpExecutor.java | 18 +++++++++++++----- .../test_metadata_no_events_processing.py | 14 ++++++++++++++ tests/metadata/test_hms_integration.py | 10 ++++++++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java index c62646e71..56f08e28c 100644 --- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java @@ -6404,10 +6404,16 @@ public class CatalogOpExecutor { // Thrift representation of the result of the invalidate/refresh operation. TCatalogObject updatedThriftTable = null; TableName tblName = TableName.fromThrift(req.getTable_name()); - Table tbl = catalog_.getTable(tblName.getDb(), tblName.getTbl()); - if (req.isIs_refresh()) { + Table tbl = null; + if (!req.isIs_refresh()) { + // For INVALIDATE METADATA <db>.<table>, the db might be unloaded. + // So we can't update 'tbl' here. + updatedThriftTable = catalog_.invalidateTable( + req.getTable_name(), tblWasRemoved, dbWasAdded); + } else { // Quick check to see if the table exists in the catalog without triggering // a table load. + tbl = catalog_.getTable(tblName.getDb(), tblName.getTbl()); if (tbl != null) { // If the table is not loaded, no need to perform refresh after the initial // metadata load. @@ -6454,9 +6460,6 @@ public class CatalogOpExecutor { } } } - } else { - updatedThriftTable = catalog_.invalidateTable( - req.getTable_name(), tblWasRemoved, dbWasAdded); } if (updatedThriftTable == null) { @@ -6467,6 +6470,11 @@ public class CatalogOpExecutor { } if (BackendConfig.INSTANCE.enableReloadEvents()) { + // For INVALIDATE METADATA <table>, 'tbl' can only be got after it succeeds. + if (!req.isIs_refresh()) { + tbl = catalog_.getTable(tblName.getDb(), tblName.getTbl()); + } + Preconditions.checkNotNull(tbl, "tbl is null in " + cmdString); // fire event for refresh event and update the last refresh event id fireReloadEventAndUpdateRefreshEventId(req, updatedThriftTable, tblName, tbl); } diff --git a/tests/custom_cluster/test_metadata_no_events_processing.py b/tests/custom_cluster/test_metadata_no_events_processing.py index ee6af0821..fd6bc9b80 100644 --- a/tests/custom_cluster/test_metadata_no_events_processing.py +++ b/tests/custom_cluster/test_metadata_no_events_processing.py @@ -296,3 +296,17 @@ class TestMetadataNoEventsProcessing(CustomClusterTestSuite): result = self.client.execute("show partitions %s" % tbl) assert result.get_data().startswith("1\t1\t2"),\ "Incorrect partition stats %s" % result.get_data() + + @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0") + def test_invalidate_metadata(self, unique_name): + """Verify invalidate metadata on tables under unloaded db won't fail""" + db = unique_name + "_db" + tbl = db + "." + unique_name + "_tbl" + try: + self.run_stmt_in_hive("create database " + db) + self.run_stmt_in_hive("create table %s (i int)" % tbl) + self.client.execute("invalidate metadata %s" % tbl) + res = self.client.execute("describe %s" % tbl) + assert res.data == ["i\tint\t"] + finally: + self.run_stmt_in_hive("drop database %s cascade" % db) diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py index b67d0cac9..d1cbd4cc7 100644 --- a/tests/metadata/test_hms_integration.py +++ b/tests/metadata/test_hms_integration.py @@ -144,6 +144,16 @@ class TestHmsIntegrationSanity(ImpalaTestSuite): else: assert False + def test_invalidate_metadata(self, unique_name): + """Verify invalidate metadata on tables under unloaded db won't fail""" + db = unique_name + "_db" + tbl = db + "." + unique_name + "_tbl" + try: + self.run_stmt_in_hive("create database " + db) + self.run_stmt_in_hive("create table %s (i int)" % tbl) + self.client.execute("invalidate metadata %s" % tbl) + finally: + self.run_stmt_in_hive("drop database %s cascade" % db) @SkipIfFS.hive class TestHmsIntegration(ImpalaTestSuite):
