IMPALA-7224. Improve performance of UpdateCatalogMetrics This function is called after every DDL query, and was implemented by fetching the entire list of table names, even though only the length of that list was needed. In workloads with millions of tables, this could add several seconds of overhead following even simple requests like 'USE' or 'DESCRIBE'.
I tested a backported version of this patch against one such workload. It reduced the time taken for a simple DESCRIBE query from 12-14sec down to about 40ms. I also tested locally that the metrics on impalad were still updated by DDL operations. Change-Id: Ic5467adbce1e760ff93996925db5611748efafc0 Reviewed-on: http://gerrit.cloudera.org:8080/10846 Reviewed-by: Vuk Ercegovac <[email protected]> Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2b6d71fe Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2b6d71fe Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2b6d71fe Branch: refs/heads/master Commit: 2b6d71fee779088af54cc416ee25027bbd415954 Parents: 0a47016 Author: Todd Lipcon <[email protected]> Authored: Fri Jun 29 15:38:14 2018 -0700 Committer: Tim Armstrong <[email protected]> Committed: Tue Jul 3 15:37:27 2018 +0000 ---------------------------------------------------------------------- be/src/service/frontend.cc | 5 +++++ be/src/service/frontend.h | 4 ++++ be/src/service/impala-server.cc | 15 ++++----------- common/thrift/Frontend.thrift | 6 ++++++ .../java/org/apache/impala/service/Frontend.java | 10 ++++++++++ .../java/org/apache/impala/service/JniFrontend.java | 11 +++++++++++ 6 files changed, 40 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/2b6d71fe/be/src/service/frontend.cc ---------------------------------------------------------------------- diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc index 592a1dd..9ae9f90 100644 --- a/be/src/service/frontend.cc +++ b/be/src/service/frontend.cc @@ -82,6 +82,7 @@ Frontend::Frontend() { {"checkConfiguration", "()Ljava/lang/String;", &check_config_id_}, {"updateCatalogCache", "([B)[B", &update_catalog_cache_id_}, {"updateMembership", "([B)V", &update_membership_id_}, + {"getCatalogMetrics", "()[B", &get_catalog_metrics_id_}, {"getTableNames", "([B)[B", &get_table_names_id_}, {"describeDb", "([B)[B", &describe_db_id_}, {"describeTable", "([B)[B", &describe_table_id_}, @@ -152,6 +153,10 @@ Status Frontend::ShowCreateFunction(const TGetFunctionsParams& params, string* r return JniUtil::CallJniMethod(fe_, show_create_function_id_, params, response); } +Status Frontend::GetCatalogMetrics(TGetCatalogMetricsResult* resp) { + return JniUtil::CallJniMethod(fe_, get_catalog_metrics_id_, resp); +} + Status Frontend::GetTableNames(const string& db, const string* pattern, const TSessionState* session, TGetTablesResult* table_names) { TGetTablesParams params; http://git-wip-us.apache.org/repos/asf/impala/blob/2b6d71fe/be/src/service/frontend.h ---------------------------------------------------------------------- diff --git a/be/src/service/frontend.h b/be/src/service/frontend.h index 08123b3..77836ab 100644 --- a/be/src/service/frontend.h +++ b/be/src/service/frontend.h @@ -55,6 +55,9 @@ class Frontend { /// Call FE to get TExecRequest. Status GetExecRequest(const TQueryCtx& query_ctx, TExecRequest* result); + /// Get the metrics from the catalog used by this frontend. + Status GetCatalogMetrics(TGetCatalogMetricsResult* resp); + /// Returns all matching table names, per Hive's "SHOW TABLES <pattern>". Each /// table name returned is unqualified. /// If pattern is NULL, match all tables otherwise match only those tables that @@ -194,6 +197,7 @@ class Frontend { jmethodID check_config_id_; // JniFrontend.checkConfiguration() jmethodID update_catalog_cache_id_; // JniFrontend.updateCatalogCache(byte[][]) jmethodID update_membership_id_; // JniFrontend.updateMembership() + jmethodID get_catalog_metrics_id_; // JniFrontend.getCatalogMetrics() jmethodID get_table_names_id_; // JniFrontend.getTableNames jmethodID describe_db_id_; // JniFrontend.describeDb jmethodID describe_table_id_; // JniFrontend.describeTable http://git-wip-us.apache.org/repos/asf/impala/blob/2b6d71fe/be/src/service/impala-server.cc ---------------------------------------------------------------------- diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index e9ddbe4..c1f00e3 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -1154,17 +1154,10 @@ Status ImpalaServer::UnregisterQuery(const TUniqueId& query_id, bool check_infli } Status ImpalaServer::UpdateCatalogMetrics() { - TGetDbsResult dbs; - RETURN_IF_ERROR(exec_env_->frontend()->GetDbs(nullptr, nullptr, &dbs)); - ImpaladMetrics::CATALOG_NUM_DBS->SetValue(dbs.dbs.size()); - ImpaladMetrics::CATALOG_NUM_TABLES->SetValue(0L); - for (const TDatabase& db: dbs.dbs) { - TGetTablesResult table_names; - RETURN_IF_ERROR(exec_env_->frontend()->GetTableNames(db.db_name, nullptr, nullptr, - &table_names)); - ImpaladMetrics::CATALOG_NUM_TABLES->Increment(table_names.tables.size()); - } - + TGetCatalogMetricsResult metrics; + RETURN_IF_ERROR(exec_env_->frontend()->GetCatalogMetrics(&metrics)); + ImpaladMetrics::CATALOG_NUM_DBS->SetValue(metrics.num_dbs); + ImpaladMetrics::CATALOG_NUM_TABLES->SetValue(metrics.num_tables); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/impala/blob/2b6d71fe/common/thrift/Frontend.thrift ---------------------------------------------------------------------- diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift index 8d32b53..6cbbf79 100644 --- a/common/thrift/Frontend.thrift +++ b/common/thrift/Frontend.thrift @@ -98,6 +98,12 @@ struct TGetTableMetricsResponse { 1: required string metrics } +// Response from a call to getCatalogMetrics. +struct TGetCatalogMetricsResult { + 1: required i32 num_dbs + 2: required i32 num_tables +} + // Arguments to getDbs, which returns a list of dbs that match an optional pattern struct TGetDbsParams { // If not set, match every database http://git-wip-us.apache.org/repos/asf/impala/blob/2b6d71fe/fe/src/main/java/org/apache/impala/service/Frontend.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java index ab330fd..9208184 100644 --- a/fe/src/main/java/org/apache/impala/service/Frontend.java +++ b/fe/src/main/java/org/apache/impala/service/Frontend.java @@ -114,6 +114,7 @@ import org.apache.impala.thrift.TExecRequest; import org.apache.impala.thrift.TExplainResult; import org.apache.impala.thrift.TFinalizeParams; import org.apache.impala.thrift.TFunctionCategory; +import org.apache.impala.thrift.TGetCatalogMetricsResult; import org.apache.impala.thrift.TGrantRevokePrivParams; import org.apache.impala.thrift.TGrantRevokeRoleParams; import org.apache.impala.thrift.TLineageGraph; @@ -614,6 +615,15 @@ public class Frontend { return stringBuilder.toString(); } + public TGetCatalogMetricsResult getCatalogMetrics() throws ImpalaException { + TGetCatalogMetricsResult resp = new TGetCatalogMetricsResult(); + for (FeDb db : getCatalog().getDbs(PatternMatcher.MATCHER_MATCH_ALL)) { + resp.num_dbs++; + resp.num_tables += db.getAllTableNames().size(); + } + return resp; + } + /** * Returns all tables in database 'dbName' that match the pattern of 'matcher' and are * accessible to 'user'. http://git-wip-us.apache.org/repos/asf/impala/blob/2b6d71fe/fe/src/main/java/org/apache/impala/service/JniFrontend.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java index 25dee1c..ad8b165 100644 --- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java +++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java @@ -68,6 +68,7 @@ import org.apache.impala.thrift.TDescriptorTable; import org.apache.impala.thrift.TExecRequest; import org.apache.impala.thrift.TFunctionCategory; import org.apache.impala.thrift.TGetAllHadoopConfigsResponse; +import org.apache.impala.thrift.TGetCatalogMetricsResult; import org.apache.impala.thrift.TGetDataSrcsParams; import org.apache.impala.thrift.TGetDataSrcsResult; import org.apache.impala.thrift.TGetDbsParams; @@ -224,6 +225,16 @@ public class JniFrontend { return plan; } + public byte[] getCatalogMetrics() throws ImpalaException { + TGetCatalogMetricsResult metrics = frontend_.getCatalogMetrics(); + TSerializer serializer = new TSerializer(protocolFactory_); + try { + return serializer.serialize(metrics); + } catch (TException e) { + throw new InternalException(e.getMessage()); + } + } + /** * Implement Hive's pattern-matching semantics for "SHOW TABLE [[LIKE] 'pattern']", and * return a list of table names matching an optional pattern.
