IMPALA-7531: Daemon level catalog cache metrics This patch adds the aggregated CatalogdMetaProvider cache stats to the catalog metrics on the coordinators. They can be accessed under <coordinator>:<web-port>/metrics#catalog.
These metrics are refreshed at the end of planning, for each query run. Testing: ------- Visual inspection by running a few queries locally and making sure stats are updated. Also modified existing tests to account for this behavior. Change-Id: I23c131b77ca84aa4df8919213bbd83944fa112a5 Reviewed-on: http://gerrit.cloudera.org:8080/11511 Reviewed-by: Bharath Vissapragada <bhara...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ac33c0c4 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ac33c0c4 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ac33c0c4 Branch: refs/heads/master Commit: ac33c0c42e1e7cc898893b1ae1f69c13287d20a8 Parents: e83fe23 Author: Bharath Vissapragada <bhara...@cloudera.com> Authored: Mon Sep 24 15:31:17 2018 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Committed: Fri Sep 28 02:15:44 2018 +0000 ---------------------------------------------------------------------- be/src/service/impala-server.cc | 37 +++++- be/src/service/impala-server.h | 5 +- be/src/util/impalad-metrics.cc | 93 +++++++++++--- be/src/util/impalad-metrics.h | 58 +++++++++ common/thrift/Frontend.thrift | 14 +++ common/thrift/metrics.json | 120 +++++++++++++++++++ .../apache/impala/catalog/FeCatalogUtils.java | 37 ++++++ .../impala/catalog/local/LocalCatalog.java | 3 +- .../impala/catalog/local/MetaProvider.java | 2 +- .../org/apache/impala/service/Frontend.java | 2 + tests/custom_cluster/test_local_catalog.py | 61 ++++++++-- 11 files changed, 402 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/be/src/service/impala-server.cc ---------------------------------------------------------------------- diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index c46bebe..af9414c 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -974,11 +974,9 @@ Status ImpalaServer::ExecuteInternal( // start execution of query; also starts fragment status reports RETURN_IF_ERROR((*request_state)->Exec(&result)); - if (result.stmt_type == TStmtType::DDL) { - Status status = UpdateCatalogMetrics(); - if (!status.ok()) { - VLOG_QUERY << "Couldn't update catalog metrics: " << status.GetDetail(); - } + Status status = UpdateCatalogMetrics(); + if (!status.ok()) { + VLOG_QUERY << "Couldn't update catalog metrics: " << status.GetDetail(); } if ((*request_state)->schedule() != nullptr) { @@ -1203,7 +1201,36 @@ Status ImpalaServer::UpdateCatalogMetrics() { RETURN_IF_ERROR(exec_env_->frontend()->GetCatalogMetrics(&metrics)); ImpaladMetrics::CATALOG_NUM_DBS->SetValue(metrics.num_dbs); ImpaladMetrics::CATALOG_NUM_TABLES->SetValue(metrics.num_tables); + if (!FLAGS_use_local_catalog) return Status::OK(); + DCHECK(metrics.__isset.cache_eviction_count); + DCHECK(metrics.__isset.cache_hit_count); + DCHECK(metrics.__isset.cache_load_count); + DCHECK(metrics.__isset.cache_load_exception_count); + DCHECK(metrics.__isset.cache_load_success_count); + DCHECK(metrics.__isset.cache_miss_count); + DCHECK(metrics.__isset.cache_request_count); + DCHECK(metrics.__isset.cache_total_load_time); + DCHECK(metrics.__isset.cache_avg_load_time); + DCHECK(metrics.__isset.cache_hit_rate); + DCHECK(metrics.__isset.cache_load_exception_rate); + DCHECK(metrics.__isset.cache_miss_rate); + ImpaladMetrics::CATALOG_CACHE_EVICTION_COUNT->SetValue(metrics.cache_eviction_count); + ImpaladMetrics::CATALOG_CACHE_HIT_COUNT->SetValue(metrics.cache_hit_count); + ImpaladMetrics::CATALOG_CACHE_LOAD_COUNT->SetValue(metrics.cache_load_count); + ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_COUNT->SetValue( + metrics.cache_load_exception_count); + ImpaladMetrics::CATALOG_CACHE_LOAD_SUCCESS_COUNT->SetValue( + metrics.cache_load_success_count); + ImpaladMetrics::CATALOG_CACHE_MISS_COUNT->SetValue(metrics.cache_miss_count); + ImpaladMetrics::CATALOG_CACHE_REQUEST_COUNT->SetValue(metrics.cache_request_count); + ImpaladMetrics::CATALOG_CACHE_TOTAL_LOAD_TIME->SetValue(metrics.cache_total_load_time); + ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME->SetValue(metrics.cache_avg_load_time); + ImpaladMetrics::CATALOG_CACHE_HIT_RATE->SetValue(metrics.cache_hit_rate); + ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE->SetValue( + metrics.cache_load_exception_rate); + ImpaladMetrics::CATALOG_CACHE_MISS_RATE->SetValue(metrics.cache_miss_rate); return Status::OK(); + } Status ImpalaServer::CancelInternal(const TUniqueId& query_id, bool check_inflight, http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/be/src/service/impala-server.h ---------------------------------------------------------------------- diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h index 7db5ce4..1deb7c2 100644 --- a/be/src/service/impala-server.h +++ b/be/src/service/impala-server.h @@ -549,7 +549,10 @@ class ImpalaServer : public ImpalaServiceIf, std::shared_ptr<ClientRequestState> GetClientRequestState( const TUniqueId& query_id); - /// Updates the number of databases / tables metrics from the FE catalog + /// Updates a set of Impalad catalog metrics including number tables/databases and + /// some cache metrics applicable to local catalog mode (if configured). Called at + /// the end of statestore update callback (to account for metadata object changes) + /// and at the end of query planning to update local catalog cache access metrics. Status UpdateCatalogMetrics() WARN_UNUSED_RESULT; /// Depending on the query type, this either submits the query to the admission http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/be/src/util/impalad-metrics.cc ---------------------------------------------------------------------- diff --git a/be/src/util/impalad-metrics.cc b/be/src/util/impalad-metrics.cc index b8bb90c..6ac9401 100644 --- a/be/src/util/impalad-metrics.cc +++ b/be/src/util/impalad-metrics.cc @@ -22,6 +22,8 @@ #include "common/names.h" +DECLARE_bool(use_local_catalog); + namespace impala { // Naming convention: Components should be separated by '.' and words should @@ -75,8 +77,28 @@ const char* ImpaladMetricKeys::CATALOG_NUM_TABLES = const char* ImpaladMetricKeys::CATALOG_VERSION = "catalog.curr-version"; const char* ImpaladMetricKeys::CATALOG_TOPIC_VERSION = "catalog.curr-topic"; const char* ImpaladMetricKeys::CATALOG_SERVICE_ID = "catalog.curr-serviceid"; -const char* ImpaladMetricKeys::CATALOG_READY = - "catalog.ready"; +const char* ImpaladMetricKeys::CATALOG_READY = "catalog.ready"; +const char* ImpaladMetricKeys::CATALOG_CACHE_AVG_LOAD_TIME = + "catalog.cache.average-load-time"; +const char* ImpaladMetricKeys::CATALOG_CACHE_EVICTION_COUNT = + "catalog.cache.eviction-count"; +const char* ImpaladMetricKeys::CATALOG_CACHE_HIT_COUNT = "catalog.cache.hit-count"; +const char* ImpaladMetricKeys::CATALOG_CACHE_HIT_RATE ="catalog.cache.hit-rate"; +const char* ImpaladMetricKeys::CATALOG_CACHE_LOAD_COUNT = "catalog.cache.load-count"; +const char* ImpaladMetricKeys::CATALOG_CACHE_LOAD_EXCEPTION_COUNT = + "catalog.cache.load-exception-count"; +const char* ImpaladMetricKeys::CATALOG_CACHE_LOAD_EXCEPTION_RATE = + "catalog.cache.load-exception-rate"; +const char* ImpaladMetricKeys::CATALOG_CACHE_LOAD_SUCCESS_COUNT = + "catalog.cache.load-success-count"; +const char* ImpaladMetricKeys::CATALOG_CACHE_MISS_COUNT = + "catalog.cache.miss-count"; +const char* ImpaladMetricKeys::CATALOG_CACHE_MISS_RATE = + "catalog.cache.miss-rate"; +const char* ImpaladMetricKeys::CATALOG_CACHE_REQUEST_COUNT = + "catalog.cache.request-count"; +const char* ImpaladMetricKeys::CATALOG_CACHE_TOTAL_LOAD_TIME = + "catalog.cache.total-load-time"; const char* ImpaladMetricKeys::NUM_FILES_OPEN_FOR_INSERT = "impala-server.num-files-open-for-insert"; const char* ImpaladMetricKeys::IMPALA_SERVER_NUM_OPEN_HS2_SESSIONS = @@ -122,6 +144,14 @@ IntCounter* ImpaladMetrics::IO_MGR_BYTES_WRITTEN = NULL; IntCounter* ImpaladMetrics::IO_MGR_CACHED_FILE_HANDLES_REOPENED = NULL; IntCounter* ImpaladMetrics::HEDGED_READ_OPS = NULL; IntCounter* ImpaladMetrics::HEDGED_READ_OPS_WIN = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_EVICTION_COUNT = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_HIT_COUNT = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_LOAD_COUNT = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_COUNT = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_LOAD_SUCCESS_COUNT = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_MISS_COUNT = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_REQUEST_COUNT = NULL; +IntCounter* ImpaladMetrics::CATALOG_CACHE_TOTAL_LOAD_TIME = NULL; // Gauges IntGauge* ImpaladMetrics::CATALOG_NUM_DBS = NULL; @@ -143,6 +173,10 @@ IntGauge* ImpaladMetrics::NUM_FILES_OPEN_FOR_INSERT = NULL; IntGauge* ImpaladMetrics::NUM_QUERIES_REGISTERED = NULL; IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_NUM_ROWS = NULL; IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_BYTES = NULL; +DoubleGauge* ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME = NULL; +DoubleGauge* ImpaladMetrics::CATALOG_CACHE_HIT_RATE = NULL; +DoubleGauge* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE = NULL; +DoubleGauge* ImpaladMetrics::CATALOG_CACHE_MISS_RATE = NULL; // Properties BooleanProperty* ImpaladMetrics::CATALOG_READY = NULL; @@ -158,6 +192,48 @@ HistogramMetric* ImpaladMetrics::DDL_DURATIONS = NULL; StatsMetric<uint64_t, StatsType::MEAN>* ImpaladMetrics::IO_MGR_CACHED_FILE_HANDLES_HIT_RATIO = NULL; +void ImpaladMetrics::InitCatalogMetrics(MetricGroup* m) { + // Initialize catalog metrics + MetricGroup* catalog_metrics = m->GetOrCreateChildGroup("catalog"); + CATALOG_NUM_DBS = catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_NUM_DBS, 0); + CATALOG_NUM_TABLES = + catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_NUM_TABLES, 0); + CATALOG_VERSION = catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_VERSION, 0); + CATALOG_TOPIC_VERSION = + catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_TOPIC_VERSION, 0); + CATALOG_SERVICE_ID = + catalog_metrics->AddProperty<string>(ImpaladMetricKeys::CATALOG_SERVICE_ID, ""); + CATALOG_READY = + catalog_metrics->AddProperty<bool>(ImpaladMetricKeys::CATALOG_READY, false); + // CatalogdMetaProvider cache metrics. Valid only when --use_local_catalog is set. + if (FLAGS_use_local_catalog) { + CATALOG_CACHE_AVG_LOAD_TIME = catalog_metrics->AddDoubleGauge( + ImpaladMetricKeys::CATALOG_CACHE_AVG_LOAD_TIME, 0); + CATALOG_CACHE_EVICTION_COUNT = + catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_EVICTION_COUNT, 0); + CATALOG_CACHE_HIT_COUNT = + catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_HIT_COUNT, 0); + CATALOG_CACHE_HIT_RATE = + catalog_metrics->AddDoubleGauge(ImpaladMetricKeys::CATALOG_CACHE_HIT_RATE, 0); + CATALOG_CACHE_LOAD_COUNT = + catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_LOAD_COUNT, 0); + CATALOG_CACHE_LOAD_EXCEPTION_COUNT = catalog_metrics->AddCounter( + ImpaladMetricKeys::CATALOG_CACHE_LOAD_EXCEPTION_COUNT, 0); + CATALOG_CACHE_LOAD_EXCEPTION_RATE = catalog_metrics->AddDoubleGauge( + ImpaladMetricKeys::CATALOG_CACHE_LOAD_EXCEPTION_RATE, 0); + CATALOG_CACHE_LOAD_SUCCESS_COUNT = catalog_metrics->AddCounter( + ImpaladMetricKeys::CATALOG_CACHE_LOAD_SUCCESS_COUNT, 0); + CATALOG_CACHE_MISS_COUNT = + catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_MISS_COUNT, 0); + CATALOG_CACHE_MISS_RATE = + catalog_metrics->AddDoubleGauge(ImpaladMetricKeys::CATALOG_CACHE_MISS_RATE, 0); + CATALOG_CACHE_REQUEST_COUNT = + catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_REQUEST_COUNT, 0); + CATALOG_CACHE_TOTAL_LOAD_TIME = + catalog_metrics->AddCounter(ImpaladMetricKeys::CATALOG_CACHE_TOTAL_LOAD_TIME, 0); + } +} + void ImpaladMetrics::CreateMetrics(MetricGroup* m) { // Initialize impalad metrics IMPALA_SERVER_VERSION = m->AddProperty<string>( @@ -235,18 +311,7 @@ void ImpaladMetrics::CreateMetrics(MetricGroup* m) { StatsMetric<uint64_t, StatsType::MEAN>::CreateAndRegister(m, ImpaladMetricKeys::IO_MGR_CACHED_FILE_HANDLES_HIT_RATIO); - // Initialize catalog metrics - MetricGroup* catalog_metrics = m->GetOrCreateChildGroup("catalog"); - CATALOG_NUM_DBS = catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_NUM_DBS, 0); - CATALOG_NUM_TABLES = - catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_NUM_TABLES, 0); - CATALOG_VERSION = catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_VERSION, 0); - CATALOG_TOPIC_VERSION = - catalog_metrics->AddGauge(ImpaladMetricKeys::CATALOG_TOPIC_VERSION, 0); - CATALOG_SERVICE_ID = - catalog_metrics->AddProperty<string>(ImpaladMetricKeys::CATALOG_SERVICE_ID, ""); - CATALOG_READY = - catalog_metrics->AddProperty<bool>(ImpaladMetricKeys::CATALOG_READY, false); + InitCatalogMetrics(m); // Maximum duration to be tracked by the query durations metric. No particular reasoning // behind five hours, except to say that there's some threshold beyond which queries http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/be/src/util/impalad-metrics.h ---------------------------------------------------------------------- diff --git a/be/src/util/impalad-metrics.h b/be/src/util/impalad-metrics.h index 7de7aa8..36d9d1b 100644 --- a/be/src/util/impalad-metrics.h +++ b/be/src/util/impalad-metrics.h @@ -121,6 +121,47 @@ class ImpaladMetricKeys { /// a catalog server with an unexpected ID. static const char* CATALOG_READY; + /// Average time spent loading new values into the Impalad Catalog Cache. + static const char* CATALOG_CACHE_AVG_LOAD_TIME; + + /// Total number of evictions from the Impalad Catalog Cache. Does not include manual + /// cache invalidate calls. + static const char* CATALOG_CACHE_EVICTION_COUNT; + + /// Total number of Impalad Catalog cache hits. + static const char* CATALOG_CACHE_HIT_COUNT; + + /// Ratio of Impalad Catalog cache requests that were hits. Accounts for all the + /// requests since the process boot time. + static const char* CATALOG_CACHE_HIT_RATE; + + /// Total requests to Impalad Catalog cache requests that loaded new values. + static const char* CATALOG_CACHE_LOAD_COUNT; + + /// Total requests to Impalad Catalog cache requests that threw exceptions loading + /// new values. + static const char* CATALOG_CACHE_LOAD_EXCEPTION_COUNT; + + /// Ratio of Impalad Catalog cache requests that threw exceptions loading new values. + /// Accounts for all the requests since the process boot time. + static const char* CATALOG_CACHE_LOAD_EXCEPTION_RATE; + + /// Number of Impalad Catalog cache requests that successfully loaded new values. + static const char* CATALOG_CACHE_LOAD_SUCCESS_COUNT; + + /// Number of Impalad Catalog cache requests that returned uncached values. + static const char* CATALOG_CACHE_MISS_COUNT; + + /// Ratio of Impalad Catalog cache requests that were misses. Accounts for all the + /// requests since the process boot time. + static const char* CATALOG_CACHE_MISS_RATE; + + /// Total number of Impalad Catalog cache requests. + static const char* CATALOG_CACHE_REQUEST_COUNT; + + /// Total time spent in Impalad Catalog cache loading new values. + static const char* CATALOG_CACHE_TOTAL_LOAD_TIME; + /// Number of files open for insert static const char* NUM_FILES_OPEN_FOR_INSERT; @@ -180,12 +221,24 @@ class ImpaladMetrics { static IntCounter* IO_MGR_CACHED_FILE_HANDLES_REOPENED; static IntCounter* HEDGED_READ_OPS; static IntCounter* HEDGED_READ_OPS_WIN; + static IntCounter* CATALOG_CACHE_EVICTION_COUNT; + static IntCounter* CATALOG_CACHE_HIT_COUNT; + static IntCounter* CATALOG_CACHE_LOAD_COUNT; + static IntCounter* CATALOG_CACHE_LOAD_EXCEPTION_COUNT; + static IntCounter* CATALOG_CACHE_LOAD_SUCCESS_COUNT; + static IntCounter* CATALOG_CACHE_MISS_COUNT; + static IntCounter* CATALOG_CACHE_REQUEST_COUNT; + static IntCounter* CATALOG_CACHE_TOTAL_LOAD_TIME; // Gauges static IntGauge* CATALOG_NUM_DBS; static IntGauge* CATALOG_NUM_TABLES; static IntGauge* CATALOG_VERSION; static IntGauge* CATALOG_TOPIC_VERSION; + static DoubleGauge* CATALOG_CACHE_AVG_LOAD_TIME; + static DoubleGauge* CATALOG_CACHE_HIT_RATE; + static DoubleGauge* CATALOG_CACHE_LOAD_EXCEPTION_RATE; + static DoubleGauge* CATALOG_CACHE_MISS_RATE; static IntGauge* IMPALA_SERVER_NUM_OPEN_BEESWAX_SESSIONS; static IntGauge* IMPALA_SERVER_NUM_OPEN_HS2_SESSIONS; static IntGauge* IO_MGR_NUM_BUFFERS; @@ -201,6 +254,7 @@ class ImpaladMetrics { static IntGauge* NUM_QUERIES_REGISTERED; static IntGauge* RESULTSET_CACHE_TOTAL_NUM_ROWS; static IntGauge* RESULTSET_CACHE_TOTAL_BYTES; + // Properties static BooleanProperty* CATALOG_READY; static BooleanProperty* IMPALA_SERVER_READY; @@ -215,6 +269,10 @@ class ImpaladMetrics { // Creates and initializes all metrics above in 'm'. static void CreateMetrics(MetricGroup* m); + + private: + // Initializes the metrics for this coordinator's metadata catalog. + static void InitCatalogMetrics(MetricGroup* m); }; http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/common/thrift/Frontend.thrift ---------------------------------------------------------------------- diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift index 86588d3..48e915a 100644 --- a/common/thrift/Frontend.thrift +++ b/common/thrift/Frontend.thrift @@ -102,6 +102,20 @@ struct TGetTableMetricsResponse { struct TGetCatalogMetricsResult { 1: required i32 num_dbs 2: required i32 num_tables + // Following cache metrics are set only in local catalog mode. These map to Guava's + // CacheStats. Accounts for all the cache requests since the process boot time. + 3: optional i64 cache_eviction_count + 4: optional i64 cache_hit_count + 5: optional i64 cache_load_count + 6: optional i64 cache_load_exception_count + 7: optional i64 cache_load_success_count + 8: optional i64 cache_miss_count + 9: optional i64 cache_request_count + 10: optional i64 cache_total_load_time + 11: optional double cache_avg_load_time + 12: optional double cache_hit_rate + 13: optional double cache_load_exception_rate + 14: optional double cache_miss_rate } // Arguments to getDbs, which returns a list of dbs that match an optional pattern http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/common/thrift/metrics.json ---------------------------------------------------------------------- diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json index f0cdcb6..c40716a 100644 --- a/common/thrift/metrics.json +++ b/common/thrift/metrics.json @@ -1740,5 +1740,125 @@ "units": "NONE", "kind": "PROPERTY", "key": "tzdata-path" + }, + { + "description": "Average time spent loading new values into the Impalad Catalog Cache.", + "contexts": [ + "IMPALAD" + ], + "label": "Average Impalad catalog cache load time", + "units": "TIME_NS", + "kind": "GAUGE", + "key": "catalog.cache.average-load-time" + }, + { + "description": "Total number of evictions from the Impalad Catalog Cache.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache eviction count", + "units": "NONE", + "kind": "COUNTER", + "key": "catalog.cache.eviction-count" + }, + { + "description": "Total number of Impalad Catalog cache hits.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache hit count", + "units": "NONE", + "kind": "COUNTER", + "key": "catalog.cache.hit-count" + }, + { + "description": "Ratio of Impalad Catalog cache requests that were hits.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache hit rate", + "units": "NONE", + "kind": "GAUGE", + "key": "catalog.cache.hit-rate" + }, + { + "description": "Total requests to Impalad Catalog cache requests that loaded new values.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache load count", + "units": "NONE", + "kind": "COUNTER", + "key": "catalog.cache.load-count" + }, + { + "description": "Total requests to Impalad Catalog cache requests that threw exceptions loading new values.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache load exception count", + "units": "NONE", + "kind": "COUNTER", + "key": "catalog.cache.load-exception-count" + }, + { + "description": "Ratio of Impalad Catalog cache requests that threw exceptions loading new values.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache load exception rate", + "units": "NONE", + "kind": "GAUGE", + "key": "catalog.cache.load-exception-rate" + }, + { + "description": "Number of Impalad Catalog cache requests that successfully loaded new values.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache load success count", + "units": "NONE", + "kind": "COUNTER", + "key": "catalog.cache.load-success-count" + }, + { + "description": "Number of Impalad Catalog cache requests that returned uncached values.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache load miss count", + "units": "NONE", + "kind": "COUNTER", + "key": "catalog.cache.miss-count" + }, + { + "description": "Ratio of Impalad Catalog cache requests that were misses.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache load miss rate", + "units": "NONE", + "kind": "GAUGE", + "key": "catalog.cache.miss-rate" + }, + { + "description": "Total number of Impalad Catalog cache requests.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache request count", + "units": "NONE", + "kind": "COUNTER", + "key": "catalog.cache.request-count" + }, + { + "description": "Total time spent in Impalad Catalog cache loading new values.", + "contexts": [ + "IMPALAD" + ], + "label": "Impalad catalog cache time spent in loads", + "units": "TIME_NS", + "kind": "COUNTER", + "key": "catalog.cache.total-load-time" } ] http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java index 0e76a37..8c865fe 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import com.google.common.cache.CacheStats; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -32,9 +33,14 @@ import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.NullLiteral; import org.apache.impala.analysis.PartitionKeyValue; import org.apache.impala.analysis.ToSqlUtils; +import org.apache.impala.catalog.local.CatalogdMetaProvider; import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; +import org.apache.impala.catalog.local.LocalCatalog; +import org.apache.impala.catalog.local.MetaProvider; +import org.apache.impala.service.BackendConfig; import org.apache.impala.thrift.TColumnDescriptor; +import org.apache.impala.thrift.TGetCatalogMetricsResult; import org.apache.impala.thrift.THdfsPartition; import org.apache.impala.thrift.TTableStats; import org.slf4j.Logger; @@ -348,4 +354,35 @@ public abstract class FeCatalogUtils { } return thriftHdfsPart; } + + /** + * Populates cache metrics in the input TGetCatalogMetricsResult object. + * No-op if CatalogdMetaProvider is not the configured metadata provider. + */ + public static void populateCacheMetrics( + FeCatalog catalog, TGetCatalogMetricsResult metrics) { + Preconditions.checkNotNull(catalog); + Preconditions.checkNotNull(metrics); + // Populate cache stats only if configured in local mode. + if (!BackendConfig.INSTANCE.getBackendCfg().use_local_catalog) return; + Preconditions.checkState(catalog instanceof LocalCatalog); + MetaProvider provider = ((LocalCatalog) catalog).getMetaProvider(); + if (!(provider instanceof CatalogdMetaProvider)) return; + + CacheStats stats = ((CatalogdMetaProvider) provider).getCacheStats(); + metrics.setCache_eviction_count(stats.evictionCount()); + metrics.setCache_hit_count(stats.hitCount()); + metrics.setCache_load_count(stats.loadCount()); + metrics.setCache_load_exception_count(stats.loadExceptionCount()); + metrics.setCache_load_success_count(stats.loadSuccessCount()); + metrics.setCache_miss_count(stats.missCount()); + metrics.setCache_request_count(stats.requestCount()); + metrics.setCache_total_load_time(stats.totalLoadTime()); + metrics.setCache_avg_load_time(stats.averageLoadPenalty()); + metrics.setCache_hit_rate(stats.hitRate()); + metrics.setCache_load_exception_rate(stats.loadExceptionRate()); + metrics.setCache_miss_rate(stats.missRate()); + } + + } http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java index 683ed18..96e3325 100644 --- a/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java +++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java @@ -38,7 +38,6 @@ import org.apache.impala.catalog.FeTable; import org.apache.impala.catalog.Function; import org.apache.impala.catalog.Function.CompareMode; import org.apache.impala.common.InternalException; -import org.apache.impala.service.FeSupport; import org.apache.impala.catalog.HdfsCachePool; import org.apache.impala.catalog.PartitionNotFoundException; import org.apache.impala.catalog.PrunablePartition; @@ -256,7 +255,7 @@ public class LocalCatalog implements FeCatalog { // This appears to only be used in some tests. } - MetaProvider getMetaProvider() { + public MetaProvider getMetaProvider() { return metaProvider_; } } http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java b/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java index 60739d3..ade99bc 100644 --- a/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java +++ b/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java @@ -44,7 +44,7 @@ import com.google.errorprone.annotations.Immutable; * Implementations may directly access the metadata from the source systems * or may include caching, etc. */ -interface MetaProvider { +public interface MetaProvider { /** * Get the authorization policy. This acts as a repository of authorization http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/fe/src/main/java/org/apache/impala/service/Frontend.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java index 847e8b1..fad530c 100644 --- a/fe/src/main/java/org/apache/impala/service/Frontend.java +++ b/fe/src/main/java/org/apache/impala/service/Frontend.java @@ -77,6 +77,7 @@ import org.apache.impala.catalog.CatalogException; import org.apache.impala.catalog.Column; import org.apache.impala.catalog.DatabaseNotFoundException; import org.apache.impala.catalog.FeCatalog; +import org.apache.impala.catalog.FeCatalogUtils; import org.apache.impala.catalog.FeDataSource; import org.apache.impala.catalog.FeDataSourceTable; import org.apache.impala.catalog.FeDb; @@ -650,6 +651,7 @@ public class Frontend { resp.num_dbs++; resp.num_tables += db.getAllTableNames().size(); } + FeCatalogUtils.populateCacheMetrics(getCatalog(), resp); return resp; } http://git-wip-us.apache.org/repos/asf/impala/blob/ac33c0c4/tests/custom_cluster/test_local_catalog.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py index d6bd9b6..7ac7783 100644 --- a/tests/custom_cluster/test_local_catalog.py +++ b/tests/custom_cluster/test_local_catalog.py @@ -27,6 +27,23 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite RETRY_PROFILE_MSG = 'Retrying query planning due to inconsistent metadata' class TestCompactCatalogUpdates(CustomClusterTestSuite): + + def get_catalog_cache_metrics(self, impalad): + """ Returns catalog cache metrics as a dict by scraping the json metrics page on the + given impalad""" + child_groups =\ + impalad.service.get_debug_webpage_json('metrics')['metric_group']['child_groups'] + for group in child_groups: + if group['name'] != 'impala-server': continue + # Filter catalog cache metrics. + for child_group in group['child_groups']: + if child_group['name'] != 'catalog': continue + metrics_data = [(metric['name'], metric['value']) + for metric in child_group['metrics'] if 'catalog.cache' in metric['name']] + return dict(metrics_data) + assert False, "Catalog cache metrics not found in %s" % child_groups + + @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--use_local_catalog=true", @@ -215,15 +232,45 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite): @CustomClusterTestSuite.with_args( impalad_args="--use_local_catalog=true", catalogd_args="--catalog_topic_mode=minimal") - def test_cache_profile_metrics(self): + def test_cache_metrics(self, unique_database): """ - Test that profile output includes impalad local cache metrics. + Test that profile output includes impalad local cache metrics. Also verifies that + the daemon level metrics are updated between query runs. """ try: - client = self.cluster.impalads[0].service.create_beeswax_client() - query = "select count(*) from functional.alltypes" - ret = self.execute_query_expect_success(client, query) - assert ret.runtime_profile.count("Frontend:") == 1 - assert ret.runtime_profile.count("CatalogFetch") > 1 + impalad = self.cluster.impalads[0] + client = impalad.service.create_beeswax_client() + cache_hit_rate_metric_key = "catalog.cache.hit-rate" + cache_miss_rate_metric_key = "catalog.cache.miss-rate" + cache_hit_count_metric_key = "catalog.cache.hit-count" + cache_request_count_metric_key = "catalog.cache.request-count" + cache_request_count_prev_run = 0 + cache_hit_count_prev_run = 0 + test_table_name = "%s.test_cache_metrics_test_tbl" % unique_database + # A mix of queries of various types. + queries_to_test = ["select count(*) from functional.alltypes", + "explain select count(*) from functional.alltypes", + "create table %s (a int)" % test_table_name, + "drop table %s" % test_table_name] + for _ in xrange(0, 10): + for query in queries_to_test: + ret = self.execute_query_expect_success(client, query) + assert ret.runtime_profile.count("Frontend:") == 1 + assert ret.runtime_profile.count("CatalogFetch") > 1 + cache_metrics = self.get_catalog_cache_metrics(impalad) + cache_hit_rate = cache_metrics[cache_hit_rate_metric_key] + cache_miss_rate = cache_metrics[cache_miss_rate_metric_key] + cache_hit_count = cache_metrics[cache_hit_count_metric_key] + cache_request_count = cache_metrics[cache_request_count_metric_key] + assert cache_hit_rate > 0.0 and cache_hit_rate < 1.0 + assert cache_miss_rate > 0.0 and cache_miss_rate < 1.0 + assert cache_hit_count > cache_hit_count_prev_run,\ + "%s not updated between two query runs, query - %s"\ + % (cache_hit_count_metric_key, query) + assert cache_request_count > cache_request_count_prev_run,\ + "%s not updated betweeen two query runs, query - %s"\ + % (cache_request_count_metric_key, query) + cache_hit_count_prev_run = cache_hit_count + cache_request_count_prev_run = cache_request_count finally: client.close()