This is an automated email from the ASF dual-hosted git repository. chhsiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit ae67c324a40222529e16f40765814f5343385d03 Author: Chun-Hung Hsiao <[email protected]> AuthorDate: Mon Apr 1 23:24:02 2019 -0700 Made CSI plugin RPC metrics agnostic to CSI versions. Since the fine-grained version-specific per-CSI-call metrics are not very useful for operators, and most informations are highly correlated to per-operation metrics, these metrics are aggregated and made CSI version agnostic. Review: https://reviews.apache.org/r/70225/ --- docs/monitoring.md | 50 ++---- src/csi/metrics.cpp | 100 ++---------- src/csi/metrics.hpp | 10 +- src/csi/service_manager.cpp | 10 +- src/csi/v0_volume_manager.cpp | 10 +- .../storage_local_resource_provider_tests.cpp | 176 +++++---------------- 6 files changed, 72 insertions(+), 284 deletions(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index 239077b..027580f 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -2220,9 +2220,7 @@ Storage resource providers in Mesos are backed by [standalone containers](standalone-container.md). To monitor the health of these CSI plugins for a storage resource provider with _type_ and _name_, the following metrics provide information about plugin terminations and ongoing and -completed CSI calls made to the plugin. In the following metrics, the _rpc_ -placeholder refers to the name of a particular CSI call, which is described in -the list of [supported CSI calls](#supported-csi-calls). +completed CSI calls made to the plugin. <table class="table table-striped"> <thead> @@ -2237,60 +2235,30 @@ the list of [supported CSI calls](#supported-csi-calls). </tr> <tr> <td> - <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs/<i><rpc></i>/pending</code> + <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs_pending</code> </td> - <td>Number of ongoing <i>rpc</i> calls</td> + <td>Number of ongoing CSI calls</td> <td>Gauge</td> </tr> <tr> <td> - <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs/<i><rpc></i>/successes</code> + <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs_finished</code> </td> - <td>Number of successful <i>rpc</i> calls</td> + <td>Number of successful CSI calls</td> <td>Counter</td> </tr> <tr> <td> - <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs/<i><rpc></i>/errors</code> + <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs_failed</code> </td> - <td>Number of erroneous <i>rpc</i> calls</td> + <td>Number of failed CSI calls</td> <td>Counter</td> </tr> <tr> <td> - <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs/<i><rpc></i>/cancelled</code> + <code>resource_providers/<i><type></i>.<i><name></i>/csi_plugin/rpcs_cancelled</code> </td> - <td>Number of cancelled <i>rpc</i> calls</td> + <td>Number of cancelled CSI calls</td> <td>Counter</td> </tr> </table> - -##### Supported CSI Calls - -The following is a comprehensive list of CSI calls that are used in storage -resource providers. These names are used to replace the _rpc_ placeholder in the -above metrics. - -* [`csi.v0.Identity.GetPluginInfo`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#getplugininfo) -* [`csi.v0.Identity.GetPluginCapabilities`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#getplugincapabilities) -* [`csi.v0.Identity.Probe`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#probe) -* [`csi.v0.Controller.CreateVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#createvolume) -* [`csi.v0.Controller.DeleteVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#deletevolume) -* [`csi.v0.Controller.ControllerPublishVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#controllerpublishvolume) -* [`csi.v0.Controller.ControllerUnpublishVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#controllerunpublishvolume) -* [`csi.v0.Controller.ValidateVolumeCapabilities`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#validatevolumecapabilities) -* [`csi.v0.Controller.ListVolumes`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#listvolumes) -* [`csi.v0.Controller.GetCapacity`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#getcapacity) -* [`csi.v0.Controller.ControllerGetCapabilities`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#controllergetcapabilities) -* [`csi.v0.Node.NodeStageVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#node-service-rpc) -* [`csi.v0.Node.NodeUnstageVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#nodeunstagevolume) -* [`csi.v0.Node.NodePublishVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#nodepublishvolume) -* [`csi.v0.Node.NodeUnpublishVolume`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#nodeunpublishvolume) -* [`csi.v0.Node.NodeGetId`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#nodegetid) -* [`csi.v0.Node.NodeGetCapabilities`](https://github.com/container-storage-interface/spec/blob/v0.2.0/spec.md#nodegetcapabilities) - -For example, cluster operators can monitor the number of successful -`csi.v0.Controller.CreateVolume` calls that are made by the resource provider -with type `org.apache.mesos.rp.local.storage` and name `lvm` through the -`resource_providers/org.apache.mesos.rp.local.storage.lvm/csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes` -metric. diff --git a/src/csi/metrics.cpp b/src/csi/metrics.cpp index 1ef591e..e2859f6 100644 --- a/src/csi/metrics.cpp +++ b/src/csi/metrics.cpp @@ -16,110 +16,36 @@ #include "csi/metrics.hpp" -#include <vector> - #include <process/metrics/metrics.hpp> -#include <stout/foreach.hpp> -#include <stout/stringify.hpp> - using std::string; -using std::vector; - -using process::metrics::Counter; -using process::metrics::PushGauge; namespace mesos { namespace csi { Metrics::Metrics(const string& prefix) : csi_plugin_container_terminations( - prefix + "csi_plugin/container_terminations") + prefix + "csi_plugin/container_terminations"), + csi_plugin_rpcs_pending(prefix + "csi_plugin/rpcs_pending"), + csi_plugin_rpcs_finished(prefix + "csi_plugin/rpcs_finished"), + csi_plugin_rpcs_failed(prefix + "csi_plugin/rpcs_failed"), + csi_plugin_rpcs_cancelled(prefix + "csi_plugin/rpcs_cancelled") { process::metrics::add(csi_plugin_container_terminations); - - vector<csi::v0::RPC> rpcs; - - // NOTE: We use a switch statement here as a compile-time sanity check so we - // won't forget to add metrics for new RPCs in the future. Since each case - // falls through intentionally, every RPC will be added. - csi::v0::RPC firstRpc = csi::v0::GET_PLUGIN_INFO; - switch (firstRpc) { - case csi::v0::GET_PLUGIN_INFO: - rpcs.push_back(csi::v0::GET_PLUGIN_INFO); - case csi::v0::GET_PLUGIN_CAPABILITIES: - rpcs.push_back(csi::v0::GET_PLUGIN_CAPABILITIES); - case csi::v0::PROBE: - rpcs.push_back(csi::v0::PROBE); - case csi::v0::CREATE_VOLUME: - rpcs.push_back(csi::v0::CREATE_VOLUME); - case csi::v0::DELETE_VOLUME: - rpcs.push_back(csi::v0::DELETE_VOLUME); - case csi::v0::CONTROLLER_PUBLISH_VOLUME: - rpcs.push_back(csi::v0::CONTROLLER_PUBLISH_VOLUME); - case csi::v0::CONTROLLER_UNPUBLISH_VOLUME: - rpcs.push_back(csi::v0::CONTROLLER_UNPUBLISH_VOLUME); - case csi::v0::VALIDATE_VOLUME_CAPABILITIES: - rpcs.push_back(csi::v0::VALIDATE_VOLUME_CAPABILITIES); - case csi::v0::LIST_VOLUMES: - rpcs.push_back(csi::v0::LIST_VOLUMES); - case csi::v0::GET_CAPACITY: - rpcs.push_back(csi::v0::GET_CAPACITY); - case csi::v0::CONTROLLER_GET_CAPABILITIES: - rpcs.push_back(csi::v0::CONTROLLER_GET_CAPABILITIES); - case csi::v0::NODE_STAGE_VOLUME: - rpcs.push_back(csi::v0::NODE_STAGE_VOLUME); - case csi::v0::NODE_UNSTAGE_VOLUME: - rpcs.push_back(csi::v0::NODE_UNSTAGE_VOLUME); - case csi::v0::NODE_PUBLISH_VOLUME: - rpcs.push_back(csi::v0::NODE_PUBLISH_VOLUME); - case csi::v0::NODE_UNPUBLISH_VOLUME: - rpcs.push_back(csi::v0::NODE_UNPUBLISH_VOLUME); - case csi::v0::NODE_GET_ID: - rpcs.push_back(csi::v0::NODE_GET_ID); - case csi::v0::NODE_GET_CAPABILITIES: - rpcs.push_back(csi::v0::NODE_GET_CAPABILITIES); - } - - foreach (const csi::v0::RPC& rpc, rpcs) { - const string name = stringify(rpc); - - csi_plugin_rpcs_pending.put( - rpc, PushGauge(prefix + "csi_plugin/rpcs/" + name + "/pending")); - csi_plugin_rpcs_successes.put( - rpc, Counter(prefix + "csi_plugin/rpcs/" + name + "/successes")); - csi_plugin_rpcs_errors.put( - rpc, Counter(prefix + "csi_plugin/rpcs/" + name + "/errors")); - csi_plugin_rpcs_cancelled.put( - rpc, Counter(prefix + "csi_plugin/rpcs/" + name + "/cancelled")); - - process::metrics::add(csi_plugin_rpcs_pending.at(rpc)); - process::metrics::add(csi_plugin_rpcs_successes.at(rpc)); - process::metrics::add(csi_plugin_rpcs_errors.at(rpc)); - process::metrics::add(csi_plugin_rpcs_cancelled.at(rpc)); - } + process::metrics::add(csi_plugin_rpcs_pending); + process::metrics::add(csi_plugin_rpcs_finished); + process::metrics::add(csi_plugin_rpcs_failed); + process::metrics::add(csi_plugin_rpcs_cancelled); } Metrics::~Metrics() { process::metrics::remove(csi_plugin_container_terminations); - - foreachvalue (const PushGauge& gauge, csi_plugin_rpcs_pending) { - process::metrics::remove(gauge); - } - - foreachvalue (const Counter& counter, csi_plugin_rpcs_successes) { - process::metrics::remove(counter); - } - - foreachvalue (const Counter& counter, csi_plugin_rpcs_errors) { - process::metrics::remove(counter); - } - - foreachvalue (const Counter& counter, csi_plugin_rpcs_cancelled) { - process::metrics::remove(counter); - } + process::metrics::remove(csi_plugin_rpcs_pending); + process::metrics::remove(csi_plugin_rpcs_finished); + process::metrics::remove(csi_plugin_rpcs_failed); + process::metrics::remove(csi_plugin_rpcs_cancelled); } } // namespace csi { diff --git a/src/csi/metrics.hpp b/src/csi/metrics.hpp index 1892cdd..2b228bf 100644 --- a/src/csi/metrics.hpp +++ b/src/csi/metrics.hpp @@ -22,8 +22,6 @@ #include <process/metrics/counter.hpp> #include <process/metrics/push_gauge.hpp> -#include "csi/rpc.hpp" - namespace mesos { namespace csi { @@ -34,10 +32,10 @@ struct Metrics ~Metrics(); process::metrics::Counter csi_plugin_container_terminations; - hashmap<csi::v0::RPC, process::metrics::PushGauge> csi_plugin_rpcs_pending; - hashmap<csi::v0::RPC, process::metrics::Counter> csi_plugin_rpcs_successes; - hashmap<csi::v0::RPC, process::metrics::Counter> csi_plugin_rpcs_errors; - hashmap<csi::v0::RPC, process::metrics::Counter> csi_plugin_rpcs_cancelled; + process::metrics::PushGauge csi_plugin_rpcs_pending; + process::metrics::Counter csi_plugin_rpcs_finished; + process::metrics::Counter csi_plugin_rpcs_failed; + process::metrics::Counter csi_plugin_rpcs_cancelled; }; } // namespace csi { diff --git a/src/csi/service_manager.cpp b/src/csi/service_manager.cpp index 27de0c9..9f715d6 100644 --- a/src/csi/service_manager.cpp +++ b/src/csi/service_manager.cpp @@ -491,7 +491,7 @@ Future<Nothing> ServiceManagerProcess::waitEndpoint(const string& endpoint) .then(process::defer(self(), [=]() -> Future<Nothing> { // TODO(chhsiao): Detect which CSI version to use through versioned // `Probe` calls to support CSI v1 in a backward compatible way. - ++metrics->csi_plugin_rpcs_pending.at(v0::PROBE); + ++metrics->csi_plugin_rpcs_pending; return v0::Client(endpoint, runtime) .call<v0::PROBE>(v0::ProbeRequest()) @@ -507,13 +507,13 @@ Future<Nothing> ServiceManagerProcess::waitEndpoint(const string& endpoint) return Nothing(); })) .onAny(process::defer(self(), [this](const Future<Nothing>& future) { - --metrics->csi_plugin_rpcs_pending.at(v0::PROBE); + --metrics->csi_plugin_rpcs_pending; if (future.isReady()) { - ++metrics->csi_plugin_rpcs_successes.at(v0::PROBE); + ++metrics->csi_plugin_rpcs_finished; } else if (future.isDiscarded()) { - ++metrics->csi_plugin_rpcs_cancelled.at(v0::PROBE); + ++metrics->csi_plugin_rpcs_cancelled; } else { - ++metrics->csi_plugin_rpcs_errors.at(v0::PROBE); + ++metrics->csi_plugin_rpcs_failed; } })); })); diff --git a/src/csi/v0_volume_manager.cpp b/src/csi/v0_volume_manager.cpp index 3a5aee7..aeddb5d 100644 --- a/src/csi/v0_volume_manager.cpp +++ b/src/csi/v0_volume_manager.cpp @@ -529,18 +529,18 @@ template <RPC Rpc> Future<Try<Response<Rpc>, StatusError>> VolumeManagerProcess::_call( const string& endpoint, const Request<Rpc>& request) { - ++metrics->csi_plugin_rpcs_pending.at(Rpc); + ++metrics->csi_plugin_rpcs_pending; return Client(endpoint, runtime).call<Rpc>(request) .onAny(defer(self(), [=]( const Future<Try<Response<Rpc>, StatusError>>& future) { - --metrics->csi_plugin_rpcs_pending.at(Rpc); + --metrics->csi_plugin_rpcs_pending; if (future.isReady() && future->isSome()) { - ++metrics->csi_plugin_rpcs_successes.at(Rpc); + ++metrics->csi_plugin_rpcs_finished; } else if (future.isDiscarded()) { - ++metrics->csi_plugin_rpcs_cancelled.at(Rpc); + ++metrics->csi_plugin_rpcs_cancelled; } else { - ++metrics->csi_plugin_rpcs_errors.at(Rpc); + ++metrics->csi_plugin_rpcs_failed; } })); } diff --git a/src/tests/storage_local_resource_provider_tests.cpp b/src/tests/storage_local_resource_provider_tests.cpp index bb71935..8243ff3 100644 --- a/src/tests/storage_local_resource_provider_tests.cpp +++ b/src/tests/storage_local_resource_provider_tests.cpp @@ -4128,48 +4128,17 @@ TEST_F(StorageLocalResourceProviderTest, CsiPluginRpcMetrics) ASSERT_SOME(source); - JSON::Object snapshot = Metrics(); + // We expect that the following RPC calls are made during startup: `Probe`, + // `GetPluginInfo` (2), `GetPluginCapabilities, `ControllerGetCapabilities`, + // `ListVolumes`, `GetCapacity`, `NodeGetCapabilities`, `NodeGetId`. + // + // TODO(chhsiao): As these are implementation details, we should count the + // calls processed by a mock CSI plugin and check the metrics against that. + const int numFinishedStartupRpcs = 9; - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.Probe/successes"))); - EXPECT_EQ(1, snapshot.values.at( metricName( - "csi_plugin/rpcs/csi.v0.Identity.Probe/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginInfo/successes"))); - EXPECT_EQ(2, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginInfo/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginCapabilities/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginCapabilities/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ControllerGetCapabilities/successes"))); // NOLINT(whitespace/line_length) - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ControllerGetCapabilities/successes"))); // NOLINT(whitespace/line_length) - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ListVolumes/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ListVolumes/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.GetCapacity/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.GetCapacity/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes"))); - EXPECT_EQ(0, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/errors"))); - EXPECT_EQ(0, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/errors"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetCapabilities/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetCapabilities/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetId/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetId/successes"))); + EXPECT_TRUE(metricEquals( + metricName("csi_plugin/rpcs_finished"), numFinishedStartupRpcs)); + EXPECT_TRUE(metricEquals(metricName("csi_plugin/rpcs_failed"), 0)); // Create a volume. EXPECT_CALL(sched, resourceOffers(&driver, OffersHaveAnyResource( @@ -4202,48 +4171,10 @@ TEST_F(StorageLocalResourceProviderTest, CsiPluginRpcMetrics) ASSERT_TRUE(volume->disk().source().mount().has_root()); EXPECT_FALSE(path::absolute(volume->disk().source().mount().root())); - snapshot = Metrics(); - - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.Probe/successes"))); - EXPECT_EQ(1, snapshot.values.at( metricName( - "csi_plugin/rpcs/csi.v0.Identity.Probe/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginInfo/successes"))); - EXPECT_EQ(2, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginInfo/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginCapabilities/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginCapabilities/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ControllerGetCapabilities/successes"))); // NOLINT(whitespace/line_length) - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ControllerGetCapabilities/successes"))); // NOLINT(whitespace/line_length) - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ListVolumes/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ListVolumes/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.GetCapacity/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.GetCapacity/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/errors"))); - EXPECT_EQ(0, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/errors"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetCapabilities/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetCapabilities/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetId/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetId/successes"))); + // An additional `CreateVolume` RPC call is now finished. + EXPECT_TRUE(metricEquals( + metricName("csi_plugin/rpcs_finished"), numFinishedStartupRpcs + 1)); + EXPECT_TRUE(metricEquals(metricName("csi_plugin/rpcs_failed"), 0)); // Remove the volume out of band to fail `DESTROY_DISK`. Option<string> volumePath; @@ -4270,48 +4201,10 @@ TEST_F(StorageLocalResourceProviderTest, CsiPluginRpcMetrics) AWAIT_READY(operationFailedOffers); ASSERT_FALSE(operationFailedOffers->empty()); - snapshot = Metrics(); - - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.Probe/successes"))); - EXPECT_EQ(1, snapshot.values.at( metricName( - "csi_plugin/rpcs/csi.v0.Identity.Probe/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginInfo/successes"))); - EXPECT_EQ(2, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginInfo/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginCapabilities/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Identity.GetPluginCapabilities/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ControllerGetCapabilities/successes"))); // NOLINT(whitespace/line_length) - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ControllerGetCapabilities/successes"))); // NOLINT(whitespace/line_length) - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ListVolumes/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.ListVolumes/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.GetCapacity/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.GetCapacity/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/errors"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/errors"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetCapabilities/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetCapabilities/successes"))); - ASSERT_NE(0u, snapshot.values.count(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetId/successes"))); - EXPECT_EQ(1, snapshot.values.at(metricName( - "csi_plugin/rpcs/csi.v0.Node.NodeGetId/successes"))); + // An additional `DeleteVolume` RPC call is now failed. + EXPECT_TRUE(metricEquals( + metricName("csi_plugin/rpcs_finished"), numFinishedStartupRpcs + 1)); + EXPECT_TRUE(metricEquals(metricName("csi_plugin/rpcs_failed"), 1)); } @@ -5037,6 +4930,18 @@ TEST_F(StorageLocalResourceProviderTest, RetryRpcWithExponentialBackoff) .filter(std::bind(isStoragePool<Resource>, lambda::_1, "test")) .begin(); + // We expect that the following RPC calls are made during startup: `Probe`, + // `GetPluginInfo` (2), `GetPluginCapabilities, `ControllerGetCapabilities`, + // `ListVolumes`, `GetCapacity`, `NodeGetCapabilities`, `NodeGetId`. + // + // TODO(chhsiao): As these are implementation details, we should count the + // calls processed by a mock CSI plugin and check the metrics against that. + const int numFinishedStartupRpcs = 9; + + EXPECT_TRUE(metricEquals( + metricName("csi_plugin/rpcs_finished"), numFinishedStartupRpcs)); + EXPECT_TRUE(metricEquals(metricName("csi_plugin/rpcs_failed"), 0)); + // Create a MOUNT disk. Future<UpdateOperationStatusMessage> updateOperationStatus = FUTURE_PROTOBUF(UpdateOperationStatusMessage(), _, _); @@ -5201,21 +5106,12 @@ TEST_F(StorageLocalResourceProviderTest, RetryRpcWithExponentialBackoff) EXPECT_TRUE(metricEquals("master/operations/failed", 1)); EXPECT_TRUE(metricEquals("master/operations/finished", 1)); - EXPECT_TRUE(metricEquals(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/successes"), - 1)); - - EXPECT_TRUE(metricEquals(metricName( - "csi_plugin/rpcs/csi.v0.Controller.CreateVolume/errors"), - numRetryableErrors)); - - EXPECT_TRUE(metricEquals(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/successes"), - 0)); - - EXPECT_TRUE(metricEquals(metricName( - "csi_plugin/rpcs/csi.v0.Controller.DeleteVolume/errors"), - numRetryableErrors + 1)); + // There should be 1 finished and 10 failed `CreateVolume` calls, and 11 + // failed `DeleteVolume` calls. + EXPECT_TRUE(metricEquals( + metricName("csi_plugin/rpcs_finished"), numFinishedStartupRpcs + 1)); + EXPECT_TRUE(metricEquals( + metricName("csi_plugin/rpcs_failed"), numRetryableErrors * 2 + 1)); }
