This is an automated email from the ASF dual-hosted git repository. zwoop pushed a commit to branch 9.2.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit 8ea2f4c003549034f4d32593763881ab67b0119c Author: Jeff Elsloo <[email protected]> AuthorDate: Tue Oct 26 08:48:10 2021 -0600 Added metrics to the rate limit plugin and document the new options (#8395) * Added metrics to the rate limit plugin and documented the new options. * * Addressed feedback in PR review * Fixed calculation for metric length that was counting too many things due to a prior implementation and lack of cleanup of this specific line of code (cherry picked from commit a3f04cb6dc12c0d192f4abd981b5ff09647484f4) --- doc/admin-guide/plugins/rate_limit.en.rst | 134 ++++++++++++++++++++++++ plugins/experimental/rate_limit/limiter.h | 81 ++++++++++++++ plugins/experimental/rate_limit/rate_limit.cc | 5 +- plugins/experimental/rate_limit/sni_limiter.cc | 10 ++ plugins/experimental/rate_limit/sni_limiter.h | 2 + plugins/experimental/rate_limit/sni_selector.cc | 4 + plugins/experimental/rate_limit/txn_limiter.cc | 14 +++ plugins/experimental/rate_limit/utilities.cc | 46 ++++++++ plugins/experimental/rate_limit/utilities.h | 1 + 9 files changed, 296 insertions(+), 1 deletion(-) diff --git a/doc/admin-guide/plugins/rate_limit.en.rst b/doc/admin-guide/plugins/rate_limit.en.rst index f50377223..ef3f3246f 100644 --- a/doc/admin-guide/plugins/rate_limit.en.rst +++ b/doc/admin-guide/plugins/rate_limit.en.rst @@ -80,6 +80,18 @@ are available: An optional `max-age` for how long a transaction can sit in the delay queue. The value (default 0) is the age in milliseconds. +.. option:: --prefix + + An optional metric prefix to use instead of the default (plugin.rate_limiter). + +.. option:: --tag + + An optional metric tag to use instead of the default. When a tag is not specified + the plugin will use the scheme, FQDN, and port when it is non-standard. For example + a default plugin tag might be "https.example.com" or "http.example.com:8080" + noting that in the latter exampe, the non-standard scheme and port led to + ":8080" being appended to the string. + Global Plugin ------------- @@ -122,6 +134,61 @@ The following options are available: An optional `max-age` for how long a transaction can sit in the delay queue. The value (default 0) is the age in milliseconds. +.. option:: --prefix + + An optional metric prefix to use instead of the default (plugin.rate_limiter). + +.. option:: --tag + + An optional metric tag to use instead of the default. When a tag is not specified + the plugin will use the FQDN of the SNI associated with each rate limiter instance + created during plugin initialization. + +Metrics +------- +Metric names are generated either using defaults or user-supplied values. In either +case, the format of the metric names is as follows: + + ``prefix.type.tag.metric`` + +A user can specify their own prefixes and tags, but not types or metrics. + +``prefix`` + The default prefix for all metrics is `plugin.rate_limiter`. + +``type`` + There are two types of metrics: `sni` and `remap`. Each type corresponds with the + type of configuration used to generate the metric. The global configuration is for + rate limiting requests during TLS negotiation, hence, the type of ``sni``. Similarly + ``remap`` connotes a remap configuration. + +``tag`` + By default the metric tag is derived from a description that is set conditionally. + When configured in global mode, the ``SNI`` argument allows a comma separated list + of FQDNs that require rate limiting. Each FQDN is associated with an instance of + the rate limiter, and the description of each limiter is set to the FQDN. + + When configured on a remap, the plugin will generate a description based on the + configuration. When the scheme and port number are standard, the port is omitted + from the generated description, however, when the scheme and port combination are + non-standard, the port is appended. For example, a standard scheme and port would + lead to a description of ``http.example.com`` or ``https.example.com`` but if a + non-standard port was used, a description might be ``https.example.com:8443`` or + ``http.example.com:8080``. This approach allows each limiter to increment metrics + for the correct remaps. + +``metric`` + There are four metrics that may be incremented, depending on which action the plugin takes: + + ============== =================================================================== + Metric Definition + ============== =================================================================== + ``queued`` Request queued due to being at the limit but under the queue limit. + ``rejected`` Request rejected due to being over the defined limits. + ``expired`` Queued connection is too old to be resumed and is rejected. + ``resumed`` Queued connection is resumed. + ============== =================================================================== + Examples -------- @@ -158,3 +225,70 @@ In this case, the response would look like this when the queue is full: :: Content-Language: en Retry-After: 3600 Content-Length: 207 + +Metric Examples +--------------- +The following examples show the metric names that result from various settings +using a hypothetical domain of example.com with both global and remap configurations. +Note that in this example the remap configuration contains both TLS and non-TLS +remap rules. + +Defaults: +:: + + proxy.rate_limiter.sni.example.com.queued + proxy.rate_limiter.sni.example.com.rejected + proxy.rate_limiter.sni.example.com.expired + proxy.rate_limiter.sni.example.com.resumed + + proxy.rate_limiter.remap.https.example.com.queued + proxy.rate_limiter.remap.https.example.com.rejected + proxy.rate_limiter.remap.https.example.com.expired + proxy.rate_limiter.remap.https.example.com.resumed + + proxy.rate_limiter.remap.http.example.com.queued + proxy.rate_limiter.remap.http.example.com.rejected + proxy.rate_limiter.remap.http.example.com.expired + proxy.rate_limiter.remap.http.example.com.resumed + +Defaults with non-standard scheme+port combinations in the remap rules: +:: + + proxy.rate_limiter.sni.example.com.queued + proxy.rate_limiter.sni.example.com.rejected + proxy.rate_limiter.sni.example.com.expired + proxy.rate_limiter.sni.example.com.resumed + + proxy.rate_limiter.remap.https.example.com:8443.queued + proxy.rate_limiter.remap.https.example.com:8443.rejected + proxy.rate_limiter.remap.https.example.com:8443.expired + proxy.rate_limiter.remap.https.example.com:8443.resumed + + proxy.rate_limiter.remap.http.example.com:8080.queued + proxy.rate_limiter.remap.http.example.com:8080.rejected + proxy.rate_limiter.remap.http.example.com:8080.expired + proxy.rate_limiter.remap.http.example.com:8080.resumed + +With: + * ``--prefix=limiter`` on the global configuration + * ``--tag=tls.example.com`` on the global configuration + * ``@pparam=--prefix=limiter`` on the remap configurations + * ``@pparam=--tag=secure.example.com`` on the TLS-enabled remap configuration + * ``@pparam=--tag=insecure.example.com`` on the non-TLS-enabled remap configuration + +:: + + limiter.sni.tls.example.com.queued + limiter.sni.tls.example.com.rejected + limiter.sni.tls.example.com.expired + limiter.sni.tls.example.com.resumed + + limiter.remap.secure.example.com.queued + limiter.remap.secure.example.com.rejected + limiter.remap.secure.example.com.expired + limiter.remap.secure.example.com.resumed + + limiter.remap.insecure.example.com.queued + limiter.remap.insecure.example.com.rejected + limiter.remap.insecure.example.com.expired + limiter.remap.insecure.example.com.resumed diff --git a/plugins/experimental/rate_limit/limiter.h b/plugins/experimental/rate_limit/limiter.h index 4d56ffc95..9c4f4b0cd 100644 --- a/plugins/experimental/rate_limit/limiter.h +++ b/plugins/experimental/rate_limit/limiter.h @@ -31,6 +31,39 @@ constexpr auto QUEUE_DELAY_TIME = std::chrono::milliseconds{200}; // Examine the queue every 200ms using QueueTime = std::chrono::time_point<std::chrono::system_clock>; +enum { + RATE_LIMITER_TYPE_SNI = 0, + RATE_LIMITER_TYPE_REMAP, + + RATE_LIMITER_TYPE_MAX +}; + +// order must align with the above +static const char *types[] = { + "sni", + "remap", +}; + +// no metric for requests we accept; accepted requests should be counted under their usual metrics +enum { + RATE_LIMITER_METRIC_QUEUED = 0, + RATE_LIMITER_METRIC_REJECTED, + RATE_LIMITER_METRIC_EXPIRED, + RATE_LIMITER_METRIC_RESUMED, + + RATE_LIMITER_METRIC_MAX +}; + +// order must align with the above +static const char *suffixes[] = { + "queued", + "rejected", + "expired", + "resumed", +}; + +static const char *RATE_LIMITER_METRIC_PREFIX = "plugin.rate_limiter"; + /////////////////////////////////////////////////////////////////////////////// // Base class for all limiters // @@ -139,6 +172,50 @@ public: } } + void + initializeMetrics(uint type) + { + TSReleaseAssert(type < RATE_LIMITER_TYPE_MAX); + memset(_metrics, 0, sizeof(_metrics)); + + std::string metric_prefix = prefix; + metric_prefix.append("." + std::string(types[type])); + + if (!tag.empty()) { + metric_prefix.append("." + tag); + } else if (!description.empty()) { + metric_prefix.append("." + description); + } + + for (int i = 0; i < RATE_LIMITER_METRIC_MAX; i++) { + size_t const metricsz = metric_prefix.length() + strlen(suffixes[i]) + 2; // padding for dot+terminator + char *const metric = (char *)TSmalloc(metricsz); + snprintf(metric, metricsz, "%s.%s", metric_prefix.data(), suffixes[i]); + + _metrics[i] = TS_ERROR; + + if (TSStatFindName(metric, &_metrics[i]) == TS_ERROR) { + _metrics[i] = TSStatCreate(metric, TS_RECORDDATATYPE_INT, TS_STAT_NON_PERSISTENT, TS_STAT_SYNC_SUM); + } + + if (_metrics[i] != TS_ERROR) { + TSDebug(PLUGIN_NAME, "established metric '%s' as ID %d", metric, _metrics[i]); + } else { + TSError("failed to create metric '%s'", metric); + } + + TSfree(metric); + } + } + + void + incrementMetric(uint metric) + { + if (_metrics[metric] != TS_ERROR) { + TSStatIntIncrement(_metrics[metric], 1); + } + } + // Initialize a new instance of this rate limiter bool initialize(int argc, const char *argv[]); @@ -147,6 +224,8 @@ public: unsigned max_queue = UINT_MAX; // No queue limit, but if sets will give an immediate error if at max std::chrono::milliseconds max_age = std::chrono::milliseconds::zero(); // Max age (ms) in the queue std::string description = ""; + std::string prefix = RATE_LIMITER_METRIC_PREFIX; // metric prefix, i.e.: plugin.rate_limiter + std::string tag = ""; // optional tag to append to the prefix (prefix.tag) private: std::atomic<unsigned> _active = 0; // Current active number of txns. This has to always stay <= limit above @@ -154,4 +233,6 @@ private: TSMutex _queue_lock, _active_lock; // Resource locks std::deque<QueueItem> _queue; // Queue for the pending TXN's. ToDo: Should also move (see below) + + int _metrics[RATE_LIMITER_METRIC_MAX]; }; diff --git a/plugins/experimental/rate_limit/rate_limit.cc b/plugins/experimental/rate_limit/rate_limit.cc index 8220f55d7..a3c94d094 100644 --- a/plugins/experimental/rate_limit/rate_limit.cc +++ b/plugins/experimental/rate_limit/rate_limit.cc @@ -29,7 +29,7 @@ #include "sni_limiter.h" /////////////////////////////////////////////////////////////////////////////// -// As a global plugin, things works a little difference since we don't setup +// As a global plugin, things works a little different since we don't setup // per transaction or via remap.config. extern int gVCIdx; @@ -113,6 +113,9 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf ATS_UNUSE { TxnRateLimiter *limiter = new TxnRateLimiter(); + // set the description based on the pristine remap URL prior to advancing the pointer below + limiter->description = getDescriptionFromUrl(argv[0]); + // argv contains the "to" and "from" URLs. Skip the first so that the // second one poses as the program name. --argc; diff --git a/plugins/experimental/rate_limit/sni_limiter.cc b/plugins/experimental/rate_limit/sni_limiter.cc index d1a5e0586..b63c50b1d 100644 --- a/plugins/experimental/rate_limit/sni_limiter.cc +++ b/plugins/experimental/rate_limit/sni_limiter.cc @@ -54,12 +54,14 @@ sni_limit_cont(TSCont contp, TSEvent event, void *edata) TSVConnReenableEx(vc, TS_EVENT_ERROR); TSDebug(PLUGIN_NAME, "Rejecting connection, we're at capacity and queue is full"); TSUserArgSet(vc, gVCIdx, nullptr); + limiter->incrementMetric(RATE_LIMITER_METRIC_REJECTED); return TS_ERROR; } else { TSUserArgSet(vc, gVCIdx, reinterpret_cast<void *>(limiter)); limiter->push(vc, contp); TSDebug(PLUGIN_NAME, "Queueing the VC, we are at capacity"); + limiter->incrementMetric(RATE_LIMITER_METRIC_QUEUED); } } else { // Not at limit on the handshake, we can re-enable @@ -103,6 +105,8 @@ SniRateLimiter::initialize(int argc, const char *argv[]) {const_cast<char *>("limit"), required_argument, nullptr, 'l'}, {const_cast<char *>("queue"), required_argument, nullptr, 'q'}, {const_cast<char *>("maxage"), required_argument, nullptr, 'm'}, + {const_cast<char *>("prefix"), required_argument, nullptr, 'p'}, + {const_cast<char *>("tag"), required_argument, nullptr, 't'}, // EOF {nullptr, no_argument, nullptr, '\0'}, }; @@ -120,6 +124,12 @@ SniRateLimiter::initialize(int argc, const char *argv[]) case 'm': this->max_age = std::chrono::milliseconds(strtol(optarg, nullptr, 10)); break; + case 'p': + this->prefix = std::string(optarg); + break; + case 't': + this->tag = std::string(optarg); + break; } if (opt == -1) { break; diff --git a/plugins/experimental/rate_limit/sni_limiter.h b/plugins/experimental/rate_limit/sni_limiter.h index ea3581b9b..3889a0819 100644 --- a/plugins/experimental/rate_limit/sni_limiter.h +++ b/plugins/experimental/rate_limit/sni_limiter.h @@ -35,6 +35,8 @@ public: limit = src.limit; max_queue = src.max_queue; max_age = src.max_age; + prefix = src.prefix; + tag = src.tag; } bool initialize(int argc, const char *argv[]); diff --git a/plugins/experimental/rate_limit/sni_selector.cc b/plugins/experimental/rate_limit/sni_selector.cc index 60fc2ee85..d41b4df06 100644 --- a/plugins/experimental/rate_limit/sni_selector.cc +++ b/plugins/experimental/rate_limit/sni_selector.cc @@ -41,6 +41,7 @@ sni_queue_cont(TSCont cont, TSEvent event, void *edata) (void)contp; // Ugly, but silences some compilers. TSDebug(PLUGIN_NAME, "SNI=%s: Enabling queued VC after %ldms", key.data(), static_cast<long>(delay.count())); TSVConnReenable(vc); + limiter->incrementMetric(RATE_LIMITER_METRIC_RESUMED); } // Kill any queued VCs if they are too old @@ -55,6 +56,7 @@ sni_queue_cont(TSCont cont, TSEvent event, void *edata) (void)contp; TSDebug(PLUGIN_NAME, "Queued VC is too old (%ldms), erroring out", static_cast<long>(age.count())); TSVConnReenableEx(vc, TS_EVENT_ERROR); + limiter->incrementMetric(RATE_LIMITER_METRIC_EXPIRED); } } } @@ -73,6 +75,8 @@ SniSelector::insert(std::string_view sni, SniRateLimiter *limiter) TSDebug(PLUGIN_NAME, "Added global limiter for SNI=%s (limit=%u, queue=%u, max_age=%ldms)", sni.data(), limiter->limit, limiter->max_queue, static_cast<long>(limiter->max_age.count())); + limiter->initializeMetrics(RATE_LIMITER_TYPE_SNI); + return true; } diff --git a/plugins/experimental/rate_limit/txn_limiter.cc b/plugins/experimental/rate_limit/txn_limiter.cc index f5b0951e0..6e3366588 100644 --- a/plugins/experimental/rate_limit/txn_limiter.cc +++ b/plugins/experimental/rate_limit/txn_limiter.cc @@ -40,6 +40,7 @@ txn_limit_cont(TSCont cont, TSEvent event, void *edata) case TS_EVENT_HTTP_POST_REMAP: limiter->push(static_cast<TSHttpTxn>(edata), cont); + limiter->incrementMetric(RATE_LIMITER_METRIC_QUEUED); return TS_EVENT_NONE; break; @@ -47,6 +48,7 @@ txn_limit_cont(TSCont cont, TSEvent event, void *edata) retryAfter(static_cast<TSHttpTxn>(edata), limiter->retry); TSContDestroy(cont); // We are done with this continuation now TSHttpTxnReenable(static_cast<TSHttpTxn>(edata), TS_EVENT_HTTP_CONTINUE); + limiter->incrementMetric(RATE_LIMITER_METRIC_REJECTED); return TS_EVENT_CONTINUE; break; @@ -74,6 +76,7 @@ txn_queue_cont(TSCont cont, TSEvent event, void *edata) // Since this was a delayed transaction, we need to add the TXN_CLOSE hook to free the slot when done TSHttpTxnHookAdd(txnp, TS_HTTP_TXN_CLOSE_HOOK, contp); TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + limiter->incrementMetric(RATE_LIMITER_METRIC_RESUMED); } // Kill any queued txns if they are too old @@ -90,6 +93,7 @@ txn_queue_cont(TSCont cont, TSEvent event, void *edata) TSHttpTxnStatusSet(txnp, static_cast<TSHttpStatus>(limiter->error)); TSHttpTxnHookAdd(txnp, TS_HTTP_SEND_RESPONSE_HDR_HOOK, contp); TSHttpTxnReenable(txnp, TS_EVENT_HTTP_ERROR); + limiter->incrementMetric(RATE_LIMITER_METRIC_EXPIRED); } } @@ -109,6 +113,8 @@ TxnRateLimiter::initialize(int argc, const char *argv[]) {const_cast<char *>("retry"), required_argument, nullptr, 'r'}, {const_cast<char *>("header"), required_argument, nullptr, 'h'}, {const_cast<char *>("maxage"), required_argument, nullptr, 'm'}, + {const_cast<char *>("prefix"), required_argument, nullptr, 'p'}, + {const_cast<char *>("tag"), required_argument, nullptr, 't'}, // EOF {nullptr, no_argument, nullptr, '\0'}, }; @@ -135,6 +141,12 @@ TxnRateLimiter::initialize(int argc, const char *argv[]) case 'h': this->header = optarg; break; + case 'p': + this->prefix = std::string(optarg); + break; + case 't': + this->tag = std::string(optarg); + break; } if (opt == -1) { break; @@ -148,6 +160,8 @@ TxnRateLimiter::initialize(int argc, const char *argv[]) _action = TSContScheduleEveryOnPool(_queue_cont, QUEUE_DELAY_TIME.count(), TS_THREAD_POOL_TASK); } + this->initializeMetrics(RATE_LIMITER_TYPE_REMAP); + return true; } diff --git a/plugins/experimental/rate_limit/utilities.cc b/plugins/experimental/rate_limit/utilities.cc index c648d98c1..0838689c0 100644 --- a/plugins/experimental/rate_limit/utilities.cc +++ b/plugins/experimental/rate_limit/utilities.cc @@ -70,3 +70,49 @@ retryAfter(TSHttpTxn txnp, unsigned retry) } } } + +/////////////////////////////////////////////////////////////////////////////// +// Parse a URL to obtain a description for use with metrics when no user +// provided tag is available. This is used by the remap side of the plugin, +// while the SNI side uses the FQDN associated with each limiter instance +// which is obtained from the list of SNIs in the global plugin configuration. +// +std::string +getDescriptionFromUrl(const char *url) +{ + TSMBuffer const buf = TSMBufferCreate(); + TSMLoc url_loc = nullptr; + + const int url_len = strlen(url); + std::string description; + + if (TS_SUCCESS == TSUrlCreate(buf, &url_loc) && TS_PARSE_DONE == TSUrlParse(buf, url_loc, &url, url + url_len)) { + int host_len, scheme_len = 0; + const char *s = TSUrlSchemeGet(buf, url_loc, &scheme_len); + const char *h = TSUrlHostGet(buf, url_loc, &host_len); + const int port = TSUrlPortGet(buf, url_loc); + + const std::string hostname = std::string(h, host_len); + const std::string scheme = std::string(s, scheme_len); + + TSDebug(PLUGIN_NAME, "scheme = %s, host = %s, port = %d", scheme.c_str(), hostname.c_str(), port); + + description = scheme; + description.append("."); + description.append(hostname); + + // only append the port when it is non-standard + if (!(strncmp(s, TS_URL_SCHEME_HTTP, scheme_len) == 0 && port == 80) && + !(strncmp(s, TS_URL_SCHEME_HTTPS, scheme_len) == 0 && port == 443)) { + description.append(":" + std::to_string(port)); + } + } + + if (url_loc != nullptr) { + TSHandleMLocRelease(buf, nullptr, url_loc); + } + + TSMBufferDestroy(buf); + + return description; +} diff --git a/plugins/experimental/rate_limit/utilities.h b/plugins/experimental/rate_limit/utilities.h index 0ff58bff3..e936912e6 100644 --- a/plugins/experimental/rate_limit/utilities.h +++ b/plugins/experimental/rate_limit/utilities.h @@ -26,3 +26,4 @@ constexpr char const PLUGIN_NAME[] = "rate_limit"; void delayHeader(TSHttpTxn txnp, std::string &header, std::chrono::milliseconds delay); void retryAfter(TSHttpTxn txnp, unsigned retry); +std::string getDescriptionFromUrl(const char *url);
