This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 20be4ede1e0dc2f74d583373a3a0e3062529c6fe Author: Alexey Serbin <[email protected]> AuthorDate: Tue Oct 18 15:17:44 2022 -0700 [tserver] validate scanner TTL vs RPC connection timeout This patch adds a group validator for --scanner_ttl_ms and --rpc_default_keepalive_time_ms flags. The validator outputs a warning (not an error though) if the TTL for an idle scanner is greater than the timeout for an idle RPC connection. Even if an idle scanner is kept alive at the server side for some time, Kudu servers periodically close connections that have been idle at least for --rpc_default_keepalive_time_ms time interval. So, setting the --rpc_default_keepalive_time_ms flag to a greater or equal value than --scanner_ttl_ms helps keeping yet-to-be-used connections to idle scanners open, avoiding inadvertent closure and re-opening connections to scanners that might yet be sent continuation scan requests. The new constraint also helps to work around one particular bug [1] in the Kudu Java client. I didn't add a test for the newly added validator, but I manually verified that the warning is output as expected when necessary. [1] https://issues.apache.org/jira/browse/KUDU-3169 Change-Id: If1439dfb6eb82ba2be0472547b04e5a692879535 Reviewed-on: http://gerrit.cloudera.org:8080/19152 Tested-by: Alexey Serbin <[email protected]> Reviewed-by: Yuqi Du <[email protected]> Reviewed-by: Yingchun Lai <[email protected]> --- src/kudu/tserver/scanners.cc | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/kudu/tserver/scanners.cc b/src/kudu/tserver/scanners.cc index f07340f3d..30bf4f7be 100644 --- a/src/kudu/tserver/scanners.cc +++ b/src/kudu/tserver/scanners.cc @@ -23,6 +23,7 @@ #include <mutex> #include <numeric> #include <ostream> +#include <type_traits> #include <gflags/gflags.h> @@ -37,11 +38,11 @@ #include "kudu/gutil/stl_util.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/rpc/remote_user.h" -#include "kudu/tablet/tablet.h" #include "kudu/tablet/tablet_metadata.h" #include "kudu/tablet/tablet_metrics.h" #include "kudu/tserver/scanner_metrics.h" #include "kudu/util/flag_tags.h" +#include "kudu/util/flag_validators.h" #include "kudu/util/locks.h" #include "kudu/util/logging.h" #include "kudu/util/metrics.h" @@ -50,7 +51,7 @@ #include "kudu/util/thread.h" DEFINE_int32(scanner_ttl_ms, 60000, - "Number of milliseconds of inactivity allowed for a scanner" + "Number of milliseconds of inactivity allowed for a scanner " "before it may be expired"); TAG_FLAG(scanner_ttl_ms, advanced); TAG_FLAG(scanner_ttl_ms, runtime); @@ -64,23 +65,44 @@ DEFINE_int32(scan_history_count, 20, "scans will be shown on the tablet server's scans dashboard."); TAG_FLAG(scan_history_count, experimental); +DECLARE_int32(rpc_default_keepalive_time_ms); + METRIC_DEFINE_gauge_size(server, active_scanners, "Active Scanners", kudu::MetricUnit::kScanners, "Number of scanners that are currently active", kudu::MetricLevel::kInfo); +using kudu::rpc::RemoteUser; +using kudu::tablet::TabletReplica; using std::string; using std::unique_ptr; using std::unordered_map; using std::vector; using strings::Substitute; -namespace kudu { +namespace { + +// Issue a warning if --rpc_default_keepalive_time_ms and --scanner_ttl_ms flags +// are set improperly. That's just a warning, not an error: the client should be +// able to re-establish the connection even if it's been closed by the server. +// However, it takes extra roundtrips and adds extra latency to the scan time. +bool ValidateScannerAndRpcConnectionIdleTimes() { + if (FLAGS_rpc_default_keepalive_time_ms >= 0 && + FLAGS_rpc_default_keepalive_time_ms < FLAGS_scanner_ttl_ms) { + LOG(WARNING) << Substitute( + "--rpc_default_keepalive_time_ms (currently $0) should be set at least " + "as high as --scanner_ttl_ms (currently $1) to prevent servers closing " + "idle client connections to not-yet-expired scanners", + FLAGS_rpc_default_keepalive_time_ms, FLAGS_scanner_ttl_ms); + } + return true; +} -using rpc::RemoteUser; -using tablet::TabletReplica; +GROUP_FLAG_VALIDATOR(idle_intervals, ValidateScannerAndRpcConnectionIdleTimes); +} // anonymous namespace +namespace kudu { namespace tserver { ScannerManager::ScannerManager(const scoped_refptr<MetricEntity>& metric_entity)
