This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 20be4ede1e0dc2f74d583373a3a0e3062529c6fe
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue Oct 18 15:17:44 2022 -0700

    [tserver] validate scanner TTL vs RPC connection timeout
    
    This patch adds a group validator for --scanner_ttl_ms and
    --rpc_default_keepalive_time_ms flags.  The validator outputs a warning
    (not an error though) if the TTL for an idle scanner is greater than the
    timeout for an idle RPC connection.
    
    Even if an idle scanner is kept alive at the server side for some time,
    Kudu servers periodically close connections that have been idle at least
    for --rpc_default_keepalive_time_ms time interval.  So, setting the
    --rpc_default_keepalive_time_ms flag to a greater or equal value than
    --scanner_ttl_ms helps keeping yet-to-be-used connections to idle
    scanners open, avoiding inadvertent closure and re-opening connections
    to scanners that might yet be sent continuation scan requests.  The new
    constraint also helps to work around one particular bug [1] in the Kudu
    Java client.
    
    I didn't add a test for the newly added validator, but I manually
    verified that the warning is output as expected when necessary.
    
    [1] https://issues.apache.org/jira/browse/KUDU-3169
    
    Change-Id: If1439dfb6eb82ba2be0472547b04e5a692879535
    Reviewed-on: http://gerrit.cloudera.org:8080/19152
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Yuqi Du <[email protected]>
    Reviewed-by: Yingchun Lai <[email protected]>
---
 src/kudu/tserver/scanners.cc | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/kudu/tserver/scanners.cc b/src/kudu/tserver/scanners.cc
index f07340f3d..30bf4f7be 100644
--- a/src/kudu/tserver/scanners.cc
+++ b/src/kudu/tserver/scanners.cc
@@ -23,6 +23,7 @@
 #include <mutex>
 #include <numeric>
 #include <ostream>
+#include <type_traits>
 
 #include <gflags/gflags.h>
 
@@ -37,11 +38,11 @@
 #include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/rpc/remote_user.h"
-#include "kudu/tablet/tablet.h"
 #include "kudu/tablet/tablet_metadata.h"
 #include "kudu/tablet/tablet_metrics.h"
 #include "kudu/tserver/scanner_metrics.h"
 #include "kudu/util/flag_tags.h"
+#include "kudu/util/flag_validators.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/logging.h"
 #include "kudu/util/metrics.h"
@@ -50,7 +51,7 @@
 #include "kudu/util/thread.h"
 
 DEFINE_int32(scanner_ttl_ms, 60000,
-             "Number of milliseconds of inactivity allowed for a scanner"
+             "Number of milliseconds of inactivity allowed for a scanner "
              "before it may be expired");
 TAG_FLAG(scanner_ttl_ms, advanced);
 TAG_FLAG(scanner_ttl_ms, runtime);
@@ -64,23 +65,44 @@ DEFINE_int32(scan_history_count, 20,
              "scans will be shown on the tablet server's scans dashboard.");
 TAG_FLAG(scan_history_count, experimental);
 
+DECLARE_int32(rpc_default_keepalive_time_ms);
+
 METRIC_DEFINE_gauge_size(server, active_scanners,
                          "Active Scanners",
                          kudu::MetricUnit::kScanners,
                          "Number of scanners that are currently active",
                          kudu::MetricLevel::kInfo);
 
+using kudu::rpc::RemoteUser;
+using kudu::tablet::TabletReplica;
 using std::string;
 using std::unique_ptr;
 using std::unordered_map;
 using std::vector;
 using strings::Substitute;
 
-namespace kudu {
+namespace {
+
+// Issue a warning if --rpc_default_keepalive_time_ms and --scanner_ttl_ms 
flags
+// are set improperly. That's just a warning, not an error: the client should 
be
+// able to re-establish the connection even if it's been closed by the server.
+// However, it takes extra roundtrips and adds extra latency to the scan time.
+bool ValidateScannerAndRpcConnectionIdleTimes() {
+  if (FLAGS_rpc_default_keepalive_time_ms >= 0 &&
+      FLAGS_rpc_default_keepalive_time_ms < FLAGS_scanner_ttl_ms) {
+    LOG(WARNING) << Substitute(
+        "--rpc_default_keepalive_time_ms (currently $0) should be set at least 
"
+        "as high as --scanner_ttl_ms (currently $1) to prevent servers closing 
"
+        "idle client connections to not-yet-expired scanners",
+        FLAGS_rpc_default_keepalive_time_ms, FLAGS_scanner_ttl_ms);
+  }
+  return true;
+}
 
-using rpc::RemoteUser;
-using tablet::TabletReplica;
+GROUP_FLAG_VALIDATOR(idle_intervals, ValidateScannerAndRpcConnectionIdleTimes);
+} // anonymous namespace
 
+namespace kudu {
 namespace tserver {
 
 ScannerManager::ScannerManager(const scoped_refptr<MetricEntity>& 
metric_entity)

Reply via email to