This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 101ed5b  [master] set RPC queue length to 100 by default
101ed5b is described below

commit 101ed5b938af31b69f99a187c894261891459e09
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Jul 8 12:07:44 2020 -0700

    [master] set RPC queue length to 100 by default
    
    There is some evidence from the field that in even in case of small
    (20-30 nodes) and mid-size (50-80 nodes) Kudu clusters, master's RPC
    queue sometimes overflows due to spikes of incoming client and
    TSHeartbeat requests.  Since those request floods are attributed to
    batches of lightweight requests such as GetTableSchema,
    TSHeartbeatRequestPB, etc., it makes sense to increase the default
    setting for the RPC queue size from 50 to 100 for Kudu masters.
    
    Additionally, I did a minor code clean-up in master_runner.cc and
    tablet_server_runner.cc files.
    
    Change-Id: Ia90bc157d8a0d52b6d1320cf67bc533a51faf101
    Reviewed-on: http://gerrit.cloudera.org:8080/16154
    Tested-by: Kudu Jenkins
    Reviewed-by: Bankim Bhavsar <[email protected]>
    Reviewed-by: Grant Henke <[email protected]>
---
 src/kudu/master/master_runner.cc         | 39 ++++++++++++++++++++------------
 src/kudu/tserver/tablet_server.h         |  1 -
 src/kudu/tserver/tablet_server_runner.cc | 32 ++++++++++++++++----------
 3 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/src/kudu/master/master_runner.cc b/src/kudu/master/master_runner.cc
index eac8097..8e9e0f9 100644
--- a/src/kudu/master/master_runner.cc
+++ b/src/kudu/master/master_runner.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/master/master_runner.h"
 
+#include <cstdint>
 #include <iostream>
 #include <string>
 
@@ -29,8 +30,9 @@
 #include "kudu/util/monotime.h"
 #include "kudu/util/version_info.h"
 
-using kudu::master::Master;
+using gflags::SET_FLAGS_DEFAULT;
 using std::string;
+using std::to_string;
 
 DECLARE_bool(evict_failed_followers);
 
@@ -58,30 +60,39 @@ static Status ValidateHiveMetastoreSaslEnabled() {
 }
 
 void SetMasterFlagDefaults() {
-  // Reset some default values before parsing gflags.
-  CHECK_NE("", google::SetCommandLineOptionWithMode("rpc_bind_addresses",
-                                                    strings::Substitute(
-                                                        "0.0.0.0:$0",
-                                                        
Master::kDefaultPort).c_str(),
-                                                    
google::FlagSettingMode::SET_FLAGS_DEFAULT));
-  CHECK_NE("", google::SetCommandLineOptionWithMode("webserver_port",
-                                                    std::to_string(
-                                                        
Master::kDefaultWebPort).c_str(),
-                                                    
google::FlagSettingMode::SET_FLAGS_DEFAULT));
+  constexpr int32_t kDefaultRpcServiceQueueLength = 100;
 
+  // Reset some default values before parsing gflags.
+  CHECK_NE("", SetCommandLineOptionWithMode(
+      "rpc_bind_addresses",
+      strings::Substitute("0.0.0.0:$0", Master::kDefaultPort).c_str(),
+      SET_FLAGS_DEFAULT));
+  CHECK_NE("", SetCommandLineOptionWithMode(
+      "webserver_port",
+      to_string(Master::kDefaultWebPort).c_str(),
+      SET_FLAGS_DEFAULT));
+  // Even in a small Kudu cluster, masters might be flooded with requests 
coming
+  // from many clients (those like GetTableSchema are rather small and can be
+  // processed fast, but it might be a bunch of them coming at once).
+  // In addition, TSHeartbeatRequestPB from tablet servers are put into the 
same
+  // RPC queue (see KUDU-2955). So, it makes sense to increase the default
+  // setting for the RPC service queue length.
+  CHECK_NE("", SetCommandLineOptionWithMode(
+      "rpc_service_queue_length",
+      to_string(kDefaultRpcServiceQueueLength).c_str(),
+      SET_FLAGS_DEFAULT));
   // Setting the default value of the 'force_block_cache_capacity' flag to
   // 'false' makes the corresponding group validator enforce proper settings
   // for the memory limit and the cfile cache capacity.
   CHECK_NE("", SetCommandLineOptionWithMode("force_block_cache_capacity",
                                             "false",
-                                            gflags::SET_FLAGS_DEFAULT));
-
+                                            SET_FLAGS_DEFAULT));
   // A multi-node Master leader should not evict failed Master followers
   // because there is no-one to assign replacement servers in order to maintain
   // the desired replication factor. (It's not turtles all the way down!)
   CHECK_NE("", SetCommandLineOptionWithMode("evict_failed_followers",
                                             "false",
-                                            gflags::SET_FLAGS_DEFAULT));
+                                            SET_FLAGS_DEFAULT));
   // SET_FLAGS_DEFAULT won't reset the flag value if it has previously been
   // set, instead it will only change the default. Because we want to ensure
   // evict_failed_followers is always false, we explicitly set the flag.
diff --git a/src/kudu/tserver/tablet_server.h b/src/kudu/tserver/tablet_server.h
index 71d0923..ad0e2e4 100644
--- a/src/kudu/tserver/tablet_server.h
+++ b/src/kudu/tserver/tablet_server.h
@@ -44,7 +44,6 @@ class TabletServer : public kserver::KuduServer {
   // this constant as well.
   static const uint16_t kDefaultPort = 7050;
   static const uint16_t kDefaultWebPort = 8050;
-  static const uint16_t kDefaultNumServiceThreads = 20;
 
   explicit TabletServer(const TabletServerOptions& opts);
   ~TabletServer();
diff --git a/src/kudu/tserver/tablet_server_runner.cc 
b/src/kudu/tserver/tablet_server_runner.cc
index feb7b80..b4a7205 100644
--- a/src/kudu/tserver/tablet_server_runner.cc
+++ b/src/kudu/tserver/tablet_server_runner.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/tserver/tablet_server_runner.h"
 
+#include <cstdint>
 #include <iostream>
 #include <string>
 
@@ -32,8 +33,9 @@
 #include "kudu/util/monotime.h"
 #include "kudu/util/version_info.h"
 
-using kudu::tserver::TabletServer;
+using gflags::SET_FLAGS_DEFAULT;
 using std::string;
+using std::to_string;
 
 DEFINE_double(fault_before_start, 0.0,
               "Fake fault flag that always causes a crash on startup. "
@@ -45,22 +47,28 @@ namespace kudu {
 namespace tserver {
 
 void SetTabletServerFlagDefaults() {
+  constexpr int32_t kDefaultNumServiceThreads = 20;
+
   // Reset some default values before parsing gflags.
-  CHECK_NE("", google::SetCommandLineOptionWithMode("rpc_bind_addresses",
+  CHECK_NE("", SetCommandLineOptionWithMode(
+      "rpc_bind_addresses",
       strings::Substitute("0.0.0.0:$0", TabletServer::kDefaultPort).c_str(),
-      google::FlagSettingMode::SET_FLAGS_DEFAULT));
-  CHECK_NE("", google::SetCommandLineOptionWithMode("rpc_num_service_threads",
-      std::to_string(TabletServer::kDefaultNumServiceThreads).c_str(),
-      google::FlagSettingMode::SET_FLAGS_DEFAULT));
-  CHECK_NE("", google::SetCommandLineOptionWithMode("webserver_port",
-      std::to_string(TabletServer::kDefaultWebPort).c_str(),
-      google::FlagSettingMode::SET_FLAGS_DEFAULT));
-
+      SET_FLAGS_DEFAULT));
+  CHECK_NE("", SetCommandLineOptionWithMode(
+      "rpc_num_service_threads",
+      to_string(kDefaultNumServiceThreads).c_str(),
+      SET_FLAGS_DEFAULT));
+  CHECK_NE("", SetCommandLineOptionWithMode(
+      "webserver_port",
+      to_string(TabletServer::kDefaultWebPort).c_str(),
+      SET_FLAGS_DEFAULT));
   // Setting the default value of the 'force_block_cache_capacity' flag to
   // 'false' makes the corresponding group validator enforce proper settings
   // for the memory limit and the cfile cache capacity.
-  CHECK_NE("", SetCommandLineOptionWithMode("force_block_cache_capacity",
-                                            "false", 
gflags::SET_FLAGS_DEFAULT));
+  CHECK_NE("", SetCommandLineOptionWithMode(
+      "force_block_cache_capacity",
+      "false",
+       SET_FLAGS_DEFAULT));
 }
 
 Status RunTabletServer() {

Reply via email to