This is an automated email from the ASF dual-hosted git repository.

bankim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new ac8e228  [loadgen] Separate flags to insert random values for PK and 
non-PK cols
ac8e228 is described below

commit ac8e2281419a5aab975478595cf0f7c1c206ece0
Author: Bankim Bhavsar <[email protected]>
AuthorDate: Wed Jul 29 11:59:54 2020 -0700

    [loadgen] Separate flags to insert random values for PK and non-PK cols
    
    With --use_random option in loadgen tool, both primary key and
    non-primary key columns are populated with random values.
    This increases the possibility of collisions for primary key columns
    when inserting large number of rows.
    
    This change adds two new options:
    --use_random_non_pk that uses random numbers only for non-primary key
    columns allowing creation of large number of rows with random values
    and --use_random_pk that uses random numbers for primary key columns
    
    --use_random is marked as deprecated in the description and if this
    option is specified along with --use_random_non_pk or --use_random_pk
    then the --use_random will be ignored.
    
    Change-Id: I35bb33a192baa9e6e67e85bcbd5ca7164ba154e4
    Reviewed-on: http://gerrit.cloudera.org:8080/16253
    Reviewed-by: Alexey Serbin <[email protected]>
    Tested-by: Kudu Jenkins
---
 src/kudu/tools/kudu-tool-test.cc   | 80 +++++++++++++++++++++++++++++++-------
 src/kudu/tools/tool_action_perf.cc | 38 ++++++++++++++----
 2 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 9061209..b51160c 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -638,6 +638,8 @@ class ToolTest : public KuduTest {
   }
 
  protected:
+  // Note: Each test case must have a single invocation of RunLoadgen() 
otherwise it leads to
+  //       memory leaks.
   void RunLoadgen(int num_tservers = 1,
                   const vector<string>& tool_args = {},
                   const string& table_name = "",
@@ -2225,21 +2227,73 @@ TEST_F(ToolTest, 
TestLoadgenAutoFlushBackgroundSequential) {
       "bench_auto_flush_background_sequential"));
 }
 
-// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode, randomized values.
-TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandom) {
-  NO_FATALS(RunLoadgen(5,
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized keys 
and values
+// using the deprecated --use_random option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomKeysValuesDeprecated) {
+  NO_FATALS(RunLoadgen(
+      5,
       {
-        "--buffer_flush_watermark_pct=0.125",
-        "--buffer_size_bytes=65536",
-        "--buffers_num=8",
-        // small number of rows to avoid collisions: it's random generation 
mode
-        "--num_rows_per_thread=16",
-        "--num_threads=1",
-        "--run_scan",
-        "--string_len=8",
-        "--use_random",
+          // Small number of rows to avoid collisions: it's random generation 
mode
+          "--num_rows_per_thread=16",
+          "--num_threads=1",
+          "--run_scan",
+          "--string_len=8",
+          "--use_random",
+      },
+      "bench_auto_flush_background_random_keys_values_deprecated"));
+}
+
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized keys
+// using the --use_random_pk option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomKeys) {
+  NO_FATALS(RunLoadgen(
+      5,
+      {
+          // Small number of rows to avoid collisions: it's random generation 
mode
+          "--num_rows_per_thread=16",
+          "--num_threads=1",
+          "--run_scan",
+          "--string_len=8",
+          "--use_random_pk",
+      },
+      "bench_auto_flush_background_random_keys"));
+}
+
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized values
+// using the --use_random_non_pk option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomValues) {
+  NO_FATALS(RunLoadgen(
+      5,
+      {
+          // Large number of rows are okay since only non-pk columns will use 
random values.
+          "--num_rows_per_thread=4096",
+          "--num_threads=2",
+          "--run_scan",
+          "--string_len=8",
+          "--use_random_non_pk",
+      },
+      "bench_auto_flush_background_random_values"));
+}
+
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized values
+// using the --use_random_non_pk option combined with the deprecated 
--use_random option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomValuesIgnoreDeprecated) {
+  // Test to ensure if both '--use_random' and 
'--use_random_non_pk'/'--use_random_pk' are
+  // specified then '--use_random' is ignored.
+  NO_FATALS(RunLoadgen(
+      5,
+      {
+          // Large number of rows are okay since only non-pk columns will use 
random values and
+          // the deprecated '--use_random' will be ignored when used with 
'--use_random_non_pk'.
+          "--num_rows_per_thread=4096",
+          "--num_threads=2",
+          "--run_scan",
+          "--string_len=8",
+          // Combining deprecated --use_random option with new 
--use_random_non_pk option.
+          "--use_random",
+          "--use_random_non_pk",
       },
-      "bench_auto_flush_background_random"));
+      "bench_auto_flush_background_random_values_ignore_deprecated"));
 }
 
 // Run the loadgen benchmark in MANUAL_FLUSH mode.
diff --git a/src/kudu/tools/tool_action_perf.cc 
b/src/kudu/tools/tool_action_perf.cc
index 8685253..5815f69 100644
--- a/src/kudu/tools/tool_action_perf.cc
+++ b/src/kudu/tools/tool_action_perf.cc
@@ -355,9 +355,18 @@ DEFINE_int32(table_num_replicas, 1,
              "The number of replicas for the auto-created table; "
              "0 means 'use server-side default'.");
 DEFINE_bool(use_random, false,
-            "Whether to use random numbers instead of sequential ones. "
-            "In case of using random numbers collisions are possible over "
-            "the data for columns with unique constraint (e.g. primary key).");
+            "Whether to use random numbers instead of sequential ones for both 
primary keys and "
+            "non-primary key columns. In case of using random numbers 
collisions are "
+            "possible over the data for columns with unique constraint (e.g. 
primary key). "
+            "This option has been deprecated, use '--use_random_pk' and/or 
'--use_random_non_pk' "
+            "instead. If either '--use_random_pk' or '--use_random_non_pk' is 
specified with "
+            "'--use_random' then this option will be ignored.");
+DEFINE_bool(use_random_pk, false,
+            "Whether to use random numbers instead of sequential ones for 
primary key "
+            "columns. Using random numbers may cause collisions over primary 
key columns.");
+DEFINE_bool(use_random_non_pk, false,
+            "Whether to use random numbers instead of sequential ones for 
non-primary key "
+            "columns.");
 
 namespace kudu {
 namespace tools {
@@ -549,8 +558,21 @@ WriteResults GeneratorThread(const 
client::sp::shared_ptr<KuduClient>& client,
                              const string& table_name,
                              size_t gen_idx,
                              KuduWriteOperation::Type op_type) {
-  const Generator::Mode gen_mode = FLAGS_use_random ? Generator::MODE_RAND
-                                                    : Generator::MODE_SEQ;
+  Generator::Mode key_gen_mode = Generator::MODE_SEQ;
+  Generator::Mode value_gen_mode = Generator::MODE_SEQ;
+
+  if (FLAGS_use_random_pk || FLAGS_use_random_non_pk) {
+    // Honor the non-default values for new flags ignoring the old deprecated 
FLAGS_use_random.
+    if (FLAGS_use_random_pk) {
+      key_gen_mode = Generator::MODE_RAND;
+    }
+    if (FLAGS_use_random_non_pk) {
+      value_gen_mode = Generator::MODE_RAND;
+    }
+  } else if (FLAGS_use_random) {
+    key_gen_mode = value_gen_mode = Generator::MODE_RAND;
+  }
+
   const size_t flush_per_n_rows = FLAGS_flush_per_n_rows;
   const uint64_t gen_seq_start = FLAGS_seq_start;
   client::sp::shared_ptr<KuduSession> session(client->NewSession());
@@ -578,8 +600,8 @@ WriteResults GeneratorThread(const 
client::sp::shared_ptr<KuduClient>& client,
     // in sequential generation mode.
     const int64_t gen_span = 
SpanPerThread(KuduSchema::ToSchema(table->schema()).num_key_columns());
     const int64_t gen_seed = gen_idx * gen_span + gen_seq_start;
-    Generator key_gen(gen_mode, gen_seed, FLAGS_string_len);
-    Generator value_gen(gen_mode, gen_seed, FLAGS_string_len);
+    Generator key_gen(key_gen_mode, gen_seed, FLAGS_string_len);
+    Generator value_gen(value_gen_mode, gen_seed, FLAGS_string_len);
     for (; num_rows_per_gen < 0 || idx < num_rows_per_gen; ++idx) {
       switch (op_type) {
         case KuduWriteOperation::Type::INSERT: {
@@ -924,6 +946,8 @@ unique_ptr<Mode> BuildPerfMode() {
           .AddOptionalParameter("table_num_replicas")
           .AddOptionalParameter("use_client_per_thread")
           .AddOptionalParameter("use_random")
+          .AddOptionalParameter("use_random_pk")
+          .AddOptionalParameter("use_random_non_pk")
           .Build();
 
   unique_ptr<Action> table_scan =

Reply via email to