This is an automated email from the ASF dual-hosted git repository.
bankim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new ac8e228 [loadgen] Separate flags to insert random values for PK and
non-PK cols
ac8e228 is described below
commit ac8e2281419a5aab975478595cf0f7c1c206ece0
Author: Bankim Bhavsar <[email protected]>
AuthorDate: Wed Jul 29 11:59:54 2020 -0700
[loadgen] Separate flags to insert random values for PK and non-PK cols
With --use_random option in loadgen tool, both primary key and
non-primary key columns are populated with random values.
This increases the possibility of collisions for primary key columns
when inserting large number of rows.
This change adds two new options:
--use_random_non_pk that uses random numbers only for non-primary key
columns allowing creation of large number of rows with random values
and --use_random_pk that uses random numbers for primary key columns
--use_random is marked as deprecated in the description and if this
option is specified along with --use_random_non_pk or --use_random_pk
then the --use_random will be ignored.
Change-Id: I35bb33a192baa9e6e67e85bcbd5ca7164ba154e4
Reviewed-on: http://gerrit.cloudera.org:8080/16253
Reviewed-by: Alexey Serbin <[email protected]>
Tested-by: Kudu Jenkins
---
src/kudu/tools/kudu-tool-test.cc | 80 +++++++++++++++++++++++++++++++-------
src/kudu/tools/tool_action_perf.cc | 38 ++++++++++++++----
2 files changed, 98 insertions(+), 20 deletions(-)
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 9061209..b51160c 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -638,6 +638,8 @@ class ToolTest : public KuduTest {
}
protected:
+ // Note: Each test case must have a single invocation of RunLoadgen()
otherwise it leads to
+ // memory leaks.
void RunLoadgen(int num_tservers = 1,
const vector<string>& tool_args = {},
const string& table_name = "",
@@ -2225,21 +2227,73 @@ TEST_F(ToolTest,
TestLoadgenAutoFlushBackgroundSequential) {
"bench_auto_flush_background_sequential"));
}
-// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode, randomized values.
-TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandom) {
- NO_FATALS(RunLoadgen(5,
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized keys
and values
+// using the deprecated --use_random option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomKeysValuesDeprecated) {
+ NO_FATALS(RunLoadgen(
+ 5,
{
- "--buffer_flush_watermark_pct=0.125",
- "--buffer_size_bytes=65536",
- "--buffers_num=8",
- // small number of rows to avoid collisions: it's random generation
mode
- "--num_rows_per_thread=16",
- "--num_threads=1",
- "--run_scan",
- "--string_len=8",
- "--use_random",
+ // Small number of rows to avoid collisions: it's random generation
mode
+ "--num_rows_per_thread=16",
+ "--num_threads=1",
+ "--run_scan",
+ "--string_len=8",
+ "--use_random",
+ },
+ "bench_auto_flush_background_random_keys_values_deprecated"));
+}
+
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized keys
+// using the --use_random_pk option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomKeys) {
+ NO_FATALS(RunLoadgen(
+ 5,
+ {
+ // Small number of rows to avoid collisions: it's random generation
mode
+ "--num_rows_per_thread=16",
+ "--num_threads=1",
+ "--run_scan",
+ "--string_len=8",
+ "--use_random_pk",
+ },
+ "bench_auto_flush_background_random_keys"));
+}
+
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized values
+// using the --use_random_non_pk option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomValues) {
+ NO_FATALS(RunLoadgen(
+ 5,
+ {
+ // Large number of rows are okay since only non-pk columns will use
random values.
+ "--num_rows_per_thread=4096",
+ "--num_threads=2",
+ "--run_scan",
+ "--string_len=8",
+ "--use_random_non_pk",
+ },
+ "bench_auto_flush_background_random_values"));
+}
+
+// Run loadgen benchmark in AUTO_FLUSH_BACKGROUND mode with randomized values
+// using the --use_random_non_pk option combined with the deprecated
--use_random option.
+TEST_F(ToolTest, TestLoadgenAutoFlushBackgroundRandomValuesIgnoreDeprecated) {
+ // Test to ensure if both '--use_random' and
'--use_random_non_pk'/'--use_random_pk' are
+ // specified then '--use_random' is ignored.
+ NO_FATALS(RunLoadgen(
+ 5,
+ {
+ // Large number of rows are okay since only non-pk columns will use
random values and
+ // the deprecated '--use_random' will be ignored when used with
'--use_random_non_pk'.
+ "--num_rows_per_thread=4096",
+ "--num_threads=2",
+ "--run_scan",
+ "--string_len=8",
+ // Combining deprecated --use_random option with new
--use_random_non_pk option.
+ "--use_random",
+ "--use_random_non_pk",
},
- "bench_auto_flush_background_random"));
+ "bench_auto_flush_background_random_values_ignore_deprecated"));
}
// Run the loadgen benchmark in MANUAL_FLUSH mode.
diff --git a/src/kudu/tools/tool_action_perf.cc
b/src/kudu/tools/tool_action_perf.cc
index 8685253..5815f69 100644
--- a/src/kudu/tools/tool_action_perf.cc
+++ b/src/kudu/tools/tool_action_perf.cc
@@ -355,9 +355,18 @@ DEFINE_int32(table_num_replicas, 1,
"The number of replicas for the auto-created table; "
"0 means 'use server-side default'.");
DEFINE_bool(use_random, false,
- "Whether to use random numbers instead of sequential ones. "
- "In case of using random numbers collisions are possible over "
- "the data for columns with unique constraint (e.g. primary key).");
+ "Whether to use random numbers instead of sequential ones for both
primary keys and "
+ "non-primary key columns. In case of using random numbers
collisions are "
+ "possible over the data for columns with unique constraint (e.g.
primary key). "
+ "This option has been deprecated, use '--use_random_pk' and/or
'--use_random_non_pk' "
+ "instead. If either '--use_random_pk' or '--use_random_non_pk' is
specified with "
+ "'--use_random' then this option will be ignored.");
+DEFINE_bool(use_random_pk, false,
+ "Whether to use random numbers instead of sequential ones for
primary key "
+ "columns. Using random numbers may cause collisions over primary
key columns.");
+DEFINE_bool(use_random_non_pk, false,
+ "Whether to use random numbers instead of sequential ones for
non-primary key "
+ "columns.");
namespace kudu {
namespace tools {
@@ -549,8 +558,21 @@ WriteResults GeneratorThread(const
client::sp::shared_ptr<KuduClient>& client,
const string& table_name,
size_t gen_idx,
KuduWriteOperation::Type op_type) {
- const Generator::Mode gen_mode = FLAGS_use_random ? Generator::MODE_RAND
- : Generator::MODE_SEQ;
+ Generator::Mode key_gen_mode = Generator::MODE_SEQ;
+ Generator::Mode value_gen_mode = Generator::MODE_SEQ;
+
+ if (FLAGS_use_random_pk || FLAGS_use_random_non_pk) {
+ // Honor the non-default values for new flags ignoring the old deprecated
FLAGS_use_random.
+ if (FLAGS_use_random_pk) {
+ key_gen_mode = Generator::MODE_RAND;
+ }
+ if (FLAGS_use_random_non_pk) {
+ value_gen_mode = Generator::MODE_RAND;
+ }
+ } else if (FLAGS_use_random) {
+ key_gen_mode = value_gen_mode = Generator::MODE_RAND;
+ }
+
const size_t flush_per_n_rows = FLAGS_flush_per_n_rows;
const uint64_t gen_seq_start = FLAGS_seq_start;
client::sp::shared_ptr<KuduSession> session(client->NewSession());
@@ -578,8 +600,8 @@ WriteResults GeneratorThread(const
client::sp::shared_ptr<KuduClient>& client,
// in sequential generation mode.
const int64_t gen_span =
SpanPerThread(KuduSchema::ToSchema(table->schema()).num_key_columns());
const int64_t gen_seed = gen_idx * gen_span + gen_seq_start;
- Generator key_gen(gen_mode, gen_seed, FLAGS_string_len);
- Generator value_gen(gen_mode, gen_seed, FLAGS_string_len);
+ Generator key_gen(key_gen_mode, gen_seed, FLAGS_string_len);
+ Generator value_gen(value_gen_mode, gen_seed, FLAGS_string_len);
for (; num_rows_per_gen < 0 || idx < num_rows_per_gen; ++idx) {
switch (op_type) {
case KuduWriteOperation::Type::INSERT: {
@@ -924,6 +946,8 @@ unique_ptr<Mode> BuildPerfMode() {
.AddOptionalParameter("table_num_replicas")
.AddOptionalParameter("use_client_per_thread")
.AddOptionalParameter("use_random")
+ .AddOptionalParameter("use_random_pk")
+ .AddOptionalParameter("use_random_non_pk")
.Build();
unique_ptr<Action> table_scan =