This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 1520269ab KUDU-1261 'array_max_elem_num' flag for 'kudu perf loadgen'
1520269ab is described below
commit 1520269ab2fe898e6bf4a05ca5ffdae174a0c5fe
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Nov 10 15:00:34 2025 -0800
KUDU-1261 'array_max_elem_num' flag for 'kudu perf loadgen'
Prior to this patch, the `kudu perf loadgen` CLI tool used 256 as the
hard-coded limit for the maximum possible number of elements in an array
cell to generate. With this changelist, it's now configurable using
the newly introduced flag --array_max_elem_num which is set to 256
by default.
Change-Id: I831c692b3a997091de17b34fe3e9c092c3386f6d
Reviewed-on: http://gerrit.cloudera.org:8080/23661
Reviewed-by: Abhishek Chennaka <[email protected]>
Tested-by: Alexey Serbin <[email protected]>
---
src/kudu/tools/kudu-tool-test.cc | 24 ++++++++++++++++++++++++
src/kudu/tools/tool_action_perf.cc | 15 +++++++++++----
2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 421f0653e..b8f040a92 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1593,6 +1593,7 @@ TEST_F(ToolTest, TestModeHelp) {
NO_FATALS(RunTestHelpRpcFlags(kCmd, {"loadgen", "table_scan"}));
const vector<string> kLoadgenHelpRegexes = {
+ "-array_max_elem_num.*Maximum number of elements to generate",
"-auto_database.*The database in which to create the automatically
generated",
"-buffer_flush_watermark_pct.*Mutation buffer flush watermark",
"-buffer_size_bytes.*Size of the mutation buffer, per session",
@@ -3926,6 +3927,29 @@ TEST_F(ToolTest, LoadgenEnableArrayColumn) {
NO_FATALS(RunLoadgen(1, { "--enable_array_columns", "--run_scan" }));
}
+// Customize the setting for --array_max_elem_num flag with higher than the
+// default setting, but still lower than the default value for the
+// --array_cell_max_elem_num flag at the server side.
+TEST_F(ToolTest, LoadgenMaxNumberElementsInArrayCustom) {
+ NO_FATALS(RunLoadgen(1, { "--enable_array_columns",
+ "--array_max_elem_num=512",
+ "--run_scan" }));
+}
+
+// Generate empty arrays (there might be a NULL array cells as well).
+TEST_F(ToolTest, LoadgenEmptyArrays) {
+ NO_FATALS(RunLoadgen(1, { "--enable_array_columns",
+ "--array_max_elem_num=0",
+ "--run_scan" }));
+}
+
+TEST_F(ToolTest, LoadgenAtMostOneElementInArray) {
+ // Generate at most one element per array cell.
+ NO_FATALS(RunLoadgen(1, { "--enable_array_columns",
+ "--array_max_elem_num=1",
+ "--run_scan" }));
+}
+
TEST_F(ToolTest, TestLoadgenDatabaseName) {
NO_FATALS(RunLoadgen(1, { "--auto_database=foo", "--keep_auto_table=true"
}));
string out;
diff --git a/src/kudu/tools/tool_action_perf.cc
b/src/kudu/tools/tool_action_perf.cc
index 1a8829f13..a4e9c1f2f 100644
--- a/src/kudu/tools/tool_action_perf.cc
+++ b/src/kudu/tools/tool_action_perf.cc
@@ -394,6 +394,10 @@ DEFINE_bool(enable_array_columns, false,
"If array columns are present in already existing table "
"specified by the --table_name flag, the tool populates such "
"columns regardless of this flag's setting.");
+DEFINE_uint32(array_max_elem_num, 256,
+ "Maximum number of elements to generate for a single array cell.
"
+ "The tool generates random number of elements per array cell: "
+ "from 0 up to this number minus one.");
DECLARE_bool(show_values);
DECLARE_int32(num_threads);
@@ -592,9 +596,10 @@ Status PopulateArrayCell(int col_idx,
const TypeInfo& elem_tinfo,
const string& fixed_string,
Generator* gen,
- KuduPartialRow* row) {
- // Up to 256 array elements in one cell.
- const size_t elem_num = gen->Next<uint8_t>();
+ KuduPartialRow* row,
+ uint32_t max_elem_num) {
+ const size_t elem_num =
+ (max_elem_num == 0) ? 0 : gen->Next<uint32_t>() % max_elem_num;
const auto& column_schema = row->schema()->column(col_idx);
if (column_schema.is_nullable() && elem_num != 0 && elem_num % 16 == 0) {
// OK, let it be a null array cell.
@@ -733,7 +738,8 @@ Status GenerateRowData(Generator* key_gen,
case NESTED:
if (const auto* elem_tinfo = GetArrayElementTypeInfo(*tinfo);
PREDICT_TRUE(elem_tinfo)) {
- RETURN_NOT_OK(PopulateArrayCell(idx, *elem_tinfo, fixed_string, gen,
row));
+ RETURN_NOT_OK(PopulateArrayCell(
+ idx, *elem_tinfo, fixed_string, gen, row,
FLAGS_array_max_elem_num));
} else {
return Status::NotSupported("non-array NESTED columns not
supported");
}
@@ -1173,6 +1179,7 @@ unique_ptr<Mode> BuildPerfMode() {
"an existing or auto-created table as fast as possible. "
"If requested, also scan the inserted rows to check whether the "
"actual count of inserted rows matches the expected one.")
+ .AddOptionalParameter("array_max_elem_num")
.AddOptionalParameter("auto_database")
.AddOptionalParameter("buffer_flush_watermark_pct")
.AddOptionalParameter("buffer_size_bytes")