This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 1520269ab KUDU-1261 'array_max_elem_num' flag for 'kudu perf loadgen'
1520269ab is described below

commit 1520269ab2fe898e6bf4a05ca5ffdae174a0c5fe
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Nov 10 15:00:34 2025 -0800

    KUDU-1261 'array_max_elem_num' flag for 'kudu perf loadgen'
    
    Prior to this patch, the `kudu perf loadgen` CLI tool used 256 as the
    hard-coded limit for the maximum possible number of elements in an array
    cell to generate.  With this changelist, it's now configurable using
    the newly introduced flag --array_max_elem_num which is set to 256
    by default.
    
    Change-Id: I831c692b3a997091de17b34fe3e9c092c3386f6d
    Reviewed-on: http://gerrit.cloudera.org:8080/23661
    Reviewed-by: Abhishek Chennaka <[email protected]>
    Tested-by: Alexey Serbin <[email protected]>
---
 src/kudu/tools/kudu-tool-test.cc   | 24 ++++++++++++++++++++++++
 src/kudu/tools/tool_action_perf.cc | 15 +++++++++++----
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 421f0653e..b8f040a92 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1593,6 +1593,7 @@ TEST_F(ToolTest, TestModeHelp) {
     NO_FATALS(RunTestHelpRpcFlags(kCmd, {"loadgen", "table_scan"}));
 
     const vector<string> kLoadgenHelpRegexes = {
+      "-array_max_elem_num.*Maximum number of elements to generate",
       "-auto_database.*The database in which to create the automatically 
generated",
       "-buffer_flush_watermark_pct.*Mutation buffer flush watermark",
       "-buffer_size_bytes.*Size of the mutation buffer, per session",
@@ -3926,6 +3927,29 @@ TEST_F(ToolTest, LoadgenEnableArrayColumn) {
   NO_FATALS(RunLoadgen(1, { "--enable_array_columns", "--run_scan" }));
 }
 
+// Customize the setting for --array_max_elem_num flag with higher than the
+// default setting, but still lower than the default value for the
+// --array_cell_max_elem_num flag at the server side.
+TEST_F(ToolTest, LoadgenMaxNumberElementsInArrayCustom) {
+  NO_FATALS(RunLoadgen(1, { "--enable_array_columns",
+                            "--array_max_elem_num=512",
+                            "--run_scan" }));
+}
+
+// Generate empty arrays (there might be a NULL array cells as well).
+TEST_F(ToolTest, LoadgenEmptyArrays) {
+  NO_FATALS(RunLoadgen(1, { "--enable_array_columns",
+                            "--array_max_elem_num=0",
+                            "--run_scan" }));
+}
+
+TEST_F(ToolTest, LoadgenAtMostOneElementInArray) {
+  // Generate at most one element per array cell.
+  NO_FATALS(RunLoadgen(1, { "--enable_array_columns",
+                            "--array_max_elem_num=1",
+                            "--run_scan" }));
+}
+
 TEST_F(ToolTest, TestLoadgenDatabaseName) {
   NO_FATALS(RunLoadgen(1, { "--auto_database=foo", "--keep_auto_table=true" 
}));
   string out;
diff --git a/src/kudu/tools/tool_action_perf.cc 
b/src/kudu/tools/tool_action_perf.cc
index 1a8829f13..a4e9c1f2f 100644
--- a/src/kudu/tools/tool_action_perf.cc
+++ b/src/kudu/tools/tool_action_perf.cc
@@ -394,6 +394,10 @@ DEFINE_bool(enable_array_columns, false,
             "If array columns are present in already existing table "
             "specified by the --table_name flag, the tool populates such "
             "columns regardless of this flag's setting.");
+DEFINE_uint32(array_max_elem_num, 256,
+              "Maximum number of elements to generate for a single array cell. 
"
+              "The tool generates random number of elements per array cell: "
+              "from 0 up to this number minus one.");
 
 DECLARE_bool(show_values);
 DECLARE_int32(num_threads);
@@ -592,9 +596,10 @@ Status PopulateArrayCell(int col_idx,
                          const TypeInfo& elem_tinfo,
                          const string& fixed_string,
                          Generator* gen,
-                         KuduPartialRow* row) {
-  // Up to 256 array elements in one cell.
-  const size_t elem_num = gen->Next<uint8_t>();
+                         KuduPartialRow* row,
+                         uint32_t max_elem_num) {
+  const size_t elem_num =
+      (max_elem_num == 0) ? 0 : gen->Next<uint32_t>() % max_elem_num;
   const auto& column_schema = row->schema()->column(col_idx);
   if (column_schema.is_nullable() && elem_num != 0 && elem_num % 16 == 0) {
     // OK, let it be a null array cell.
@@ -733,7 +738,8 @@ Status GenerateRowData(Generator* key_gen,
       case NESTED:
         if (const auto* elem_tinfo = GetArrayElementTypeInfo(*tinfo);
             PREDICT_TRUE(elem_tinfo)) {
-          RETURN_NOT_OK(PopulateArrayCell(idx, *elem_tinfo, fixed_string, gen, 
row));
+          RETURN_NOT_OK(PopulateArrayCell(
+              idx, *elem_tinfo, fixed_string, gen, row, 
FLAGS_array_max_elem_num));
         } else {
           return Status::NotSupported("non-array NESTED columns not 
supported");
         }
@@ -1173,6 +1179,7 @@ unique_ptr<Mode> BuildPerfMode() {
           "an existing or auto-created table as fast as possible. "
           "If requested, also scan the inserted rows to check whether the "
           "actual count of inserted rows matches the expected one.")
+      .AddOptionalParameter("array_max_elem_num")
       .AddOptionalParameter("auto_database")
       .AddOptionalParameter("buffer_flush_watermark_pct")
       .AddOptionalParameter("buffer_size_bytes")

Reply via email to