This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 50b1cc45f [tools] Add '--create_table_replication_factor' flag for
'kudu table copy'
50b1cc45f is described below
commit 50b1cc45f9fde3deac5aa0fef216f4950246b2c9
Author: Yingchun Lai <[email protected]>
AuthorDate: Mon Apr 25 16:43:59 2022 +0800
[tools] Add '--create_table_replication_factor' flag for 'kudu table copy'
Now it's possible to specify the replication factor for the
destination table when copying a table.
Some usage scenarios, copy a table with RF=3 from a cluster with
multiple tservers to a cluster with only one tserver, we can set
--create_table_replication_factor=1.
Change-Id: I9a4eebdcf85b5ec3666e023194b8c06d66b0a683
Reviewed-on: http://gerrit.cloudera.org:8080/18446
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/tools/kudu-tool-test.cc | 50 +++++++++++++++++++++++++++++--------
src/kudu/tools/table_scanner.cc | 9 +++++--
src/kudu/tools/tool_action_table.cc | 15 +++++++++++
3 files changed, 62 insertions(+), 12 deletions(-)
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index c87d39c55..94ed6dbfe 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -517,6 +517,7 @@ class ToolTest : public KuduTest {
int64_t max_value;
string columns;
TableCopyMode mode;
+ int32_t create_table_replication_factor;
};
void RunCopyTableCheck(const RunCopyTableCheckArgs& args) {
@@ -566,14 +567,15 @@ class ToolTest : public KuduTest {
string stdout;
NO_FATALS(RunActionStdoutString(
Substitute("table copy $0 $1 $2 -dst_table=$3 -predicates=$4
-write_type=$5 "
- "-create_table=$6",
+ "-create_table=$6
-create_table_replication_factor=$7",
cluster_->master()->bound_rpc_addr().ToString(),
args.src_table_name,
cluster_->master()->bound_rpc_addr().ToString(),
kDstTableName,
args.predicates_json,
write_type,
- create_table),
+ create_table,
+ args.create_table_replication_factor),
&stdout));
// Check total count.
@@ -599,10 +601,15 @@ class ToolTest : public KuduTest {
cluster_->master()->bound_rpc_addr().ToString(),
kDstTableName), &dst_schema));
- // Remove the first lines, which are the different table names.
- src_schema.erase(src_schema.begin());
- dst_schema.erase(dst_schema.begin());
- ASSERT_EQ(src_schema, dst_schema);
+ ASSERT_EQ(src_schema.size(), dst_schema.size());
+ for (int i = 0; i < src_schema.size(); ++i) {
+ // Table name is different.
+ if (HasPrefixString(src_schema[i], "TABLE ")) continue;
+ // Replication factor is different when explicitly set it to 3
(default 1).
+ if (args.create_table_replication_factor == 3 &&
+ HasPrefixString(src_schema[i], "REPLICAS ")) continue;
+ ASSERT_EQ(src_schema[i], dst_schema[i]);
+ }
}
// Check all values.
@@ -719,7 +726,13 @@ class ToolTestCopyTableParameterized :
public:
void SetUp() override {
test_case_ = GetParam();
- NO_FATALS(StartExternalMiniCluster());
+ ExternalMiniClusterOptions opts;
+ if (test_case_ == kTestCopyTableSchemaOnly) {
+ // In kTestCopyTableSchemaOnly case, we may create table with RF=3,
+ // means 3 tservers needed at least.
+ opts.num_tablet_servers = 3;
+ }
+ NO_FATALS(StartExternalMiniCluster(opts));
// Create the src table and write some data to it.
TestWorkload ww(cluster_.get());
@@ -757,7 +770,8 @@ class ToolTestCopyTableParameterized :
1,
total_rows_,
kSimpleSchemaColumns,
- TableCopyMode::INSERT_TO_EXIST_TABLE };
+ TableCopyMode::INSERT_TO_EXIST_TABLE,
+ -1 };
switch (test_case_) {
case kTestCopyTableDstTableExist:
return { args };
@@ -767,9 +781,25 @@ class ToolTestCopyTableParameterized :
case kTestCopyTableUpsert:
args.mode = TableCopyMode::UPSERT_TO_EXIST_TABLE;
return { args };
- case kTestCopyTableSchemaOnly:
+ case kTestCopyTableSchemaOnly: {
args.mode = TableCopyMode::COPY_SCHEMA_ONLY;
- return { args };
+ vector<RunCopyTableCheckArgs> multi_args;
+ {
+ auto args_temp = args;
+ multi_args.emplace_back(std::move(args_temp));
+ }
+ {
+ auto args_temp = args;
+ args_temp.create_table_replication_factor = 1;
+ multi_args.emplace_back(std::move(args_temp));
+ }
+ {
+ auto args_temp = args;
+ args_temp.create_table_replication_factor = 3;
+ multi_args.emplace_back(std::move(args_temp));
+ }
+ return multi_args;
+ }
case kTestCopyTableComplexSchema:
args.columns = kComplexSchemaColumns;
args.mode = TableCopyMode::INSERT_TO_NOT_EXIST_TABLE;
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index 8be3b4c15..f3e8beb73 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -88,6 +88,9 @@ using strings::Substitute;
DEFINE_bool(create_table, true,
"Whether to create the destination table if it doesn't exist.");
+DEFINE_int32(create_table_replication_factor, -1,
+ "The replication factor of the destination table if the table
will be created. "
+ "By default, the replication factor of source table will be
used.");
DEFINE_bool(fill_cache, true,
"Whether to fill block cache when scanning.");
DEFINE_string(predicates, "",
@@ -116,7 +119,7 @@ DEFINE_bool(show_values, false,
DEFINE_string(write_type, "insert",
"How data should be copied to the destination table. Valid
values are 'insert', "
"'upsert' or the empty string. If the empty string, data will
not be copied "
- "(useful when create_table is 'true').");
+ "(useful when --create_table=true).");
DEFINE_string(replica_selection, "CLOSEST",
"Replica selection for scan operations. Acceptable values are: "
"CLOSEST, LEADER (maps into KuduClient::CLOSEST_REPLICA and "
@@ -415,10 +418,12 @@ Status CreateDstTableIfNeeded(const
client::sp::shared_ptr<KuduTable>& src_table
};
// Table schema and replica number.
+ int num_replicas = FLAGS_create_table_replication_factor == -1 ?
+ src_table->num_replicas() : FLAGS_create_table_replication_factor;
unique_ptr<KuduTableCreator> table_creator(dst_client->NewTableCreator());
table_creator->table_name(dst_table_name)
.schema(&dst_table_schema)
- .num_replicas(src_table->num_replicas());
+ .num_replicas(num_replicas);
// Add hash partition schema.
for (const auto& hash_dimension : partition_schema.hash_schema()) {
diff --git a/src/kudu/tools/tool_action_table.cc
b/src/kudu/tools/tool_action_table.cc
index b9e6627dd..87ee2a5f1 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -58,6 +58,7 @@
#include "kudu/tools/tool.pb.h"
#include "kudu/tools/tool_action.h"
#include "kudu/tools/tool_action_common.h"
+#include "kudu/util/flag_validators.h"
#include "kudu/util/jsonreader.h"
#include "kudu/util/jsonwriter.h"
#include "kudu/util/status.h"
@@ -130,6 +131,8 @@ DEFINE_bool(show_avro_format_schema, false,
"table schema in Avro format without any other information like "
"partition/owner/comments. It cannot be used in conjunction with
other flags");
+DECLARE_bool(create_table);
+DECLARE_int32(create_table_replication_factor);
DECLARE_bool(row_count_only);
DECLARE_bool(show_scanner_stats);
@@ -146,6 +149,17 @@ DECLARE_bool(show_values);
DECLARE_string(replica_selection);
DECLARE_string(tables);
+bool ValidateCreateTable() {
+ if (!FLAGS_create_table && FLAGS_create_table_replication_factor != -1) {
+ LOG(ERROR) << Substitute("--create_table_replication_factor is meaningless
"
+ "when --create_table=false");
+ return false;
+ }
+ return true;
+}
+
+GROUP_FLAG_VALIDATOR(create_table, ValidateCreateTable);
+
namespace kudu {
namespace tools {
@@ -1553,6 +1567,7 @@ unique_ptr<Mode> BuildTableMode() {
.AddRequiredParameter({ kTableNameArg, "Name of the source table" })
.AddRequiredParameter({ kDestMasterAddressesArg,
kDestMasterAddressesArgDesc })
.AddOptionalParameter("create_table")
+ .AddOptionalParameter("create_table_replication_factor")
.AddOptionalParameter("dst_table")
.AddOptionalParameter("num_threads")
.AddOptionalParameter("predicates")