This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 247535e5e Allow creating a table with empty range partition
247535e5e is described below
commit 247535e5ee93f6219c7ba00a1051524b2079411e
Author: xinghuayu007 <[email protected]>
AuthorDate: Thu Apr 18 17:11:58 2024 +0800
Allow creating a table with empty range partition
Currently, when creating a table without setting range partition,
it will create an unbounded partition defaultly. In some cases,
this method is not suitable. For example, the user wants to create
a table with empty partition, and add a new partition daily to
import the data into Kudu. Besides, copying a empty partition table
to another cluster will create an unbounded partition table when
using the CLI command 'kudu table copy'. It changes the orginal
table structure.
Therefore, it is better to support to create a table with empty
partition, that is without any partitions.
Change-Id: I480072a693e5b6f1a13f86cb1475cff0452472e7
Reviewed-on: http://gerrit.cloudera.org:8080/21328
Reviewed-by: Yingchun Lai <[email protected]>
Tested-by: Yingchun Lai <[email protected]>
---
src/kudu/client/client.cc | 11 ++++++-
src/kudu/client/client.h | 8 ++++++
src/kudu/client/table_creator-internal.h | 2 ++
src/kudu/common/partition.cc | 13 ++++++---
src/kudu/common/partition.h | 10 +++++--
src/kudu/master/catalog_manager.cc | 3 +-
src/kudu/master/master.proto | 3 ++
src/kudu/tools/kudu-tool-test.cc | 49 ++++++++++++++++++++++++++++++++
8 files changed, 90 insertions(+), 9 deletions(-)
diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc
index d5b5740cc..eab9b5bb4 100644
--- a/src/kudu/client/client.cc
+++ b/src/kudu/client/client.cc
@@ -935,6 +935,12 @@ KuduTableCreator& KuduTableCreator::set_comment(const
string& comment) {
return *this;
}
+KuduTableCreator& KuduTableCreator::set_allow_empty_partition(
+ bool allow_empty_partition) {
+ data_->allow_empty_partition_ = allow_empty_partition;
+ return *this;
+}
+
KuduTableCreator& KuduTableCreator::split_rows(const vector<const
KuduPartialRow*>& rows) {
for (const KuduPartialRow* row : rows) {
data_->range_partition_splits_.emplace_back(const_cast<KuduPartialRow*>(row));
@@ -1021,6 +1027,10 @@ Status KuduTableCreator::Create() {
if (data_->comment_) {
req.set_comment(*data_->comment_);
}
+ if (data_->allow_empty_partition_) {
+ req.set_allow_empty_partition(*data_->allow_empty_partition_);
+ }
+
RETURN_NOT_OK_PREPEND(SchemaToPB(*data_->schema_->schema_,
req.mutable_schema(),
SCHEMA_PB_WITHOUT_WRITE_DEFAULT),
"Invalid schema");
@@ -1053,7 +1063,6 @@ Status KuduTableCreator::Create() {
auto* partition_schema = req.mutable_partition_schema();
partition_schema->CopyFrom(data_->partition_schema_);
-
for (const auto& p : data_->range_partitions_) {
const auto* range = p->data_;
if (!range->lower_bound_ || !range->upper_bound_) {
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index 1e28d29a8..953c6b90e 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -1393,6 +1393,14 @@ class KUDU_EXPORT KuduTableCreator {
/// @return Reference to the modified table creator.
KuduTableCreator& set_comment(const std::string& comment);
+
+ /// Set allow creating empty partition.
+ ///
+ /// @param [in] allow_empty_partition
+ /// Allow creating a table with empty partition.
+ /// @return Reference to the modified table creator.
+ KuduTableCreator& set_allow_empty_partition(bool allow_empty_partition);
+
/// @deprecated Use @c add_range_partition_split() instead.
///
/// @param [in] split_rows
diff --git a/src/kudu/client/table_creator-internal.h
b/src/kudu/client/table_creator-internal.h
index 0380d6d00..399b978b2 100644
--- a/src/kudu/client/table_creator-internal.h
+++ b/src/kudu/client/table_creator-internal.h
@@ -75,6 +75,8 @@ class KuduTableCreator::Data {
std::optional<std::string> comment_;
+ std::optional<bool> allow_empty_partition_;
+
std::optional<int> num_replicas_;
std::optional<std::string> dimension_label_;
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index 3b5af8a5c..0367dbd01 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -503,12 +503,15 @@ Status PartitionSchema::EncodeRangeBounds(
const vector<pair<KuduPartialRow, KuduPartialRow>>& range_bounds,
const vector<HashSchema>& range_hash_schemas,
const Schema& schema,
- RangesWithHashSchemas* bounds_with_hash_schemas) const {
+ RangesWithHashSchemas* bounds_with_hash_schemas,
+ bool allow_create_partition) const {
DCHECK(bounds_with_hash_schemas);
auto& bounds_whs = *bounds_with_hash_schemas;
DCHECK(bounds_whs.empty());
if (range_bounds.empty()) {
- bounds_whs.emplace_back(RangeWithHashSchema{"", "", hash_schema_});
+ if (!allow_create_partition) {
+ bounds_whs.emplace_back(RangeWithHashSchema{"", "", hash_schema_});
+ }
return Status::OK();
}
@@ -621,14 +624,16 @@ Status PartitionSchema::CreatePartitions(
const vector<KuduPartialRow>& split_rows,
const vector<pair<KuduPartialRow, KuduPartialRow>>& range_bounds,
const Schema& schema,
- vector<Partition>* partitions) const {
+ vector<Partition>* partitions,
+ bool allow_empty_partition) const {
DCHECK(partitions);
RETURN_NOT_OK(CheckRangeSchema(schema));
RangesWithHashSchemas bounds_with_hash_schemas;
RETURN_NOT_OK(EncodeRangeBounds(range_bounds, {}, schema,
- &bounds_with_hash_schemas));
+ &bounds_with_hash_schemas,
+ allow_empty_partition));
vector<string> splits;
RETURN_NOT_OK(EncodeRangeSplits(split_rows, schema, &splits));
RETURN_NOT_OK(SplitRangeBounds(schema, splits, &bounds_with_hash_schemas));
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index cd0353f70..20465f21d 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -358,11 +358,14 @@ class PartitionSchema {
// of resulting partitions is the product of the number of hash buckets for
// each hash bucket component, multiplied by
// (split_rows.size() + max(1, range_bounds.size())).
+ // Parameter 'allow_empty_partition' is used to whether allow creating
+ // a table with empty partition.
Status CreatePartitions(
const std::vector<KuduPartialRow>& split_rows,
const std::vector<std::pair<KuduPartialRow, KuduPartialRow>>&
range_bounds,
const Schema& schema,
- std::vector<Partition>* partitions) const WARN_UNUSED_RESULT;
+ std::vector<Partition>* partitions,
+ bool allow_empty_partition = false) const WARN_UNUSED_RESULT;
// Create the set of partitions given the specified ranges with per-range
// hash schemas. The 'partitions' output parameter must be non-null.
@@ -689,11 +692,14 @@ class PartitionSchema {
// inserts them into 'bounds_with_hash_schemas' in sorted order. The hash
schemas
// per range are stored within 'range_hash_schemas'. If 'range_hash_schemas'
is empty,
// it indicates that the table wide hash schema will be used per range.
+ // Parameter 'allow_empty_partition' is used to indicate whether allow
creating
+ // a table with empty partition.
Status EncodeRangeBounds(
const std::vector<std::pair<KuduPartialRow, KuduPartialRow>>&
range_bounds,
const std::vector<HashSchema>& range_hash_schemas,
const Schema& schema,
- RangesWithHashSchemas* bounds_with_hash_schemas) const;
+ RangesWithHashSchemas* bounds_with_hash_schemas,
+ bool allow_empty_partition = false) const;
// Splits the encoded range bounds by the split points. The splits and
bounds within
// 'bounds_with_hash_schemas' must be sorted. If `bounds_with_hash_schemas`
is empty,
diff --git a/src/kudu/master/catalog_manager.cc
b/src/kudu/master/catalog_manager.cc
index 98af0f0b3..1ab7bc85c 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -2037,7 +2037,7 @@ Status CatalogManager::CreateTable(const
CreateTableRequestPB* orig_req,
} else {
// Create partitions based on specified partition schema and split rows.
RETURN_NOT_OK(partition_schema.CreatePartitions(
- split_rows, range_bounds, schema, &partitions));
+ split_rows, range_bounds, schema, &partitions,
req.allow_empty_partition()));
}
// Check the restriction on the same number of hash dimensions across all the
@@ -2047,7 +2047,6 @@ Status CatalogManager::CreateTable(const
CreateTableRequestPB* orig_req,
// TODO(aserbin): remove the restriction once the rest of the code is ready
// to handle range partitions with arbitrary number of hash
// dimensions in hash schemas
- CHECK(!partitions.empty());
const auto hash_dimensions_num = partition_schema.hash_schema().size();
for (const auto& p : partitions) {
if (p.hash_buckets().size() != hash_dimensions_num) {
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index a096da60e..f8ff85d3e 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -573,6 +573,9 @@ message CreateTableRequestPB {
// The comment on the table.
optional string comment = 13;
+
+ // Allow creating a table with empty partition.
+ optional bool allow_empty_partition = 14 [ default = false ];
}
message CreateTableResponsePB {
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 39a0f3baf..df689edf3 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1728,6 +1728,55 @@ TEST_F(ToolTest, TestActionMissingRequiredArg) {
"peers", /* variadic */ true));
}
+
+class CreateEmptyPartitionTableTest :
+ public ToolTest,
+ public ::testing::WithParamInterface<bool> {
+};
+
+INSTANTIATE_TEST_SUITE_P(, CreateEmptyPartitionTableTest, ::testing::Bool());
+TEST_P(CreateEmptyPartitionTableTest, TestCreateEmptyPartitionTable) {
+ const string& kTableName = "test";
+ bool allow_empty_partition = GetParam();
+ shared_ptr<KuduClient> client;
+ NO_FATALS(StartExternalMiniCluster());
+ cluster_->CreateClient(nullptr, &client);
+ unique_ptr<KuduTableCreator> table_creator(client->NewTableCreator());
+ KuduSchema schema = KuduSchema::FromSchema(GetSimpleTestSchema());
+ table_creator->table_name(kTableName)
+ .schema(&schema)
+ .set_range_partition_columns({"key"})
+ .num_replicas(1)
+ .set_allow_empty_partition(allow_empty_partition)
+ .Create();
+ vector<string> src_schema;
+ string stdout;
+ NO_FATALS(RunActionStdoutString(
+ Substitute("table describe $0 $1",
+ cluster_->master()->bound_rpc_addr().ToString(),
+ kTableName), &stdout));
+ if (allow_empty_partition) {
+ ASSERT_STR_CONTAINS(stdout, "RANGE (key) ()");
+ } else {
+ ASSERT_STR_CONTAINS(stdout, "PARTITION UNBOUNDED");
+ }
+ // Create a new partition.
+ if (allow_empty_partition) {
+ unique_ptr<KuduPartialRow> lower_bound(schema.NewRow());
+ ASSERT_OK(lower_bound->SetInt32("key", 0));
+ unique_ptr<KuduPartialRow> upper_bound(schema.NewRow());
+ ASSERT_OK(upper_bound->SetInt32("key", 1));
+ unique_ptr<KuduTableAlterer>
table_alterer(client->NewTableAlterer(kTableName));
+ table_alterer->AddRangePartition(lower_bound.release(),
upper_bound.release());
+ ASSERT_OK(table_alterer->Alter());
+ NO_FATALS(RunActionStdoutString(
+ Substitute("table describe $0 $1",
+ cluster_->master()->bound_rpc_addr().ToString(),
+ kTableName), &stdout));
+ ASSERT_STR_CONTAINS(stdout, "PARTITION 0 <= VALUES < 1");
+ }
+}
+
TEST_F(ToolTest, TestFsCheck) {
const string kTestDir = GetTestPath("test");
const string kTabletId = "ffffffffffffffffffffffffffffffff";