This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new cad295f98 KUDU-3723 fix interpretation of new range partition specs
cad295f98 is described below
commit cad295f98a9eb2b390eb096d34e445fb27b8cca2
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue Dec 9 17:55:55 2025 -0800
KUDU-3723 fix interpretation of new range partition specs
In the context of KUDU-2671, a new notation for specifying hash schema
of a range partition has been added. With the introduction of
range-specific hash schemas, the client side is free to send in
information on the newly added range partition as if it had a custom
hash schema, even if the new range's hash schema is the same as the
table-wide one. However, the system catalog should have normalized
the provided information, and stored the information on ranges with
the table-wide and custom hash schemas differently. The normalization
part was missing, and that was the reason for the reported issue.
This changelist addresses the problem and adds a new test scenario that
triggers an assertion without the fix.
Change-Id: Icceb138a919cd7afb572c6dd74695a3fcaaac99e
Reviewed-on: http://gerrit.cloudera.org:8080/23778
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/integration-tests/alter_table-test.cc | 128 +++++++++++++++++++++++++
src/kudu/master/catalog_manager.cc | 53 ++++++----
2 files changed, 163 insertions(+), 18 deletions(-)
diff --git a/src/kudu/integration-tests/alter_table-test.cc
b/src/kudu/integration-tests/alter_table-test.cc
index 9933da9fc..f7c27f60b 100644
--- a/src/kudu/integration-tests/alter_table-test.cc
+++ b/src/kudu/integration-tests/alter_table-test.cc
@@ -2881,4 +2881,132 @@ TEST_F(ReplicatedAlterTableTest,
CheckTableStateAfterReplicatedAlter) {
ASSERT_FALSE(is_altering);
}
+// Test scenario for KUDU-3723.
+TEST_F(AlterTableTest, DropAndAddBackSameRangePartition) {
+ constexpr const int32_t kBucketNum = 5;
+ constexpr const int64_t kRangeLowerBound = 1765000000000000;
+ constexpr const int64_t kRangeUpperBound = 1766000000000000;
+
+ KuduSchemaBuilder b;
+ b.AddColumn("c0")->Type(KuduColumnSchema::STRING)->NotNull();
+ b.AddColumn("c1")->Type(KuduColumnSchema::STRING)->NotNull();
+ b.AddColumn("c2")->Type(KuduColumnSchema::UNIXTIME_MICROS)->NotNull();
+ b.AddColumn("c3")->Type(KuduColumnSchema::STRING)->NotNull();
+ b.AddColumn("c4")->Type(KuduColumnSchema::STRING)->NotNull();
+ b.AddColumn("c5")->Type(KuduColumnSchema::STRING);
+ b.SetPrimaryKey({"c0", "c1", "c2", "c3", "c4"});
+
+ KuduSchema schema;
+ ASSERT_OK(b.Build(&schema));
+
+ constexpr const char* const table_name = "test-dup-ranges";
+
+ // Create a table with one range partition, using the table-wide hash schema.
+ unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+ unique_ptr<KuduPartialRow> lower(schema.NewRow());
+ unique_ptr<KuduPartialRow> upper(schema.NewRow());
+ ASSERT_OK(lower->SetUnixTimeMicros("c2", 1764000000000000));
+ ASSERT_OK(upper->SetUnixTimeMicros("c2", 1765000000000000));
+ ASSERT_OK(table_creator->table_name(table_name)
+ .schema(&schema)
+ .set_range_partition_columns({ "c2" })
+ .add_hash_partitions({ "c0", "c4" }, kBucketNum)
+ .add_range_partition(lower.release(),
upper.release())
+ .num_replicas(1)
+ .Create());
+ // Make sure it's possible to open the newly created table.
+ {
+ shared_ptr<KuduTable> table;
+ ASSERT_OK(client_->OpenTable(table_name, &table));
+ }
+
+ // Add a new range partition.
+ {
+ unique_ptr<KuduPartialRow> lower(schema.NewRow());
+ ASSERT_OK(lower->SetUnixTimeMicros("c2", kRangeLowerBound));
+ unique_ptr<KuduPartialRow> upper(schema.NewRow());
+ ASSERT_OK(upper->SetUnixTimeMicros("c2", kRangeUpperBound));
+
+ // Use the custom hash schema notation for the new range even if the hash
+ // schema is the same as the table-wide hash schema.
+ auto p = std::make_unique<KuduRangePartition>(lower.release(),
+ upper.release());
+ vector<string> columns{ "c0", "c4" };
+ ASSERT_OK(p->add_hash_partitions(columns, kBucketNum, 0));
+ unique_ptr<KuduTableAlterer> alterer(client_->NewTableAlterer(table_name));
+ alterer->AddRangePartition(p.release());
+ ASSERT_OK(alterer->Alter());
+ }
+
+ // Drop the range partition.
+ {
+ unique_ptr<KuduPartialRow> lower(schema.NewRow());
+ ASSERT_OK(lower->SetUnixTimeMicros("c2", kRangeLowerBound));
+ unique_ptr<KuduPartialRow> upper(schema.NewRow());
+ ASSERT_OK(upper->SetUnixTimeMicros("c2", kRangeUpperBound));
+
+ unique_ptr<KuduTableAlterer> alterer(client_->NewTableAlterer(table_name));
+ alterer->DropRangePartition(lower.release(), upper.release());
+ ASSERT_OK(alterer->Alter());
+ }
+
+ // Add the same range partition again.
+ {
+ unique_ptr<KuduPartialRow> lower(schema.NewRow());
+ ASSERT_OK(lower->SetUnixTimeMicros("c2", kRangeLowerBound));
+ unique_ptr<KuduPartialRow> upper(schema.NewRow());
+ ASSERT_OK(upper->SetUnixTimeMicros("c2", kRangeUpperBound));
+
+ // Use the custom hash schema notation for the new range even if the hash
+ // schema is the same as the table-wide hash schema.
+ auto p = std::make_unique<KuduRangePartition>(lower.release(),
+ upper.release());
+ vector<string> columns{ "c0", "c4" };
+ ASSERT_OK(p->add_hash_partitions(columns, kBucketNum, 0));
+ unique_ptr<KuduTableAlterer> alterer(client_->NewTableAlterer(table_name));
+ alterer->AddRangePartition(p.release());
+ ASSERT_OK(alterer->Alter());
+ }
+
+ {
+ // Make sure it's possible to open the table.
+ shared_ptr<KuduTable> table;
+ ASSERT_OK(client_->OpenTable(table_name, &table));
+ }
+
+ // Repeat the drill of adding the same range partition, but now use
+ // the notation of table-wide hash schema for the newly added range.
+ {
+ unique_ptr<KuduPartialRow> lower(schema.NewRow());
+ ASSERT_OK(lower->SetUnixTimeMicros("c2", kRangeLowerBound));
+ unique_ptr<KuduPartialRow> upper(schema.NewRow());
+ ASSERT_OK(upper->SetUnixTimeMicros("c2", kRangeUpperBound));
+
+ unique_ptr<KuduTableAlterer> alterer(client_->NewTableAlterer(table_name));
+ alterer->DropRangePartition(lower.release(), upper.release());
+ ASSERT_OK(alterer->Alter());
+ }
+
+ {
+ unique_ptr<KuduPartialRow> lower(schema.NewRow());
+ ASSERT_OK(lower->SetUnixTimeMicros("c2", kRangeLowerBound));
+ unique_ptr<KuduPartialRow> upper(schema.NewRow());
+ ASSERT_OK(upper->SetUnixTimeMicros("c2", kRangeUpperBound));
+
+ unique_ptr<KuduTableAlterer> alterer(client_->NewTableAlterer(table_name));
+ alterer->AddRangePartition(lower.release(), upper.release(),
+ KuduTableCreator::EXCLUSIVE_BOUND,
+ KuduTableCreator::INCLUSIVE_BOUND);
+ }
+
+ {
+ // Make sure it's possible to open the table after all the manipulations.
+ shared_ptr<KuduTable> table;
+ ASSERT_OK(client_->OpenTable(table_name, &table));
+ }
+
+ // Drop the table.
+ ASSERT_OK(client_->DeleteTable(table_name));
+}
+
} // namespace kudu
diff --git a/src/kudu/master/catalog_manager.cc
b/src/kudu/master/catalog_manager.cc
index 72242a657..eebe8629f 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3166,25 +3166,42 @@ Status CatalogManager::ApplyAlterPartitioningSteps(
vector<Partition> partitions;
const pair<KuduPartialRow, KuduPartialRow> range_bound =
{ *ops[0].split_row, *ops[1].split_row };
+ const auto& table_wide_hash_schema = partition_schema.hash_schema();
if (step.type() == AlterTableRequestPB::ADD_RANGE_PARTITION) {
- if (!FLAGS_enable_per_range_hash_schemas ||
- !step.add_range_partition().has_custom_hash_schema()) {
- RETURN_NOT_OK(partition_schema.CreatePartitions(
- {}, { range_bound }, schema, &partitions));
- } else {
+ // Clients are free to send in information on a new partition to add
+ // in a format that's used to add partitions with custom hash schemas
+ // even if the hash schema for the new range is the same as the
table-wide
+ // hash schema. At this point it's necessary to have a clear separation
+ // between ranges with custom hash schemas and ranges with the table-wide
+ // hash schemas since the code below adds different records into
+ // the system catalog table depending on the actual hash schema for the
+ // newly added range.
+ //
+ // So, let's figure out whether the partition being added has a hash
+ // schema that differs from the table-wide hash schema.
+ bool has_custom_hash_schema = false;
+ PartitionSchema::HashSchema range_hash_schema;
+ if (FLAGS_enable_per_range_hash_schemas &&
+ step.add_range_partition().has_custom_hash_schema()) {
const auto& custom_hash_schema_pb =
step.add_range_partition().custom_hash_schema().hash_schema();
- const Schema schema = client_schema.CopyWithColumnIds();
- PartitionSchema::HashSchema hash_schema;
RETURN_NOT_OK(PartitionSchema::ExtractHashSchemaFromPB(
- schema, custom_hash_schema_pb, &hash_schema));
- if (partition_schema.hash_schema().size() != hash_schema.size()) {
+ schema, custom_hash_schema_pb, &range_hash_schema));
+ has_custom_hash_schema = table_wide_hash_schema != range_hash_schema;
+ }
+
+ if (!has_custom_hash_schema) {
+ RETURN_NOT_OK(partition_schema.CreatePartitions(
+ {}, { range_bound }, schema, &partitions));
+ } else {
+ if (table_wide_hash_schema.size() != range_hash_schema.size()) {
return Status::NotSupported(
"varying number of hash dimensions per range is not yet
supported");
}
- RETURN_NOT_OK(PartitionSchema::ValidateHashSchema(schema,
hash_schema));
+ const Schema schema = client_schema.CopyWithColumnIds();
+ RETURN_NOT_OK(PartitionSchema::ValidateHashSchema(schema,
range_hash_schema));
RETURN_NOT_OK(partition_schema.CreatePartitionsForRange(
- range_bound, hash_schema, schema, &partitions));
+ range_bound, range_hash_schema, schema, &partitions));
// Add information on the new range with custom hash schema into the
// PartitionSchema for the table stored in the system catalog.
@@ -3193,7 +3210,7 @@ Status CatalogManager::ApplyAlterPartitioningSteps(
RowOperationsPBEncoder encoder(range->mutable_range_bounds());
encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, range_bound.first);
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, range_bound.second);
- for (const auto& hash_dimension : hash_schema) {
+ for (const auto& hash_dimension : range_hash_schema) {
auto* hash_dimension_pb = range->add_hash_schema();
hash_dimension_pb->set_num_buckets(hash_dimension.num_buckets);
hash_dimension_pb->set_seed(hash_dimension.seed);
@@ -3217,12 +3234,12 @@ Status CatalogManager::ApplyAlterPartitioningSteps(
RETURN_NOT_OK(partition_schema.CreatePartitionsForRange(
range_bound, range_hash_schema, schema, &partitions));
- // Update the partition schema information to be stored in the system
- // catalog table. The information on a range with the table-wide hash
- // schema must not be present in the PartitionSchemaPB that the system
- // catalog stores, so this is necessary only if the range has custom
- // (i.e. other than the table-wide) hash schema.
- if (range_hash_schema != partition_schema.hash_schema()) {
+ // The information on a range with the table-wide hash schema is not
+ // present in the PartitionSchemaPB that the system catalog stores.
+ // Only information on ranges with custom hash schemas is stored there.
+ if (range_hash_schema != table_wide_hash_schema) {
+ // If the range being dropped has a custom hash schema, update the
+ // partition schema information stored in the system catalog table.
RETURN_NOT_OK(partition_schema.DropRange(
range_bound.first, range_bound.second, schema));
PartitionSchemaPB ps_pb;