This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 817c395 [common] extra test PartitionSchema and a small clean-up
817c395 is described below
commit 817c395d4db8ab5dc5709b05765563d68249a682
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Jul 12 18:48:19 2021 -0700
[common] extra test PartitionSchema and a small clean-up
While working on adding support for custom hash bucket schemas in
range partitions for the Kudu C++ client, I found that a test scenario
for custom range partitions with non-existent table-wide hash bucket
schema was missing. This patch adds a simple one to cover such a case.
In addition, I updated the code in partition.cc to use std::move
semantics in PartitionSchema::GenerateHashPartitions() and other
related methods. I also did other style-related changes.
Change-Id: I66ff63488f34173e2f7f48db5aae73b2a612be8c
Reviewed-on: http://gerrit.cloudera.org:8080/17680
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Andrew Wong <[email protected]>
---
src/kudu/common/partition-test.cc | 357 +++++++++++++++++++++++++-------------
src/kudu/common/partition.cc | 166 +++++++++---------
src/kudu/common/partition.h | 17 +-
3 files changed, 337 insertions(+), 203 deletions(-)
diff --git a/src/kudu/common/partition-test.cc
b/src/kudu/common/partition-test.cc
index 5b63260..f26e5ce 100644
--- a/src/kudu/common/partition-test.cc
+++ b/src/kudu/common/partition-test.cc
@@ -867,117 +867,6 @@ TEST_F(PartitionTest, TestVarcharRangePartitions) {
}
namespace {
-void CheckPartitions(const vector<Partition>& partitions) {
- ASSERT_EQ(16, partitions.size());
-
- EXPECT_EQ(0, partitions[0].hash_buckets()[0]);
- EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[0].range_key_start());
- EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[0].range_key_end());
- EXPECT_EQ(string("\0\0\0\0" "a1\0\0\0\0c1",
12),partitions[0].partition_key_start());
- EXPECT_EQ(string("\0\0\0\0" "a2\0\0\0\0c2",
12),partitions[0].partition_key_end());
-
- EXPECT_EQ(1, partitions[1].hash_buckets()[0]);
- EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[1].range_key_start());
- EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[1].range_key_end());
- EXPECT_EQ(string("\0\0\0\1" "a1\0\0\0\0c1",
12),partitions[1].partition_key_start());
- EXPECT_EQ(string("\0\0\0\1" "a2\0\0\0\0c2",
12),partitions[1].partition_key_end());
-
- EXPECT_EQ(2, partitions[2].hash_buckets()[0]);
- EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[2].range_key_start());
- EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[2].range_key_end());
- EXPECT_EQ(string("\0\0\0\2" "a1\0\0\0\0c1",
12),partitions[2].partition_key_start());
- EXPECT_EQ(string("\0\0\0\2" "a2\0\0\0\0c2",
12),partitions[2].partition_key_end());
-
- EXPECT_EQ(3, partitions[3].hash_buckets()[0]);
- EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[3].range_key_start());
- EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[3].range_key_end());
- EXPECT_EQ(string("\0\0\0\3" "a1\0\0\0\0c1",
12),partitions[3].partition_key_start());
- EXPECT_EQ(string("\0\0\0\3" "a2\0\0\0\0c2",
12),partitions[3].partition_key_end());
-
- EXPECT_EQ(0, partitions[4].hash_buckets()[0]);
- EXPECT_EQ(0, partitions[4].hash_buckets()[1]);
- EXPECT_EQ(string("a3\0\0b3\0\0", 8),partitions[4].range_key_start());
- EXPECT_EQ(string("a4\0\0b4\0\0", 8),partitions[4].range_key_end());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a3\0\0b3\0\0",
16),partitions[4].partition_key_start());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a4\0\0b4\0\0",
16),partitions[4].partition_key_end());
-
- EXPECT_EQ(0, partitions[5].hash_buckets()[0]);
- EXPECT_EQ(1, partitions[5].hash_buckets()[1]);
- EXPECT_EQ(string("a3\0\0b3\0\0", 8),partitions[5].range_key_start());
- EXPECT_EQ(string("a4\0\0b4\0\0", 8),partitions[5].range_key_end());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a3\0\0b3\0\0",
16),partitions[5].partition_key_start());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a4\0\0b4\0\0",
16),partitions[5].partition_key_end());
-
- EXPECT_EQ(1, partitions[6].hash_buckets()[0]);
- EXPECT_EQ(0, partitions[6].hash_buckets()[1]);
- EXPECT_EQ(string("a3\0\0b3\0\0", 8),partitions[6].range_key_start());
- EXPECT_EQ(string("a4\0\0b4\0\0", 8),partitions[6].range_key_end());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a3\0\0b3\0\0",
16),partitions[6].partition_key_start());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a4\0\0b4\0\0",
16),partitions[6].partition_key_end());
-
- EXPECT_EQ(1, partitions[7].hash_buckets()[0]);
- EXPECT_EQ(1, partitions[7].hash_buckets()[1]);
- EXPECT_EQ(string("a3\0\0b3\0\0", 8),partitions[7].range_key_start());
- EXPECT_EQ(string("a4\0\0b4\0\0", 8),partitions[7].range_key_end());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a3\0\0b3\0\0",
16),partitions[7].partition_key_start());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a4\0\0b4\0\0",
16),partitions[7].partition_key_end());
-
- EXPECT_EQ(2, partitions[8].hash_buckets()[0]);
- EXPECT_EQ(0, partitions[8].hash_buckets()[1]);
- EXPECT_EQ(string("a3\0\0b3\0\0", 8),partitions[8].range_key_start());
- EXPECT_EQ(string("a4\0\0b4\0\0", 8),partitions[8].range_key_end());
- EXPECT_EQ(string("\0\0\0\2" "\0\0\0\0" "a3\0\0b3\0\0",
16),partitions[8].partition_key_start());
- EXPECT_EQ(string("\0\0\0\2" "\0\0\0\0" "a4\0\0b4\0\0",
16),partitions[8].partition_key_end());
-
- EXPECT_EQ(2, partitions[9].hash_buckets()[0]);
- EXPECT_EQ(1, partitions[9].hash_buckets()[1]);
- EXPECT_EQ(string("a3\0\0b3\0\0", 8),partitions[9].range_key_start());
- EXPECT_EQ(string("a4\0\0b4\0\0", 8),partitions[9].range_key_end());
- EXPECT_EQ(string("\0\0\0\2" "\0\0\0\1" "a3\0\0b3\0\0",
16),partitions[9].partition_key_start());
- EXPECT_EQ(string("\0\0\0\2" "\0\0\0\1" "a4\0\0b4\0\0",
16),partitions[9].partition_key_end());
-
- EXPECT_EQ(0, partitions[10].hash_buckets()[0]);
- EXPECT_EQ(0, partitions[10].hash_buckets()[1]);
- EXPECT_EQ(string("a5\0\0b5\0\0", 8),partitions[10].range_key_start());
- EXPECT_EQ(string("a6\0\0\0\0c6", 8),partitions[10].range_key_end());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a5\0\0b5\0\0",
16),partitions[10].partition_key_start());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a6\0\0\0\0c6",
16),partitions[10].partition_key_end());
-
- EXPECT_EQ(0, partitions[11].hash_buckets()[0]);
- EXPECT_EQ(1, partitions[11].hash_buckets()[1]);
- EXPECT_EQ(string("a5\0\0b5\0\0", 8),partitions[11].range_key_start());
- EXPECT_EQ(string("a6\0\0\0\0c6", 8),partitions[11].range_key_end());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a5\0\0b5\0\0",
16),partitions[11].partition_key_start());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a6\0\0\0\0c6",
16),partitions[11].partition_key_end());
-
- EXPECT_EQ(0, partitions[12].hash_buckets()[0]);
- EXPECT_EQ(2, partitions[12].hash_buckets()[1]);
- EXPECT_EQ(string("a5\0\0b5\0\0", 8),partitions[12].range_key_start());
- EXPECT_EQ(string("a6\0\0\0\0c6", 8),partitions[12].range_key_end());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\2" "a5\0\0b5\0\0",
16),partitions[12].partition_key_start());
- EXPECT_EQ(string("\0\0\0\0" "\0\0\0\2" "a6\0\0\0\0c6",
16),partitions[12].partition_key_end());
-
- EXPECT_EQ(1, partitions[13].hash_buckets()[0]);
- EXPECT_EQ(0, partitions[13].hash_buckets()[1]);
- EXPECT_EQ(string("a5\0\0b5\0\0", 8),partitions[13].range_key_start());
- EXPECT_EQ(string("a6\0\0\0\0c6", 8),partitions[13].range_key_end());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a5\0\0b5\0\0",
16),partitions[13].partition_key_start());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a6\0\0\0\0c6",
16),partitions[13].partition_key_end());
-
- EXPECT_EQ(1, partitions[14].hash_buckets()[0]);
- EXPECT_EQ(1, partitions[14].hash_buckets()[1]);
- EXPECT_EQ(string("a5\0\0b5\0\0", 8),partitions[14].range_key_start());
- EXPECT_EQ(string("a6\0\0\0\0c6", 8),partitions[14].range_key_end());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a5\0\0b5\0\0",
16),partitions[14].partition_key_start());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a6\0\0\0\0c6",
16),partitions[14].partition_key_end());
-
- EXPECT_EQ(1, partitions[15].hash_buckets()[0]);
- EXPECT_EQ(2, partitions[15].hash_buckets()[1]);
- EXPECT_EQ(string("a5\0\0b5\0\0", 8),partitions[15].range_key_start());
- EXPECT_EQ(string("a6\0\0\0\0c6", 8),partitions[15].range_key_end());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\2" "a5\0\0b5\0\0",
16),partitions[15].partition_key_start());
- EXPECT_EQ(string("\0\0\0\1" "\0\0\0\2" "a6\0\0\0\0c6",
16),partitions[15].partition_key_end());
-}
void CheckSerializationFunctions(const PartitionSchemaPB& pb,
const PartitionSchema& partition_schema,
@@ -993,7 +882,7 @@ void CheckSerializationFunctions(const PartitionSchemaPB&
pb,
ASSERT_EQ(partition_schema, partition_schema1);
}
-} // namespace
+} // anonymous namespace
TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
// CREATE TABLE t (a VARCHAR, b VARCHAR, c VARCHAR, PRIMARY KEY (a, b, c))
@@ -1063,9 +952,174 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
std::move(hash_schema_2_buckets_by_3));
}
+ const auto check_partitions = [](const vector<Partition>& partitions) {
+ ASSERT_EQ(16, partitions.size());
+
+ ASSERT_EQ(1, partitions[0].hash_buckets().size());
+ EXPECT_EQ(0, partitions[0].hash_buckets()[0]);
+ EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[0].range_key_start());
+ EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[0].range_key_end());
+ EXPECT_EQ(string("\0\0\0\0" "a1\0\0\0\0c1", 12),
+ partitions[0].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\0" "a2\0\0\0\0c2", 12),
+ partitions[0].partition_key_end());
+
+ ASSERT_EQ(1, partitions[1].hash_buckets().size());
+ EXPECT_EQ(1, partitions[1].hash_buckets()[0]);
+ EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[1].range_key_start());
+ EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[1].range_key_end());
+ EXPECT_EQ(string("\0\0\0\1" "a1\0\0\0\0c1", 12),
+ partitions[1].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\1" "a2\0\0\0\0c2", 12),
+ partitions[1].partition_key_end());
+
+ ASSERT_EQ(1, partitions[2].hash_buckets().size());
+ EXPECT_EQ(2, partitions[2].hash_buckets()[0]);
+ EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[2].range_key_start());
+ EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[2].range_key_end());
+ EXPECT_EQ(string("\0\0\0\2" "a1\0\0\0\0c1", 12),
+ partitions[2].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\2" "a2\0\0\0\0c2", 12),
+ partitions[2].partition_key_end());
+
+ ASSERT_EQ(1, partitions[3].hash_buckets().size());
+ EXPECT_EQ(3, partitions[3].hash_buckets()[0]);
+ EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[3].range_key_start());
+ EXPECT_EQ(string("a2\0\0\0\0c2", 8), partitions[3].range_key_end());
+ EXPECT_EQ(string("\0\0\0\3" "a1\0\0\0\0c1", 12),
+ partitions[3].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\3" "a2\0\0\0\0c2", 12),
+ partitions[3].partition_key_end());
+
+ ASSERT_EQ(2, partitions[4].hash_buckets().size());
+ EXPECT_EQ(0, partitions[4].hash_buckets()[0]);
+ EXPECT_EQ(0, partitions[4].hash_buckets()[1]);
+ EXPECT_EQ(string("a3\0\0b3\0\0", 8),
+ partitions[4].range_key_start());
+ EXPECT_EQ(string("a4\0\0b4\0\0", 8),
+ partitions[4].range_key_end());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a3\0\0b3\0\0", 16),
+ partitions[4].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a4\0\0b4\0\0", 16),
+ partitions[4].partition_key_end());
+
+ ASSERT_EQ(2, partitions[5].hash_buckets().size());
+ EXPECT_EQ(0, partitions[5].hash_buckets()[0]);
+ EXPECT_EQ(1, partitions[5].hash_buckets()[1]);
+ EXPECT_EQ(string("a3\0\0b3\0\0", 8), partitions[5].range_key_start());
+ EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[5].range_key_end());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a3\0\0b3\0\0", 16),
+ partitions[5].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a4\0\0b4\0\0", 16),
+ partitions[5].partition_key_end());
+
+ ASSERT_EQ(2, partitions[6].hash_buckets().size());
+ EXPECT_EQ(1, partitions[6].hash_buckets()[0]);
+ EXPECT_EQ(0, partitions[6].hash_buckets()[1]);
+ EXPECT_EQ(string("a3\0\0b3\0\0", 8), partitions[6].range_key_start());
+ EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[6].range_key_end());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a3\0\0b3\0\0", 16),
+ partitions[6].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a4\0\0b4\0\0", 16),
+ partitions[6].partition_key_end());
+
+ ASSERT_EQ(2, partitions[7].hash_buckets().size());
+ EXPECT_EQ(1, partitions[7].hash_buckets()[0]);
+ EXPECT_EQ(1, partitions[7].hash_buckets()[1]);
+ EXPECT_EQ(string("a3\0\0b3\0\0", 8), partitions[7].range_key_start());
+ EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[7].range_key_end());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a3\0\0b3\0\0", 16),
+ partitions[7].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a4\0\0b4\0\0", 16),
+ partitions[7].partition_key_end());
+
+ ASSERT_EQ(2, partitions[8].hash_buckets().size());
+ EXPECT_EQ(2, partitions[8].hash_buckets()[0]);
+ EXPECT_EQ(0, partitions[8].hash_buckets()[1]);
+ EXPECT_EQ(string("a3\0\0b3\0\0", 8), partitions[8].range_key_start());
+ EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[8].range_key_end());
+ EXPECT_EQ(string("\0\0\0\2" "\0\0\0\0" "a3\0\0b3\0\0", 16),
+ partitions[8].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\2" "\0\0\0\0" "a4\0\0b4\0\0", 16),
+ partitions[8].partition_key_end());
+
+ ASSERT_EQ(2, partitions[9].hash_buckets().size());
+ EXPECT_EQ(2, partitions[9].hash_buckets()[0]);
+ EXPECT_EQ(1, partitions[9].hash_buckets()[1]);
+ EXPECT_EQ(string("a3\0\0b3\0\0", 8), partitions[9].range_key_start());
+ EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[9].range_key_end());
+ EXPECT_EQ(string("\0\0\0\2" "\0\0\0\1" "a3\0\0b3\0\0", 16),
+ partitions[9].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\2" "\0\0\0\1" "a4\0\0b4\0\0", 16),
+ partitions[9].partition_key_end());
+
+ ASSERT_EQ(2, partitions[10].hash_buckets().size());
+ EXPECT_EQ(0, partitions[10].hash_buckets()[0]);
+ EXPECT_EQ(0, partitions[10].hash_buckets()[1]);
+ EXPECT_EQ(string("a5\0\0b5\0\0", 8),
+ partitions[10].range_key_start());
+ EXPECT_EQ(string("a6\0\0\0\0c6", 8),
+ partitions[10].range_key_end());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a5\0\0b5\0\0", 16),
+ partitions[10].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a6\0\0\0\0c6", 16),
+ partitions[10].partition_key_end());
+
+ ASSERT_EQ(2, partitions[11].hash_buckets().size());
+ EXPECT_EQ(0, partitions[11].hash_buckets()[0]);
+ EXPECT_EQ(1, partitions[11].hash_buckets()[1]);
+ EXPECT_EQ(string("a5\0\0b5\0\0", 8),partitions[11].range_key_start());
+ EXPECT_EQ(string("a6\0\0\0\0c6", 8),partitions[11].range_key_end());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a5\0\0b5\0\0", 16),
+ partitions[11].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a6\0\0\0\0c6", 16),
+ partitions[11].partition_key_end());
+
+ ASSERT_EQ(2, partitions[12].hash_buckets().size());
+ EXPECT_EQ(0, partitions[12].hash_buckets()[0]);
+ EXPECT_EQ(2, partitions[12].hash_buckets()[1]);
+ EXPECT_EQ(string("a5\0\0b5\0\0", 8), partitions[12].range_key_start());
+ EXPECT_EQ(string("a6\0\0\0\0c6", 8), partitions[12].range_key_end());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\2" "a5\0\0b5\0\0", 16),
+ partitions[12].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\0" "\0\0\0\2" "a6\0\0\0\0c6", 16),
+ partitions[12].partition_key_end());
+
+ ASSERT_EQ(2, partitions[13].hash_buckets().size());
+ EXPECT_EQ(1, partitions[13].hash_buckets()[0]);
+ EXPECT_EQ(0, partitions[13].hash_buckets()[1]);
+ EXPECT_EQ(string("a5\0\0b5\0\0", 8), partitions[13].range_key_start());
+ EXPECT_EQ(string("a6\0\0\0\0c6", 8), partitions[13].range_key_end());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a5\0\0b5\0\0", 16),
+ partitions[13].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a6\0\0\0\0c6", 16),
+ partitions[13].partition_key_end());
+
+ ASSERT_EQ(2, partitions[14].hash_buckets().size());
+ EXPECT_EQ(1, partitions[14].hash_buckets()[0]);
+ EXPECT_EQ(1, partitions[14].hash_buckets()[1]);
+ EXPECT_EQ(string("a5\0\0b5\0\0", 8), partitions[14].range_key_start());
+ EXPECT_EQ(string("a6\0\0\0\0c6", 8), partitions[14].range_key_end());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a5\0\0b5\0\0", 16),
+ partitions[14].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a6\0\0\0\0c6", 16),
+ partitions[14].partition_key_end());
+
+ ASSERT_EQ(2, partitions[15].hash_buckets().size());
+ EXPECT_EQ(1, partitions[15].hash_buckets()[0]);
+ EXPECT_EQ(2, partitions[15].hash_buckets()[1]);
+ EXPECT_EQ(string("a5\0\0b5\0\0", 8), partitions[15].range_key_start());
+ EXPECT_EQ(string("a6\0\0\0\0c6", 8), partitions[15].range_key_end());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\2" "a5\0\0b5\0\0", 16),
+ partitions[15].partition_key_start());
+ EXPECT_EQ(string("\0\0\0\1" "\0\0\0\2" "a6\0\0\0\0c6", 16),
+ partitions[15].partition_key_end());
+ };
+
vector<Partition> partitions;
- ASSERT_OK(partition_schema.CreatePartitions({}, bounds, range_hash_schemas,
schema, &partitions));
- CheckPartitions(partitions);
+ ASSERT_OK(partition_schema.CreatePartitions(
+ {}, bounds, range_hash_schemas, schema, &partitions));
+ NO_FATALS(check_partitions(partitions));
bounds.clear();
range_hash_schemas.clear();
@@ -1081,8 +1135,9 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
range_hash_schemas.emplace_back(bounds_and_schema.second);
}
- ASSERT_OK(partition_schema.CreatePartitions({}, bounds, range_hash_schemas,
schema, &partitions));
- CheckPartitions(partitions);
+ ASSERT_OK(partition_schema.CreatePartitions(
+ {}, bounds, range_hash_schemas, schema, &partitions));
+ NO_FATALS(check_partitions(partitions));
// not clearing bounds or range_hash_schemas, adding a split row to test
incompatibility
vector<KuduPartialRow> splits;
@@ -1107,7 +1162,65 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
schema, &partitions);
ASSERT_EQ("Invalid argument: The number of range bounds does not match the
number of per "
"range hash schemas.", s1.ToString());
+}
+
+TEST_F(PartitionTest, CustomHashSchemasPerRangeOnly) {
+ // CREATE TABLE t (a STRING, b STRING, PRIMARY KEY (a, b)) RANGE (a, b)
+ Schema schema({ ColumnSchema("a", STRING), ColumnSchema("b", STRING) },
+ { ColumnId(0), ColumnId(1) }, 2);
+
+ // No table-wide hash bucket schema.
+ PartitionSchemaPB schema_builder;
+ PartitionSchema partition_schema;
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema,
&partition_schema));
+ CheckSerializationFunctions(schema_builder, partition_schema, schema);
+
+ ASSERT_EQ("RANGE (a, b)", partition_schema.DebugString(schema));
+ typedef pair<KuduPartialRow, KuduPartialRow> RangeBound;
+ vector<RangeBound> bounds;
+ PartitionSchema::RangeHashSchema range_hash_schemas;
+ vector<pair<RangeBound, PartitionSchema::HashBucketSchemas>>
+ bounds_with_hash_schemas;
+
+ // [(a1, b1), (a2, b2))
+ {
+ KuduPartialRow lower(&schema);
+ KuduPartialRow upper(&schema);
+ ASSERT_OK(lower.SetStringNoCopy("a", "a1"));
+ ASSERT_OK(lower.SetStringNoCopy("b", "b1"));
+ ASSERT_OK(upper.SetStringNoCopy("a", "a2"));
+ ASSERT_OK(upper.SetStringNoCopy("b", "b2"));
+ PartitionSchema::HashBucketSchemas hash_schema_2_buckets =
+ { { { ColumnId(0) }, 2, 0 } };
+ bounds.emplace_back(lower, upper);
+ range_hash_schemas.emplace_back(hash_schema_2_buckets);
+ bounds_with_hash_schemas.emplace_back(
+ make_pair(std::move(lower), std::move(upper)),
+ std::move(hash_schema_2_buckets));
+ }
+
+ vector<Partition> partitions;
+ ASSERT_OK(partition_schema.CreatePartitions(
+ {}, bounds, range_hash_schemas, schema, &partitions));
+
+ ASSERT_EQ(2, partitions.size());
+
+ {
+ const auto& p = partitions[0];
+ ASSERT_EQ(1, p.hash_buckets().size());
+ EXPECT_EQ(0, p.hash_buckets()[0]);
+ EXPECT_EQ(string("a1\0\0b1", 6), p.range_key_start());
+ EXPECT_EQ(string("a2\0\0b2", 6), p.range_key_end());
+ }
+
+ {
+ const auto& p = partitions[1];
+ ASSERT_EQ(1, p.hash_buckets().size());
+ EXPECT_EQ(1, p.hash_buckets()[0]);
+ EXPECT_EQ(string("a1\0\0b1", 6), p.range_key_start());
+ EXPECT_EQ(string("a2\0\0b2", 6), p.range_key_end());
+ }
}
TEST_F(PartitionTest, TestVaryingHashSchemasPerUnboundedRanges) {
@@ -1171,42 +1284,49 @@ TEST_F(PartitionTest,
TestVaryingHashSchemasPerUnboundedRanges) {
// Partitions below sorted by range, can verify that the partition keyspace
is filled by checking
// that the start key of the first partition and the end key of the last
partition is cleared.
+ ASSERT_EQ(1, partitions[0].hash_buckets().size());
EXPECT_EQ(0, partitions[0].hash_buckets()[0]);
EXPECT_EQ("", partitions[0].range_key_start());
EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[0].range_key_end());
EXPECT_EQ("", partitions[0].partition_key_start());
EXPECT_EQ(string("\0\0\0\0" "a1\0\0\0\0c1", 12),
partitions[0].partition_key_end());
+ ASSERT_EQ(1, partitions[1].hash_buckets().size());
EXPECT_EQ(1, partitions[1].hash_buckets()[0]);
EXPECT_EQ("", partitions[1].range_key_start());
EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[1].range_key_end());
EXPECT_EQ(string("\0\0\0\1", 4),partitions[1].partition_key_start());
EXPECT_EQ(string("\0\0\0\1" "a1\0\0\0\0c1", 12),
partitions[1].partition_key_end());
+ ASSERT_EQ(1, partitions[2].hash_buckets().size());
EXPECT_EQ(2, partitions[2].hash_buckets()[0]);
EXPECT_EQ("", partitions[2].range_key_start());
EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[2].range_key_end());
EXPECT_EQ(string("\0\0\0\2", 4), partitions[2].partition_key_start());
EXPECT_EQ(string("\0\0\0\2" "a1\0\0\0\0c1", 12),
partitions[2].partition_key_end());
+ ASSERT_EQ(1, partitions[3].hash_buckets().size());
EXPECT_EQ(3, partitions[3].hash_buckets()[0]);
EXPECT_EQ("", partitions[3].range_key_start());
EXPECT_EQ(string("a1\0\0\0\0c1", 8), partitions[3].range_key_end());
EXPECT_EQ(string("\0\0\0\3", 4), partitions[3].partition_key_start());
EXPECT_EQ(string("\0\0\0\3" "a1\0\0\0\0c1", 12),
partitions[3].partition_key_end());
+ ASSERT_EQ(1, partitions[4].hash_buckets().size());
EXPECT_EQ(0, partitions[4].hash_buckets()[0]);
EXPECT_EQ(string("a2\0\0b2\0\0", 8), partitions[4].range_key_start());
EXPECT_EQ(string("a3\0\0b3\0\0", 8), partitions[4].range_key_end());
EXPECT_EQ(string("\0\0\0\0" "a2\0\0b2\0\0", 12),
partitions[4].partition_key_start());
EXPECT_EQ(string("\0\0\0\0" "a3\0\0b3\0\0", 12),
partitions[4].partition_key_end());
+ ASSERT_EQ(1, partitions[5].hash_buckets().size());
EXPECT_EQ(1, partitions[5].hash_buckets()[0]);
EXPECT_EQ(string("a2\0\0b2\0\0", 8), partitions[5].range_key_start());
EXPECT_EQ(string("a3\0\0b3\0\0", 8), partitions[5].range_key_end());
EXPECT_EQ(string("\0\0\0\1" "a2\0\0b2\0\0", 12),
partitions[5].partition_key_start());
EXPECT_EQ(string("\0\0\0\1" "a3\0\0b3\0\0", 12),
partitions[5].partition_key_end());
+ ASSERT_EQ(2, partitions[6].hash_buckets().size());
EXPECT_EQ(0, partitions[6].hash_buckets()[0]);
EXPECT_EQ(0, partitions[6].hash_buckets()[1]);
EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[6].range_key_start());
@@ -1214,6 +1334,7 @@ TEST_F(PartitionTest,
TestVaryingHashSchemasPerUnboundedRanges) {
EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a4\0\0b4\0\0", 16),
partitions[6].partition_key_start());
EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1", 8),
partitions[6].partition_key_end());
+ ASSERT_EQ(2, partitions[7].hash_buckets().size());
EXPECT_EQ(0, partitions[7].hash_buckets()[0]);
EXPECT_EQ(1, partitions[7].hash_buckets()[1]);
EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[7].range_key_start());
@@ -1221,6 +1342,7 @@ TEST_F(PartitionTest,
TestVaryingHashSchemasPerUnboundedRanges) {
EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a4\0\0b4\0\0",
16),partitions[7].partition_key_start());
EXPECT_EQ(string("\0\0\0\0" "\0\0\0\2", 8),
partitions[7].partition_key_end());
+ ASSERT_EQ(2, partitions[8].hash_buckets().size());
EXPECT_EQ(0, partitions[8].hash_buckets()[0]);
EXPECT_EQ(2, partitions[8].hash_buckets()[1]);
EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[8].range_key_start());
@@ -1228,6 +1350,7 @@ TEST_F(PartitionTest,
TestVaryingHashSchemasPerUnboundedRanges) {
EXPECT_EQ(string("\0\0\0\0" "\0\0\0\2" "a4\0\0b4\0\0", 16),
partitions[8].partition_key_start());
EXPECT_EQ(string("\0\0\0\1", 4), partitions[8].partition_key_end());
+ ASSERT_EQ(2, partitions[9].hash_buckets().size());
EXPECT_EQ(1, partitions[9].hash_buckets()[0]);
EXPECT_EQ(0, partitions[9].hash_buckets()[1]);
EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[9].range_key_start());
@@ -1235,6 +1358,7 @@ TEST_F(PartitionTest,
TestVaryingHashSchemasPerUnboundedRanges) {
EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a4\0\0b4\0\0", 16),
partitions[9].partition_key_start());
EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1", 8),
partitions[9].partition_key_end());
+ ASSERT_EQ(2, partitions[10].hash_buckets().size());
EXPECT_EQ(1, partitions[10].hash_buckets()[0]);
EXPECT_EQ(1, partitions[10].hash_buckets()[1]);
EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[10].range_key_start());
@@ -1242,6 +1366,7 @@ TEST_F(PartitionTest,
TestVaryingHashSchemasPerUnboundedRanges) {
EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a4\0\0b4\0\0",
16),partitions[10].partition_key_start());
EXPECT_EQ(string("\0\0\0\1" "\0\0\0\2", 8),
partitions[10].partition_key_end());
+ ASSERT_EQ(2, partitions[10].hash_buckets().size());
EXPECT_EQ(1, partitions[11].hash_buckets()[0]);
EXPECT_EQ(2, partitions[11].hash_buckets()[1]);
EXPECT_EQ(string("a4\0\0b4\0\0", 8), partitions[11].range_key_start());
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index 6aa5cb5..d8f578d 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -477,30 +477,6 @@ Status PartitionSchema::SplitRangeBounds(const Schema&
schema,
return Status::OK();
}
-vector<Partition> PartitionSchema::GenerateHashPartitions(const
HashBucketSchemas& hash_schemas,
- const
KeyEncoder<string>& hash_encoder) {
- vector<Partition> hash_partitions(1);
- // Create a partition for each hash bucket combination.
- for (const HashBucketSchema& bucket_schema : hash_schemas) {
- auto expected_partitions = hash_partitions.size() *
bucket_schema.num_buckets;
- vector<Partition> new_partitions;
- new_partitions.reserve(expected_partitions);
- // For each of the partitions created so far, replicate it
- // by the number of buckets in the next hash bucketing component.
- for (const Partition& base_partition : hash_partitions) {
- for (int32_t bucket = 0; bucket < bucket_schema.num_buckets; bucket++) {
- Partition partition = base_partition;
- partition.hash_buckets_.push_back(bucket);
- hash_encoder.Encode(&bucket, &partition.partition_key_start_);
- hash_encoder.Encode(&bucket, &partition.partition_key_end_);
- new_partitions.push_back(partition);
- }
- }
- hash_partitions = std::move(new_partitions);
- }
- return hash_partitions;
-}
-
Status PartitionSchema::CreatePartitions(const vector<KuduPartialRow>&
split_rows,
const vector<pair<KuduPartialRow,
KuduPartialRow>>&
range_bounds,
@@ -520,8 +496,8 @@ Status PartitionSchema::CreatePartitions(const
vector<KuduPartialRow>& split_row
}
}
- vector<Partition> base_hash_partitions =
GenerateHashPartitions(hash_bucket_schemas_,
-
hash_encoder);
+ vector<Partition> base_hash_partitions = GenerateHashPartitions(
+ hash_bucket_schemas_, hash_encoder);
std::unordered_set<int> range_column_idxs;
for (const ColumnId& column_id : range_schema_.column_ids) {
@@ -548,7 +524,27 @@ Status PartitionSchema::CreatePartitions(const
vector<KuduPartialRow>& split_row
// empty if no per range hash schemas are used.
vector<int> partition_idx_to_hash_schemas_idx;
- if (!range_hash_schemas.empty()) {
+ // Even if no hash partitioning for a table is specified, there must be at
+ // least one element in 'base_hash_partitions': it's used to build the result
+ // set of range partitions.
+ DCHECK_GE(base_hash_partitions.size(), 1);
+ // The case of a single element in base_hash_partitions is a special case.
+ DCHECK(base_hash_partitions.size() > 1 ||
+ base_hash_partitions.front().hash_buckets().empty());
+
+ if (range_hash_schemas.empty()) {
+ // Create a partition per range bound and hash bucket combination.
+ vector<Partition> new_partitions;
+ for (const Partition& base_partition : base_hash_partitions) {
+ for (const auto& bound : bounds_with_hash_schemas) {
+ Partition partition = base_partition;
+ partition.partition_key_start_.append(bound.lower);
+ partition.partition_key_end_.append(bound.upper);
+ new_partitions.emplace_back(std::move(partition));
+ }
+ }
+ *partitions = std::move(new_partitions);
+ } else {
// The number of ranges should match the size of range_hash_schemas.
DCHECK_EQ(range_hash_schemas.size(), bounds_with_hash_schemas.size());
// No split rows should be defined if range_hash_schemas is populated.
@@ -558,15 +554,14 @@ Status PartitionSchema::CreatePartitions(const
vector<KuduPartialRow>& split_row
for (int i = 0; i < bounds_with_hash_schemas.size(); i++) {
const auto& bound = bounds_with_hash_schemas[i];
const auto& current_range_hash_schemas = bound.hash_schemas;
- vector<Partition> current_bound_hash_partitions;
- // If current bound's HashBucketSchema is empty, implies use of default
table-wide schema.
- // If not empty, generate hash partitions for all the provided hash
schemas in this range.
- if (current_range_hash_schemas.empty()) {
- current_bound_hash_partitions = base_hash_partitions;
- } else {
- current_bound_hash_partitions =
GenerateHashPartitions(current_range_hash_schemas,
- hash_encoder);
- }
+ // If current bound's HashBucketSchema is empty, implies use of default
+ // table-wide schema. If not empty, generate hash partitions for all the
+ // provided hash schemas in this range.
+ vector<Partition> current_bound_hash_partitions =
+ current_range_hash_schemas.empty() ? base_hash_partitions
+ : GenerateHashPartitions(
+ current_range_hash_schemas,
+ hash_encoder);
// Add range part to partition key.
for (Partition& partition : current_bound_hash_partitions) {
partition.partition_key_start_.append(bound.lower);
@@ -580,18 +575,6 @@ Status PartitionSchema::CreatePartitions(const
vector<KuduPartialRow>& split_row
}
DCHECK_EQ(partition_idx_to_hash_schemas_idx.size(),
result_partitions.size());
*partitions = std::move(result_partitions);
- } else {
- // Create a partition per range bound and hash bucket combination.
- vector<Partition> new_partitions;
- for (const Partition& base_partition : base_hash_partitions) {
- for (const auto& bound : bounds_with_hash_schemas) {
- Partition partition = base_partition;
- partition.partition_key_start_.append(bound.lower);
- partition.partition_key_end_.append(bound.upper);
- new_partitions.push_back(partition);
- }
- }
- *partitions = std::move(new_partitions);
}
// Note: the following discussion and logic only takes effect when the
table's
// partition schema includes at least one hash bucket component, and the
@@ -1013,7 +996,7 @@ string PartitionSchema::RangeKeyDebugString(const
ConstContiguousRow& key) const
break;
}
key.schema()->column(column_idx).DebugCellAppend(key.cell(column_idx),
&column);
- components.push_back(column);
+ components.emplace_back(std::move(column));
}
if (components.size() == 1) {
@@ -1180,6 +1163,61 @@ int32_t PartitionSchema::BucketForEncodedColumns(const
string& encoded_hash_colu
return hash % static_cast<uint64_t>(hash_bucket_schema.num_buckets);
}
+vector<Partition> PartitionSchema::GenerateHashPartitions(
+ const HashBucketSchemas& hash_schemas,
+ const KeyEncoder<string>& hash_encoder) {
+ vector<Partition> hash_partitions(1);
+ // Create a partition for each hash bucket combination.
+ for (const HashBucketSchema& bucket_schema : hash_schemas) {
+ vector<Partition> new_partitions;
+ new_partitions.reserve(hash_partitions.size() * bucket_schema.num_buckets);
+ // For each of the partitions created so far, replicate it
+ // by the number of buckets in the next hash bucketing component.
+ for (const Partition& base_partition : hash_partitions) {
+ for (auto bucket = 0; bucket < bucket_schema.num_buckets; ++bucket) {
+ Partition partition = base_partition;
+ partition.hash_buckets_.push_back(bucket);
+ hash_encoder.Encode(&bucket, &partition.partition_key_start_);
+ hash_encoder.Encode(&bucket, &partition.partition_key_end_);
+ new_partitions.emplace_back(std::move(partition));
+ }
+ }
+ hash_partitions = std::move(new_partitions);
+ }
+ return hash_partitions;
+}
+
+Status PartitionSchema::ValidateHashBucketSchemas(const Schema& schema,
+ const HashBucketSchemas&
hash_schemas) {
+ set<ColumnId> hash_columns;
+ for (const PartitionSchema::HashBucketSchema& hash_schema : hash_schemas) {
+ if (hash_schema.num_buckets < 2) {
+ return Status::InvalidArgument("must have at least two hash buckets");
+ }
+
+ if (hash_schema.column_ids.empty()) {
+ return Status::InvalidArgument("must have at least one hash column");
+ }
+
+ for (const ColumnId& hash_column : hash_schema.column_ids) {
+ if (!hash_columns.insert(hash_column).second) {
+ return Status::InvalidArgument("hash bucket schema components must not
"
+ "contain columns in common");
+ }
+ int32_t column_idx = schema.find_column_by_id(hash_column);
+ if (column_idx == Schema::kColumnNotFound) {
+ return Status::InvalidArgument("must specify existing columns for hash
"
+ "bucket partition components");
+ }
+ if (column_idx >= schema.num_key_columns()) {
+ return Status::InvalidArgument("must specify only primary key columns
for "
+ "hash bucket partition components");
+ }
+ }
+ }
+ return Status::OK();
+}
+
template<typename Row>
Status PartitionSchema::BucketForRow(const Row& row,
const HashBucketSchema&
hash_bucket_schema,
@@ -1213,36 +1251,6 @@ void PartitionSchema::Clear() {
}
-Status PartitionSchema::ValidateHashBucketSchemas(const Schema& schema,
- const HashBucketSchemas&
hash_schemas) {
- set<ColumnId> hash_columns;
- for (const PartitionSchema::HashBucketSchema& hash_schema : hash_schemas) {
- if (hash_schema.num_buckets < 2) {
- return Status::InvalidArgument("must have at least two hash buckets");
- }
-
- if (hash_schema.column_ids.size() < 1) {
- return Status::InvalidArgument("must have at least one hash column");
- }
-
- for (const ColumnId& hash_column : hash_schema.column_ids) {
- if (!hash_columns.insert(hash_column).second) {
- return Status::InvalidArgument("hash bucket schema components must not
"
- "contain columns in common");
- }
- int32_t column_idx = schema.find_column_by_id(hash_column);
- if (column_idx == Schema::kColumnNotFound) {
- return Status::InvalidArgument("must specify existing columns for hash
"
- "bucket partition components");
- } else if (column_idx >= schema.num_key_columns()) {
- return Status::InvalidArgument("must specify only primary key columns
for "
- "hash bucket partition components");
- }
- }
- }
- return Status::OK();
-}
-
Status PartitionSchema::Validate(const Schema& schema) const {
RETURN_NOT_OK(ValidateHashBucketSchemas(schema, hash_bucket_schemas_));
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index c37c7c5..ff58fb0 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -357,6 +357,15 @@ class PartitionSchema {
static int32_t BucketForEncodedColumns(const std::string&
encoded_hash_columns,
const HashBucketSchema&
hash_bucket_schema);
+ // Helper function that validates the hash bucket schemas.
+ static Status ValidateHashBucketSchemas(const Schema& schema,
+ const HashBucketSchemas&
hash_schemas);
+
+ // Generates hash partitions for each combination of hash buckets in
hash_schemas.
+ static std::vector<Partition> GenerateHashPartitions(
+ const HashBucketSchemas& hash_schemas,
+ const KeyEncoder<std::string>& hash_encoder);
+
// Assigns the row to a hash bucket according to the hash schema.
template<typename Row>
static Status BucketForRow(const Row& row,
@@ -426,18 +435,10 @@ class PartitionSchema {
// Clears the state of this partition schema.
void Clear();
- // Helper function that validates the hash bucket schemas.
- static Status ValidateHashBucketSchemas(const Schema& schema,
- const HashBucketSchemas&
hash_schemas);
-
// Validates that this partition schema is valid. Returns OK, or an
// appropriate error code for an invalid partition schema.
Status Validate(const Schema& schema) const;
- // Generates hash partitions for each combination of hash buckets in
hash_schemas.
- static std::vector<Partition> GenerateHashPartitions(const
HashBucketSchemas& hash_schemas,
- const
KeyEncoder<std::string>& hash_encoder);
-
// Validates the split rows, converts them to partition key form, and inserts
// them into splits in sorted order.
Status EncodeRangeSplits(const std::vector<KuduPartialRow>& split_rows,