This is an automated email from the ASF dual-hosted git repository. granthenke pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 83b8caf4f1d60c021ef1e5a5f930847fb96c5ff4 Author: ningw <[email protected]> AuthorDate: Thu Dec 24 13:13:32 2020 +0800 KUDU-1644 use range partition info for pruning When pruning in-list predicate values, range partition can also be taken into consideration. Change-Id: I3f14f543cffd44f090026f344c9b06af13ea2e10 Reviewed-on: http://gerrit.cloudera.org:8080/16903 Tested-by: Kudu Jenkins Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/common/partition.cc | 46 +- src/kudu/common/partition.h | 19 +- src/kudu/common/scan_spec-test.cc | 847 +++++++++++++++++++++++++------------ src/kudu/common/scan_spec.cc | 37 +- src/kudu/common/scan_spec.h | 18 +- src/kudu/tools/kudu-admin-test.cc | 2 +- src/kudu/tserver/tablet_service.cc | 6 +- 7 files changed, 680 insertions(+), 295 deletions(-) diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc index 6414e0c..6b2018e 100644 --- a/src/kudu/common/partition.cc +++ b/src/kudu/common/partition.cc @@ -544,15 +544,7 @@ Status PartitionSchema::PartitionContainsRowImpl(const Partition& partition, } } - string range_partition_key; - RETURN_NOT_OK(EncodeColumns(row, range_schema_.column_ids, &range_partition_key)); - - // If all of the hash buckets match, then the row is contained in the - // partition if the row is gte the lower bound; and if there is no upper - // bound, or the row is lt the upper bound. - *contains = (Slice(range_partition_key).compare(partition.range_key_start()) >= 0) - && (partition.range_key_end().empty() - || Slice(range_partition_key).compare(partition.range_key_end()) < 0); + RETURN_NOT_OK(RangePartitionContainsRowImpl(partition, row, contains)); return Status::OK(); } @@ -570,6 +562,24 @@ Status PartitionSchema::HashPartitionContainsRowImpl(const Partition& partition, return Status::OK(); } +template<typename Row> +Status PartitionSchema::RangePartitionContainsRowImpl(const Partition& partition, + const Row& row, + bool* contains) const { + string range_partition_key; + // If range partition is not used, column_ids would be empty and + // EncodedColumn() would return immediately. + RETURN_NOT_OK(EncodeColumns(row, range_schema_.column_ids, &range_partition_key)); + + // If all of the hash buckets match, then the row is contained in the + // partition if the row is gte the lower bound; and if there is no upper + // bound, or the row is lt the upper bound. + *contains = (Slice(range_partition_key).compare(partition.range_key_start()) >= 0) + && (partition.range_key_end().empty() + || Slice(range_partition_key).compare(partition.range_key_end()) < 0); + return Status::OK(); +} + Status PartitionSchema::PartitionContainsRow(const Partition& partition, const KuduPartialRow& row, bool* contains) const { @@ -596,6 +606,18 @@ Status PartitionSchema::HashPartitionContainsRow(const Partition& partition, return HashPartitionContainsRowImpl(partition, row, hash_idx, contains); } +Status PartitionSchema::RangePartitionContainsRow(const Partition& partition, + const KuduPartialRow& row, + bool* contains) const { + return RangePartitionContainsRowImpl(partition, row, contains); +} + +Status PartitionSchema::RangePartitionContainsRow(const Partition& partition, + const ConstContiguousRow& row, + bool* contains) const { + return RangePartitionContainsRowImpl(partition, row, contains); +} + Status PartitionSchema::DecodeRangeKey(Slice* encoded_key, KuduPartialRow* partial_row, Arena* arena) const { @@ -1388,4 +1410,10 @@ int32_t PartitionSchema::TryGetSingleColumnHashPartitionIndex(const Schema& sche return -1; } +bool PartitionSchema::IsColumnSingleRangeSchema(const Schema& schema, int32_t col_idx) const { + const ColumnId column_id = schema.column_id(col_idx); + return range_partition_schema().column_ids.size() == 1 && + range_partition_schema().column_ids[0] == column_id; +} + } // namespace kudu diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h index d76802e..85c58e9 100644 --- a/src/kudu/common/partition.h +++ b/src/kudu/common/partition.h @@ -211,7 +211,7 @@ class PartitionSchema { const ConstContiguousRow& row, bool* contains) const WARN_UNUSED_RESULT; - // Tests if the hash partition contians the row with given hash_idx. + // Tests if the hash partition contains the row with given hash_idx. Status HashPartitionContainsRow(const Partition& partition, const KuduPartialRow& row, int hash_idx, @@ -221,6 +221,14 @@ class PartitionSchema { int hash_idx, bool* contains) const WARN_UNUSED_RESULT; + // Tests if the range partition contains the row. + Status RangePartitionContainsRow(const Partition& partition, + const KuduPartialRow& row, + bool* contains) const WARN_UNUSED_RESULT; + Status RangePartitionContainsRow(const Partition& partition, + const ConstContiguousRow& row, + bool* contains) const WARN_UNUSED_RESULT; + // Returns a text description of the partition suitable for debug printing. // // Partitions are considered metadata, so no redaction will happen on the hash @@ -306,6 +314,9 @@ class PartitionSchema { // contains only one column, otherwise returns -1. int32_t TryGetSingleColumnHashPartitionIndex(const Schema& schema, int32_t col_idx) const; + // Given a column idx, verify that it is the only column of the range partition. + bool IsColumnSingleRangeSchema(const Schema& schema, int32_t col_idx) const; + private: friend class PartitionPruner; FRIEND_TEST(PartitionTest, TestIncrementRangePartitionBounds); @@ -357,6 +368,12 @@ class PartitionSchema { int hash_idx, bool* contains) const; + // Private templated helper for RangePartitionContainsRow. + template<typename Row> + Status RangePartitionContainsRowImpl(const Partition& partition, + const Row& row, + bool* contains) const; + // Private templated helper for EncodeKey. template<typename Row> Status EncodeKeyImpl(const Row& row, std::string* buf) const; diff --git a/src/kudu/common/scan_spec-test.cc b/src/kudu/common/scan_spec-test.cc index 2f51824..7ebb096 100644 --- a/src/kudu/common/scan_spec-test.cc +++ b/src/kudu/common/scan_spec-test.cc @@ -44,17 +44,19 @@ #include "kudu/util/test_macros.h" #include "kudu/util/test_util.h" -using std::vector; using std::pair; +using std::string; +using std::vector; namespace kudu { namespace { -// Generate partition schema of a table with given hash_partitions. +// Generate partition schema of a table with given hash_partitions and range partition keys. // E.g. GeneratePartitionSchema(schema, {make_pair({a, b}, 3), make_pair({c}, 5) }) // Returns 'partition by hash(a, b) partitions 3, hash(c) partitions 5'. void GeneratePartitionSchema(const Schema& schema, - const vector<std::pair<vector<std::string>, int>>& hash_partitions, + const vector<pair<vector<string>, int>>& hash_partitions, + const vector<string>& range_partition_columns, PartitionSchema* partition_schema) { PartitionSchemaPB partition_schema_pb; for (const auto& col_names_and_num_buckets : hash_partitions) { @@ -68,12 +70,33 @@ void GeneratePartitionSchema(const Schema& schema, column_pb->set_name(col_name); } } + if (!range_partition_columns.empty()) { + auto* range_schema = partition_schema_pb.mutable_range_schema(); + for (const auto& range_column : range_partition_columns) { + range_schema->add_columns()->set_name(range_column); + } + } CHECK_OK(PartitionSchema::FromPB(partition_schema_pb, schema, partition_schema)); } + +// Copy a spec and return the pruned spec string. +string PruneInlistValuesAndGetSchemaString(const ScanSpec& spec, + const Schema& schema, + const Partition& partition, + const PartitionSchema& partition_schema, + Arena* arena) { + ScanSpec copy_spec = spec; + + copy_spec.PruneInlistValuesIfPossible(schema, partition, partition_schema); + copy_spec.OptimizeScan(schema, arena, true); + + return copy_spec.ToString(schema); +} + } // anonymous namespace -static std::string ToString(const vector<ColumnSchema>& columns) { - std::string str; +static string ToString(const vector<ColumnSchema>& columns) { + string str; for (const auto& column : columns) { str += column.ToString(); str += "\n"; @@ -409,45 +432,373 @@ TEST_F(CompositeIntKeysTest, TestOneHashKeyInListHashPruning) { PartitionSchema partition_schema; GeneratePartitionSchema(schema, - { pair<vector<std::string>, int>({ "a" }, 3) }, + { pair<vector<string>, int>({ "a" }, 3) }, + {}, &partition_schema); vector<Partition> partitions; ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, &partitions)); ASSERT_EQ(3, partitions.size()); - // clone scan_spec for different partition. - ScanSpec spec_p1 = spec; - ScanSpec spec_p2 = spec; - ScanSpec spec_p3 = spec; + // Verify the splitted values can merge into original set without overlapping. + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=101, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=101, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=6, int8 b=101, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (50, 100)"); +} - spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], partition_schema); - spec_p1.OptimizeScan(schema, &arena_, true); +// Test that hash(a), range(a) IN list predicates prune would happen on +// both hash and range aspects. +TEST_F(CompositeIntKeysTest, TestOneHashKeyOneRangeKeyInListHashPruning) { + ScanSpec spec; + AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + AddInPredicate<int8_t>(&spec, "b", { 50, 100 }); - // Verify the splitted values can merge into originl set without overlapping. - SCOPED_TRACE(spec_p1.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=4, int8 b=50, int8 c=-128) AND " - "PK < (int8 a=8, int8 b=101, int8 c=-128) AND " - "a IN (4, 7, 8) AND b IN (50, 100)", - spec_p1.ToString(schema)); + Schema schema = schema_.CopyWithColumnIds(); - spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], partition_schema); - spec_p2.OptimizeScan(schema, &arena_, true); + PartitionSchema partition_schema; + GeneratePartitionSchema(schema, + { pair<vector<string>, int>({ "a" }, 3) }, + { "a" }, + &partition_schema); - SCOPED_TRACE(spec_p2.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " - "PK < (int8 a=9, int8 b=101, int8 c=-128) AND " - "a IN (0, 2, 5, 9) AND b IN (50, 100)", - spec_p2.ToString(schema)); + KuduPartialRow split1(&schema); + KuduPartialRow split2(&schema); + ASSERT_OK(split1.SetInt8("a", 3)); + ASSERT_OK(split2.SetInt8("a", 6)); - spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], partition_schema); - spec_p3.OptimizeScan(schema, &arena_, true); + vector<Partition> partitions; + ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 }, + {}, + {}, + schema, + &partitions)); + ASSERT_EQ(9, partitions.size()); - SCOPED_TRACE(spec_p3.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=1, int8 b=50, int8 c=-128) AND " + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "a IN () AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=4, int8 b=101, int8 c=-128) AND " + "a IN (4) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "PK >= (int8 a=7, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=101, int8 c=-128) AND " + "a IN (7, 8) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[3], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=2, int8 b=101, int8 c=-128) AND " + "a IN (0, 2) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[4], + partition_schema, + &arena_), + "PK >= (int8 a=5, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=5, int8 b=101, int8 c=-128) AND " + "a IN (5) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[5], + partition_schema, + &arena_), + "PK >= (int8 a=9, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=101, int8 c=-128) AND " + "a IN (9) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[6], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=1, int8 b=101, int8 c=-128) AND " + "a IN (1) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[7], + partition_schema, + &arena_), + "PK >= (int8 a=3, int8 b=50, int8 c=-128) AND " + "PK < (int8 a=3, int8 b=101, int8 c=-128) AND " + "a IN (3) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[8], + partition_schema, + &arena_), + "PK >= (int8 a=6, int8 b=50, int8 c=-128) AND " "PK < (int8 a=6, int8 b=101, int8 c=-128) AND " - "a IN (1, 3, 6) AND b IN (50, 100)", - spec_p3.ToString(schema)); + "a IN (6) AND b IN (50, 100)"); +} + +// Test that hash(a), range(a, b) IN list predicates prune would happen +// on hash-key but not on range key. +TEST_F(CompositeIntKeysTest, TestOneHashKeyMultiRangeKeyInListHashPruning) { + ScanSpec spec; + AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + AddInPredicate<int8_t>(&spec, "b", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + + Schema schema = schema_.CopyWithColumnIds(); + + PartitionSchema partition_schema; + GeneratePartitionSchema(schema, + { pair<vector<string>, int>({ "a" }, 3) }, + { "a", "b" }, + &partition_schema); + + KuduPartialRow split1(&schema); + KuduPartialRow split2(&schema); + ASSERT_OK(split1.SetInt8("a", 2)); + ASSERT_OK(split1.SetInt8("b", 3)); + + ASSERT_OK(split2.SetInt8("a", 6)); + ASSERT_OK(split2.SetInt8("b", 6)); + + vector<Partition> partitions; + ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 }, + {}, + {}, + schema, + &partitions)); + ASSERT_EQ(9, partitions.size()); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=10, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=10, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=10, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[3], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=10, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[4], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=10, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[5], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=10, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[6], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=6, int8 b=10, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[7], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=6, int8 b=10, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[8], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=6, int8 b=10, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); +} + +// Test that hash(a), range(b) IN list predicates prune would happen +// on both hash and range aspects. +TEST_F(CompositeIntKeysTest, TestDifferentHashRangeKeyInListHashPruning) { + ScanSpec spec; + AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + AddInPredicate<int8_t>(&spec, "b", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + + Schema schema = schema_.CopyWithColumnIds(); + + PartitionSchema partition_schema; + GeneratePartitionSchema(schema, + { pair<vector<string>, int>({ "a" }, 3) }, + { "b" }, + &partition_schema); + + KuduPartialRow split1(&schema); + KuduPartialRow split2(&schema); + + ASSERT_OK(split1.SetInt8("b", 3)); + ASSERT_OK(split2.SetInt8("b", 6)); + + vector<Partition> partitions; + ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 }, + {}, + {}, + schema, + &partitions)); + ASSERT_EQ(9, partitions.size()); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=3, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (0, 1, 2)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=3, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=6, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (3, 4, 5)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=6, int8 c=-128) AND " + "PK < (int8 a=8, int8 b=10, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[3], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=3, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (0, 1, 2)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[4], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=3, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=6, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (3, 4, 5)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[5], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=6, int8 c=-128) AND " + "PK < (int8 a=9, int8 b=10, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (6, 7, 8, 9)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[6], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND " + "PK < (int8 a=6, int8 b=3, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (0, 1, 2)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[7], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=3, int8 c=-128) AND " + "PK < (int8 a=6, int8 b=6, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (3, 4, 5)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[8], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=6, int8 c=-128) AND " + "PK < (int8 a=6, int8 b=10, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (6, 7, 8, 9)"); } // Test that in case hash(a) prune all predicate values, the rest predicate values for @@ -462,46 +813,38 @@ TEST_F(CompositeIntKeysTest, TestHashKeyInListHashPruningEmptyDetect) { PartitionSchema partition_schema; GeneratePartitionSchema(schema, - { pair<vector<std::string>, int>({ "a" }, 3) }, + { pair<vector<string>, int>({ "a" }, 3) }, + {}, &partition_schema); vector<Partition> partitions; ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, &partitions)); ASSERT_EQ(3, partitions.size()); - // clone scan_spec for different partition. - ScanSpec spec_p1 = spec; - ScanSpec spec_p2 = spec; - ScanSpec spec_p3 = spec; - - spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], partition_schema); - // Guarantee OptimizeScan can be call without fatal. - NO_FATALS(spec_p1.OptimizeScan(schema, &arena_, true)); - - // Verify the splitted values can merge into originl set without overlapping. - SCOPED_TRACE(spec_p1.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=4, int8 b=50, int8 c=-128) AND " + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND " "PK < (int8 a=8, int8 b=101, int8 c=-128) AND " - "a IN (4, 7, 8) AND b IN (50, 100)", - spec_p1.ToString(schema)); - ASSERT_FALSE(spec_p1.CanShortCircuit()); - - spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], partition_schema); - // Guarantee OptimizeScan can be call without fatal. - NO_FATALS(spec_p2.OptimizeScan(schema, &arena_, true)); - - SCOPED_TRACE(spec_p2.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " "PK < (int8 a=9, int8 b=101, int8 c=-128) AND " - "a IN (0, 2, 5, 9) AND b IN (50, 100)", - spec_p2.ToString(schema)); - ASSERT_FALSE(spec_p2.CanShortCircuit()); - - // There should be no predicate values after prune. - spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], partition_schema); - // Guarantee OptimizeScan can be call without fatal. - NO_FATALS(spec_p3.OptimizeScan(schema, &arena_, true)); - ASSERT_TRUE(spec_p3.CanShortCircuit()); + "a IN (0, 2, 5, 9) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "a IN () AND b IN (50, 100)"); } // Test that hash(a), hash(b) IN list predicates should be pruned. @@ -514,106 +857,98 @@ TEST_F(CompositeIntKeysTest, TestMultiHashKeyOneColumnInListHashPruning) { PartitionSchema partition_schema; GeneratePartitionSchema(schema, - { pair<vector<std::string>, int>({ "a" }, 3), - pair<vector<std::string>, int>({ "b" }, 3) }, + { pair<vector<string>, int>({ "a" }, 3), + pair<vector<string>, int>({ "b" }, 3) }, + {}, &partition_schema); vector<Partition> partitions; ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, &partitions)); ASSERT_EQ(9, partitions.size()); - // clone scan_spec for different partition. - ScanSpec spec_p1 = spec; - ScanSpec spec_p2 = spec; - ScanSpec spec_p3 = spec; - ScanSpec spec_p4 = spec; - ScanSpec spec_p5 = spec; - ScanSpec spec_p6 = spec; - ScanSpec spec_p7 = spec; - ScanSpec spec_p8 = spec; - ScanSpec spec_p9 = spec; - // p1, p2, p3 should have the same predicate values to be pushed on hash(a). // p1, p4, p7 should have the same predicate values to be pushed on hash(b). - spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], partition_schema); - spec_p1.OptimizeScan(schema, &arena_, true); - SCOPED_TRACE(spec_p1.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=4, int8 b=40, int8 c=-128) AND " + // pi refer to partitions[i-1], e.g. p1 = partitions[0] + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=40, int8 c=-128) AND " "PK < (int8 a=8, int8 b=71, int8 c=-128) AND " - "a IN (4, 7, 8) AND b IN (40, 60, 70)", - spec_p1.ToString(schema)); - - spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], partition_schema); - spec_p2.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p2.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=4, int8 b=20, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (40, 60, 70)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=20, int8 c=-128) AND " "PK < (int8 a=8, int8 b=51, int8 c=-128) AND " - "a IN (4, 7, 8) AND b IN (20, 30, 50)", - spec_p2.ToString(schema)); - - spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], partition_schema); - spec_p3.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p3.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=4, int8 b=10, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (20, 30, 50)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "PK >= (int8 a=4, int8 b=10, int8 c=-128) AND " "PK < (int8 a=8, int8 b=81, int8 c=-128) AND " - "a IN (4, 7, 8) AND b IN (10, 80)", - spec_p3.ToString(schema)); - - spec_p4.PruneHashForInlistIfPossible(schema, partitions[3], partition_schema); - spec_p4.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p4.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=40, int8 c=-128) AND " + "a IN (4, 7, 8) AND b IN (10, 80)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[3], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=40, int8 c=-128) AND " "PK < (int8 a=9, int8 b=71, int8 c=-128) AND " - "a IN (0, 2, 5, 9) AND b IN (40, 60, 70)", - spec_p4.ToString(schema)); - - spec_p5.PruneHashForInlistIfPossible(schema, partitions[4], partition_schema); - spec_p5.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p5.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=20, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (40, 60, 70)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[4], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=20, int8 c=-128) AND " "PK < (int8 a=9, int8 b=51, int8 c=-128) AND " - "a IN (0, 2, 5, 9) AND b IN (20, 30, 50)", - spec_p5.ToString(schema)); - - spec_p6.PruneHashForInlistIfPossible(schema, partitions[5], partition_schema); - spec_p6.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p6.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=10, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (20, 30, 50)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[5], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=10, int8 c=-128) AND " "PK < (int8 a=9, int8 b=81, int8 c=-128) AND " - "a IN (0, 2, 5, 9) AND b IN (10, 80)", - spec_p6.ToString(schema)); - - spec_p7.PruneHashForInlistIfPossible(schema, partitions[6], partition_schema); - spec_p7.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p7.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=1, int8 b=40, int8 c=-128) AND " + "a IN (0, 2, 5, 9) AND b IN (10, 80)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[6], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=40, int8 c=-128) AND " "PK < (int8 a=6, int8 b=71, int8 c=-128) AND " - "a IN (1, 3, 6) AND b IN (40, 60, 70)", - spec_p7.ToString(schema)); - - spec_p8.PruneHashForInlistIfPossible(schema, partitions[7], partition_schema); - spec_p8.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p8.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=1, int8 b=20, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (40, 60, 70)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[7], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=20, int8 c=-128) AND " "PK < (int8 a=6, int8 b=51, int8 c=-128) AND " - "a IN (1, 3, 6) AND b IN (20, 30, 50)", - spec_p8.ToString(schema)); - - spec_p9.PruneHashForInlistIfPossible(schema, partitions[8], partition_schema); - spec_p9.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p9.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=1, int8 b=10, int8 c=-128) AND " + "a IN (1, 3, 6) AND b IN (20, 30, 50)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[8], + partition_schema, + &arena_), + "PK >= (int8 a=1, int8 b=10, int8 c=-128) AND " "PK < (int8 a=6, int8 b=81, int8 c=-128) AND " - "a IN (1, 3, 6) AND b IN (10, 80)", - spec_p9.ToString(schema)); + "a IN (1, 3, 6) AND b IN (10, 80)"); } // Test that hash(a, b) IN list predicates should not be pruned. @@ -626,46 +961,40 @@ TEST_F(CompositeIntKeysTest, TesMultiHashColumnsInListHashPruning) { PartitionSchema partition_schema; GeneratePartitionSchema(schema, - { pair<vector<std::string>, int>({ "a", "b" }, 3) }, + { pair<vector<string>, int>({ "a", "b" }, 3) }, + {}, &partition_schema); vector<Partition> partitions; ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, &partitions)); ASSERT_EQ(3, partitions.size()); - // clone scan_spec for different partition. - ScanSpec spec_p1 = spec; - ScanSpec spec_p2 = spec; - ScanSpec spec_p3 = spec; - - spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], partition_schema); - spec_p1.OptimizeScan(schema, &arena_, true); - - // Verify that the predicates to be pushed to different partition should be - // the same when no hash prune happened. - SCOPED_TRACE(spec_p1.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " "PK < (int8 a=9, int8 b=101, int8 c=-128) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)", - spec_p1.ToString(schema)); - - spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], partition_schema); - spec_p2.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p2.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " "PK < (int8 a=9, int8 b=101, int8 c=-128) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)", - spec_p2.ToString(schema)); - - spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], partition_schema); - spec_p3.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p3.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND " "PK < (int8 a=9, int8 b=101, int8 c=-128) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)", - spec_p3.ToString(schema)); + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)"); } // Test that hash(a, b), hash(c) InList predicates. @@ -681,107 +1010,95 @@ TEST_F(CompositeIntKeysTest, TesMultiHashKeyMultiHashInListHashPruning) { PartitionSchema partition_schema; GeneratePartitionSchema(schema, - { pair<vector<std::string>, int>({ "a", "b" }, 3), - pair<vector<std::string>, int>({ "c" }, 3) }, + { pair<vector<string>, int>({ "a", "b" }, 3), + pair<vector<string>, int>({ "c" }, 3) }, + {}, &partition_schema); vector<Partition> partitions; ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, &partitions)); ASSERT_EQ(9, partitions.size()); - // clone scan_spec for different partition. - ScanSpec spec_p1 = spec; - ScanSpec spec_p2 = spec; - ScanSpec spec_p3 = spec; - ScanSpec spec_p4 = spec; - ScanSpec spec_p5 = spec; - ScanSpec spec_p6 = spec; - ScanSpec spec_p7 = spec; - ScanSpec spec_p8 = spec; - ScanSpec spec_p9 = spec; - - spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], partition_schema); - spec_p1.OptimizeScan(schema, &arena_, true); - - // hash(a, b) should not be pruned, hash(c) should be pruned. - // p1, p4, p7 should have the same values to be pushed. - SCOPED_TRACE(spec_p1.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND " + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[0], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=40) AND " "PK < (int8 a=9, int8 b=100, int8 c=71) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (40, 60, 70)", - spec_p1.ToString(schema)); - - spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], partition_schema); - spec_p2.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p2.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (40, 60, 70)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[1], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=20) AND " "PK < (int8 a=9, int8 b=100, int8 c=51) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (20, 30, 50)", - spec_p2.ToString(schema)); - - spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], partition_schema); - spec_p3.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p3.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (20, 30, 50)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[2], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=80) AND " "PK < (int8 a=9, int8 b=100, int8 c=91) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (80, 90)", - spec_p3.ToString(schema)); - - spec_p4.PruneHashForInlistIfPossible(schema, partitions[3], partition_schema); - spec_p4.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p4.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (80, 90)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[3], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=40) AND " "PK < (int8 a=9, int8 b=100, int8 c=71) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (40, 60, 70)", - spec_p4.ToString(schema)); - - spec_p5.PruneHashForInlistIfPossible(schema, partitions[4], partition_schema); - spec_p5.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p5.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (40, 60, 70)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[4], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=20) AND " "PK < (int8 a=9, int8 b=100, int8 c=51) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (20, 30, 50)", - spec_p5.ToString(schema)); - - spec_p6.PruneHashForInlistIfPossible(schema, partitions[5], partition_schema); - spec_p6.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p6.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (20, 30, 50)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[5], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=80) AND " "PK < (int8 a=9, int8 b=100, int8 c=91) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (80, 90)", - spec_p6.ToString(schema)); - - spec_p7.PruneHashForInlistIfPossible(schema, partitions[6], partition_schema); - spec_p7.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p7.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (80, 90)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[6], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=40) AND " "PK < (int8 a=9, int8 b=100, int8 c=71) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (40, 60, 70)", - spec_p7.ToString(schema)); - - spec_p8.PruneHashForInlistIfPossible(schema, partitions[7], partition_schema); - spec_p8.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p8.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (40, 60, 70)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[7], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=20) AND " "PK < (int8 a=9, int8 b=100, int8 c=51) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (20, 30, 50)", - spec_p8.ToString(schema)); - - spec_p9.PruneHashForInlistIfPossible(schema, partitions[8], partition_schema); - spec_p9.OptimizeScan(schema, &arena_, true); - - SCOPED_TRACE(spec_p9.ToString(schema)); - EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND " + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (20, 30, 50)"); + + ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec, + schema, + partitions[8], + partition_schema, + &arena_), + "PK >= (int8 a=0, int8 b=50, int8 c=80) AND " "PK < (int8 a=9, int8 b=100, int8 c=91) AND " - "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (80, 90)", - spec_p9.ToString(schema)); + "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN (80, 90)"); } // Test that IN list mixed with range predicates get pushed into the primary key @@ -1139,7 +1456,7 @@ TEST_F(CompositeIntKeysTest, TestGetMissingColumns) { Schema projection({ ColumnSchema("e", INT8) }, 0); vector<ColumnSchema> missing_cols = spec.GetMissingColumns(projection); EXPECT_EQ(2, missing_cols.size()); - std::string missing_cols_str = ToString(missing_cols); + string missing_cols_str = ToString(missing_cols); EXPECT_STR_CONTAINS(missing_cols_str, "b INT8"); EXPECT_STR_CONTAINS(missing_cols_str, "d INT8"); } @@ -1149,7 +1466,7 @@ TEST_F(CompositeIntKeysTest, TestGetMissingColumns) { ColumnSchema("e", INT8) }, 0); vector<ColumnSchema> missing_cols = spec.GetMissingColumns(projection); EXPECT_EQ(1, missing_cols.size()); - std::string missing_cols_str = ToString(missing_cols); + string missing_cols_str = ToString(missing_cols); EXPECT_STR_CONTAINS(missing_cols_str, "b INT8"); } diff --git a/src/kudu/common/scan_spec.cc b/src/kudu/common/scan_spec.cc index fc519d2..e82714f 100644 --- a/src/kudu/common/scan_spec.cc +++ b/src/kudu/common/scan_spec.cc @@ -171,9 +171,9 @@ void ScanSpec::OptimizeScan(const Schema& schema, } } -void ScanSpec::PruneHashForInlistIfPossible(const Schema& schema, - const Partition& partition, - const PartitionSchema& partition_schema) { +void ScanSpec::PruneInlistValuesIfPossible(const Schema& schema, + const Partition& partition, + const PartitionSchema& partition_schema) { for (auto& predicate_pair : predicates_) { auto& predicate = predicate_pair.second; if (predicate.predicate_type() != PredicateType::InList) continue; @@ -184,19 +184,36 @@ void ScanSpec::PruneHashForInlistIfPossible(const Schema& schema, if (!s.ok() || !schema.is_key_column(idx)) continue; int hash_idx = partition_schema.TryGetSingleColumnHashPartitionIndex(schema, idx); - if (hash_idx == -1) continue; + bool is_col_single_range_schema = partition_schema.IsColumnSingleRangeSchema(schema, idx); + if (hash_idx == -1 && !is_col_single_range_schema) continue; auto* predicate_values = predicate.mutable_raw_values(); + predicate_values->erase(std::remove_if(predicate_values->begin(), predicate_values->end(), - [idx, hash_idx, &schema, &partition, &partition_schema](const void* value) { + [idx, hash_idx, is_col_single_range_schema, + &schema, &partition, &partition_schema](const void* value) { + // Returns true indicates this value is going to be removed from the predicate values. KuduPartialRow partial_row(&schema); Status s = partial_row.Set(idx, reinterpret_cast<const uint8_t*>(value)); if (!s.ok()) return false; - bool is_value_in; - s = partition_schema.HashPartitionContainsRow(partition, partial_row, - hash_idx, &is_value_in); - if (!s.ok()) return false; - return !is_value_in; + + // If value is not in given hash partition, remove this value from predicate values. + if (hash_idx != -1) { + bool is_value_in; + s = partition_schema.HashPartitionContainsRow(partition, partial_row, + hash_idx, &is_value_in); + if (!s.ok()) return false; + if (!is_value_in) return true; + } + + // If value is not in given range partition, remove this value from predicate values. + if (is_col_single_range_schema) { + bool is_value_in; + s = partition_schema.RangePartitionContainsRow(partition, partial_row, &is_value_in); + if (!s.ok()) return false; + if (!is_value_in) return true; + } + return false; }), predicate_values->end()); } } diff --git a/src/kudu/common/scan_spec.h b/src/kudu/common/scan_spec.h index 3963f17..9843dae 100644 --- a/src/kudu/common/scan_spec.h +++ b/src/kudu/common/scan_spec.h @@ -74,18 +74,24 @@ class ScanSpec { bool remove_pushed_predicates); // Filter in-list predicate values with given hash partition schema. - // Only supports pruning for single-column hash schemas. + // If range partition is introduced when creating table, in-list predicate + // can also benefit from this pruning. + // + // Only supports pruning for single-column hash schemas or single-column range schema. // Now support hash prune on: // hash(onekey), # support. + // range(onekey), # support. // hash(onekey), hash(anotherkey) # support either. - // hash(key_one, key_two), hash(anotherkey) # support only prune on anotherkey. + // hash(onekey), range(anotherkey) # support either. + // hash(key_one, key_two), hash(anotherkey) # only support prune on anotherkey. + // range(key_one, key_two) # not support. // - // TODO(ningw) For IN list predicate on hash(key_one, key_two) or more columns, + // TODO(ningw) For IN list predicate on hash/range(key_one, key_two) or more columns, // if one predicate is IN list, and the rest predicate(s) are EQUAL, could // have IN list predicate values prune as well. - void PruneHashForInlistIfPossible(const Schema& schema, - const Partition& partition, - const PartitionSchema& partition_schema); + void PruneInlistValuesIfPossible(const Schema& schema, + const Partition& partition, + const PartitionSchema& partition_schema); // Get columns that are present in the predicates but not in the projection std::vector<ColumnSchema> GetMissingColumns(const Schema& projection); diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc index f549e74..cbd5a68 100644 --- a/src/kudu/tools/kudu-admin-test.cc +++ b/src/kudu/tools/kudu-admin-test.cc @@ -2458,7 +2458,7 @@ TEST_F(AdminCliTest, TestAddAndDropUnboundedPartition) { }); // Since the unbounded partition has been dropped, now we can add a new unbounded - // range parititon for the table. + // range partition for the table. s = RunKuduTool({ "table", "add_range_partition", diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc index aa0b905..7224aad 100644 --- a/src/kudu/tserver/tablet_service.cc +++ b/src/kudu/tserver/tablet_service.cc @@ -2698,9 +2698,9 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica, } VLOG(3) << "Before optimizing scan spec: " << spec.ToString(tablet_schema); - spec.PruneHashForInlistIfPossible(tablet_schema, - replica->tablet_metadata()->partition(), - replica->tablet_metadata()->partition_schema()); + spec.PruneInlistValuesIfPossible(tablet_schema, + replica->tablet_metadata()->partition(), + replica->tablet_metadata()->partition_schema()); spec.OptimizeScan(tablet_schema, scanner->arena(), true); VLOG(3) << "After optimizing scan spec: " << spec.ToString(tablet_schema);
