This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 607d9d0 [common] more generic API for IN list predicate pruning
607d9d0 is described below
commit 607d9d0a7e95e220864f43b88a64644bb6402163
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Aug 9 21:18:29 2021 -0700
[common] more generic API for IN list predicate pruning
While working on KUDU-2671, I found that the exposed internals of the
PartitionSchema class doesn't allow for updating the implementation of
the partition-related code to include per-range custom hash bucket
schemas in a consistent manner.
This patch introduces a bit more generic interface for pruning values of
IN list predicates by adding a new PartitionMayContainRow() method and
removes the following methods from the public API of the PartitionSchema
class:
* HashPartitionContainsRow()
* RangePartitionContainsRow()
* IsColumnSingleRangeSchema()
* TryGetSingleColumnHashPartitionIndex()
I also added one extra test scenario and updated existing ones to
increase readability of the assertion messages if they are triggered.
This is a follow-up to 6a7cadc7e and 83b8caf4f.
Change-Id: I2e2390cc4747864fdac71656dd7125ac3b15bf9d
Reviewed-on: http://gerrit.cloudera.org:8080/17764
Tested-by: Kudu Jenkins
Reviewed-by: Mahesh Reddy <[email protected]>
Reviewed-by: Andrew Wong <[email protected]>
---
src/kudu/common/partition.cc | 86 ++--
src/kudu/common/partition.h | 59 +--
src/kudu/common/scan_spec-test.cc | 833 +++++++++++++++-----------------------
src/kudu/common/scan_spec.cc | 59 ++-
src/kudu/common/scan_spec.h | 27 +-
5 files changed, 462 insertions(+), 602 deletions(-)
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index 8a5a846..9cb06af 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -709,14 +709,54 @@ bool PartitionSchema::PartitionContainsRow(const
Partition& partition,
return PartitionContainsRowImpl(partition, row);
}
-bool PartitionSchema::HashPartitionContainsRow(const Partition& partition,
- const KuduPartialRow& row,
- int hash_idx) const {
- return HashPartitionContainsRowImpl(partition, row, hash_idx);
+bool PartitionSchema::PartitionMayContainRow(const Partition& partition,
+ const KuduPartialRow& row) const {
+ // It's the fast and 100% sure path when the row has the primary key set.
+ if (row.IsKeySet()) {
+ return PartitionContainsRow(partition, row);
+ }
+
+ const Schema* schema = row.schema();
+ vector<ColumnId> set_column_ids;
+ set_column_ids.reserve(schema->num_key_columns());
+ for (size_t idx = 0; idx < schema->num_key_columns(); ++idx) {
+ DCHECK(schema->is_key_column(idx));
+ if (row.IsColumnSet(idx) && !row.IsNull(idx)) {
+ set_column_ids.emplace_back(schema->column_id(idx));
+ if (set_column_ids.size() > 1) {
+ // No futher optimizations in this case for a while.
+ // NOTE: This might be a false positive.
+ return true;
+ }
+ }
+ }
+ if (set_column_ids.empty()) {
+ return false;
+ }
+
+ DCHECK_EQ(1, set_column_ids.size());
+ const auto& single_column_id = set_column_ids[0];
+
+ if (range_schema_.column_ids.size() == 1 &&
+ range_schema_.column_ids[0] == single_column_id &&
+ !RangePartitionContainsRow(partition, row)) {
+ return false;
+ }
+ for (size_t i = 0; i < hash_bucket_schemas_.size(); ++i) {
+ const auto& hash_partition = hash_bucket_schemas_[i];
+ if (hash_partition.column_ids.size() == 1 &&
+ hash_partition.column_ids[0] == single_column_id &&
+ !HashPartitionContainsRow(partition, row, i)) {
+ return false;
+ }
+ }
+
+ // NOTE: This might be a false positive.
+ return true;
}
bool PartitionSchema::HashPartitionContainsRow(const Partition& partition,
- const ConstContiguousRow& row,
+ const KuduPartialRow& row,
int hash_idx) const {
return HashPartitionContainsRowImpl(partition, row, hash_idx);
}
@@ -726,11 +766,6 @@ bool PartitionSchema::RangePartitionContainsRow(
return RangePartitionContainsRowImpl(partition, row);
}
-bool PartitionSchema::RangePartitionContainsRow(
- const Partition& partition, const ConstContiguousRow& row) const {
- return RangePartitionContainsRowImpl(partition, row);
-}
-
Status PartitionSchema::DecodeRangeKey(Slice* encoded_key,
KuduPartialRow* partial_row,
Arena* arena) const {
@@ -1559,36 +1594,19 @@ Status
PartitionSchema::MakeUpperBoundRangePartitionKeyExclusive(KuduPartialRow*
return Status::OK();
}
-Status PartitionSchema::GetRangeSchemaColumnIndexes(const Schema& schema,
- vector<int32_t>*
range_column_idxs) const {
+Status PartitionSchema::GetRangeSchemaColumnIndexes(
+ const Schema& schema,
+ vector<int32_t>* range_column_indexes) const {
+ DCHECK(range_column_indexes);
for (const ColumnId& column_id : range_schema_.column_ids) {
int32_t idx = schema.find_column_by_id(column_id);
if (idx == Schema::kColumnNotFound) {
- return Status::InvalidArgument(Substitute("range partition column ID $0 "
- "not found in range partition
key schema.",
- column_id));
+ return Status::InvalidArgument(Substitute(
+ "range partition column ID $0 not found in table schema",
column_id));
}
- range_column_idxs->push_back(idx);
+ range_column_indexes->push_back(idx);
}
return Status::OK();
}
-int32_t PartitionSchema::TryGetSingleColumnHashPartitionIndex(const Schema&
schema,
- int32_t col_idx)
const {
- const ColumnId column_id = schema.column_id(col_idx);
- for (int i = 0; i < hash_bucket_schemas_.size(); ++i) {
- const auto& hash_partition = hash_bucket_schemas_[i];
- if (hash_partition.column_ids.size() == 1 && hash_partition.column_ids[0]
== column_id) {
- return i;
- }
- }
- return -1;
-}
-
-bool PartitionSchema::IsColumnSingleRangeSchema(const Schema& schema, int32_t
col_idx) const {
- const ColumnId column_id = schema.column_id(col_idx);
- return range_schema_.column_ids.size() == 1 &&
- range_schema_.column_ids[0] == column_id;
-}
-
} // namespace kudu
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index bb3e773..fd72b69 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -225,25 +225,32 @@ class PartitionSchema {
const Schema& schema,
std::vector<Partition>* partitions) const WARN_UNUSED_RESULT;
- // Tests if the partition contains the row.
+ // Check if the given partition contains the specified row. The row must have
+ // all the columns participating in the table's partitioning schema
+ // set to particular values.
bool PartitionContainsRow(const Partition& partition,
const KuduPartialRow& row) const;
bool PartitionContainsRow(const Partition& partition,
const ConstContiguousRow& row) const;
- // Tests if the hash partition contains the row with given hash_idx.
- bool HashPartitionContainsRow(const Partition& partition,
- const KuduPartialRow& row,
- int hash_idx) const;
- bool HashPartitionContainsRow(const Partition& partition,
- const ConstContiguousRow& row,
- int hash_idx) const;
-
- // Tests if the range partition contains the row.
- bool RangePartitionContainsRow(const Partition& partition,
- const KuduPartialRow& row) const;
- bool RangePartitionContainsRow(const Partition& partition,
- const ConstContiguousRow& row) const;
+ // Check if the specified row is probably in the given partition.
+ // The collection of columns set to particular values in the row can be a
+ // subset of all the columns participating in the table's partitioning
schema.
+ // This method can be used to optimize the collection of values for IN list
+ // predicates. As of now, this method is effectively implemented only for
+ // single-column hash and single-column range partitioning schemas, meaning
+ // that it can return false positives in case of other than single-row range
+ // and hash partitioning schemas.
+ //
+ // NOTE: this method returns false positives in some cases (see above)
+ //
+ // TODO(aserbin): implement this for multi-row range schemas as well,
+ // substituting non-specified columns in the row with values
+ // from the partition's start key and return logically
inverted
+ // result of calling PartitionContainsRow() with the
+ // artificially constructed row
+ bool PartitionMayContainRow(const Partition& partition,
+ const KuduPartialRow& row) const;
// Returns a text description of the partition suitable for debug printing.
//
@@ -327,18 +334,13 @@ class PartitionSchema {
return ranges_with_hash_schemas_;
}
- // Gets the vector containing the column indexes of the range partition keys.
+ // Given the specified table schema, populate the 'range_column_indexes'
+ // container with column indexes of the range partition keys.
// If any of the columns is not in the key range columns then an
// InvalidArgument status is returned.
- Status GetRangeSchemaColumnIndexes(const Schema& schema,
- std::vector<int>* range_column_idxs)
const;
-
- // Returns index of given column idx, if it is one of hash key and this hash
schema
- // contains only one column, otherwise returns -1.
- int32_t TryGetSingleColumnHashPartitionIndex(const Schema& schema, int32_t
col_idx) const;
-
- // Given a column idx, verify that it is the only column of the range
partition.
- bool IsColumnSingleRangeSchema(const Schema& schema, int32_t col_idx) const;
+ Status GetRangeSchemaColumnIndexes(
+ const Schema& schema,
+ std::vector<int>* range_column_indexes) const;
private:
friend class PartitionPruner;
@@ -346,6 +348,15 @@ class PartitionSchema {
FRIEND_TEST(PartitionTest, TestIncrementRangePartitionStringBounds);
FRIEND_TEST(PartitionTest, TestVarcharRangePartitions);
+ // Tests if the hash partition contains the row with given hash_idx.
+ bool HashPartitionContainsRow(const Partition& partition,
+ const KuduPartialRow& row,
+ int hash_idx) const;
+
+ // Tests if the range partition contains the row.
+ bool RangePartitionContainsRow(const Partition& partition,
+ const KuduPartialRow& row) const;
+
// Returns a text description of the encoded range key suitable for debug
printing.
std::string RangeKeyDebugString(Slice range_key, const Schema& schema) const;
std::string RangeKeyDebugString(const KuduPartialRow& key) const;
diff --git a/src/kudu/common/scan_spec-test.cc
b/src/kudu/common/scan_spec-test.cc
index 7ebb096..f43151e 100644
--- a/src/kudu/common/scan_spec-test.cc
+++ b/src/kudu/common/scan_spec-test.cc
@@ -86,7 +86,6 @@ string PruneInlistValuesAndGetSchemaString(const ScanSpec&
spec,
const PartitionSchema&
partition_schema,
Arena* arena) {
ScanSpec copy_spec = spec;
-
copy_spec.PruneInlistValuesIfPossible(schema, partition, partition_schema);
copy_spec.OptimizeScan(schema, arena, true);
@@ -423,66 +422,50 @@ TEST_F(CompositeIntKeysTest, TestInListPushdown) {
}
// Test that hash(a) IN list predicates prune with right values.
-TEST_F(CompositeIntKeysTest, TestOneHashKeyInListHashPruning) {
+TEST_F(CompositeIntKeysTest, OneHashKeyInListHashPruning) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 50, 100 });
- Schema schema = schema_.CopyWithColumnIds();
-
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a" }, 3) },
- {},
- &partition_schema);
+ GeneratePartitionSchema(schema, { { { "a" }, 3 } }, {}, &partition_schema);
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema,
&partitions));
ASSERT_EQ(3, partitions.size());
// Verify the splitted values can merge into original set without
overlapping.
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=101, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=50, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=101, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (50, 100)");
+ "a IN (1, 3, 6) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
}
// Test that hash(a), range(a) IN list predicates prune would happen on
// both hash and range aspects.
-TEST_F(CompositeIntKeysTest, TestOneHashKeyOneRangeKeyInListHashPruning) {
+TEST_F(CompositeIntKeysTest, OneHashKeyOneRangeKeyInListHashPruning) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 50, 100 });
- Schema schema = schema_.CopyWithColumnIds();
-
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a" }, 3) },
- { "a" },
- &partition_schema);
+ GeneratePartitionSchema(
+ schema, { { { "a" }, 3 } }, { "a" }, &partition_schema);
KuduPartialRow split1(&schema);
KuduPartialRow split2(&schema);
@@ -490,107 +473,74 @@ TEST_F(CompositeIntKeysTest,
TestOneHashKeyOneRangeKeyInListHashPruning) {
ASSERT_OK(split2.SetInt8("a", 6));
vector<Partition> partitions;
- ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 },
- {},
- {},
- schema,
- &partitions));
+ ASSERT_OK(partition_schema.CreatePartitions(
+ { split1, split2 }, {}, {}, schema, &partitions));
ASSERT_EQ(9, partitions.size());
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "a IN () AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
+ ASSERT_EQ("a IN () AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=4, int8 b=101, int8 c=-128) AND "
- "a IN (4) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "PK >= (int8 a=7, int8 b=50, int8 c=-128) AND "
+ "a IN (4) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=7, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=101, int8 c=-128) AND "
- "a IN (7, 8) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[3],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+ "a IN (7, 8) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=2, int8 b=101, int8 c=-128) AND "
- "a IN (0, 2) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[4],
- partition_schema,
- &arena_),
- "PK >= (int8 a=5, int8 b=50, int8 c=-128) AND "
+ "a IN (0, 2) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[3], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=5, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=5, int8 b=101, int8 c=-128) AND "
- "a IN (5) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[5],
- partition_schema,
- &arena_),
- "PK >= (int8 a=9, int8 b=50, int8 c=-128) AND "
+ "a IN (5) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[4], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=9, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
- "a IN (9) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[6],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=50, int8 c=-128) AND "
+ "a IN (9) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[5], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=1, int8 b=101, int8 c=-128) AND "
- "a IN (1) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[7],
- partition_schema,
- &arena_),
- "PK >= (int8 a=3, int8 b=50, int8 c=-128) AND "
+ "a IN (1) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[6], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=3, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=3, int8 b=101, int8 c=-128) AND "
- "a IN (3) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[8],
- partition_schema,
- &arena_),
- "PK >= (int8 a=6, int8 b=50, int8 c=-128) AND "
+ "a IN (3) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[7], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=6, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=101, int8 c=-128) AND "
- "a IN (6) AND b IN (50, 100)");
+ "a IN (6) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[8], partition_schema, &arena_));
}
// Test that hash(a), range(a, b) IN list predicates prune would happen
// on hash-key but not on range key.
-TEST_F(CompositeIntKeysTest, TestOneHashKeyMultiRangeKeyInListHashPruning) {
+TEST_F(CompositeIntKeysTest, OneHashKeyMultiRangeKeyInListHashPruning) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
- Schema schema = schema_.CopyWithColumnIds();
-
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a" }, 3) },
- { "a", "b" },
- &partition_schema);
+ GeneratePartitionSchema(
+ schema, { { { "a" }, 3 } }, { "a", "b" }, &partition_schema);
KuduPartialRow split1(&schema);
KuduPartialRow split2(&schema);
@@ -601,109 +551,76 @@ TEST_F(CompositeIntKeysTest,
TestOneHashKeyMultiRangeKeyInListHashPruning) {
ASSERT_OK(split2.SetInt8("b", 6));
vector<Partition> partitions;
- ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 },
- {},
- {},
- schema,
- &partitions));
+ ASSERT_OK(partition_schema.CreatePartitions(
+ { split1, split2 }, {}, {}, schema, &partitions));
ASSERT_EQ(9, partitions.size());
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[3],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[4],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[3], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[5],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[4], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[6],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[5], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[7],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+ "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[6], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[8],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+ "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[7], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+ "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[8], partition_schema, &arena_));
}
// Test that hash(a), range(b) IN list predicates prune would happen
// on both hash and range aspects.
-TEST_F(CompositeIntKeysTest, TestDifferentHashRangeKeyInListHashPruning) {
+TEST_F(CompositeIntKeysTest, DifferentHashRangeKeyInListHashPruning) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
- Schema schema = schema_.CopyWithColumnIds();
-
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a" }, 3) },
- { "b" },
- &partition_schema);
+ GeneratePartitionSchema(
+ schema, { { { "a" }, 3 } }, { "b" }, &partition_schema);
KuduPartialRow split1(&schema);
KuduPartialRow split2(&schema);
@@ -712,155 +629,109 @@ TEST_F(CompositeIntKeysTest,
TestDifferentHashRangeKeyInListHashPruning) {
ASSERT_OK(split2.SetInt8("b", 6));
vector<Partition> partitions;
- ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 },
- {},
- {},
- schema,
- &partitions));
+ ASSERT_OK(partition_schema.CreatePartitions(
+ { split1, split2 }, {}, {}, schema, &partitions));
ASSERT_EQ(9, partitions.size());
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=3, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (0, 1, 2)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=3, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (0, 1, 2)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=3, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=6, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (3, 4, 5)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=6, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (3, 4, 5)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=6, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[3],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=3, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (0, 1, 2)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[4],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=3, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (0, 1, 2)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[3], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=3, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=6, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (3, 4, 5)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[5],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=6, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (3, 4, 5)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[4], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=6, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (6, 7, 8, 9)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[6],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[5], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=3, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (0, 1, 2)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[7],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=3, int8 c=-128) AND "
+ "a IN (1, 3, 6) AND b IN (0, 1, 2)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[6], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=3, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=6, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (3, 4, 5)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[8],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=6, int8 c=-128) AND "
+ "a IN (1, 3, 6) AND b IN (3, 4, 5)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[7], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=6, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (6, 7, 8, 9)");
+ "a IN (1, 3, 6) AND b IN (6, 7, 8, 9)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[8], partition_schema, &arena_));
}
// Test that in case hash(a) prune all predicate values, the rest predicate
values for
// pruned in-list predicate should be detect correctly by CanShortCircuit().
// BTW, empty IN list predicates wouldn't result in the crash of
OptimizeScan().
-TEST_F(CompositeIntKeysTest, TestHashKeyInListHashPruningEmptyDetect) {
+TEST_F(CompositeIntKeysTest, HashKeyInListHashPruningEmptyDetect) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 2, 4, 5, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 50, 100 });
- Schema schema = schema_.CopyWithColumnIds();
-
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a" }, 3) },
- {},
- &partition_schema);
+ GeneratePartitionSchema(
+ schema, { { { "a" }, 3 } }, {}, &partition_schema);
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema,
&partitions));
ASSERT_EQ(3, partitions.size());
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=101, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "a IN () AND b IN (50, 100)");
+ "a IN (0, 2, 5, 9) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("a IN () AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
}
// Test that hash(a), hash(b) IN list predicates should be pruned.
-TEST_F(CompositeIntKeysTest, TestMultiHashKeyOneColumnInListHashPruning) {
+TEST_F(CompositeIntKeysTest, MultiHashKeyOneColumnInListHashPruning) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 10, 20, 30, 40, 50, 60, 70, 80 });
- Schema schema = schema_.CopyWithColumnIds();
-
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a" }, 3),
- pair<vector<string>, int>({ "b" }, 3) },
- {},
- &partition_schema);
+ GeneratePartitionSchema(
+ schema, { { { "a" }, 3 }, { { "b" }, 3 }, }, {}, &partition_schema);
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema,
&partitions));
@@ -869,236 +740,202 @@ TEST_F(CompositeIntKeysTest,
TestMultiHashKeyOneColumnInListHashPruning) {
// p1, p2, p3 should have the same predicate values to be pushed on hash(a).
// p1, p4, p7 should have the same predicate values to be pushed on hash(b).
// pi refer to partitions[i-1], e.g. p1 = partitions[0]
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=40, int8 c=-128) AND "
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=40, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=71, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (40, 60, 70)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=20, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (40, 60, 70)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=20, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=51, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (20, 30, 50)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "PK >= (int8 a=4, int8 b=10, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (20, 30, 50)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=4, int8 b=10, int8 c=-128) AND "
"PK < (int8 a=8, int8 b=81, int8 c=-128) AND "
- "a IN (4, 7, 8) AND b IN (10, 80)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[3],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=40, int8 c=-128) AND "
+ "a IN (4, 7, 8) AND b IN (10, 80)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=40, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=71, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (40, 60, 70)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[4],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=20, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (40, 60, 70)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[3], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=20, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=51, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (20, 30, 50)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[5],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=10, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (20, 30, 50)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[4], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=10, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=81, int8 c=-128) AND "
- "a IN (0, 2, 5, 9) AND b IN (10, 80)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[6],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=40, int8 c=-128) AND "
+ "a IN (0, 2, 5, 9) AND b IN (10, 80)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[5], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=40, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=71, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (40, 60, 70)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[7],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=20, int8 c=-128) AND "
+ "a IN (1, 3, 6) AND b IN (40, 60, 70)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[6], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=20, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=51, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (20, 30, 50)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[8],
- partition_schema,
- &arena_),
- "PK >= (int8 a=1, int8 b=10, int8 c=-128) AND "
+ "a IN (1, 3, 6) AND b IN (20, 30, 50)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[7], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=1, int8 b=10, int8 c=-128) AND "
"PK < (int8 a=6, int8 b=81, int8 c=-128) AND "
- "a IN (1, 3, 6) AND b IN (10, 80)");
+ "a IN (1, 3, 6) AND b IN (10, 80)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[8], partition_schema, &arena_));
}
// Test that hash(a, b) IN list predicates should not be pruned.
-TEST_F(CompositeIntKeysTest, TesMultiHashColumnsInListHashPruning) {
+TEST_F(CompositeIntKeysTest, MultiHashColumnsInListHashPruning) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 50, 100 });
- Schema schema = schema_.CopyWithColumnIds();
-
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a", "b" }, 3) },
- {},
- &partition_schema);
+ GeneratePartitionSchema(
+ schema, { { { "a", "b" }, 3 } }, {}, &partition_schema);
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema,
&partitions));
ASSERT_EQ(3, partitions.size());
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
"PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)");
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
}
// Test that hash(a, b), hash(c) InList predicates.
// Neither a or b IN list can be pruned.
// c IN list should be pruned.
-TEST_F(CompositeIntKeysTest, TesMultiHashKeyMultiHashInListHashPruning) {
+TEST_F(CompositeIntKeysTest, MultiHashKeyMultiHashInListHashPruning) {
ScanSpec spec;
AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AddInPredicate<int8_t>(&spec, "b", { 50, 100 });
- AddInPredicate<int8_t>(&spec, "c", { 20, 30, 40, 50, 60, 70, 80, 90});
-
- Schema schema = schema_.CopyWithColumnIds();
+ AddInPredicate<int8_t>(&spec, "c", { 20, 30, 40, 50, 60, 70, 80, 90 });
+ const auto schema = schema_.CopyWithColumnIds();
PartitionSchema partition_schema;
- GeneratePartitionSchema(schema,
- { pair<vector<string>, int>({ "a", "b" }, 3),
- pair<vector<string>, int>({ "c" }, 3) },
- {},
- &partition_schema);
+ GeneratePartitionSchema(
+ schema, { { { "a", "b" }, 3 }, { { "c" }, 3 } }, {}, &partition_schema);
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema,
&partitions));
ASSERT_EQ(9, partitions.size());
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[0],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
"PK < (int8 a=9, int8 b=100, int8 c=71) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(40, 60, 70)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[1],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (40, 60, 70)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
"PK < (int8 a=9, int8 b=100, int8 c=51) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(20, 30, 50)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[2],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (20, 30, 50)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
"PK < (int8 a=9, int8 b=100, int8 c=91) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(80, 90)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[3],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (80, 90)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
"PK < (int8 a=9, int8 b=100, int8 c=71) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(40, 60, 70)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[4],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (40, 60, 70)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[3], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
"PK < (int8 a=9, int8 b=100, int8 c=51) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(20, 30, 50)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[5],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (20, 30, 50)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[4], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
"PK < (int8 a=9, int8 b=100, int8 c=91) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(80, 90)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[6],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (80, 90)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[5], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
"PK < (int8 a=9, int8 b=100, int8 c=71) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(40, 60, 70)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[7],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (40, 60, 70)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[6], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
"PK < (int8 a=9, int8 b=100, int8 c=51) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(20, 30, 50)");
-
- ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
- schema,
- partitions[8],
- partition_schema,
- &arena_),
- "PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (20, 30, 50)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[7], partition_schema, &arena_));
+
+ ASSERT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
"PK < (int8 a=9, int8 b=100, int8 c=91) AND "
- "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN
(80, 90)");
+ "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND "
+ "b IN (50, 100) AND c IN (80, 90)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[8], partition_schema, &arena_));
+}
+
+// Of course, no pruning of IN list predicate's values for non-key columns.
+TEST_F(CompositeIntKeysTest, NonKeyValuesInListHashPruning) {
+ ScanSpec spec;
+ AddInPredicate<int8_t>(&spec, "d", { 1, 2, 3, 4, 5 });
+
+ const auto schema = schema_.CopyWithColumnIds();
+ PartitionSchema partition_schema;
+ GeneratePartitionSchema(
+ schema, { { { "a" }, 3 } }, {}, &partition_schema);
+
+ vector<Partition> partitions;
+ ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema,
&partitions));
+ ASSERT_EQ(3, partitions.size());
+
+ ASSERT_EQ("d IN (1, 2, 3, 4, 5)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[0], partition_schema, &arena_));
+
+ ASSERT_EQ("d IN (1, 2, 3, 4, 5)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[1], partition_schema, &arena_));
+
+ ASSERT_EQ("d IN (1, 2, 3, 4, 5)",
+ PruneInlistValuesAndGetSchemaString(
+ spec, schema, partitions[2], partition_schema, &arena_));
}
// Test that IN list mixed with range predicates get pushed into the primary
key
@@ -1128,7 +965,7 @@ TEST_F(CompositeIntKeysTest, TestInListPushdownWithRange) {
TEST_F(CompositeIntKeysTest, TestLiftPrimaryKeyBounds_NoBounds) {
ScanSpec spec;
spec.OptimizeScan(schema_, &arena_, false);
- ASSERT_EQ(0, spec.predicates().size());
+ ASSERT_TRUE(spec.predicates().empty());
}
// Test that implicit constraints specified in the lower primary key bound are
diff --git a/src/kudu/common/scan_spec.cc b/src/kudu/common/scan_spec.cc
index e421829..87b3d5b 100644
--- a/src/kudu/common/scan_spec.cc
+++ b/src/kudu/common/scan_spec.cc
@@ -174,42 +174,37 @@ void ScanSpec::OptimizeScan(const Schema& schema,
void ScanSpec::PruneInlistValuesIfPossible(const Schema& schema,
const Partition& partition,
const PartitionSchema&
partition_schema) {
- for (auto& predicate_pair : predicates_) {
- auto& predicate = predicate_pair.second;
- if (predicate.predicate_type() != PredicateType::InList) continue;
+ for (auto& [column_name, predicate] : predicates_) {
+ if (predicate.predicate_type() != PredicateType::InList) {
+ continue;
+ }
- const string& col_name = predicate_pair.first;
int32_t idx;
- Status s = schema.FindColumn(col_name, &idx);
- if (!s.ok() || !schema.is_key_column(idx)) continue;
-
- int hash_idx =
partition_schema.TryGetSingleColumnHashPartitionIndex(schema, idx);
- bool is_col_single_range_schema =
partition_schema.IsColumnSingleRangeSchema(schema, idx);
- if (hash_idx == -1 && !is_col_single_range_schema) continue;
+ if (auto s = schema.FindColumn(column_name, &idx); !s.ok()) {
+ LOG(DFATAL) << s.ToString();
+ continue;
+ }
+ if (!schema.is_key_column(idx)) {
+ continue;
+ }
auto* predicate_values = predicate.mutable_raw_values();
-
- predicate_values->erase(std::remove_if(predicate_values->begin(),
predicate_values->end(),
- [idx, hash_idx, is_col_single_range_schema,
- &schema, &partition, &partition_schema](const void* value) {
- // Returns true indicates this value is going to be removed from the
predicate values.
- KuduPartialRow partial_row(&schema);
- Status s = partial_row.Set(idx, reinterpret_cast<const
uint8_t*>(value));
- if (!s.ok()) return false;
-
- // If value is not in given hash partition, remove this value from
predicate values.
- if (hash_idx != -1 && !partition_schema.HashPartitionContainsRow(
- partition, partial_row, hash_idx)) {
- return true;
- }
-
- // If value is not in given range partition, remove this value from
predicate values.
- if (is_col_single_range_schema &&
- !partition_schema.RangePartitionContainsRow(partition,
partial_row)) {
- return true;
- }
- return false;
- }), predicate_values->end());
+ predicate_values->erase(std::remove_if(
+ predicate_values->begin(),
+ predicate_values->end(),
+ [idx, &schema, &partition, &partition_schema](const void* value) {
+ // If the target partition cannot contain the row, there is no sense
+ // of searching for the value: return 'true' if the value is to be
+ // removed from the IN(...) predicate.
+ KuduPartialRow row(&schema);
+ if (auto s = row.Set(idx, reinterpret_cast<const uint8_t*>(value));
+ !s.ok()) {
+ LOG(DFATAL) << s.ToString();
+ return false;
+ }
+ return !partition_schema.PartitionMayContainRow(partition, row);
+ }),
+ predicate_values->end());
}
}
diff --git a/src/kudu/common/scan_spec.h b/src/kudu/common/scan_spec.h
index 9843dae..a660502 100644
--- a/src/kudu/common/scan_spec.h
+++ b/src/kudu/common/scan_spec.h
@@ -73,22 +73,21 @@ class ScanSpec {
Arena* arena,
bool remove_pushed_predicates);
- // Filter in-list predicate values with given hash partition schema.
- // If range partition is introduced when creating table, in-list predicate
- // can also benefit from this pruning.
+ // Filter in-list predicate values with given a partition schema.
//
- // Only supports pruning for single-column hash schemas or single-column
range schema.
- // Now support hash prune on:
- // hash(onekey), # support.
- // range(onekey), # support.
- // hash(onekey), hash(anotherkey) # support either.
- // hash(onekey), range(anotherkey) # support either.
- // hash(key_one, key_two), hash(anotherkey) # only support prune on
anotherkey.
- // range(key_one, key_two) # not support.
+ // Supports pruning only for single-column hash and range schemas. The
pruning
+ // of IN list predicate's values is enabled for the following partitioning
+ // patterns:
//
- // TODO(ningw) For IN list predicate on hash/range(key_one, key_two) or more
columns,
- // if one predicate is IN list, and the rest predicate(s) are EQUAL, could
- // have IN list predicate values prune as well.
+ // hash(onekey), # pruning on 'onekey'
+ // range(onekey), # pruning on 'onekey'
+ // hash(onekey), hash(anotherkey) # pruning on either key
+ // hash(onekey), range(anotherkey) # pruning on either key
+ // hash(key_one, key_two), hash(anotherkey) # pruning on 'anotherkey'
+ //
+ // TODO(ningw) For IN list predicate on hash/range(key_one, key_two) or more
+ // columns, if one predicate is IN list, and the rest
predicate(s)
+ // are EQUAL, could have IN list predicate values prune as well.
void PruneInlistValuesIfPossible(const Schema& schema,
const Partition& partition,
const PartitionSchema& partition_schema);