[kudu] 01/02: KUDU-1644 use range partition info for pruning

granthenke Mon, 11 Jan 2021 07:13:26 -0800

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


commit 83b8caf4f1d60c021ef1e5a5f930847fb96c5ff4
Author: ningw <[email protected]>
AuthorDate: Thu Dec 24 13:13:32 2020 +0800

    KUDU-1644 use range partition info for pruning
    
    When pruning in-list predicate values, range partition can also be taken
    into consideration.
    
    Change-Id: I3f14f543cffd44f090026f344c9b06af13ea2e10
    Reviewed-on: http://gerrit.cloudera.org:8080/16903
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <[email protected]>
---
 src/kudu/common/partition.cc       |  46 +-
 src/kudu/common/partition.h        |  19 +-
 src/kudu/common/scan_spec-test.cc  | 847 +++++++++++++++++++++++++------------
 src/kudu/common/scan_spec.cc       |  37 +-
 src/kudu/common/scan_spec.h        |  18 +-
 src/kudu/tools/kudu-admin-test.cc  |   2 +-
 src/kudu/tserver/tablet_service.cc |   6 +-
 7 files changed, 680 insertions(+), 295 deletions(-)

diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index 6414e0c..6b2018e 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -544,15 +544,7 @@ Status PartitionSchema::PartitionContainsRowImpl(const 
Partition& partition,
     }
   }
 
-  string range_partition_key;
-  RETURN_NOT_OK(EncodeColumns(row, range_schema_.column_ids, 
&range_partition_key));
-
-  // If all of the hash buckets match, then the row is contained in the
-  // partition if the row is gte the lower bound; and if there is no upper
-  // bound, or the row is lt the upper bound.
-  *contains = (Slice(range_partition_key).compare(partition.range_key_start()) 
>= 0)
-           && (partition.range_key_end().empty()
-                || 
Slice(range_partition_key).compare(partition.range_key_end()) < 0);
+  RETURN_NOT_OK(RangePartitionContainsRowImpl(partition, row, contains));
 
   return Status::OK();
 }
@@ -570,6 +562,24 @@ Status PartitionSchema::HashPartitionContainsRowImpl(const 
Partition& partition,
   return Status::OK();
 }
 
+template<typename Row>
+Status PartitionSchema::RangePartitionContainsRowImpl(const Partition& 
partition,
+                                                      const Row& row,
+                                                      bool* contains) const {
+  string range_partition_key;
+  // If range partition is not used, column_ids would be empty and
+  // EncodedColumn() would return immediately.
+  RETURN_NOT_OK(EncodeColumns(row, range_schema_.column_ids, 
&range_partition_key));
+
+  // If all of the hash buckets match, then the row is contained in the
+  // partition if the row is gte the lower bound; and if there is no upper
+  // bound, or the row is lt the upper bound.
+  *contains = (Slice(range_partition_key).compare(partition.range_key_start()) 
>= 0)
+           && (partition.range_key_end().empty()
+                || 
Slice(range_partition_key).compare(partition.range_key_end()) < 0);
+  return Status::OK();
+}
+
 Status PartitionSchema::PartitionContainsRow(const Partition& partition,
                                              const KuduPartialRow& row,
                                              bool* contains) const {
@@ -596,6 +606,18 @@ Status PartitionSchema::HashPartitionContainsRow(const 
Partition& partition,
   return HashPartitionContainsRowImpl(partition, row, hash_idx, contains);
 }
 
+Status PartitionSchema::RangePartitionContainsRow(const Partition& partition,
+                                                  const KuduPartialRow& row,
+                                                  bool* contains) const {
+  return RangePartitionContainsRowImpl(partition, row, contains);
+}
+
+Status PartitionSchema::RangePartitionContainsRow(const Partition& partition,
+                                                  const ConstContiguousRow& 
row,
+                                                  bool* contains) const {
+  return RangePartitionContainsRowImpl(partition, row, contains);
+}
+
 Status PartitionSchema::DecodeRangeKey(Slice* encoded_key,
                                        KuduPartialRow* partial_row,
                                        Arena* arena) const {
@@ -1388,4 +1410,10 @@ int32_t 
PartitionSchema::TryGetSingleColumnHashPartitionIndex(const Schema& sche
   return -1;
 }
 
+bool PartitionSchema::IsColumnSingleRangeSchema(const Schema& schema, int32_t 
col_idx) const {
+  const ColumnId column_id = schema.column_id(col_idx);
+  return range_partition_schema().column_ids.size() == 1 &&
+         range_partition_schema().column_ids[0] == column_id;
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index d76802e..85c58e9 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -211,7 +211,7 @@ class PartitionSchema {
                               const ConstContiguousRow& row,
                               bool* contains) const WARN_UNUSED_RESULT;
 
-  // Tests if the hash partition contians the row with given hash_idx.
+  // Tests if the hash partition contains the row with given hash_idx.
   Status HashPartitionContainsRow(const Partition& partition,
                                   const KuduPartialRow& row,
                                   int hash_idx,
@@ -221,6 +221,14 @@ class PartitionSchema {
                                   int hash_idx,
                                   bool* contains) const WARN_UNUSED_RESULT;
 
+  // Tests if the range partition contains the row.
+  Status RangePartitionContainsRow(const Partition& partition,
+                                   const KuduPartialRow& row,
+                                   bool* contains) const WARN_UNUSED_RESULT;
+  Status RangePartitionContainsRow(const Partition& partition,
+                                   const ConstContiguousRow& row,
+                                   bool* contains) const WARN_UNUSED_RESULT;
+
   // Returns a text description of the partition suitable for debug printing.
   //
   // Partitions are considered metadata, so no redaction will happen on the 
hash
@@ -306,6 +314,9 @@ class PartitionSchema {
   // contains only one column, otherwise returns -1.
   int32_t TryGetSingleColumnHashPartitionIndex(const Schema& schema, int32_t 
col_idx) const;
 
+  // Given a column idx, verify that it is the only column of the range 
partition.
+  bool IsColumnSingleRangeSchema(const Schema& schema, int32_t col_idx) const;
+
  private:
   friend class PartitionPruner;
   FRIEND_TEST(PartitionTest, TestIncrementRangePartitionBounds);
@@ -357,6 +368,12 @@ class PartitionSchema {
                                       int hash_idx,
                                       bool* contains) const;
 
+  // Private templated helper for RangePartitionContainsRow.
+  template<typename Row>
+  Status RangePartitionContainsRowImpl(const Partition& partition,
+                                       const Row& row,
+                                       bool* contains) const;
+
   // Private templated helper for EncodeKey.
   template<typename Row>
   Status EncodeKeyImpl(const Row& row, std::string* buf) const;
diff --git a/src/kudu/common/scan_spec-test.cc 
b/src/kudu/common/scan_spec-test.cc
index 2f51824..7ebb096 100644
--- a/src/kudu/common/scan_spec-test.cc
+++ b/src/kudu/common/scan_spec-test.cc
@@ -44,17 +44,19 @@
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
-using std::vector;
 using std::pair;
+using std::string;
+using std::vector;
 
 namespace kudu {
 
 namespace {
-// Generate partition schema of a table with given hash_partitions.
+// Generate partition schema of a table with given hash_partitions and range 
partition keys.
 // E.g. GeneratePartitionSchema(schema, {make_pair({a, b}, 3), make_pair({c}, 
5) })
 // Returns 'partition by hash(a, b) partitions 3, hash(c) partitions 5'.
 void GeneratePartitionSchema(const Schema& schema,
-                             const vector<std::pair<vector<std::string>, 
int>>& hash_partitions,
+                             const vector<pair<vector<string>, int>>& 
hash_partitions,
+                             const vector<string>& range_partition_columns,
                              PartitionSchema* partition_schema) {
   PartitionSchemaPB partition_schema_pb;
   for (const auto& col_names_and_num_buckets : hash_partitions) {
@@ -68,12 +70,33 @@ void GeneratePartitionSchema(const Schema& schema,
       column_pb->set_name(col_name);
     }
   }
+  if (!range_partition_columns.empty()) {
+    auto* range_schema = partition_schema_pb.mutable_range_schema();
+    for (const auto& range_column : range_partition_columns) {
+      range_schema->add_columns()->set_name(range_column);
+    }
+  }
   CHECK_OK(PartitionSchema::FromPB(partition_schema_pb, schema, 
partition_schema));
 }
+
+// Copy a spec and return the pruned spec string.
+string PruneInlistValuesAndGetSchemaString(const ScanSpec& spec,
+                                           const Schema& schema,
+                                           const Partition& partition,
+                                           const PartitionSchema& 
partition_schema,
+                                           Arena* arena) {
+  ScanSpec copy_spec = spec;
+
+  copy_spec.PruneInlistValuesIfPossible(schema, partition, partition_schema);
+  copy_spec.OptimizeScan(schema, arena, true);
+
+  return copy_spec.ToString(schema);
+}
+
 } // anonymous namespace
 
-static std::string ToString(const vector<ColumnSchema>& columns) {
-  std::string str;
+static string ToString(const vector<ColumnSchema>& columns) {
+  string str;
   for (const auto& column : columns) {
     str += column.ToString();
     str += "\n";
@@ -409,45 +432,373 @@ TEST_F(CompositeIntKeysTest, 
TestOneHashKeyInListHashPruning) {
 
   PartitionSchema partition_schema;
   GeneratePartitionSchema(schema,
-                          { pair<vector<std::string>, int>({ "a" }, 3) },
+                          { pair<vector<string>, int>({ "a" }, 3) },
+                          {},
                           &partition_schema);
 
   vector<Partition> partitions;
   ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, 
&partitions));
   ASSERT_EQ(3, partitions.size());
 
-  // clone scan_spec for different partition.
-  ScanSpec spec_p1 = spec;
-  ScanSpec spec_p2 = spec;
-  ScanSpec spec_p3 = spec;
+  // Verify the splitted values can merge into original set without 
overlapping.
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=101, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=6, int8 b=101, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (50, 100)");
+}
 
-  spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], 
partition_schema);
-  spec_p1.OptimizeScan(schema, &arena_, true);
+// Test that hash(a), range(a) IN list predicates prune would happen on
+// both hash and range aspects.
+TEST_F(CompositeIntKeysTest, TestOneHashKeyOneRangeKeyInListHashPruning) {
+  ScanSpec spec;
+  AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
+  AddInPredicate<int8_t>(&spec, "b", { 50, 100 });
 
-  // Verify the splitted values can merge into originl set without overlapping.
-  SCOPED_TRACE(spec_p1.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
-            "PK < (int8 a=8, int8 b=101, int8 c=-128) AND "
-            "a IN (4, 7, 8) AND b IN (50, 100)",
-            spec_p1.ToString(schema));
+  Schema schema = schema_.CopyWithColumnIds();
 
-  spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], 
partition_schema);
-  spec_p2.OptimizeScan(schema, &arena_, true);
+  PartitionSchema partition_schema;
+  GeneratePartitionSchema(schema,
+                          { pair<vector<string>, int>({ "a" }, 3) },
+                          { "a" },
+                          &partition_schema);
 
-  SCOPED_TRACE(spec_p2.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
-            "PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
-            "a IN (0, 2, 5, 9) AND b IN (50, 100)",
-            spec_p2.ToString(schema));
+  KuduPartialRow split1(&schema);
+  KuduPartialRow split2(&schema);
+  ASSERT_OK(split1.SetInt8("a", 3));
+  ASSERT_OK(split2.SetInt8("a", 6));
 
-  spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], 
partition_schema);
-  spec_p3.OptimizeScan(schema, &arena_, true);
+  vector<Partition> partitions;
+  ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 },
+                                              {},
+                                              {},
+                                              schema,
+                                              &partitions));
+  ASSERT_EQ(9, partitions.size());
 
-  SCOPED_TRACE(spec_p3.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=1, int8 b=50, int8 c=-128) AND "
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "a IN () AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=4, int8 b=101, int8 c=-128) AND "
+            "a IN (4) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=7, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=101, int8 c=-128) AND "
+            "a IN (7, 8) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[3],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=2, int8 b=101, int8 c=-128) AND "
+            "a IN (0, 2) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[4],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=5, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=5, int8 b=101, int8 c=-128) AND "
+            "a IN (5) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[5],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=9, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
+            "a IN (9) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[6],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=1, int8 b=101, int8 c=-128) AND "
+            "a IN (1) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[7],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=3, int8 b=50, int8 c=-128) AND "
+            "PK < (int8 a=3, int8 b=101, int8 c=-128) AND "
+            "a IN (3) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[8],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=6, int8 b=50, int8 c=-128) AND "
             "PK < (int8 a=6, int8 b=101, int8 c=-128) AND "
-            "a IN (1, 3, 6) AND b IN (50, 100)",
-            spec_p3.ToString(schema));
+            "a IN (6) AND b IN (50, 100)");
+}
+
+// Test that hash(a), range(a, b) IN list predicates prune would happen
+// on hash-key but not on range key.
+TEST_F(CompositeIntKeysTest, TestOneHashKeyMultiRangeKeyInListHashPruning) {
+  ScanSpec spec;
+  AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
+  AddInPredicate<int8_t>(&spec, "b", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
+
+  Schema schema = schema_.CopyWithColumnIds();
+
+  PartitionSchema partition_schema;
+  GeneratePartitionSchema(schema,
+                          { pair<vector<string>, int>({ "a" }, 3) },
+                          { "a", "b" },
+                          &partition_schema);
+
+  KuduPartialRow split1(&schema);
+  KuduPartialRow split2(&schema);
+  ASSERT_OK(split1.SetInt8("a", 2));
+  ASSERT_OK(split1.SetInt8("b", 3));
+
+  ASSERT_OK(split2.SetInt8("a", 6));
+  ASSERT_OK(split2.SetInt8("b", 6));
+
+  vector<Partition> partitions;
+  ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 },
+                                              {},
+                                              {},
+                                              schema,
+                                              &partitions));
+  ASSERT_EQ(9, partitions.size());
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[3],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[4],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[5],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[6],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[7],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[8],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)");
+}
+
+// Test that hash(a), range(b) IN list predicates prune would happen
+// on both hash and range aspects.
+TEST_F(CompositeIntKeysTest, TestDifferentHashRangeKeyInListHashPruning) {
+  ScanSpec spec;
+  AddInPredicate<int8_t>(&spec, "a", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
+  AddInPredicate<int8_t>(&spec, "b", { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
+
+  Schema schema = schema_.CopyWithColumnIds();
+
+  PartitionSchema partition_schema;
+  GeneratePartitionSchema(schema,
+                          { pair<vector<string>, int>({ "a" }, 3) },
+                          { "b" },
+                          &partition_schema);
+
+  KuduPartialRow split1(&schema);
+  KuduPartialRow split2(&schema);
+
+  ASSERT_OK(split1.SetInt8("b", 3));
+  ASSERT_OK(split2.SetInt8("b", 6));
+
+  vector<Partition> partitions;
+  ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 },
+                                              {},
+                                              {},
+                                              schema,
+                                              &partitions));
+  ASSERT_EQ(9, partitions.size());
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=3, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (0, 1, 2)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=3, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=6, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (3, 4, 5)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=6, int8 c=-128) AND "
+            "PK < (int8 a=8, int8 b=10, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[3],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=3, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (0, 1, 2)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[4],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=3, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=6, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (3, 4, 5)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[5],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=6, int8 c=-128) AND "
+            "PK < (int8 a=9, int8 b=10, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (6, 7, 8, 9)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[6],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=0, int8 c=-128) AND "
+            "PK < (int8 a=6, int8 b=3, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (0, 1, 2)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[7],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=3, int8 c=-128) AND "
+            "PK < (int8 a=6, int8 b=6, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (3, 4, 5)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[8],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=6, int8 c=-128) AND "
+            "PK < (int8 a=6, int8 b=10, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (6, 7, 8, 9)");
 }
 
 // Test that in case hash(a) prune all predicate values, the rest predicate 
values for
@@ -462,46 +813,38 @@ TEST_F(CompositeIntKeysTest, 
TestHashKeyInListHashPruningEmptyDetect) {
 
   PartitionSchema partition_schema;
   GeneratePartitionSchema(schema,
-                          { pair<vector<std::string>, int>({ "a" }, 3) },
+                          { pair<vector<string>, int>({ "a" }, 3) },
+                          {},
                           &partition_schema);
 
   vector<Partition> partitions;
   ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, 
&partitions));
   ASSERT_EQ(3, partitions.size());
 
-  // clone scan_spec for different partition.
-  ScanSpec spec_p1 = spec;
-  ScanSpec spec_p2 = spec;
-  ScanSpec spec_p3 = spec;
-
-  spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], 
partition_schema);
-  // Guarantee OptimizeScan can be call without fatal.
-  NO_FATALS(spec_p1.OptimizeScan(schema, &arena_, true));
-
-  // Verify the splitted values can merge into originl set without overlapping.
-  SCOPED_TRACE(spec_p1.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=50, int8 c=-128) AND "
             "PK < (int8 a=8, int8 b=101, int8 c=-128) AND "
-            "a IN (4, 7, 8) AND b IN (50, 100)",
-            spec_p1.ToString(schema));
-  ASSERT_FALSE(spec_p1.CanShortCircuit());
-
-  spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], 
partition_schema);
-  // Guarantee OptimizeScan can be call without fatal.
-  NO_FATALS(spec_p2.OptimizeScan(schema, &arena_, true));
-
-  SCOPED_TRACE(spec_p2.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
             "PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
-            "a IN (0, 2, 5, 9) AND b IN (50, 100)",
-            spec_p2.ToString(schema));
-  ASSERT_FALSE(spec_p2.CanShortCircuit());
-
-  // There should be no predicate values after prune.
-  spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], 
partition_schema);
-  // Guarantee OptimizeScan can be call without fatal.
-  NO_FATALS(spec_p3.OptimizeScan(schema, &arena_, true));
-  ASSERT_TRUE(spec_p3.CanShortCircuit());
+            "a IN (0, 2, 5, 9) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "a IN () AND b IN (50, 100)");
 }
 
 // Test that hash(a), hash(b) IN list predicates should be pruned.
@@ -514,106 +857,98 @@ TEST_F(CompositeIntKeysTest, 
TestMultiHashKeyOneColumnInListHashPruning) {
 
   PartitionSchema partition_schema;
   GeneratePartitionSchema(schema,
-                          { pair<vector<std::string>, int>({ "a" }, 3),
-                            pair<vector<std::string>, int>({ "b" }, 3) },
+                          { pair<vector<string>, int>({ "a" }, 3),
+                            pair<vector<string>, int>({ "b" }, 3) },
+                          {},
                           &partition_schema);
 
   vector<Partition> partitions;
   ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, 
&partitions));
   ASSERT_EQ(9, partitions.size());
 
-  // clone scan_spec for different partition.
-  ScanSpec spec_p1 = spec;
-  ScanSpec spec_p2 = spec;
-  ScanSpec spec_p3 = spec;
-  ScanSpec spec_p4 = spec;
-  ScanSpec spec_p5 = spec;
-  ScanSpec spec_p6 = spec;
-  ScanSpec spec_p7 = spec;
-  ScanSpec spec_p8 = spec;
-  ScanSpec spec_p9 = spec;
-
   // p1, p2, p3 should have the same predicate values to be pushed on hash(a).
   // p1, p4, p7 should have the same predicate values to be pushed on hash(b).
-  spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], 
partition_schema);
-  spec_p1.OptimizeScan(schema, &arena_, true);
-  SCOPED_TRACE(spec_p1.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=4, int8 b=40, int8 c=-128) AND "
+  // pi refer to partitions[i-1], e.g. p1 = partitions[0]
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=40, int8 c=-128) AND "
             "PK < (int8 a=8, int8 b=71, int8 c=-128) AND "
-            "a IN (4, 7, 8) AND b IN (40, 60, 70)",
-            spec_p1.ToString(schema));
-
-  spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], 
partition_schema);
-  spec_p2.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p2.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=4, int8 b=20, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (40, 60, 70)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=20, int8 c=-128) AND "
             "PK < (int8 a=8, int8 b=51, int8 c=-128) AND "
-            "a IN (4, 7, 8) AND b IN (20, 30, 50)",
-            spec_p2.ToString(schema));
-
-  spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], 
partition_schema);
-  spec_p3.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p3.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=4, int8 b=10, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (20, 30, 50)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=4, int8 b=10, int8 c=-128) AND "
             "PK < (int8 a=8, int8 b=81, int8 c=-128) AND "
-            "a IN (4, 7, 8) AND b IN (10, 80)",
-            spec_p3.ToString(schema));
-
-  spec_p4.PruneHashForInlistIfPossible(schema, partitions[3], 
partition_schema);
-  spec_p4.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p4.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=40, int8 c=-128) AND "
+            "a IN (4, 7, 8) AND b IN (10, 80)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[3],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=40, int8 c=-128) AND "
             "PK < (int8 a=9, int8 b=71, int8 c=-128) AND "
-            "a IN (0, 2, 5, 9) AND b IN (40, 60, 70)",
-            spec_p4.ToString(schema));
-
-  spec_p5.PruneHashForInlistIfPossible(schema, partitions[4], 
partition_schema);
-  spec_p5.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p5.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=20, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (40, 60, 70)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[4],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=20, int8 c=-128) AND "
             "PK < (int8 a=9, int8 b=51, int8 c=-128) AND "
-            "a IN (0, 2, 5, 9) AND b IN (20, 30, 50)",
-            spec_p5.ToString(schema));
-
-  spec_p6.PruneHashForInlistIfPossible(schema, partitions[5], 
partition_schema);
-  spec_p6.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p6.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=10, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (20, 30, 50)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[5],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=10, int8 c=-128) AND "
             "PK < (int8 a=9, int8 b=81, int8 c=-128) AND "
-            "a IN (0, 2, 5, 9) AND b IN (10, 80)",
-            spec_p6.ToString(schema));
-
-  spec_p7.PruneHashForInlistIfPossible(schema, partitions[6], 
partition_schema);
-  spec_p7.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p7.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=1, int8 b=40, int8 c=-128) AND "
+            "a IN (0, 2, 5, 9) AND b IN (10, 80)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[6],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=40, int8 c=-128) AND "
             "PK < (int8 a=6, int8 b=71, int8 c=-128) AND "
-            "a IN (1, 3, 6) AND b IN (40, 60, 70)",
-            spec_p7.ToString(schema));
-
-  spec_p8.PruneHashForInlistIfPossible(schema, partitions[7], 
partition_schema);
-  spec_p8.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p8.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=1, int8 b=20, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (40, 60, 70)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[7],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=20, int8 c=-128) AND "
             "PK < (int8 a=6, int8 b=51, int8 c=-128) AND "
-            "a IN (1, 3, 6) AND b IN (20, 30, 50)",
-            spec_p8.ToString(schema));
-
-  spec_p9.PruneHashForInlistIfPossible(schema, partitions[8], 
partition_schema);
-  spec_p9.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p9.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=1, int8 b=10, int8 c=-128) AND "
+            "a IN (1, 3, 6) AND b IN (20, 30, 50)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[8],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=1, int8 b=10, int8 c=-128) AND "
             "PK < (int8 a=6, int8 b=81, int8 c=-128) AND "
-            "a IN (1, 3, 6) AND b IN (10, 80)",
-            spec_p9.ToString(schema));
+            "a IN (1, 3, 6) AND b IN (10, 80)");
 }
 
 // Test that hash(a, b) IN list predicates should not be pruned.
@@ -626,46 +961,40 @@ TEST_F(CompositeIntKeysTest, 
TesMultiHashColumnsInListHashPruning) {
 
   PartitionSchema partition_schema;
   GeneratePartitionSchema(schema,
-                          { pair<vector<std::string>, int>({ "a", "b" }, 3) },
+                          { pair<vector<string>, int>({ "a", "b" }, 3) },
+                          {},
                           &partition_schema);
 
   vector<Partition> partitions;
   ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, 
&partitions));
   ASSERT_EQ(3, partitions.size());
 
-  // clone scan_spec for different partition.
-  ScanSpec spec_p1 = spec;
-  ScanSpec spec_p2 = spec;
-  ScanSpec spec_p3 = spec;
-
-  spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], 
partition_schema);
-  spec_p1.OptimizeScan(schema, &arena_, true);
-
-  // Verify that the predicates to be pushed to different partition should be
-  // the same when no hash prune happened.
-  SCOPED_TRACE(spec_p1.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
             "PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)",
-            spec_p1.ToString(schema));
-
-  spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], 
partition_schema);
-  spec_p2.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p2.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
             "PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)",
-            spec_p2.ToString(schema));
-
-  spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], 
partition_schema);
-  spec_p3.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p3.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=-128) AND "
             "PK < (int8 a=9, int8 b=101, int8 c=-128) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)",
-            spec_p3.ToString(schema));
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100)");
 }
 
 // Test that hash(a, b), hash(c) InList predicates.
@@ -681,107 +1010,95 @@ TEST_F(CompositeIntKeysTest, 
TesMultiHashKeyMultiHashInListHashPruning) {
 
   PartitionSchema partition_schema;
   GeneratePartitionSchema(schema,
-                          { pair<vector<std::string>, int>({ "a", "b" }, 3),
-                            pair<vector<std::string>, int>({ "c" }, 3) },
+                          { pair<vector<string>, int>({ "a", "b" }, 3),
+                            pair<vector<string>, int>({ "c" }, 3) },
+                          {},
                           &partition_schema);
 
   vector<Partition> partitions;
   ASSERT_OK(partition_schema.CreatePartitions({}, {}, {}, schema, 
&partitions));
   ASSERT_EQ(9, partitions.size());
 
-  // clone scan_spec for different partition.
-  ScanSpec spec_p1 = spec;
-  ScanSpec spec_p2 = spec;
-  ScanSpec spec_p3 = spec;
-  ScanSpec spec_p4 = spec;
-  ScanSpec spec_p5 = spec;
-  ScanSpec spec_p6 = spec;
-  ScanSpec spec_p7 = spec;
-  ScanSpec spec_p8 = spec;
-  ScanSpec spec_p9 = spec;
-
-  spec_p1.PruneHashForInlistIfPossible(schema, partitions[0], 
partition_schema);
-  spec_p1.OptimizeScan(schema, &arena_, true);
-
-  // hash(a, b) should not be pruned, hash(c) should be pruned.
-  // p1, p4, p7 should have the same values to be pushed.
-  SCOPED_TRACE(spec_p1.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[0],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=71) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(40, 60, 70)",
-            spec_p1.ToString(schema));
-
-  spec_p2.PruneHashForInlistIfPossible(schema, partitions[1], 
partition_schema);
-  spec_p2.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p2.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(40, 60, 70)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[1],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=51) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(20, 30, 50)",
-            spec_p2.ToString(schema));
-
-  spec_p3.PruneHashForInlistIfPossible(schema, partitions[2], 
partition_schema);
-  spec_p3.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p3.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(20, 30, 50)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[2],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=91) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(80, 90)",
-            spec_p3.ToString(schema));
-
-  spec_p4.PruneHashForInlistIfPossible(schema, partitions[3], 
partition_schema);
-  spec_p4.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p4.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(80, 90)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[3],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=71) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(40, 60, 70)",
-            spec_p4.ToString(schema));
-
-  spec_p5.PruneHashForInlistIfPossible(schema, partitions[4], 
partition_schema);
-  spec_p5.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p5.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(40, 60, 70)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[4],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=51) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(20, 30, 50)",
-            spec_p5.ToString(schema));
-
-  spec_p6.PruneHashForInlistIfPossible(schema, partitions[5], 
partition_schema);
-  spec_p6.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p6.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(20, 30, 50)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[5],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=91) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(80, 90)",
-            spec_p6.ToString(schema));
-
-  spec_p7.PruneHashForInlistIfPossible(schema, partitions[6], 
partition_schema);
-  spec_p7.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p7.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(80, 90)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[6],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=40) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=71) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(40, 60, 70)",
-            spec_p7.ToString(schema));
-
-  spec_p8.PruneHashForInlistIfPossible(schema, partitions[7], 
partition_schema);
-  spec_p8.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p8.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(40, 60, 70)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[7],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=20) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=51) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(20, 30, 50)",
-            spec_p8.ToString(schema));
-
-  spec_p9.PruneHashForInlistIfPossible(schema, partitions[8], 
partition_schema);
-  spec_p9.OptimizeScan(schema, &arena_, true);
-
-  SCOPED_TRACE(spec_p9.ToString(schema));
-  EXPECT_EQ("PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(20, 30, 50)");
+
+  ASSERT_EQ(PruneInlistValuesAndGetSchemaString(spec,
+                                                schema,
+                                                partitions[8],
+                                                partition_schema,
+                                                &arena_),
+            "PK >= (int8 a=0, int8 b=50, int8 c=80) AND "
             "PK < (int8 a=9, int8 b=100, int8 c=91) AND "
-            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(80, 90)",
-            spec_p9.ToString(schema));
+            "a IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) AND b IN (50, 100) AND c IN 
(80, 90)");
 }
 
 // Test that IN list mixed with range predicates get pushed into the primary 
key
@@ -1139,7 +1456,7 @@ TEST_F(CompositeIntKeysTest, TestGetMissingColumns) {
     Schema projection({ ColumnSchema("e", INT8) }, 0);
     vector<ColumnSchema> missing_cols = spec.GetMissingColumns(projection);
     EXPECT_EQ(2, missing_cols.size());
-    std::string missing_cols_str = ToString(missing_cols);
+    string missing_cols_str = ToString(missing_cols);
     EXPECT_STR_CONTAINS(missing_cols_str, "b INT8");
     EXPECT_STR_CONTAINS(missing_cols_str, "d INT8");
   }
@@ -1149,7 +1466,7 @@ TEST_F(CompositeIntKeysTest, TestGetMissingColumns) {
                         ColumnSchema("e", INT8) }, 0);
     vector<ColumnSchema> missing_cols = spec.GetMissingColumns(projection);
     EXPECT_EQ(1, missing_cols.size());
-    std::string missing_cols_str = ToString(missing_cols);
+    string missing_cols_str = ToString(missing_cols);
     EXPECT_STR_CONTAINS(missing_cols_str, "b INT8");
   }
 
diff --git a/src/kudu/common/scan_spec.cc b/src/kudu/common/scan_spec.cc
index fc519d2..e82714f 100644
--- a/src/kudu/common/scan_spec.cc
+++ b/src/kudu/common/scan_spec.cc
@@ -171,9 +171,9 @@ void ScanSpec::OptimizeScan(const Schema& schema,
   }
 }
 
-void ScanSpec::PruneHashForInlistIfPossible(const Schema& schema,
-                                            const Partition& partition,
-                                            const PartitionSchema& 
partition_schema) {
+void ScanSpec::PruneInlistValuesIfPossible(const Schema& schema,
+                                           const Partition& partition,
+                                           const PartitionSchema& 
partition_schema) {
   for (auto& predicate_pair : predicates_) {
     auto& predicate = predicate_pair.second;
     if (predicate.predicate_type() != PredicateType::InList) continue;
@@ -184,19 +184,36 @@ void ScanSpec::PruneHashForInlistIfPossible(const Schema& 
schema,
     if (!s.ok() || !schema.is_key_column(idx)) continue;
 
     int hash_idx = 
partition_schema.TryGetSingleColumnHashPartitionIndex(schema, idx);
-    if (hash_idx == -1) continue;
+    bool is_col_single_range_schema = 
partition_schema.IsColumnSingleRangeSchema(schema, idx);
+    if (hash_idx == -1 && !is_col_single_range_schema) continue;
 
     auto* predicate_values = predicate.mutable_raw_values();
+
     predicate_values->erase(std::remove_if(predicate_values->begin(), 
predicate_values->end(),
-          [idx, hash_idx, &schema, &partition, &partition_schema](const void* 
value) {
+          [idx, hash_idx, is_col_single_range_schema,
+           &schema, &partition, &partition_schema](const void* value) {
+        // Returns true indicates this value is going to be removed from the 
predicate values.
         KuduPartialRow partial_row(&schema);
         Status s = partial_row.Set(idx, reinterpret_cast<const 
uint8_t*>(value));
         if (!s.ok()) return false;
-        bool is_value_in;
-        s = partition_schema.HashPartitionContainsRow(partition, partial_row,
-                                                      hash_idx, &is_value_in);
-        if (!s.ok()) return false;
-        return !is_value_in;
+
+         // If value is not in given hash partition, remove this value from 
predicate values.
+        if (hash_idx != -1) {
+          bool is_value_in;
+          s = partition_schema.HashPartitionContainsRow(partition, partial_row,
+                                                        hash_idx, 
&is_value_in);
+          if (!s.ok()) return false;
+          if (!is_value_in) return true;
+        }
+
+        // If value is not in given range partition, remove this value from 
predicate values.
+        if (is_col_single_range_schema) {
+          bool is_value_in;
+          s = partition_schema.RangePartitionContainsRow(partition, 
partial_row, &is_value_in);
+          if (!s.ok()) return false;
+          if (!is_value_in) return true;
+        }
+        return false;
       }), predicate_values->end());
   }
 }
diff --git a/src/kudu/common/scan_spec.h b/src/kudu/common/scan_spec.h
index 3963f17..9843dae 100644
--- a/src/kudu/common/scan_spec.h
+++ b/src/kudu/common/scan_spec.h
@@ -74,18 +74,24 @@ class ScanSpec {
                     bool remove_pushed_predicates);
 
   // Filter in-list predicate values with given hash partition schema.
-  // Only supports pruning for single-column hash schemas.
+  // If range partition is introduced when creating table, in-list predicate
+  // can also benefit from this pruning.
+  //
+  // Only supports pruning for single-column hash schemas or single-column 
range schema.
   // Now support hash prune on:
   //     hash(onekey), # support.
+  //     range(onekey), # support.
   //     hash(onekey), hash(anotherkey) # support either.
-  //     hash(key_one, key_two), hash(anotherkey) # support only prune on 
anotherkey.
+  //     hash(onekey), range(anotherkey) # support either.
+  //     hash(key_one, key_two), hash(anotherkey) # only support prune on 
anotherkey.
+  //     range(key_one, key_two) # not support.
   //
-  // TODO(ningw) For IN list predicate on hash(key_one, key_two) or more 
columns,
+  // TODO(ningw) For IN list predicate on hash/range(key_one, key_two) or more 
columns,
   // if one predicate is IN list, and the rest predicate(s) are EQUAL, could
   // have IN list predicate values prune as well.
-  void PruneHashForInlistIfPossible(const Schema& schema,
-                                    const Partition& partition,
-                                    const PartitionSchema& partition_schema);
+  void PruneInlistValuesIfPossible(const Schema& schema,
+                                   const Partition& partition,
+                                   const PartitionSchema& partition_schema);
 
   // Get columns that are present in the predicates but not in the projection
   std::vector<ColumnSchema> GetMissingColumns(const Schema& projection);
diff --git a/src/kudu/tools/kudu-admin-test.cc 
b/src/kudu/tools/kudu-admin-test.cc
index f549e74..cbd5a68 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -2458,7 +2458,7 @@ TEST_F(AdminCliTest, TestAddAndDropUnboundedPartition) {
   });
 
   // Since the unbounded partition has been dropped, now we can add a new 
unbounded
-  // range parititon for the table.
+  // range partition for the table.
   s = RunKuduTool({
     "table",
     "add_range_partition",
diff --git a/src/kudu/tserver/tablet_service.cc 
b/src/kudu/tserver/tablet_service.cc
index aa0b905..7224aad 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -2698,9 +2698,9 @@ Status 
TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
   }
 
   VLOG(3) << "Before optimizing scan spec: " << spec.ToString(tablet_schema);
-  spec.PruneHashForInlistIfPossible(tablet_schema,
-                                    replica->tablet_metadata()->partition(),
-                                    
replica->tablet_metadata()->partition_schema());
+  spec.PruneInlistValuesIfPossible(tablet_schema,
+                                   replica->tablet_metadata()->partition(),
+                                   
replica->tablet_metadata()->partition_schema());
   spec.OptimizeScan(tablet_schema, scanner->arena(), true);
   VLOG(3) << "After optimizing scan spec: " << spec.ToString(tablet_schema);

[kudu] 01/02: KUDU-1644 use range partition info for pruning

Reply via email to