This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 88ebbea [client] Remove range bounds in Bloom filter predicate
88ebbea is described below
commit 88ebbea77cf41d2082aaee30c9f8d4b88a1859c6
Author: Bankim Bhavsar <[email protected]>
AuthorDate: Wed Feb 19 07:06:19 2020 -0800
[client] Remove range bounds in Bloom filter predicate
Lower and upper range bounds are required to allow merging
a range predicate with Bloom filter predicate on the same column.
Since the merging is done on the client side, the range bounds
are required in the wire protocol but we don't need them in
the public client APIs as client allows combining multiple predicates.
Change-Id: I60fa19e69ececa97e96eb6ed5f082406b0aab890
Reviewed-on: http://gerrit.cloudera.org:8080/15247
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <[email protected]>
---
src/kudu/client/client.cc | 21 ++++++--------------
src/kudu/client/client.h | 12 ++---------
src/kudu/client/predicate-test.cc | 22 ++++++++++++++++++++-
src/kudu/client/scan_predicate-internal.h | 12 ++---------
src/kudu/client/scan_predicate.cc | 33 ++-----------------------------
src/kudu/common/column_predicate.h | 5 +++--
src/kudu/common/common.proto | 8 ++++----
7 files changed, 40 insertions(+), 73 deletions(-)
diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc
index 28cee2a..a859a1a 100644
--- a/src/kudu/client/client.cc
+++ b/src/kudu/client/client.cc
@@ -971,24 +971,17 @@ KuduPredicate* KuduTable::NewComparisonPredicate(const
Slice& col_name,
});
}
-KuduPredicate* KuduTable::NewInBloomFilterPredicate(
- const Slice& col_name,
- vector<KuduBloomFilter*>* bloom_filters,
- KuduValue* lower_bound_inclusive,
- KuduValue* upper_bound_exclusive) {
+KuduPredicate* KuduTable::NewInBloomFilterPredicate(const Slice& col_name,
+
std::vector<KuduBloomFilter*>* bloom_filters) {
// We always take ownership of values; this ensures cleanup if the predicate
is invalid.
auto cleanup = MakeScopedCleanup([&]() {
STLDeleteElements(bloom_filters);
});
- unique_ptr<KuduValue> lower_bound_inclusive_uniq_ptr(lower_bound_inclusive);
- unique_ptr<KuduValue> upper_bound_exclusive_uniq_ptr(upper_bound_exclusive);
-
- // Empty vector of bloom filters along with no upper or lower bound will
select
- // all rows. Hence disallowed.
- if (bloom_filters->empty() && !lower_bound_inclusive &&
!upper_bound_exclusive) {
+ // Empty vector of bloom filters will select all rows. Hence disallowed.
+ if (bloom_filters->empty()) {
return new KuduPredicate(
- new ErrorPredicateData(Status::InvalidArgument("No predicates
supplied")));
+ new ErrorPredicateData(Status::InvalidArgument("No Bloom filters
supplied")));
}
// Transfer the Bloom filter raw ptrs over to vector of unique ptrs.
@@ -1008,9 +1001,7 @@ KuduPredicate* KuduTable::NewInBloomFilterPredicate(
// not only deletes pointers contained in the vector but also clears the
vector
// and we want the vector be cleared as expected by the caller.
return new KuduPredicate(
- new InBloomFilterPredicateData(col_schema,
std::move(bloom_filters_owned),
-
std::move(lower_bound_inclusive_uniq_ptr),
-
std::move(upper_bound_exclusive_uniq_ptr)));
+ new InBloomFilterPredicateData(col_schema,
std::move(bloom_filters_owned)));
});
}
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index 90cdb8a..dc4ad95 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -1105,21 +1105,13 @@ class KUDU_EXPORT KuduTable : public
sp::enable_shared_from_this<KuduTable> {
/// supplied Bloom filters to be returned by the scanner. On return,
/// regardless of success or error, the returned predicate will take
/// ownership of the pointers contained in @c bloom_filters.
- /// @param [in] lower_bound_inclusive
- /// @param [in] upper_bound_exclusive
- /// Optional [lower_bound, upper_bound) filters where type of the bound
- /// must correspond to the value of the column to which the predicate is
- /// to be applied.
/// @return Raw pointer to an IN Bloom filter predicate. The caller owns the
/// predicate until it is passed into KuduScanner::AddConjunctPredicate().
/// In the case of an error (e.g. an invalid column name),
/// a non-NULL value is still returned. The error will be returned when
/// attempting to add this predicate to a KuduScanner.
- KuduPredicate* NewInBloomFilterPredicate(
- const Slice& col_name,
- std::vector<KuduBloomFilter*>* bloom_filters,
- KuduValue* lower_bound_inclusive = NULL, // NOLINT(modernize-use-nullptr)
- KuduValue* upper_bound_exclusive = NULL); //
NOLINT(modernize-use-nullptr)
+ KuduPredicate* NewInBloomFilterPredicate(const Slice& col_name,
+ std::vector<KuduBloomFilter*>*
bloom_filters);
/// Create a new IN list predicate which can be used for scanners on this
/// table.
diff --git a/src/kudu/client/predicate-test.cc
b/src/kudu/client/predicate-test.cc
index 78138c1..2db9828 100644
--- a/src/kudu/client/predicate-test.cc
+++ b/src/kudu/client/predicate-test.cc
@@ -147,6 +147,7 @@ class PredicateTest : public KuduTest {
const vector<KuduPredicate*>& predicates) {
vector<KuduPredicate*> cloned_predicates;
+ cloned_predicates.reserve(predicates.size());
for (KuduPredicate* pred : predicates) {
cloned_predicates.push_back(pred->Clone());
}
@@ -699,7 +700,7 @@ TEST_F(PredicateTest, TestBoolPredicates) {
KuduScanner scanner(table.get());
Status s = scanner.AddConjunctPredicate(bf_predicate);
ASSERT_TRUE(s.IsInvalidArgument());
- ASSERT_STR_CONTAINS(s.ToString(), "No predicates supplied");
+ ASSERT_STR_CONTAINS(s.ToString(), "No Bloom filters supplied");
}
{ // BloomFilter with (true)
@@ -1291,6 +1292,7 @@ TEST_F(BloomFilterPredicateTest,
TestBloomFilterPredicate) {
auto all_values = CreateRandomUniqueIntegers<int32_t>(kNumAllValues,
empty_set, &rand);
vector<int32_t> inclusive_values;
ReservoirSample(all_values, kNumInclusiveValues, empty_set, &rand,
&inclusive_values);
+ auto min_max_pair = std::minmax_element(inclusive_values.begin(),
inclusive_values.end());
auto* inclusive_bf = CreateBloomFilterWithValues(inclusive_values);
auto exclusive_values =
CreateRandomUniqueIntegers<int32_t>(kNumExclusiveValues, all_values,
&rand);
@@ -1308,6 +1310,9 @@ TEST_F(BloomFilterPredicateTest,
TestBloomFilterPredicate) {
vector<KuduBloomFilter*> inclusive_bf_vec = { inclusive_bf };
auto* inclusive_predicate =
table->NewInBloomFilterPredicate("value", &inclusive_bf_vec);
+ auto* inclusive_predicate_clone1 = inclusive_predicate->Clone();
+ auto* inclusive_predicate_clone2 = inclusive_predicate->Clone();
+
ASSERT_TRUE(inclusive_bf_vec.empty());
int actual_count_inclusive = CountRows(table, { inclusive_predicate });
EXPECT_LE(inclusive_values.size(), actual_count_inclusive);
@@ -1320,6 +1325,21 @@ TEST_F(BloomFilterPredicateTest,
TestBloomFilterPredicate) {
int actual_count_exclusive = CountRows(table, { exclusive_predicate });
EXPECT_LE(0, actual_count_exclusive);
EXPECT_GE(kFalsePositives, actual_count_exclusive);
+
+ // Combine Range predicate with Bloom filter predicate.
+ auto* less_predicate = table->NewComparisonPredicate("value",
KuduPredicate::LESS,
+
KuduValue::FromInt(*min_max_pair.first));
+ int actual_count_less = CountRows(table, {inclusive_predicate_clone1,
less_predicate });
+ EXPECT_EQ(0, actual_count_less);
+
+ auto* ge_predicate = table->NewComparisonPredicate("value",
KuduPredicate::GREATER_EQUAL,
+
KuduValue::FromInt(*min_max_pair.first));
+ auto* le_predicate = table->NewComparisonPredicate("value",
KuduPredicate::LESS_EQUAL,
+
KuduValue::FromInt(*min_max_pair.second));
+ int actual_count_range = CountRows(table,
+ { inclusive_predicate_clone2,
ge_predicate, le_predicate });
+ EXPECT_LE(inclusive_values.size(), actual_count_range);
+ EXPECT_GE(inclusive_values.size() + kFalsePositives, actual_count_range);
}
class ParameterizedPredicateTest : public PredicateTest,
diff --git a/src/kudu/client/scan_predicate-internal.h
b/src/kudu/client/scan_predicate-internal.h
index 41359f1..8ba7a8e 100644
--- a/src/kudu/client/scan_predicate-internal.h
+++ b/src/kudu/client/scan_predicate-internal.h
@@ -95,13 +95,9 @@ class ComparisonPredicateData : public KuduPredicate::Data {
class InBloomFilterPredicateData : public KuduPredicate::Data {
public:
InBloomFilterPredicateData(ColumnSchema col,
- std::vector<std::unique_ptr<KuduBloomFilter>>
bloom_filters,
- std::unique_ptr<KuduValue> lower = nullptr,
- std::unique_ptr<KuduValue> upper = nullptr)
+ std::vector<std::unique_ptr<KuduBloomFilter>>
bloom_filters)
: col_(std::move(col)),
- bloom_filters_(std::move(bloom_filters)),
- lower_(std::move(lower)),
- upper_(std::move(upper)) {
+ bloom_filters_(std::move(bloom_filters)) {
}
Status AddToScanSpec(ScanSpec* spec, Arena* arena) override;
@@ -109,12 +105,8 @@ class InBloomFilterPredicateData : public
KuduPredicate::Data {
InBloomFilterPredicateData* Clone() const override;
private:
- Status CheckTypeAndGetPointer(KuduValue* val_in, void** val_out) const;
-
ColumnSchema col_;
std::vector<std::unique_ptr<KuduBloomFilter>> bloom_filters_;
- std::unique_ptr<KuduValue> lower_;
- std::unique_ptr<KuduValue> upper_;
};
// A list predicate for a column and a list of constant values.
diff --git a/src/kudu/client/scan_predicate.cc
b/src/kudu/client/scan_predicate.cc
index 05657dd..de32e99 100644
--- a/src/kudu/client/scan_predicate.cc
+++ b/src/kudu/client/scan_predicate.cc
@@ -154,25 +154,7 @@ Status InListPredicateData::AddToScanSpec(ScanSpec* spec,
Arena* /*arena*/) {
return Status::OK();
}
-Status InBloomFilterPredicateData::CheckTypeAndGetPointer(KuduValue* val_in,
- void** val_out)
const {
- return val_in->data_->CheckTypeAndGetPointer(col_.name(),
- col_.type_info()->type(),
- col_.type_attributes(),
- val_out);
-}
-
Status InBloomFilterPredicateData::AddToScanSpec(ScanSpec* spec, Arena*
/*arena*/) {
- void* lower_void = nullptr;
- if (lower_) {
- RETURN_NOT_OK(CheckTypeAndGetPointer(lower_.get(), &lower_void));
- }
-
- void* upper_void = nullptr;
- if (upper_) {
- RETURN_NOT_OK(CheckTypeAndGetPointer(upper_.get(), &upper_void));
- }
-
// Extract the BlockBloomFilters.
vector<BlockBloomFilter*> block_bloom_filters;
block_bloom_filters.reserve(bloom_filters_.size());
@@ -180,29 +162,18 @@ Status
InBloomFilterPredicateData::AddToScanSpec(ScanSpec* spec, Arena* /*arena*
block_bloom_filters.push_back(bf->data_->bloom_filter_.get());
}
- spec->AddPredicate(ColumnPredicate::InBloomFilter(col_,
std::move(block_bloom_filters),
- lower_void, upper_void));
+ spec->AddPredicate(ColumnPredicate::InBloomFilter(col_,
std::move(block_bloom_filters)));
return Status::OK();
}
InBloomFilterPredicateData* client::InBloomFilterPredicateData::Clone() const {
- unique_ptr<KuduValue> lower_clone;
- if (lower_) {
- lower_clone.reset(lower_->Clone());
- }
- unique_ptr<KuduValue> upper_clone;
- if (upper_) {
- upper_clone.reset(upper_->Clone());
- }
-
vector<unique_ptr<KuduBloomFilter>> bloom_filter_clones;
bloom_filter_clones.reserve(bloom_filters_.size());
for (const auto& bf : bloom_filters_) {
bloom_filter_clones.emplace_back(bf->Clone());
}
- return new InBloomFilterPredicateData(col_, std::move(bloom_filter_clones),
- std::move(lower_clone),
std::move(upper_clone));
+ return new InBloomFilterPredicateData(col_, std::move(bloom_filter_clones));
}
KuduBloomFilter::KuduBloomFilter() {
diff --git a/src/kudu/common/column_predicate.h
b/src/kudu/common/column_predicate.h
index e94b492..ce5aa78 100644
--- a/src/kudu/common/column_predicate.h
+++ b/src/kudu/common/column_predicate.h
@@ -141,10 +141,11 @@ class ColumnPredicate {
// Create a new BloomFilter predicate for the column.
//
// The values are not copied, and must outlive the returned predicate.
+ // Optional "lower" and "upper" help with merging a Range predicate.
static ColumnPredicate InBloomFilter(ColumnSchema column,
std::vector<BlockBloomFilter*> bfs,
- const void* lower,
- const void* upper);
+ const void* lower = nullptr,
+ const void* upper = nullptr);
// Creates a new predicate which matches no values.
static ColumnPredicate None(ColumnSchema column);
diff --git a/src/kudu/common/common.proto b/src/kudu/common/common.proto
index 89afee2..559cd28 100644
--- a/src/kudu/common/common.proto
+++ b/src/kudu/common/common.proto
@@ -404,10 +404,10 @@ message ColumnPredicatePB {
// A list of bloom filters for the field.
repeated BlockBloomFilterPB bloom_filters = 1;
- // Lower and Upper is optional for InBloomFilter.
- // When use both InBloomFilter and Range predicate for the same column the
- // merge result can be InBloomFilter whith range bound inside. And the
lower
- // and upper works just like they in Range predicate.
+ // lower and upper are optional for InBloomFilter.
+ // When using both InBloomFilter and Range predicate for the same column
the
+ // merged result can be InBloomFilter within specified range.
+ //
// The inclusive lower bound.
optional bytes lower = 2 [(kudu.REDACT) = true];