This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch branch-1.18.x
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/branch-1.18.x by this push:
new 6bda3eac3 [common] ColumnPredicate clean-up
6bda3eac3 is described below
commit 6bda3eac351cac646636bb8051571c84030fc0d3
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue Apr 22 17:37:47 2025 -0700
[common] ColumnPredicate clean-up
Since I'm planning to update the code in common/column_predicate.cc
in follow-up changelists, in this separate patch I cleaned up
the code a bit:
* use 'if constexpr' where possible for cleaner code
* clean up CHECK/DCHECK assertions
* make it C++ style guide-compliant
* address other minor issues reported by linters
I verified that the new code's benchmark numbers didn't degrade
by running and comparing output of
KUDU_ALLOW_SLOW_TESTS=1 ./bin/column_predicate-test
Specifically, by running NullPredicateBenchmark.TestIsNotNull multiple
times before and after this patch at the same machine, I observed better
numbers in average, with typical numbers like below:
Before:
int32 NOT NULL (c IS NOT NULL) 91937.5M evals/sec 0.03 cycles/eval
int32 NULL (c IS NOT NULL) 93090.9M evals/sec 0.03 cycles/eval
NullPredicateBenchmark.TestIsNotNull (23 ms)
After:
int32 NOT NULL (c IS NOT NULL) 104682.1M evals/sec 0.02 cycles/eval
int32 NULL (c IS NOT NULL) 114375.1M evals/sec 0.02 cycles/eval
NullPredicateBenchmark.TestIsNotNull (20 ms)
This patch doesn't have any functional modifications.
Change-Id: I5d238a86d301b09247d8ed1d7deeb738eb20b11f
Reviewed-on: http://gerrit.cloudera.org:8080/22805
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Yifan Zhang <[email protected]>
(cherry picked from commit d09bf07b3162bccba3805b9db3c7a17ac44197b6)
Reviewed-on: http://gerrit.cloudera.org:8080/22808
---
src/kudu/common/column_predicate.cc | 207 ++++++++++++++++++------------------
1 file changed, 103 insertions(+), 104 deletions(-)
diff --git a/src/kudu/common/column_predicate.cc
b/src/kudu/common/column_predicate.cc
index b0626708f..94d9d5f46 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -72,14 +72,14 @@ ColumnPredicate::ColumnPredicate(PredicateType
predicate_type,
bloom_filters_(std::move(bfs)) {}
ColumnPredicate ColumnPredicate::Equality(ColumnSchema column, const void*
value) {
- CHECK(value != nullptr);
+ DCHECK(value != nullptr);
return ColumnPredicate(PredicateType::Equality, std::move(column), value,
nullptr);
}
ColumnPredicate ColumnPredicate::Range(ColumnSchema column,
const void* lower,
const void* upper) {
- CHECK(lower != nullptr || upper != nullptr);
+ DCHECK(lower != nullptr || upper != nullptr);
ColumnPredicate pred(PredicateType::Range, std::move(column), lower, upper);
pred.Simplify();
return pred;
@@ -87,7 +87,7 @@ ColumnPredicate ColumnPredicate::Range(ColumnSchema column,
ColumnPredicate ColumnPredicate::InList(ColumnSchema column,
vector<const void*>* values) {
- CHECK(values != nullptr);
+ DCHECK(values != nullptr);
// Sort values and remove duplicates.
std::sort(values->begin(), values->end(),
@@ -109,7 +109,7 @@ ColumnPredicate ColumnPredicate::InBloomFilter(ColumnSchema
column,
std::vector<BlockBloomFilter*>
bfs,
const void* lower,
const void* upper) {
- CHECK(!bfs.empty());
+ DCHECK(!bfs.empty());
ColumnPredicate pred(
PredicateType::InBloomFilter, std::move(column), std::move(bfs), lower,
upper);
pred.Simplify();
@@ -120,13 +120,13 @@ std::optional<ColumnPredicate>
ColumnPredicate::InclusiveRange(ColumnSchema colu
const void*
lower,
const void*
upper,
Arena* arena) {
- CHECK(lower != nullptr || upper != nullptr);
+ DCHECK(lower != nullptr || upper != nullptr);
if (upper != nullptr) {
// Transform the upper bound to exclusive by incrementing it.
// Make a copy of the value before incrementing in case it's aliased.
size_t size = column.type_info()->size();
- void* buf = CHECK_NOTNULL(arena->AllocateBytes(size));
+ void* buf = CHECK_NOTNULL(arena->AllocateBytes(size));
memcpy(buf, upper, size);
if (!key_util::IncrementCell(column, buf, arena)) {
if (lower == nullptr) {
@@ -150,7 +150,7 @@ ColumnPredicate
ColumnPredicate::ExclusiveRange(ColumnSchema column,
const void* lower,
const void* upper,
Arena* arena) {
- CHECK(lower != nullptr || upper != nullptr);
+ DCHECK(lower != nullptr || upper != nullptr);
if (lower != nullptr) {
// Transform the lower bound to inclusive by incrementing it.
@@ -194,8 +194,9 @@ void ColumnPredicate::Simplify() {
switch (predicate_type_) {
case PredicateType::None:
case PredicateType::Equality:
- case PredicateType::IsNotNull: return;
- case PredicateType::IsNull: return;
+ case PredicateType::IsNotNull:
+ case PredicateType::IsNull:
+ return;
case PredicateType::Range: {
DCHECK(lower_ != nullptr || upper_ != nullptr);
if (lower_ != nullptr && upper_ != nullptr) {
@@ -223,7 +224,7 @@ void ColumnPredicate::Simplify() {
}
}
return;
- };
+ }
case PredicateType::InList: {
if (values_.empty()) {
// If the list is empty, no results can be returned.
@@ -244,7 +245,7 @@ void ColumnPredicate::Simplify() {
values_.clear();
}
return;
- };
+ }
case PredicateType::InBloomFilter: {
if (lower_ == nullptr && upper_ == nullptr) {
return;
@@ -280,7 +281,7 @@ void ColumnPredicate::Simplify() {
}
}
return;
- };
+ }
}
LOG(FATAL) << "unknown predicate type";
}
@@ -288,37 +289,26 @@ void ColumnPredicate::Simplify() {
void ColumnPredicate::Merge(const ColumnPredicate& other) {
CHECK(column_.Equals(other.column_, ColumnSchema::COMPARE_NAME_AND_TYPE));
switch (predicate_type_) {
- case PredicateType::None: return;
- case PredicateType::Range: {
- MergeIntoRange(other);
- return;
- };
- case PredicateType::Equality: {
- MergeIntoEquality(other);
- return;
- };
- case PredicateType::IsNotNull: {
- MergeIntoIsNotNull(other);
- return;
- };
- case PredicateType::IsNull: {
- MergeIntoIsNull(other);
- return;
- }
- case PredicateType::InList: {
- MergeIntoInList(other);
- return;
- };
- case PredicateType::InBloomFilter: {
- MergeIntoBloomFilter(other);
+ case PredicateType::None:
return;
- };
+ case PredicateType::Range:
+ return MergeIntoRange(other);
+ case PredicateType::Equality:
+ return MergeIntoEquality(other);
+ case PredicateType::IsNotNull:
+ return MergeIntoIsNotNull(other);
+ case PredicateType::IsNull:
+ return MergeIntoIsNull(other);
+ case PredicateType::InList:
+ return MergeIntoInList(other);
+ case PredicateType::InBloomFilter:
+ return MergeIntoBloomFilter(other);
}
LOG(FATAL) << "unknown predicate type";
}
void ColumnPredicate::MergeIntoRange(const ColumnPredicate& other) {
- CHECK(predicate_type_ == PredicateType::Range);
+ DCHECK(predicate_type_ == PredicateType::Range);
switch (other.predicate_type()) {
case PredicateType::None:
@@ -382,57 +372,51 @@ void ColumnPredicate::MergeIntoRange(const
ColumnPredicate& other) {
}
void ColumnPredicate::MergeIntoEquality(const ColumnPredicate& other) {
- CHECK(predicate_type_ == PredicateType::Equality);
+ DCHECK(predicate_type_ == PredicateType::Equality);
switch (other.predicate_type()) {
- case PredicateType::None: {
+ case PredicateType::None:
SetToNone();
return;
- }
- case PredicateType::Range: {
+ case PredicateType::Range:
if (!other.CheckValueInRange(lower_)) {
// This equality value does not fall in the other range.
SetToNone();
}
return;
- };
- case PredicateType::Equality: {
+ case PredicateType::Equality:
if (column_.type_info()->Compare(lower_, other.lower_) != 0) {
SetToNone();
}
return;
- };
- case PredicateType::IsNotNull: return;
- case PredicateType::IsNull: {
+ case PredicateType::IsNotNull:
+ return;
+ case PredicateType::IsNull:
SetToNone();
return;
- }
- case PredicateType::InList : {
+ case PredicateType::InList :
// The equality value needs to be a member of the InList
if (!other.CheckValueInList(lower_)) {
SetToNone();
}
return;
- };
- case PredicateType::InBloomFilter: {
+ case PredicateType::InBloomFilter:
if (!other.CheckValueInBloomFilter(lower_)) {
SetToNone();
}
return;
- };
}
LOG(FATAL) << "unknown predicate type";
}
-void ColumnPredicate::MergeIntoIsNotNull(const ColumnPredicate &other) {
- CHECK(predicate_type_ == PredicateType::IsNotNull);
+void ColumnPredicate::MergeIntoIsNotNull(const ColumnPredicate& other) {
+ DCHECK(predicate_type_ == PredicateType::IsNotNull);
switch (other.predicate_type()) {
// The intersection of NULL and IS NOT NULL is None.
- case PredicateType::IsNull: {
+ case PredicateType::IsNull:
SetToNone();
return;
- }
- default: {
+ default:
// Otherwise, the intersection is the other predicate.
predicate_type_ = other.predicate_type_;
lower_ = other.lower_;
@@ -440,35 +424,32 @@ void ColumnPredicate::MergeIntoIsNotNull(const
ColumnPredicate &other) {
values_ = other.values_;
bloom_filters_ = other.bloom_filters_;
return;
- }
}
}
-void ColumnPredicate::MergeIntoIsNull(const ColumnPredicate &other) {
- CHECK(predicate_type_ == PredicateType::IsNull);
+void ColumnPredicate::MergeIntoIsNull(const ColumnPredicate& other) {
+ DCHECK(predicate_type_ == PredicateType::IsNull);
switch (other.predicate_type()) {
// The intersection of IS NULL and IS NULL is IS NULL.
- case PredicateType::IsNull: {
+ case PredicateType::IsNull:
return;
- }
- default: {
+ default:
// Otherwise, the intersection is None.
// NB: This will not be true if NULL is allowed in an InList predicate.
SetToNone();
return;
- }
}
}
-void ColumnPredicate::MergeIntoInList(const ColumnPredicate &other) {
- CHECK(predicate_type_ == PredicateType::InList);
- DCHECK(values_.size() >= 1);
+void ColumnPredicate::MergeIntoInList(const ColumnPredicate& other) {
+ DCHECK(predicate_type_ == PredicateType::InList);
+ DCHECK(!values_.empty());
switch (other.predicate_type()) {
case PredicateType::None: {
SetToNone();
return;
- };
+ }
case PredicateType::Range: {
// Only values within the range should be retained.
auto search_by = [&] (const void* lhs, const void* rhs) {
@@ -559,7 +540,7 @@ void ColumnPredicate::MergeIntoInList(const ColumnPredicate
&other) {
values_.erase(std::remove_if(values_.begin(), values_.end(),
other_absent), values_.end());
Simplify();
return;
- };
+ }
case PredicateType::InBloomFilter: {
std::vector<const void*> new_values;
std::copy_if(values_.begin(), values_.end(),
std::back_inserter(new_values),
@@ -569,13 +550,13 @@ void ColumnPredicate::MergeIntoInList(const
ColumnPredicate &other) {
values_.swap(new_values);
Simplify();
return;
- };
+ }
}
LOG(FATAL) << "unknown predicate type";
}
-void ColumnPredicate::MergeIntoBloomFilter(const ColumnPredicate &other) {
- CHECK(predicate_type_ == PredicateType::InBloomFilter);
+void ColumnPredicate::MergeIntoBloomFilter(const ColumnPredicate& other) {
+ DCHECK(predicate_type_ == PredicateType::InBloomFilter);
DCHECK(!bloom_filters_.empty());
switch (other.predicate_type()) {
@@ -649,12 +630,12 @@ namespace {
// only processes multiples of 8, so if cb.nrows() is not a multiple of 8, the
// last few elements may need to be processed by the caller.
template <DataType PhysicalType, typename P>
-ATTRIBUTE_NOINLINE
-int ApplyPredicatePrimitive(const ColumnBlock& block, uint8_t* __restrict__
sel_bitmap, P p) {
+ATTRIBUTE_NOINLINE size_t ApplyPredicatePrimitive(
+ const ColumnBlock& block, uint8_t* __restrict__ sel_bitmap, P p) {
using cpp_type = typename DataTypeTraits<PhysicalType>::cpp_type;
const cpp_type* data = reinterpret_cast<const cpp_type*>(block.data());
- const int n_chunks = block.nrows() / 8;
- for (int i = 0; i < n_chunks; i++) {
+ const size_t n_chunks = block.nrows() / 8;
+ for (size_t i = 0; i < n_chunks; ++i) {
uint8_t res_8 = 0;
for (int j = 0; j < 8; j++) {
res_8 |= p(data++) << j;
@@ -662,8 +643,9 @@ int ApplyPredicatePrimitive(const ColumnBlock& block,
uint8_t* __restrict__ sel_
sel_bitmap[i] &= res_8;
}
if (block.is_nullable()) {
- for (int i = 0; i < n_chunks; i++) {
- sel_bitmap[i] &= block.non_null_bitmap()[i];
+ const uint8_t* const non_null_bitmap = block.non_null_bitmap();
+ for (size_t i = 0; i < n_chunks; ++i) {
+ sel_bitmap[i] &= non_null_bitmap[i];
}
}
return n_chunks * 8;
@@ -673,10 +655,12 @@ int ApplyPredicatePrimitive(const ColumnBlock& block,
uint8_t* __restrict__ sel_
template <DataType PhysicalType, typename P>
void ApplyPredicate(const ColumnBlock& block, SelectionVector* sel, P p) {
using cpp_type = typename DataTypeTraits<PhysicalType>::cpp_type;
- int start_idx = 0;
- if (std::is_fundamental<cpp_type>::value) {
+ size_t start_idx = 0;
+ if constexpr (std::is_fundamental<cpp_type>::value) {
start_idx = ApplyPredicatePrimitive<PhysicalType>(block,
sel->mutable_bitmap(), p);
- if (PREDICT_TRUE(start_idx == block.nrows())) return;
+ if (PREDICT_TRUE(start_idx == block.nrows())) {
+ return;
+ }
// If we couldn't process the whole block unrolled by 8, fall through to
the
// remainder.
}
@@ -684,7 +668,9 @@ void ApplyPredicate(const ColumnBlock& block,
SelectionVector* sel, P p) {
const cpp_type* data = reinterpret_cast<const cpp_type*>(block.data());
if (block.is_nullable()) {
for (size_t i = start_idx; i < block.nrows(); i++) {
- if (!sel->IsRowSelected(i)) continue;
+ if (!sel->IsRowSelected(i)) {
+ continue;
+ }
const cpp_type* cell = block.is_null(i) ? nullptr : &data[i];
if (cell == nullptr || !p(cell)) {
BitmapClear(sel->mutable_bitmap(), i);
@@ -692,7 +678,9 @@ void ApplyPredicate(const ColumnBlock& block,
SelectionVector* sel, P p) {
}
} else {
for (size_t i = start_idx; i < block.nrows(); i++) {
- if (!sel->IsRowSelected(i)) continue;
+ if (!sel->IsRowSelected(i)) {
+ continue;
+ }
const cpp_type* cell = &data[i];
if (!p(cell)) {
BitmapClear(sel->mutable_bitmap(), i);
@@ -703,11 +691,14 @@ void ApplyPredicate(const ColumnBlock& block,
SelectionVector* sel, P p) {
template<bool IS_NOT_NULL>
void ApplyNullPredicate(const ColumnBlock& block, uint8_t* __restrict__
sel_vec) {
- int n_bytes = KUDU_ALIGN_UP(block.nrows(), 8) / 8;
- for (int i = 0; i < n_bytes; i++) {
- uint8_t non_null_byte = block.non_null_bitmap()[i];
- if (!IS_NOT_NULL) non_null_byte = ~non_null_byte;
- sel_vec[i] &= non_null_byte;
+ const size_t n_bytes = KUDU_ALIGN_UP(block.nrows(), 8) / 8;
+ const uint8_t* const non_null_bitmap = block.non_null_bitmap();
+ for (size_t i = 0; i < n_bytes; ++i) {
+ if constexpr (IS_NOT_NULL) {
+ sel_vec[i] &= non_null_bitmap[i];
+ } else {
+ sel_vec[i] &= ~non_null_bitmap[i];
+ }
}
}
} // anonymous namespace
@@ -738,19 +729,21 @@ void ColumnPredicate::EvaluateForPhysicalType(const
ColumnBlock& block,
});
}
return;
- };
+ }
case PredicateType::Equality: {
cpp_type local_lower = lower_ ? *static_cast<const cpp_type*>(lower_) :
cpp_type();
ApplyPredicate<PhysicalType>(block, sel, [local_lower] (const void*
cell) {
return traits::Compare(cell, &local_lower) == 0;
});
return;
- };
+ }
case PredicateType::IsNotNull: {
- if (!block.is_nullable()) return;
+ if (!block.is_nullable()) {
+ return;
+ }
ApplyNullPredicate<true>(block, sel->mutable_bitmap());
return;
- };
+ }
case PredicateType::IsNull: {
if (!block.is_nullable()) {
BitmapChangeBits(sel->mutable_bitmap(), 0, block.nrows(), false);
@@ -767,14 +760,15 @@ void ColumnPredicate::EvaluateForPhysicalType(const
ColumnBlock& block,
});
});
return;
- };
- case PredicateType::None: LOG(FATAL) << "NONE predicate evaluation";
+ }
case PredicateType::InBloomFilter: {
ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
return EvaluateCell<PhysicalType>(cell);
});
return;
- };
+ }
+ case PredicateType::None:
+ LOG(FATAL) << "NONE predicate evaluation";
}
LOG(FATAL) << "unknown predicate type";
}
@@ -820,7 +814,8 @@ void ColumnPredicate::Evaluate(const ColumnBlock& block,
SelectionVector* sel) c
string ColumnPredicate::ToString() const {
switch (predicate_type()) {
- case PredicateType::None: return strings::Substitute("$0 NONE",
column_.name());
+ case PredicateType::None:
+ return strings::Substitute("$0 NONE", column_.name());
case PredicateType::Range: {
if (lower_ == nullptr) {
return strings::Substitute("$0 < $1", column_.name(),
column_.Stringify(upper_));
@@ -833,16 +828,16 @@ string ColumnPredicate::ToString() const {
column_.Stringify(lower_),
column_.Stringify(upper_));
- };
+ }
case PredicateType::Equality: {
return strings::Substitute("$0 = $1", column_.name(),
column_.Stringify(lower_));
- };
+ }
case PredicateType::IsNotNull: {
return strings::Substitute("$0 IS NOT NULL", column_.name());
- };
+ }
case PredicateType::IsNull: {
return strings::Substitute("$0 IS NULL", column_.name());
- };
+ }
case PredicateType::InList: {
string ss;
ss.append(column_.name());
@@ -852,20 +847,22 @@ string ColumnPredicate::ToString() const {
", ")));
ss.append(")");
return ss;
- };
+ }
case PredicateType::InBloomFilter: {
return strings::Substitute("`$0` IS InBloomFilter", column_.name());
- };
+ }
default:
LOG(FATAL) << "unknown predicate type";
}
}
bool ColumnPredicate::operator==(const ColumnPredicate& other) const {
- if (!column_.Equals(other.column_, ColumnSchema::COMPARE_NAME_AND_TYPE)) {
return false; }
if (predicate_type_ != other.predicate_type_) {
return false;
}
+ if (!column_.Equals(other.column_, ColumnSchema::COMPARE_NAME_AND_TYPE)) {
+ return false;
+ }
switch (predicate_type_) {
case PredicateType::Equality:
return column_.type_info()->Compare(lower_, other.lower_) == 0;
@@ -894,7 +891,9 @@ bool ColumnPredicate::operator==(const ColumnPredicate&
other) const {
return false;
}
for (int i = 0; i < values_.size(); i++) {
- if (column_.type_info()->Compare(values_[i], other.values_[i]) != 0)
return false;
+ if (column_.type_info()->Compare(values_[i], other.values_[i]) != 0) {
+ return false;
+ }
}
return true;
case PredicateType::None:
@@ -906,7 +905,7 @@ bool ColumnPredicate::operator==(const ColumnPredicate&
other) const {
}
bool ColumnPredicate::CheckValueInRange(const void* value) const {
- CHECK(predicate_type_ == PredicateType::Range);
+ DCHECK(predicate_type_ == PredicateType::Range);
return ((lower_ == nullptr || column_.type_info()->Compare(lower_, value) <=
0) &&
(upper_ == nullptr || column_.type_info()->Compare(upper_, value) >
0));
}