This is an automated email from the ASF dual-hosted git repository.
BiteTheDDDDt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f7b7d9ce005 [feature](runtime-filter) Support bloom pruning for list
partitions (#64318)
f7b7d9ce005 is described below
commit f7b7d9ce005c176673a6ea3c83af4379029c4277
Author: Pxl <[email protected]>
AuthorDate: Thu Jun 18 14:18:05 2026 +0800
[feature](runtime-filter) Support bloom pruning for list partitions (#64318)
The RANGE-partition script provided by the reviewer was used as a shape
reference, but the measured case is intentionally LIST partitioned. Pure
Bloom
runtime-filter pruning for RANGE partitions remains disabled by design
in this
PR; using `IN_OR_BLOOM_FILTER` on a tiny build side can measure the
pre-existing
IN-set RANGE pruning path instead of the new Bloom LIST path.
---
.../runtime_filter_partition_pruner.cpp | 86 ++++++++++++++++-
.../runtime_filter_partition_pruner.h | 4 +
.../runtime_filter_partition_pruner_test.cpp | 107 +++++++++++++++++++++
.../RuntimeFilterPartitionPruneClassifier.java | 7 +-
.../glue/translator/RuntimeFilterTranslator.java | 4 +-
.../RuntimeFilterPartitionPruneClassifierTest.java | 78 +++++++++++++++
.../runtime_filter/rf_partition_pruning.out | 15 +++
.../runtime_filter/rf_partition_pruning.groovy | 46 +++++++++
8 files changed, 340 insertions(+), 7 deletions(-)
diff --git a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp
b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp
index a25059be8b7..6e6d43472c5 100644
--- a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp
+++ b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp
@@ -23,10 +23,14 @@
#include <optional>
#include <unordered_set>
#include <utility>
+#include <vector>
#include "core/block/block.h"
#include "core/column/column.h"
+#include "core/column/column_decimal.h"
#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
#include "core/data_type/data_type_nullable.h"
#include "core/field.h"
#include "exprs/bloom_filter_func.h"
@@ -40,6 +44,39 @@
namespace doris {
+namespace {
+
+template <PrimitiveType PT>
+bool bloom_may_match_fixed_values(const ColumnValueRange<PT>& cvr,
+ BloomFilterFuncBase* bloom_filter) {
+ if (cvr.get_fixed_value_size() == 0) {
+ return false;
+ }
+
+ using CppType = typename PrimitiveTypeTraits<PT>::CppType;
+ using ColumnType = typename PrimitiveTypeTraits<PT>::ColumnType;
+
+ MutableColumnPtr values_column;
+ if constexpr (IsDecimalNumber<CppType>) {
+ values_column = ColumnType::create(0, cvr.scale());
+ } else {
+ values_column = ColumnType::create();
+ }
+ auto* typed_column = static_cast<ColumnType*>(values_column.get());
+ for (const auto& value : cvr.get_fixed_value_set()) {
+ typed_column->insert_value(value);
+ }
+ const size_t row_count = values_column->size();
+
+ std::vector<uint8_t> results(row_count, 0);
+ ColumnPtr values_column_ptr = std::move(values_column);
+ bloom_filter->find_fixed_len(values_column_ptr, results.data());
+ return std::any_of(results.begin(), results.end(),
+ [](uint8_t matched) { return matched != 0; });
+}
+
+} // namespace
+
//
NOLINTBEGIN(readability-function-cognitive-complexity,readability-function-size)
// Complexity is inflated by macro expansion for each PrimitiveType case.
Status ParsedPartitionBoundaries::parse(
@@ -79,6 +116,7 @@ Status ParsedPartitionBoundaries::parse(
bool is_list = tb.__isset.list_values && !tb.list_values.empty();
\
bool is_range = tb.__isset.range_start || tb.__isset.range_end;
\
DORIS_CHECK(is_list || is_range);
\
+ boundary.is_list_boundary = is_list;
\
ColumnValueRange<TYPE_##NAME> cvr(slot->col_name(), is_nullable,
precision, scale); \
/* Returns nullopt if `node` is a NULL literal; the caller then sets
contain_null */ \
/* on the CVR instead of trying to extract a typed value (which would
dereference */ \
@@ -381,8 +419,9 @@ Status
ParsedPartitionBoundaries::get_or_compute_projected_boundaries(
DCHECK(cvr != nullptr);
\
DCHECK(!boundary.only_null);
\
DCHECK(!boundary.contains_null);
\
- boundary_is_list[i] = cvr->is_fixed_value_range();
\
+ boundary_is_list[i] = boundary.is_list_boundary;
\
if (boundary_is_list[i]) {
\
+ DORIS_CHECK(cvr->is_fixed_value_range());
\
list_result_begin[i] = list_row_count;
\
for (const auto& value : cvr->get_fixed_value_set()) {
\
list_inner->insert_value(value);
\
@@ -516,6 +555,7 @@ Status
ParsedPartitionBoundaries::get_or_compute_projected_boundaries(
projected_boundary.partition_id = orig_boundary.partition_id;
\
projected_boundary.slot_id = leaf_slot_id;
\
projected_boundary.is_nullable = out_nullable;
\
+ projected_boundary.is_list_boundary = true;
\
projected_boundary.contains_null = list_has_null;
\
projected_boundary.only_null = list_has_null &&
!list_has_value; \
projected_boundary.boundary_cvr = std::move(cvr);
\
@@ -552,6 +592,7 @@ Status
ParsedPartitionBoundaries::get_or_compute_projected_boundaries(
projected_boundary.partition_id = orig_boundary.partition_id;
\
projected_boundary.slot_id = leaf_slot_id;
\
projected_boundary.is_nullable = out_nullable;
\
+ projected_boundary.is_list_boundary = false;
\
projected_boundary.only_null = orig_boundary.only_null;
\
projected_boundary.contains_null = orig_boundary.contains_null;
\
projected_boundary.boundary_cvr = std::move(cvr);
\
@@ -627,12 +668,49 @@ void
RuntimeFilterPartitionPruner::_try_prune_by_single_rf(
// FilterBase::contain_null() already folds in `_null_aware`, so we only
// get a true result when the build side is actually null-aware AND
// produced a NULL value.
+ if (impl->node_type() == TExprNodeType::BLOOM_PRED) {
+ auto bloom = impl->get_bloom_filter_func();
+ DORIS_CHECK(bloom != nullptr);
+ bool rf_contains_null = bloom->contain_null();
+
+ for (const auto& pb : boundaries) {
+ if (_pruned_partition_ids.contains(pb.partition_id) ||
+ newly_pruned.contains(pb.partition_id)) {
+ continue;
+ }
+
+ if (pb.only_null) {
+ if (!rf_contains_null) {
+ newly_pruned.insert(pb.partition_id);
+ }
+ continue;
+ }
+ if (pb.contains_null && rf_contains_null) {
+ continue;
+ }
+ if (!pb.is_list_boundary) {
+ continue;
+ }
+
+ bool may_match = true;
+ std::visit(
+ [&](const auto& boundary_cvr) {
+ if (!boundary_cvr.is_fixed_value_range()) {
+ return;
+ }
+ may_match = bloom_may_match_fixed_values(boundary_cvr,
bloom.get());
+ },
+ pb.boundary_cvr);
+ if (!may_match) {
+ newly_pruned.insert(pb.partition_id);
+ }
+ }
+ return;
+ }
+
bool rf_contains_null = false;
if (auto hybrid_set = impl->get_set_func()) {
rf_contains_null = hybrid_set->contain_null();
- } else if (impl->node_type() == TExprNodeType::BLOOM_PRED) {
- auto bloom = impl->get_bloom_filter_func();
- rf_contains_null = bloom && bloom->contain_null();
} else if (impl->node_type() == TExprNodeType::NULL_AWARE_BINARY_PRED) {
// Min/Max RF built on a null-safe equal join. The literal child holds
// the min or max bound; the NULL semantic is conveyed by the node
diff --git a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h
b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h
index f718fe7e418..9e3d0150db1 100644
--- a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h
+++ b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h
@@ -43,6 +43,10 @@ struct ParsedBoundary {
int64_t partition_id = 0;
SlotId slot_id = 0;
bool is_nullable = false;
+ // True only when the original/projection boundary is a finite LIST value
set.
+ // Bloom RF pruning relies on complete value enumeration and must not use
RANGE
+ // boundaries, even when a RANGE projection degenerates to a single point.
+ bool is_list_boundary = false;
ColumnValueRangeType boundary_cvr;
// True if the partition's value set is exactly {NULL} (e.g. LIST
// partition whose only key is NULL). The CVR alone cannot encode
diff --git
a/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp
b/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp
index c4193061f0c..6267c172c98 100644
--- a/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp
+++ b/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp
@@ -28,9 +28,11 @@
#include "core/data_type/data_type_factory.hpp"
#include "core/string_ref.h"
#include "core/types.h"
+#include "exec/runtime_filter/runtime_filter_definitions.h"
#include "exec/runtime_filter/utils.h"
#include "exprs/create_predicate_function.h"
#include "exprs/runtime_filter_expr.h"
+#include "exprs/vbloom_predicate.h"
#include "exprs/vdirect_in_predicate.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"
@@ -164,6 +166,42 @@ protected:
return VDirectInPredicate::create_shared(node, in_filter<PT>(value,
contain_null));
}
+ template <PrimitiveType PT>
+ VExprSPtr bloom_predicate(const std::vector<CppType<PT>>& values, bool
contain_null = false) {
+ std::shared_ptr<BloomFilterFuncBase> filter(create_bloom_filter(PT,
contain_null));
+ RuntimeFilterParams params;
+ params.filter_type = RuntimeFilterType::BLOOM_FILTER;
+ params.column_return_type = PT;
+ params.null_aware = contain_null;
+ params.bloom_filter_size = 1024;
+ filter->init_params(¶ms);
+ EXPECT_TRUE(filter->init_with_fixed_length(1024).ok());
+
+ using ColumnType = typename PrimitiveTypeTraits<PT>::ColumnType;
+ MutableColumnPtr values_column = ColumnType::create();
+ auto* typed_column = assert_cast<ColumnType*>(values_column.get());
+ for (const auto& value : values) {
+ typed_column->insert_value(value);
+ }
+ ColumnPtr values_column_ptr = std::move(values_column);
+ filter->insert_fixed_len(values_column_ptr, 0);
+
+ if (contain_null) {
+ std::shared_ptr<HybridSetBase> null_set(create_set(PT,
contain_null));
+ null_set->insert(static_cast<const void*>(nullptr));
+ filter->insert_set(null_set);
+ }
+
+ TExprNode node;
+ node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+ node.__set_node_type(TExprNodeType::BLOOM_PRED);
+ node.__set_opcode(TExprOpcode::RT_FILTER);
+ node.__set_is_nullable(false);
+ auto bloom_pred = VBloomPredicate::create_shared(node);
+ bloom_pred->set_filter(filter);
+ return bloom_pred;
+ }
+
template <PrimitiveType PT>
VExprSPtr minmax_predicate_le(const CppType<PT>& value, const DataTypePtr&
type) {
VExprSPtr pred;
@@ -226,6 +264,7 @@ protected:
ASSERT_FALSE(parsed->empty());
ASSERT_EQ(parsed->total_partitions(), 2);
const auto& parsed_boundaries =
parsed->slot_to_boundaries().at(SLOT_ID);
+ EXPECT_TRUE(parsed_boundaries[0].is_list_boundary);
RuntimeFilterPartitionPruner in_pruner;
phmap::flat_hash_set<int64_t> in_pruned;
@@ -247,6 +286,7 @@ protected:
auto parsed_range = parse_boundaries(PT, range_boundaries, false,
precision, scale);
EXPECT_FALSE(parsed_range->empty());
EXPECT_EQ(parsed_range->total_partitions(), 1);
+
EXPECT_FALSE(parsed_range->slot_to_boundaries().at(SLOT_ID)[0].is_list_boundary);
}
DateV2Value<DateV2ValueType> date_v2(uint16_t year, uint8_t month, uint8_t
day) {
@@ -346,6 +386,8 @@ TEST_F(RuntimeFilterPartitionPrunerTest,
ProjectedBoundariesSupportListValues) {
.ok());
ASSERT_EQ(projected->size(), 2);
+ EXPECT_TRUE(projected->at(0).is_list_boundary);
+ EXPECT_TRUE(projected->at(1).is_list_boundary);
const auto& first =
std::get<ColumnValueRange<TYPE_INT>>(projected->at(0).boundary_cvr);
EXPECT_TRUE(first.is_fixed_value_range());
EXPECT_TRUE(first.get_fixed_value_set().contains(one));
@@ -361,6 +403,71 @@ TEST_F(RuntimeFilterPartitionPrunerTest,
ProjectedBoundariesSupportListValues) {
EXPECT_TRUE(pruned.contains(2));
}
+TEST_F(RuntimeFilterPartitionPrunerTest, BloomPrunesListPartitionFixedValues) {
+ int32_t one = 1;
+ int32_t two = 2;
+ int32_t three = 3;
+ int32_t four = 4;
+ int32_t five = 5;
+ std::vector<TPartitionBoundary> boundaries {
+ list_boundary<TYPE_INT>(1, {literal_node<TYPE_INT>(one)}),
+ list_boundary<TYPE_INT>(2,
+ {literal_node<TYPE_INT>(two),
literal_node<TYPE_INT>(three)}),
+ list_boundary<TYPE_INT>(3,
+ {literal_node<TYPE_INT>(four),
literal_node<TYPE_INT>(five)})};
+ auto parsed = parse_boundaries(TYPE_INT, boundaries);
+ const auto& parsed_boundaries = parsed->slot_to_boundaries().at(SLOT_ID);
+
+ RuntimeFilterPartitionPruner pruner;
+ phmap::flat_hash_set<int64_t> pruned;
+ pruner._try_prune_by_single_rf(parsed_boundaries,
bloom_predicate<TYPE_INT>({two}), pruned);
+ EXPECT_TRUE(pruned.contains(1));
+ EXPECT_FALSE(pruned.contains(2));
+ EXPECT_TRUE(pruned.contains(3));
+}
+
+TEST_F(RuntimeFilterPartitionPrunerTest, BloomPreservesListNullSemantics) {
+ int32_t one = 1;
+ int32_t two = 2;
+ std::vector<TPartitionBoundary> boundaries {
+ list_boundary<TYPE_INT>(1, {null_node(TYPE_INT)}),
+ list_boundary<TYPE_INT>(2, {null_node(TYPE_INT),
literal_node<TYPE_INT>(one)}),
+ list_boundary<TYPE_INT>(3, {literal_node<TYPE_INT>(two)})};
+ auto parsed = parse_boundaries(TYPE_INT, boundaries, true);
+ const auto& parsed_boundaries = parsed->slot_to_boundaries().at(SLOT_ID);
+
+ RuntimeFilterPartitionPruner non_null_pruner;
+ phmap::flat_hash_set<int64_t> non_null_pruned;
+ non_null_pruner._try_prune_by_single_rf(parsed_boundaries,
bloom_predicate<TYPE_INT>({one}),
+ non_null_pruned);
+ EXPECT_TRUE(non_null_pruned.contains(1));
+ EXPECT_FALSE(non_null_pruned.contains(2));
+ EXPECT_TRUE(non_null_pruned.contains(3));
+
+ RuntimeFilterPartitionPruner null_aware_pruner;
+ phmap::flat_hash_set<int64_t> null_aware_pruned;
+ null_aware_pruner._try_prune_by_single_rf(
+ parsed_boundaries, bloom_predicate<TYPE_INT>({one}, true),
null_aware_pruned);
+ EXPECT_FALSE(null_aware_pruned.contains(1));
+ EXPECT_FALSE(null_aware_pruned.contains(2));
+ EXPECT_TRUE(null_aware_pruned.contains(3));
+}
+
+TEST_F(RuntimeFilterPartitionPrunerTest, BloomDoesNotPruneRangePartition) {
+ int32_t one = 1;
+ int32_t two = 2;
+ int32_t miss = 100;
+ std::vector<TPartitionBoundary> boundaries {range_boundary<TYPE_INT>(1,
one, two)};
+ auto parsed = parse_boundaries(TYPE_INT, boundaries);
+ const auto& parsed_boundaries = parsed->slot_to_boundaries().at(SLOT_ID);
+ ASSERT_FALSE(parsed_boundaries[0].is_list_boundary);
+
+ RuntimeFilterPartitionPruner pruner;
+ phmap::flat_hash_set<int64_t> pruned;
+ pruner._try_prune_by_single_rf(parsed_boundaries,
bloom_predicate<TYPE_INT>({miss}), pruned);
+ EXPECT_TRUE(pruned.empty());
+}
+
TEST_F(RuntimeFilterPartitionPrunerTest, InvalidPartitionBoundaryRejected) {
TPartitionBoundary boundary;
boundary.__set_partition_id(1);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
index da2aa99092f..b1dd098324e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
@@ -36,6 +36,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.Monotonic;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.planner.OlapScanNode;
import org.apache.doris.planner.PlanNode;
+import org.apache.doris.thrift.TRuntimeFilterType;
import org.apache.doris.thrift.TTargetExprMonotonicity;
import com.google.common.collect.Range;
@@ -55,7 +56,8 @@ final class RuntimeFilterPartitionPruneClassifier {
private RuntimeFilterPartitionPruneClassifier() {
}
- static Classification classify(Expr targetExpr, Expression
nereidsTargetExpr, PlanNode scanNode) {
+ static Classification classify(TRuntimeFilterType filterType, Expr
targetExpr,
+ Expression nereidsTargetExpr, PlanNode scanNode) {
if (!(scanNode instanceof OlapScanNode)) {
return Classification.unsupported("target scan is not an
OlapScanNode");
}
@@ -71,6 +73,9 @@ final class RuntimeFilterPartitionPruneClassifier {
if (partType != PartitionType.RANGE && partType != PartitionType.LIST)
{
return Classification.unsupported("partition type is not RANGE or
LIST");
}
+ if (filterType == TRuntimeFilterType.BLOOM && partType ==
PartitionType.RANGE) {
+ return Classification.unsupported("BLOOM runtime filter does not
support RANGE partition pruning");
+ }
if (hasUnsupportedAutomaticPartitionExpression(partitionInfo)) {
return Classification.unsupported("automatic partition expression
boundary is not modeled");
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
index b0be2b9ec0e..286330d79ea 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
@@ -246,7 +246,7 @@ public class RuntimeFilterTranslator {
}
RuntimeFilterPartitionPruneClassifier.Classification
classification =
RuntimeFilterPartitionPruneClassifier.classify(
- targetExpr, nereidsTargetExprList.get(i),
scanNode);
+ head.getType(), targetExpr,
nereidsTargetExprList.get(i), scanNode);
if (classification.canPrunePartitions()) {
origFilter.markTargetCanPrunePartitions(scanNode.getId());
}
@@ -353,7 +353,7 @@ public class RuntimeFilterTranslator {
}
RuntimeFilterPartitionPruneClassifier.Classification
classification =
RuntimeFilterPartitionPruneClassifier.classify(
- targetExpr,
filter.getTargetExpressions().get(i), scanNode);
+ filter.getType(), targetExpr,
filter.getTargetExpressions().get(i), scanNode);
if (classification.canPrunePartitions()) {
origFilter.markTargetCanPrunePartitions(scanNode.getId());
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
index d8982dbd147..34de78c7a76 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
@@ -17,14 +17,34 @@
package org.apache.doris.nereids.glue.translator;
+import org.apache.doris.analysis.SlotDescriptor;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.analysis.SlotRef;
+import org.apache.doris.analysis.TupleId;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.ListPartitionItem;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionInfo;
+import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.RangePartitionItem;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.Monotonic;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DateTrunc;
import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
import org.apache.doris.nereids.types.DateTimeV2Type;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.planner.OlapScanNode;
+import org.apache.doris.thrift.TRuntimeFilterType;
+import org.apache.doris.thrift.TTargetExprMonotonicity;
+import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+import java.util.Map;
class RuntimeFilterPartitionPruneClassifierTest {
@Test
@@ -47,4 +67,62 @@ class RuntimeFilterPartitionPruneClassifierTest {
Assertions.assertTrue(RuntimeFilterPartitionPruneClassifier.hasInputSlotOnlyInMonotonicChild(
dateTrunc, monotonic.getMonotonicFunctionChildIndex()));
}
+
+ @Test
+ void testBloomRangePartitionUnsupported() {
+ RuntimeFilterPartitionPruneClassifier.Classification classification =
classify(
+ TRuntimeFilterType.BLOOM, PartitionType.RANGE,
RangePartitionItem.DUMMY_ITEM);
+
+ Assertions.assertFalse(classification.canPrunePartitions());
+
Assertions.assertTrue(classification.getUnsupportedReason().contains("BLOOM"));
+
Assertions.assertTrue(classification.getPartitionMonotonicity().isEmpty());
+ }
+
+ @Test
+ void testBloomListPartitionSupported() {
+ RuntimeFilterPartitionPruneClassifier.Classification classification =
classify(
+ TRuntimeFilterType.BLOOM, PartitionType.LIST,
ListPartitionItem.DUMMY_ITEM);
+
+ assertSupportedIncreasingPartitions(classification);
+ }
+
+ @Test
+ void testInOrBloomRangePartitionStillSupported() {
+ RuntimeFilterPartitionPruneClassifier.Classification classification =
classify(
+ TRuntimeFilterType.IN_OR_BLOOM, PartitionType.RANGE,
RangePartitionItem.DUMMY_ITEM);
+
+ assertSupportedIncreasingPartitions(classification);
+ }
+
+ private RuntimeFilterPartitionPruneClassifier.Classification classify(
+ TRuntimeFilterType filterType, PartitionType partitionType,
PartitionItem partitionItem) {
+ Column partitionColumn = new Column("part_col", PrimitiveType.INT);
+ SlotDescriptor slotDescriptor = new SlotDescriptor(new SlotId(1), new
TupleId(1));
+ slotDescriptor.setColumn(partitionColumn);
+ slotDescriptor.setType(partitionColumn.getType());
+ SlotRef targetSlot = new SlotRef(slotDescriptor);
+ SlotReference nereidsTarget = new SlotReference("part_col",
IntegerType.INSTANCE);
+
+ OlapTable table = Mockito.mock(OlapTable.class);
+ PartitionInfo partitionInfo = Mockito.mock(PartitionInfo.class);
+ OlapScanNode scanNode = Mockito.mock(OlapScanNode.class);
+ Mockito.when(scanNode.getOlapTable()).thenReturn(table);
+
Mockito.when(scanNode.getSelectedPartitionIds()).thenReturn(ImmutableList.of(1L,
2L));
+ Mockito.when(table.getPartitionInfo()).thenReturn(partitionInfo);
+ Mockito.when(partitionInfo.getType()).thenReturn(partitionType);
+
Mockito.when(partitionInfo.getPartitionColumns()).thenReturn(ImmutableList.of(partitionColumn));
+ Mockito.when(partitionInfo.getItem(1L)).thenReturn(partitionItem);
+ Mockito.when(partitionInfo.getItem(2L)).thenReturn(partitionItem);
+
+ return RuntimeFilterPartitionPruneClassifier.classify(filterType,
targetSlot, nereidsTarget, scanNode);
+ }
+
+ private void assertSupportedIncreasingPartitions(
+ RuntimeFilterPartitionPruneClassifier.Classification
classification) {
+ Assertions.assertTrue(classification.canPrunePartitions());
+ Map<Long, TTargetExprMonotonicity> monotonicity =
classification.getPartitionMonotonicity();
+ Assertions.assertEquals(2, monotonicity.size());
+ Assertions.assertEquals(TTargetExprMonotonicity.MONOTONIC_INCREASING,
monotonicity.get(1L));
+ Assertions.assertEquals(TTargetExprMonotonicity.MONOTONIC_INCREASING,
monotonicity.get(2L));
+ }
}
diff --git
a/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out
b/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out
index 09a100d695b..44295ef30a1 100644
--- a/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out
+++ b/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out
@@ -30,6 +30,12 @@
5 3 e
6 3 f
+-- !list_int_bloom --
+1 1 a
+2 1 b
+5 3 e
+6 3 f
+
-- !no_pruning --
13 250 m
18 350 r
@@ -123,3 +129,12 @@ Beijing 3
2 1 4 b
3 1 5 c
4 1 9 d
+
+-- !list_expr_bloom --
+1 1 a
+2 1 b
+5 3 e
+6 3 f
+
+-- !list_str_bloom --
+3 c
diff --git
a/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy
b/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy
index 37453ecfbd7..1b76f964aac 100644
--- a/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy
+++ b/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy
@@ -442,6 +442,24 @@ suite("rf_partition_pruning", "nonConcurrent") {
"* FROM rf_prune_list_int f JOIN rf_prune_dim_region d ON f.region_id
= d.dim_region",
"IN_OR_BLOOM_FILTER", 5, 3)
+ // Test 6b: List partition (INT) - Bloom filter prune.
+ // Regions {1, 3} keep two LIST partitions and prune the other three.
+ order_qt_list_int_bloom """
+ SELECT /*+ SET_VAR(runtime_filter_type='BLOOM_FILTER') */
+ f.id, f.region_id, f.value
+ FROM rf_prune_list_int f
+ JOIN rf_prune_dim_region d ON f.region_id = d.dim_region
+ """
+ assertPruningProfile(
+ "* FROM rf_prune_list_int f JOIN rf_prune_dim_region d ON f.region_id
= d.dim_region",
+ "BLOOM_FILTER", 5, 3)
+
+ // Test 6c: Range partition (INT) - Bloom filter must not register RF
partition pruning.
+ // A Bloom filter can disprove individual values, not an arbitrary [a, b)
range.
+ assertNoPartitionPruningProfile(
+ "* FROM rf_prune_range_int f JOIN rf_prune_dim_int d ON f.part_col =
d.dim_key",
+ "BLOOM_FILTER")
+
// Test 7: No pruning - dim matches all partitions
sql "drop table if exists rf_prune_dim_all"
sql """
@@ -1070,6 +1088,9 @@ suite("rf_partition_pruning", "nonConcurrent") {
assertPruningProfile(
"* FROM rf_prune_list_mixed f JOIN rf_prune_dim_five d ON f.part_col =
d.dim_key",
"IN_OR_BLOOM_FILTER", 3, 2)
+ assertPruningProfile(
+ "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_five d ON f.part_col =
d.dim_key",
+ "BLOOM_FILTER", 3, 2)
// Test 31: Mixed partition {NULL,5} + RF {7} (no value match, RF
non-null-aware)
// p_a still pruned (NULL row can't match non-null RF; concrete 5 != 7)
@@ -1096,6 +1117,9 @@ suite("rf_partition_pruning", "nonConcurrent") {
assertPruningProfile(
"* FROM rf_prune_list_mixed f JOIN rf_prune_dim_seven d ON f.part_col
= d.dim_key",
"IN_OR_BLOOM_FILTER", 3, 3)
+ assertPruningProfile(
+ "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_seven d ON f.part_col
= d.dim_key",
+ "BLOOM_FILTER", 3, 3)
// Test 32: Null-safe equal join (<=>) on mixed partition. RF is
null_aware AND
// contains NULL (build side has NULL key), so p_a (which contains NULL)
MUST
@@ -1117,6 +1141,9 @@ suite("rf_partition_pruning", "nonConcurrent") {
FROM rf_prune_list_mixed f
JOIN rf_prune_dim_null d ON f.part_col <=> d.dim_key
"""
+ assertPruningProfile(
+ "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_null d ON f.part_col
<=> d.dim_key",
+ "BLOOM_FILTER", 3, 2)
// Test 32b: Nullable RANGE partition columns can store NULL rows in the
// MINVALUE-side first partition. A null-aware RF containing only NULL must
@@ -1441,6 +1468,16 @@ suite("rf_partition_pruning", "nonConcurrent") {
"count(*) FROM rf_prune_list_int f JOIN rf_prune_dim_region_twice d "
+ "ON f.region_id + f.region_id = d.dim_region",
"IN_OR_BLOOM_FILTER", 5, 3)
+ order_qt_list_expr_bloom """
+ SELECT /*+ SET_VAR(runtime_filter_type='BLOOM_FILTER') */
+ f.id, f.region_id, f.value
+ FROM rf_prune_list_int f
+ JOIN rf_prune_dim_region_twice d ON f.region_id + f.region_id =
d.dim_region
+ """
+ assertPruningProfile(
+ "count(*) FROM rf_prune_list_int f JOIN rf_prune_dim_region_twice d "
+ + "ON f.region_id + f.region_id = d.dim_region",
+ "BLOOM_FILTER", 5, 3)
// ============================================================
// Test 51: String partition column (LIST partition on VARCHAR).
@@ -1485,6 +1522,15 @@ suite("rf_partition_pruning", "nonConcurrent") {
assertPruningProfile(
"* FROM rf_prune_list_str f JOIN rf_prune_dim_str d ON f.part_col =
d.dim_key",
"IN_OR_BLOOM_FILTER", 4, 3)
+ order_qt_list_str_bloom """
+ SELECT /*+ SET_VAR(runtime_filter_type='BLOOM_FILTER') */
+ f.id, f.part_col
+ FROM rf_prune_list_str f
+ JOIN rf_prune_dim_str d ON f.part_col = d.dim_key
+ """
+ assertPruningProfile(
+ "* FROM rf_prune_list_str f JOIN rf_prune_dim_str d ON f.part_col =
d.dim_key",
+ "BLOOM_FILTER", 4, 3)
// ============================================================
// Test 52: Grouped RF with multiple targets.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]