Re: [PR] Pax/support bloom filter pushdown [cloudberry]

via GitHub Wed, 05 Nov 2025 17:03:24 -0800


gfphoenix78 commented on code in PR #1333:
URL: https://github.com/apache/cloudberry/pull/1333#discussion_r2496681379



##########
contrib/pax_storage/src/cpp/storage/micro_partition_row_filter_reader.cc:
##########
@@ -73,6 +75,109 @@ MicroPartitionRowFilterReader::GetNextGroup(TupleDesc desc) 
{
   return group_;
 }
 
+void MicroPartitionRowFilterReader::LoadExprFilterColumns(
+    MicroPartitionReader::Group *group, TupleDesc desc,
+    const ExecutionFilterContext *ctx, size_t row_index, TupleTableSlot *slot) 
{
+  // There will not be duplicate attnos here because the attnos in ctx come 
from
+  // qual expressions. For each column index, there is at most one 
corresponding
+  // attno in ctx->attnos, so no attno appears more than once. As a result, 
this
+  // loop does not load the same column multiple times.
+  for (int i = 0; i < ctx->size; i++) {
+    auto attno = ctx->attnos[i];
+    Assert(attno > 0);
+    std::tie(slot->tts_values[attno - 1], slot->tts_isnull[attno - 1]) =
+        group->GetColumnValue(desc, attno - 1, row_index);
+  }
+}
+
+bool MicroPartitionRowFilterReader::EvalBloomNode(
+    const ExecutionFilterContext *ctx, MicroPartitionReader::Group *group,
+    TupleDesc desc, size_t row_index, int bloom_index) {
+  Assert(bloom_index >= 0 &&
+         (size_t)bloom_index < ctx->runtime_bloom_keys.size());
+  const auto &skd = ctx->runtime_bloom_keys[bloom_index];
+  const ScanKey sk = const_cast<ScanKeyData *>(&skd);
+  bool isnull = false;
+  Datum val;
+  std::tie(val, isnull) =
+      group->GetColumnValue(desc, sk->sk_attno - 1, row_index);
+  if (isnull) return true;
+  bloom_filter *bf = (bloom_filter *)DatumGetPointer(sk->sk_argument);
+  return !bloom_lacks_element(bf, (unsigned char *)&val, sizeof(Datum));
+}
+
+bool MicroPartitionRowFilterReader::EvalExprNode(
+    const ExecutionFilterContext *ctx, TupleTableSlot *slot, int expr_index) {
+  return TestRowScanInternal(slot, ctx->estates[expr_index],
+                             ctx->attnos[expr_index]);
+}
+
+// Execute a filter node.
+// During the sampling phase, updates the filter's pass rate statistics.
+bool MicroPartitionRowFilterReader::EvalFilterNode(
+    ExecutionFilterContext *ctx, MicroPartitionReader::Group *group,
+    TupleDesc desc, size_t row_index, TupleTableSlot *slot,
+    ExecutionFilterContext::FilterNode &node, bool update_stats) {
+  bool pass = true;
+  if (node.kind == ExecutionFilterContext::FilterKind::kBloom) {
+    pass = EvalBloomNode(ctx, group, desc, row_index, node.index);
+    if (ctx->ps->instrument && !pass) ctx->ps->instrument->nfilteredPRF += 1;
+  } else {
+    pass = EvalExprNode(ctx, slot, node.index);
+  }
+  if (update_stats) {
+    node.tested++;
+    node.passed += pass ? 1 : 0;
+  }
+  return pass;
+}
+
+// Applies the row filter nodes to the current tuple in two phases: Sampling 
and
+// Filtering.
+// In the sampling phase, pass rates for each filter expression are collected 
on
+// the first 64k rows, then the filters are sorted by effectiveness (lower pass
+// rate first) for optimal filtering order.
+// In the filtering phase, filters are applied in the determined order with
+// short-circuit evaluation; a failure in any filter causes immediate rejection
+// of the tuple.
+bool MicroPartitionRowFilterReader::ApplyFiltersWithSampling(
+    ExecutionFilterContext *ctx, MicroPartitionReader::Group *group,
+    TupleDesc desc, size_t row_index, TupleTableSlot *slot) {
+  if (!ctx->sampling) {
+    for (auto &node : ctx->filter_nodes) {
+      if (!EvalFilterNode(ctx, group, desc, row_index, slot, node, false)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  ctx->sample_rows++;
+  bool all_pass = true;
+  // in the sampling phase, we need to evaluate all filter nodes, if any node
+  // fails, the tuple is rejected
+  for (auto &node : ctx->filter_nodes) {
+    if (!EvalFilterNode(ctx, group, desc, row_index, slot, node, true)) {
+      all_pass = false;

Review Comment:
   Is it meaningful to evaluate the next filter if all_pass is false?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Pax/support bloom filter pushdown [cloudberry]

Reply via email to