This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch branch-53
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/branch-53 by this push:
new ed25cc2707 [branch-53] Fix FilterExec converting Absent column stats
to Exact(NULL) (#20391) (#20892)
ed25cc2707 is described below
commit ed25cc27076f2d96b217673feaf6bbb7ee0f72f1
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Mar 12 14:57:07 2026 -0400
[branch-53] Fix FilterExec converting Absent column stats to Exact(NULL)
(#20391) (#20892)
- Part of https://github.com/apache/datafusion/issues/19692
- Closes https://github.com/apache/datafusion/issues/20388 on branch-53
This PR:
- Backports https://github.com/apache/datafusion/pull/20391 from
@fwojciec to the branch-53 line
Co-authored-by: Filip Wojciechowski <[email protected]>
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
datafusion/physical-plan/src/filter.rs | 59 +++++++++++++++++++++++++++++++---
1 file changed, 54 insertions(+), 5 deletions(-)
diff --git a/datafusion/physical-plan/src/filter.rs
b/datafusion/physical-plan/src/filter.rs
index fac6fa1e7c..7bc5d346cd 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -757,6 +757,21 @@ impl EmbeddedProjection for FilterExec {
}
}
+/// Converts an interval bound to a [`Precision`] value. NULL bounds (which
+/// represent "unbounded" in the interval type) map to [`Precision::Absent`].
+fn interval_bound_to_precision(
+ bound: ScalarValue,
+ is_exact: bool,
+) -> Precision<ScalarValue> {
+ if bound.is_null() {
+ Precision::Absent
+ } else if is_exact {
+ Precision::Exact(bound)
+ } else {
+ Precision::Inexact(bound)
+ }
+}
+
/// This function ensures that all bounds in the `ExprBoundaries` vector are
/// converted to closed bounds. If a lower/upper bound is initially open, it
/// is adjusted by using the next/previous value for its data type to convert
@@ -795,11 +810,9 @@ fn collect_new_statistics(
};
};
let (lower, upper) = interval.into_bounds();
- let (min_value, max_value) = if lower.eq(&upper) {
- (Precision::Exact(lower), Precision::Exact(upper))
- } else {
- (Precision::Inexact(lower), Precision::Inexact(upper))
- };
+ let is_exact = !lower.is_null() && !upper.is_null() && lower
== upper;
+ let min_value = interval_bound_to_precision(lower, is_exact);
+ let max_value = interval_bound_to_precision(upper, is_exact);
ColumnStatistics {
null_count:
input_column_stats[idx].null_count.to_inexact(),
max_value,
@@ -2141,4 +2154,40 @@ mod tests {
Ok(())
}
+
+ /// Columns with Absent min/max statistics should remain Absent after
+ /// FilterExec.
+ #[tokio::test]
+ async fn test_filter_statistics_absent_columns_stay_absent() -> Result<()>
{
+ let schema = Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ ]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ num_rows: Precision::Inexact(1000),
+ total_byte_size: Precision::Absent,
+ column_statistics: vec![
+ ColumnStatistics::default(),
+ ColumnStatistics::default(),
+ ],
+ },
+ schema.clone(),
+ ));
+
+ let predicate = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("a", 0)),
+ Operator::Eq,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(42)))),
+ ));
+ let filter: Arc<dyn ExecutionPlan> =
+ Arc::new(FilterExec::try_new(predicate, input)?);
+
+ let statistics = filter.partition_statistics(None)?;
+ let col_b_stats = &statistics.column_statistics[1];
+ assert_eq!(col_b_stats.min_value, Precision::Absent);
+ assert_eq!(col_b_stats.max_value, Precision::Absent);
+
+ Ok(())
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]