isidentical commented on code in PR #4162:
URL: https://github.com/apache/arrow-datafusion/pull/4162#discussion_r1020612588
##########
datafusion/core/src/physical_plan/filter.rs:
##########
@@ -380,4 +403,108 @@ mod tests {
Ok(())
}
+
+ #[tokio::test]
+ async fn test_filter_statistics_basic_expr() -> Result<()> {
+ // Table:
+ // a: min=1, max=100
+ let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ num_rows: Some(100),
+ column_statistics: Some(vec![ColumnStatistics {
+ min_value: Some(ScalarValue::Int32(Some(1))),
+ max_value: Some(ScalarValue::Int32(Some(100))),
+ ..Default::default()
+ }]),
+ ..Default::default()
+ },
+ schema.clone(),
+ ));
+
+ // a <= 25
+ let predicate: Arc<dyn PhysicalExpr> =
+ binary(col("a", &schema)?, Operator::LtEq, lit(25i32), &schema)?;
+
+ // WHERE a <= 25
+ let filter: Arc<dyn ExecutionPlan> =
+ Arc::new(FilterExec::try_new(predicate, input)?);
+
+ let statistics = filter.statistics();
+ assert_eq!(statistics.num_rows, Some(25));
+
+ Ok(())
+ }
+
+ #[tokio::test]
+ #[ignore]
+ // This test requires propagation of column boundaries from the comparison
analysis
+ // to the analysis context. This is not yet implemented.
+ async fn test_filter_statistics_column_level_basic_expr() -> Result<()> {
Review Comment:
I made a PoC https://github.com/isidentical/arrow-datafusion/pull/5 that
essentially implements a much more simple version of the `apply()` API from the
previous iteration without adding any new methods. I'd be really interested in
your feedback on this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]