This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 0b7186ebaf Pass the input schema to stats_projection for ProjectionExpr (#17123) 0b7186ebaf is described below commit 0b7186ebaf01ce7dc0e6eacc026bd48b0f534be6 Author: Haresh Khanna <kh.har...@gmail.com> AuthorDate: Wed Aug 13 13:45:07 2025 +0100 Pass the input schema to stats_projection for ProjectionExpr (#17123) * Pass the input schema to stats_projection for ProjectionExpr * Adds a test * fmt * clippy --------- Co-authored-by: Andrew Lamb <and...@nerdnetworks.org> --- datafusion/physical-plan/src/projection.rs | 88 +++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index a29f4aeb40..4c0c103a37 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -249,7 +249,7 @@ impl ExecutionPlan for ProjectionExec { Ok(stats_projection( input_stats, self.expr.iter().map(|(e, _)| Arc::clone(e)), - Arc::clone(&self.schema), + Arc::clone(&self.input.schema()), )) } @@ -1030,8 +1030,10 @@ mod tests { use crate::common::collect; use crate::test; + use crate::test::exec::StatisticsExec; - use arrow::datatypes::DataType; + use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::stats::{ColumnStatistics, Precision, Statistics}; use datafusion_common::ScalarValue; use datafusion_expr::Operator; @@ -1230,4 +1232,86 @@ mod tests { assert_eq!(result, expected); } + + #[test] + fn test_projection_statistics_uses_input_schema() { + let input_schema = Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + Field::new("d", DataType::Int32, false), + Field::new("e", DataType::Int32, false), + Field::new("f", DataType::Int32, false), + ]); + + let input_statistics = Statistics { + num_rows: Precision::Exact(10), + column_statistics: vec![ + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(1))), + max_value: Precision::Exact(ScalarValue::Int32(Some(100))), + ..Default::default() + }, + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(5))), + max_value: Precision::Exact(ScalarValue::Int32(Some(50))), + ..Default::default() + }, + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(10))), + max_value: Precision::Exact(ScalarValue::Int32(Some(40))), + ..Default::default() + }, + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(20))), + max_value: Precision::Exact(ScalarValue::Int32(Some(30))), + ..Default::default() + }, + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(21))), + max_value: Precision::Exact(ScalarValue::Int32(Some(29))), + ..Default::default() + }, + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(24))), + max_value: Precision::Exact(ScalarValue::Int32(Some(26))), + ..Default::default() + }, + ], + ..Default::default() + }; + + let input = Arc::new(StatisticsExec::new(input_statistics, input_schema)); + + // Create projection expressions that reference columns from the input schema and the length + // of output schema columns < input schema columns and hence if we use the last few columns + // from the input schema in the expressions here, bounds_check would fail on them if output + // schema is supplied to the partitions_statistics method. + let exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![ + ( + Arc::new(Column::new("c", 2)) as Arc<dyn PhysicalExpr>, + "c_renamed".to_string(), + ), + ( + Arc::new(BinaryExpr::new( + Arc::new(Column::new("e", 4)), + Operator::Plus, + Arc::new(Column::new("f", 5)), + )) as Arc<dyn PhysicalExpr>, + "e_plus_f".to_string(), + ), + ]; + + let projection = ProjectionExec::try_new(exprs, input).unwrap(); + + let stats = projection.partition_statistics(None).unwrap(); + + assert_eq!(stats.num_rows, Precision::Exact(10)); + assert_eq!( + stats.column_statistics.len(), + 2, + "Expected 2 columns in projection statistics" + ); + assert!(stats.total_byte_size.is_exact().unwrap_or(false)); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org