alamb commented on code in PR #17123:
URL: https://github.com/apache/datafusion/pull/17123#discussion_r2270601725


##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -1230,4 +1232,83 @@ mod tests {
 
         assert_eq!(result, expected);
     }
+
+
+    #[test]
+    fn test_projection_statistics_uses_input_schema() {
+        let input_schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+            Field::new("d", DataType::Int32, false),
+            Field::new("e", DataType::Int32, false),
+            Field::new("f", DataType::Int32, false),
+        ]);
+
+        let input_statistics = Statistics {
+            num_rows: Precision::Exact(10),
+            column_statistics: vec![
+                ColumnStatistics {
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
+                    ..Default::default()
+                },
+                ColumnStatistics {
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(50))),
+                    ..Default::default()
+                },
+                ColumnStatistics {
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(40))),
+                    ..Default::default()
+                },
+                ColumnStatistics {
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(20))),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(30))),
+                    ..Default::default()
+                },
+                ColumnStatistics {
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(21))),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(29))),
+                    ..Default::default()
+                },
+                ColumnStatistics {
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(24))),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(26))),
+                    ..Default::default()
+                },
+            ],
+            ..Default::default()
+        };
+
+        let input = Arc::new(StatisticsExec::new(input_statistics, 
input_schema));
+
+        // Create projection expressions that reference columns from the input 
schema and the length
+        // of output schema columns < input schema columns and hence if we use 
the last few columns
+        // from the input schema in the expressions here, bounds_check would 
fail on them if output
+        // schema is supplied to the partitions_statistics method.
+        let exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![
+            (
+                Arc::new(Column::new("c", 2)) as Arc<dyn PhysicalExpr>,
+                "c_renamed".to_string(),
+            ),
+            (
+                Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("e", 4)),
+                    Operator::Plus,
+                    Arc::new(Column::new("f", 5)),
+                )) as Arc<dyn PhysicalExpr>,
+                "e_plus_f".to_string(),
+            ),
+        ];
+
+        let projection = ProjectionExec::try_new(exprs, input).unwrap();
+
+        let stats = projection.partition_statistics(None).unwrap();
+
+        assert_eq!(stats.num_rows, Precision::Exact(10));
+        assert_eq!(stats.column_statistics.len(), 2, "Expected 2 columns in 
projection statistics");
+        assert_eq!(stats.total_byte_size.is_exact().unwrap_or(false), true);

Review Comment:
   I verified this test covers the change by running the test without the code 
change and it fails like this
   
   ```
   assertion `left == right` failed
     left: false
    right: true
   
   Left:  false
   Right: true
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to