discord9 commented on code in PR #19404:
URL: https://github.com/apache/datafusion/pull/19404#discussion_r2658652793
##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -1270,4 +1302,357 @@ mod tests {
);
assert!(stats.total_byte_size.is_exact().unwrap_or(false));
}
+
+ #[test]
+ fn test_filter_pushdown_with_alias() -> Result<()> {
+ let input_schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics::new_unknown(&input_schema),
+ input_schema.clone(),
+ ));
+
+ // project "a" as "b"
+ let projection = ProjectionExec::try_new(
+ vec![ProjectionExpr {
+ expr: Arc::new(Column::new("a", 0)),
+ alias: "b".to_string(),
+ }],
+ input,
+ )?;
+
+ // filter "b > 5"
+ let filter = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("b", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter],
+ &ConfigOptions::default(),
+ )?;
+
+ // Should be converted to "a > 5"
+ // "a" is index 0 in input
+ let expected_filter = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("a", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ assert_eq!(description.self_filters(), vec![vec![]]);
+ let pushed_filters = &description.parent_filters()[0];
+ assert_eq!(
+ format!("{}", pushed_filters[0].predicate),
+ format!("{}", expected_filter)
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_multiple_aliases() -> Result<()> {
+ let input_schema = Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ ]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ column_statistics: vec![Default::default();
input_schema.fields().len()],
+ ..Default::default()
+ },
+ input_schema.clone(),
+ ));
+
+ // project "a" as "x", "b" as "y"
+ let projection = ProjectionExec::try_new(
+ vec![
+ ProjectionExpr {
+ expr: Arc::new(Column::new("a", 0)),
+ alias: "x".to_string(),
+ },
+ ProjectionExpr {
+ expr: Arc::new(Column::new("b", 1)),
+ alias: "y".to_string(),
+ },
+ ],
+ input,
+ )?;
+
+ // filter "x > 5"
+ let filter1 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("x", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ // filter "y < 10"
+ let filter2 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("y", 1)),
+ Operator::Lt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter1, filter2],
+ &ConfigOptions::default(),
+ )?;
+
+ // Should be converted to "a > 5" and "b < 10"
+ let expected_filter1 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("a", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let expected_filter2 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("b", 1)),
+ Operator::Lt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let pushed_filters = &description.parent_filters()[0];
+ assert_eq!(pushed_filters.len(), 2);
+ // Note: The order of filters is preserved
+ assert_eq!(
+ format!("{}", pushed_filters[0].predicate),
+ format!("{}", expected_filter1)
+ );
+ assert_eq!(
+ format!("{}", pushed_filters[1].predicate),
+ format!("{}", expected_filter2)
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_mixed_columns() -> Result<()> {
+ let input_schema = Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ ]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ column_statistics: vec![Default::default();
input_schema.fields().len()],
+ ..Default::default()
+ },
+ input_schema.clone(),
+ ));
+
+ // project "a" as "x", "b" as "b" (pass through)
+ let projection = ProjectionExec::try_new(
+ vec![
+ ProjectionExpr {
+ expr: Arc::new(Column::new("a", 0)),
+ alias: "x".to_string(),
+ },
+ ProjectionExpr {
+ expr: Arc::new(Column::new("b", 1)),
+ alias: "b".to_string(),
+ },
+ ],
+ input,
+ )?;
+
+ // filter "x > 5"
+ let filter1 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("x", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ // filter "b < 10" (using output index 1 which corresponds to 'b')
+ let filter2 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("b", 1)),
+ Operator::Lt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter1, filter2],
+ &ConfigOptions::default(),
+ )?;
+
+ let pushed_filters = &description.parent_filters()[0];
+ assert_eq!(pushed_filters.len(), 2);
+ // "x" -> "a" (index 0)
+ let expected_filter1 = "a@0 > 5";
+ // "b" -> "b" (index 1)
+ let expected_filter2 = "b@1 < 10";
+
+ assert_eq!(format!("{}", pushed_filters[0].predicate),
expected_filter1);
+ assert_eq!(format!("{}", pushed_filters[1].predicate),
expected_filter2);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_complex_expression() -> Result<()> {
+ let input_schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ column_statistics: vec![Default::default();
input_schema.fields().len()],
+ ..Default::default()
+ },
+ input_schema.clone(),
+ ));
+
+ // project "a + 1" as "z"
+ let projection = ProjectionExec::try_new(
+ vec![ProjectionExpr {
+ expr: Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("a", 0)),
+ Operator::Plus,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+ )),
+ alias: "z".to_string(),
+ }],
+ input,
+ )?;
+
+ // filter "z > 10"
+ let filter = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("z", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter],
+ &ConfigOptions::default(),
+ )?;
+
+ // expand to `a + 1 > 10`
+ let pushed_filters = &description.parent_filters()[0];
+ assert!(matches!(pushed_filters[0].discriminant, PushedDown::Yes));
+ assert_eq!(format!("{}", pushed_filters[0].predicate), "a@0 + 1 > 10");
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_unknown_column() -> Result<()> {
Review Comment:
> Can you help me understand how an unknown column fits into the picture?
How do they get created? Why do we need special handling here?
unknown column seems right when encounter a column thah can't be found in
input schema, but maybe a better way to handle this is simply not collect said
filter if unknown column is encountered?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]