alamb commented on code in PR #13632:
URL: https://github.com/apache/datafusion/pull/13632#discussion_r1868525331
##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2050,6 +2050,139 @@ async fn
test_dataframe_placeholder_missing_param_values() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let df = ctx.read_empty().unwrap().select_exprs(&["$1"]).unwrap();
+
+ let logical_plan = df.logical_plan();
+ let formatted = logical_plan.display_indent_schema().to_string();
+ let actual: Vec<&str> = formatted.trim().lines().collect();
+ let expected = vec!["Projection: $1 [$1:Null;N]", " EmptyRelation []"];
+ assert_eq!(
+ expected, actual,
+ "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+ );
+
+ // The placeholder is not replaced with a value,
+ // so the filter data type is not know, i.e. a = $0.
+ // Therefore, the optimization fails.
+ let optimized_plan = ctx.state().optimize(logical_plan);
+ assert!(optimized_plan.is_err());
Review Comment:
I think this line is redundant -- as `unwrap_err()` will panic if
optimize_plan is not an err
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -215,13 +215,11 @@ impl ExprSchemable for Expr {
}) => get_result_type(&left.get_type(schema)?, op,
&right.get_type(schema)?),
Expr::Like { .. } | Expr::SimilarTo { .. } =>
Ok(DataType::Boolean),
Expr::Placeholder(Placeholder { data_type, .. }) => {
- data_type.clone().ok_or_else(|| {
- plan_datafusion_err!(
- "Placeholder type could not be resolved. Make sure
that the \
- placeholder is bound to a concrete type, e.g. by
providing \
- parameter values."
- )
- })
+ if let Some(dtype) = data_type {
+ Ok(dtype.clone())
+ } else {
+ Ok(DataType::Null)
Review Comment:
I agree this is a reasonable change (it effectively defers erroring on
unspecified parameters)
Could you add a comment explaining the rationale for future readers?
Something like this perhaps:
```suggestion
// If the placeholder's type hasn't been specified,
treat it as
// null (unspecified placeholders generate an error
during planning)
Ok(DataType::Null)
```
##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2014,7 +2014,7 @@ async fn
test_dataframe_placeholder_missing_param_values() -> Result<()> {
assert!(optimized_plan
.unwrap_err()
.to_string()
- .contains("Placeholder type could not be resolved. Make sure that the
placeholder is bound to a concrete type, e.g. by providing parameter values."));
+ .contains("Placeholder type for '$0' could not be resolved. Make sure
that the placeholder is bound to a concrete type, e.g. by providing parameter
values."));
Review Comment:
👍
##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2050,6 +2050,139 @@ async fn
test_dataframe_placeholder_missing_param_values() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let df = ctx.read_empty().unwrap().select_exprs(&["$1"]).unwrap();
+
+ let logical_plan = df.logical_plan();
+ let formatted = logical_plan.display_indent_schema().to_string();
+ let actual: Vec<&str> = formatted.trim().lines().collect();
+ let expected = vec!["Projection: $1 [$1:Null;N]", " EmptyRelation []"];
+ assert_eq!(
+ expected, actual,
+ "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+ );
+
+ // The placeholder is not replaced with a value,
+ // so the filter data type is not know, i.e. a = $0.
+ // Therefore, the optimization fails.
+ let optimized_plan = ctx.state().optimize(logical_plan);
+ assert!(optimized_plan.is_err());
+ assert!(optimized_plan
+ .unwrap_err()
+ .to_string()
+ .contains("Placeholder type for '$1' could not be resolved. Make sure
that the placeholder is bound to a concrete type, e.g. by providing parameter
values."));
+
+ // Prodiving a parameter value should resolve the error
+ let df = ctx
+ .read_empty()
+ .unwrap()
+ .select_exprs(&["$1"])
+ .unwrap()
+ .with_param_values(vec![("1", ScalarValue::from(3i32))])
+ .unwrap();
+
+ let logical_plan = df.logical_plan();
+ let formatted = logical_plan.display_indent_schema().to_string();
+ let actual: Vec<&str> = formatted.trim().lines().collect();
+ let expected = vec![
+ "Projection: Int32(3) AS $1 [$1:Null;N]",
+ " EmptyRelation []",
+ ];
+ assert_eq!(
+ expected, actual,
+ "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+ );
+
+ let optimized_plan = ctx.state().optimize(logical_plan);
+ assert!(optimized_plan.is_ok());
+
+ let formatted =
optimized_plan.unwrap().display_indent_schema().to_string();
+ let actual: Vec<&str> = formatted.trim().lines().collect();
+ let expected = vec![
+ "Projection: Int32(3) AS $1 [$1:Int32]",
+ " EmptyRelation []",
+ ];
+ assert_eq!(expected, actual);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_dataframe_placeholder_like_expression() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let df = ctx
+ .read_empty()
+ .unwrap()
+ .with_column("a", lit("foo"))
+ .unwrap()
+ .filter(col("a").like(placeholder("$1")))
+ .unwrap();
+
+ let logical_plan = df.logical_plan();
+ let formatted = logical_plan.display_indent_schema().to_string();
+ let actual: Vec<&str> = formatted.trim().lines().collect();
+ let expected = vec![
+ "Filter: a LIKE $1 [a:Utf8]",
+ " Projection: Utf8(\"foo\") AS a [a:Utf8]",
+ " EmptyRelation []",
+ ];
+ assert_eq!(
+ expected, actual,
+ "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+ );
+
+ // The placeholder is not replaced with a value,
+ // so the filter data type is not know, i.e. a = $0.
Review Comment:
```suggestion
// so the filter data type is not known, i.e. a = $0.
```
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -357,6 +357,7 @@ impl Optimizer {
{
let start_time = Instant::now();
let options = config.options();
+ plan.validate_parameter_types()?;
Review Comment:
Checking parameter types doesn't seem to logically belong to the optimizer
which is supposed to preserve the semantics of each plan but potentially make
them faster.
https://docs.rs/datafusion/latest/datafusion/optimizer/trait.OptimizerRule.html
> OptimizerRules transforms one
[LogicalPlan](https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.LogicalPlan.html)
into another which computes the same results, but in a potentially more
efficient way. If there are no suitable transformations for the input plan, the
optimizer should simply return it unmodified.
I think the `Analyzer` might be a more appropriate place to put this check:
https://docs.rs/datafusion/latest/datafusion/optimizer/struct.Analyzer.html#method.execute_and_check
##########
datafusion/sql/tests/sql_integration.rs:
##########
@@ -571,10 +571,6 @@ Dml: op=[Insert Into] table=[test_decimal]
"INSERT INTO person (id, first_name, last_name) VALUES ($1, $2, $3, $4)",
"Error during planning: Placeholder $4 refers to a non existent column"
)]
-#[case::placeholder_type_unresolved(
Review Comment:
What happens now in this case? Does it pass successfully?
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -357,6 +357,7 @@ impl Optimizer {
{
let start_time = Instant::now();
let options = config.options();
+ plan.validate_parameter_types()?;
Review Comment:
Although thinking about it some more, if we are going to defer the error
anyways, I think the cleanest solution would be to move the error to when it is
needed
When I removed the specific check in the Optimizer and tried to run the
queries, it failed with this error:
```
This feature is not implemented: Physical plan does not support logical
expression Placeholder(Placeholder { id: "$1", data_type: None })
```
Maybe rather than a special check we could simply make this error better
##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2050,6 +2050,139 @@ async fn
test_dataframe_placeholder_missing_param_values() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let df = ctx.read_empty().unwrap().select_exprs(&["$1"]).unwrap();
+
+ let logical_plan = df.logical_plan();
+ let formatted = logical_plan.display_indent_schema().to_string();
+ let actual: Vec<&str> = formatted.trim().lines().collect();
+ let expected = vec!["Projection: $1 [$1:Null;N]", " EmptyRelation []"];
+ assert_eq!(
+ expected, actual,
+ "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+ );
+
+ // The placeholder is not replaced with a value,
+ // so the filter data type is not know, i.e. a = $0.
Review Comment:
```suggestion
// so the filter data type is not known, i.e. a = $0.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]