alamb commented on code in PR #13632:
URL: https://github.com/apache/datafusion/pull/13632#discussion_r1868525331


##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2050,6 +2050,139 @@ async fn 
test_dataframe_placeholder_missing_param_values() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let df = ctx.read_empty().unwrap().select_exprs(&["$1"]).unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec!["Projection: $1 [$1:Null;N]", "  EmptyRelation []"];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // The placeholder is not replaced with a value,
+    // so the filter data type is not know, i.e. a = $0.
+    // Therefore, the optimization fails.
+    let optimized_plan = ctx.state().optimize(logical_plan);
+    assert!(optimized_plan.is_err());

Review Comment:
   I think this line is redundant -- as `unwrap_err()` will panic if 
optimize_plan is not an err



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -215,13 +215,11 @@ impl ExprSchemable for Expr {
             }) => get_result_type(&left.get_type(schema)?, op, 
&right.get_type(schema)?),
             Expr::Like { .. } | Expr::SimilarTo { .. } => 
Ok(DataType::Boolean),
             Expr::Placeholder(Placeholder { data_type, .. }) => {
-                data_type.clone().ok_or_else(|| {
-                    plan_datafusion_err!(
-                        "Placeholder type could not be resolved. Make sure 
that the \
-                         placeholder is bound to a concrete type, e.g. by 
providing \
-                         parameter values."
-                    )
-                })
+                if let Some(dtype) = data_type {
+                    Ok(dtype.clone())
+                } else {
+                    Ok(DataType::Null)

Review Comment:
   I agree this is a reasonable change (it effectively defers erroring on 
unspecified parameters)
   
   Could you add a comment explaining the rationale for future readers? 
Something like this perhaps:
   
   ```suggestion
                       // If the placeholder's type hasn't been specified, 
treat it as
                       // null (unspecified placeholders generate an error 
during planning)
                       Ok(DataType::Null)
   ```



##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2014,7 +2014,7 @@ async fn 
test_dataframe_placeholder_missing_param_values() -> Result<()> {
     assert!(optimized_plan
         .unwrap_err()
         .to_string()
-        .contains("Placeholder type could not be resolved. Make sure that the 
placeholder is bound to a concrete type, e.g. by providing parameter values."));
+        .contains("Placeholder type for '$0' could not be resolved. Make sure 
that the placeholder is bound to a concrete type, e.g. by providing parameter 
values."));

Review Comment:
   👍 



##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2050,6 +2050,139 @@ async fn 
test_dataframe_placeholder_missing_param_values() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let df = ctx.read_empty().unwrap().select_exprs(&["$1"]).unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec!["Projection: $1 [$1:Null;N]", "  EmptyRelation []"];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // The placeholder is not replaced with a value,
+    // so the filter data type is not know, i.e. a = $0.
+    // Therefore, the optimization fails.
+    let optimized_plan = ctx.state().optimize(logical_plan);
+    assert!(optimized_plan.is_err());
+    assert!(optimized_plan
+        .unwrap_err()
+        .to_string()
+        .contains("Placeholder type for '$1' could not be resolved. Make sure 
that the placeholder is bound to a concrete type, e.g. by providing parameter 
values."));
+
+    // Prodiving a parameter value should resolve the error
+    let df = ctx
+        .read_empty()
+        .unwrap()
+        .select_exprs(&["$1"])
+        .unwrap()
+        .with_param_values(vec![("1", ScalarValue::from(3i32))])
+        .unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec![
+        "Projection: Int32(3) AS $1 [$1:Null;N]",
+        "  EmptyRelation []",
+    ];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    let optimized_plan = ctx.state().optimize(logical_plan);
+    assert!(optimized_plan.is_ok());
+
+    let formatted = 
optimized_plan.unwrap().display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec![
+        "Projection: Int32(3) AS $1 [$1:Int32]",
+        "  EmptyRelation []",
+    ];
+    assert_eq!(expected, actual);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_dataframe_placeholder_like_expression() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let df = ctx
+        .read_empty()
+        .unwrap()
+        .with_column("a", lit("foo"))
+        .unwrap()
+        .filter(col("a").like(placeholder("$1")))
+        .unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec![
+        "Filter: a LIKE $1 [a:Utf8]",
+        "  Projection: Utf8(\"foo\") AS a [a:Utf8]",
+        "    EmptyRelation []",
+    ];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // The placeholder is not replaced with a value,
+    // so the filter data type is not know, i.e. a = $0.

Review Comment:
   ```suggestion
       // so the filter data type is not known, i.e. a = $0.
   ```



##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -357,6 +357,7 @@ impl Optimizer {
     {
         let start_time = Instant::now();
         let options = config.options();
+        plan.validate_parameter_types()?;

Review Comment:
   Checking parameter types doesn't seem to logically belong to the optimizer 
which is supposed to preserve the semantics of each plan but potentially make 
them faster.
   
   
https://docs.rs/datafusion/latest/datafusion/optimizer/trait.OptimizerRule.html
   
   > OptimizerRules transforms one 
[LogicalPlan](https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.LogicalPlan.html)
 into another which computes the same results, but in a potentially more 
efficient way. If there are no suitable transformations for the input plan, the 
optimizer should simply return it unmodified.
   
   I think the `Analyzer` might be a more appropriate place to put this check:
   
   
https://docs.rs/datafusion/latest/datafusion/optimizer/struct.Analyzer.html#method.execute_and_check



##########
datafusion/sql/tests/sql_integration.rs:
##########
@@ -571,10 +571,6 @@ Dml: op=[Insert Into] table=[test_decimal]
     "INSERT INTO person (id, first_name, last_name) VALUES ($1, $2, $3, $4)",
     "Error during planning: Placeholder $4 refers to a non existent column"
 )]
-#[case::placeholder_type_unresolved(

Review Comment:
   What happens now in this case? Does it pass successfully?



##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -357,6 +357,7 @@ impl Optimizer {
     {
         let start_time = Instant::now();
         let options = config.options();
+        plan.validate_parameter_types()?;

Review Comment:
   Although thinking about it some more, if we are going to defer the error 
anyways, I think the cleanest solution would be to move the error to when it is 
needed
   
   When I removed the specific check in the Optimizer and tried to run the 
queries, it failed with this error:
   
   ```
   This feature is not implemented: Physical plan does not support logical 
expression Placeholder(Placeholder { id: "$1", data_type: None })
   ```
   
   Maybe rather than a special check we could simply make this error better



##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2050,6 +2050,139 @@ async fn 
test_dataframe_placeholder_missing_param_values() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let df = ctx.read_empty().unwrap().select_exprs(&["$1"]).unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec!["Projection: $1 [$1:Null;N]", "  EmptyRelation []"];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // The placeholder is not replaced with a value,
+    // so the filter data type is not know, i.e. a = $0.

Review Comment:
   ```suggestion
       // so the filter data type is not known, i.e. a = $0.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to