This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new b30c2007c2 Allow place holders like `$1` in more types of queries.  
(#13632)
b30c2007c2 is described below

commit b30c2007c2209bca90f2c285a775f09d9a56be03
Author: Paul J. Davis <[email protected]>
AuthorDate: Wed Dec 4 21:46:25 2024 -0600

    Allow place holders like `$1` in more types of queries.  (#13632)
    
    * Allow place holders in the column list
    
    Previously, a query like `SELECT $1;` would fail to generate a
    LogicalPlan. With this change these queries are now workable. This
    required creating a new LogicalPlan::validate_parametere_types that is
    called from Optimizer::optimize to assert that all parameter types have
    been inferred correctly.
    
    * Remove redundant asserts
    
    * Fix typo in comments
    
    * Add comment explaining DataType::Null
    
    * Move unbound placeholder error to physical-expr
    
    Previously, these errors would occurr during optimization. Now that we
    allow unbound placeholders through the optimizer they fail when creating
    the physical plan instead.
    
    * Fix expected error message
---
 datafusion/core/tests/dataframe/mod.rs         | 172 ++++++++++++++++++++++---
 datafusion/core/tests/sql/select.rs            |  92 +++++++++++++
 datafusion/expr/src/expr_schema.rs             |  14 +-
 datafusion/physical-expr/src/planner.rs        |   5 +-
 datafusion/sql/tests/sql_integration.rs        |   4 -
 datafusion/sqllogictest/test_files/prepare.slt |  38 +++++-
 6 files changed, 293 insertions(+), 32 deletions(-)

diff --git a/datafusion/core/tests/dataframe/mod.rs 
b/datafusion/core/tests/dataframe/mod.rs
index 439aa6147e..7c3c96025b 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -1985,6 +1985,7 @@ async fn test_array_agg() -> Result<()> {
 async fn test_dataframe_placeholder_missing_param_values() -> Result<()> {
     let ctx = SessionContext::new();
 
+    // Creating LogicalPlans with placeholders should work.
     let df = ctx
         .read_empty()
         .unwrap()
@@ -2006,17 +2007,16 @@ async fn 
test_dataframe_placeholder_missing_param_values() -> Result<()> {
         "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
     );
 
-    // The placeholder is not replaced with a value,
-    // so the filter data type is not know, i.e. a = $0.
-    // Therefore, the optimization fails.
-    let optimized_plan = ctx.state().optimize(logical_plan);
-    assert!(optimized_plan.is_err());
-    assert!(optimized_plan
-        .unwrap_err()
-        .to_string()
-        .contains("Placeholder type could not be resolved. Make sure that the 
placeholder is bound to a concrete type, e.g. by providing parameter values."));
-
-    // Prodiving a parameter value should resolve the error
+    // Executing LogicalPlans with placeholders that don't have bound values
+    // should fail.
+    let results = df.collect().await;
+    let err_mesg = results.unwrap_err().strip_backtrace();
+    assert_eq!(
+        err_mesg,
+        "Execution error: Placeholder '$0' was not provided a value for 
execution."
+    );
+
+    // Providing a parameter value should resolve the error
     let df = ctx
         .read_empty()
         .unwrap()
@@ -2040,12 +2040,152 @@ async fn 
test_dataframe_placeholder_missing_param_values() -> Result<()> {
         "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
     );
 
-    let optimized_plan = ctx.state().optimize(logical_plan);
-    assert!(optimized_plan.is_ok());
+    // N.B., the test is basically `SELECT 1 as a WHERE a = 3;` which returns 
no results.
+    #[rustfmt::skip]
+    let expected = [
+        "++",
+        "++"
+    ];
+
+    assert_batches_eq!(expected, &df.collect().await.unwrap());
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    // Creating LogicalPlans with placeholders should work
+    let df = ctx.read_empty().unwrap().select_exprs(&["$1"]).unwrap();
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+
+    #[rustfmt::skip]
+    let expected = vec![
+        "Projection: $1 [$1:Null;N]",
+        "  EmptyRelation []"
+    ];
+
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // Executing LogicalPlans with placeholders that don't have bound values
+    // should fail.
+    let results = df.collect().await;
+    let err_mesg = results.unwrap_err().strip_backtrace();
+    assert_eq!(
+        err_mesg,
+        "Execution error: Placeholder '$1' was not provided a value for 
execution."
+    );
+
+    // Providing a parameter value should resolve the error
+    let df = ctx
+        .read_empty()
+        .unwrap()
+        .select_exprs(&["$1"])
+        .unwrap()
+        .with_param_values(vec![("1", ScalarValue::from(3i32))])
+        .unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec![
+        "Projection: Int32(3) AS $1 [$1:Null;N]",
+        "  EmptyRelation []",
+    ];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    #[rustfmt::skip]
+    let expected = [
+        "+----+",
+        "| $1 |",
+        "+----+",
+        "| 3  |",
+        "+----+"
+    ];
+
+    assert_batches_eq!(expected, &df.collect().await.unwrap());
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_dataframe_placeholder_like_expression() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    // Creating LogicalPlans with placeholders should work
+    let df = ctx
+        .read_empty()
+        .unwrap()
+        .with_column("a", lit("foo"))
+        .unwrap()
+        .filter(col("a").like(placeholder("$1")))
+        .unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec![
+        "Filter: a LIKE $1 [a:Utf8]",
+        "  Projection: Utf8(\"foo\") AS a [a:Utf8]",
+        "    EmptyRelation []",
+    ];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // Executing LogicalPlans with placeholders that don't have bound values
+    // should fail.
+    let results = df.collect().await;
+    let err_mesg = results.unwrap_err().strip_backtrace();
+    assert_eq!(
+        err_mesg,
+        "Execution error: Placeholder '$1' was not provided a value for 
execution."
+    );
+
+    // Providing a parameter value should resolve the error
+    let df = ctx
+        .read_empty()
+        .unwrap()
+        .with_column("a", lit("foo"))
+        .unwrap()
+        .filter(col("a").like(placeholder("$1")))
+        .unwrap()
+        .with_param_values(vec![("1", ScalarValue::from("f%"))])
+        .unwrap();
+
+    let logical_plan = df.logical_plan();
+    let formatted = logical_plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    let expected = vec![
+        "Filter: a LIKE Utf8(\"f%\") [a:Utf8]",
+        "  Projection: Utf8(\"foo\") AS a [a:Utf8]",
+        "    EmptyRelation []",
+    ];
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    #[rustfmt::skip]
+    let expected = [
+        "+-----+",
+        "| a   |",
+        "+-----+",
+        "| foo |",
+        "+-----+"
+    ];
 
-    let actual = optimized_plan.unwrap().display_indent_schema().to_string();
-    let expected = "EmptyRelation [a:Int32]";
-    assert_eq!(expected, actual);
+    assert_batches_eq!(expected, &df.collect().await.unwrap());
 
     Ok(())
 }
diff --git a/datafusion/core/tests/sql/select.rs 
b/datafusion/core/tests/sql/select.rs
index 2e815303e3..6e81bf6410 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -229,6 +229,98 @@ async fn test_parameter_invalid_types() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_positional_parameter_not_bound() -> Result<()> {
+    let ctx = SessionContext::new();
+    let signed_ints: Int32Array = vec![-1, 0, 1].into();
+    let unsigned_ints: UInt64Array = vec![1, 2, 3].into();
+    let batch = RecordBatch::try_from_iter(vec![
+        ("signed", Arc::new(signed_ints) as ArrayRef),
+        ("unsigned", Arc::new(unsigned_ints) as ArrayRef),
+    ])?;
+    ctx.register_batch("test", batch)?;
+
+    let query = "SELECT signed, unsigned FROM test \
+            WHERE $1 >= signed AND signed <= $2 \
+            AND unsigned <= $3 AND unsigned = $4";
+
+    let results = ctx.sql(query).await?.collect().await;
+
+    assert_eq!(
+        results.unwrap_err().strip_backtrace(),
+        "Execution error: Placeholder '$1' was not provided a value for 
execution."
+    );
+
+    let results = ctx
+        .sql(query)
+        .await?
+        .with_param_values(vec![
+            ScalarValue::from(4_i32),
+            ScalarValue::from(-1_i64),
+            ScalarValue::from(2_i32),
+            ScalarValue::from("1"),
+        ])?
+        .collect()
+        .await?;
+
+    let expected = [
+        "+--------+----------+",
+        "| signed | unsigned |",
+        "+--------+----------+",
+        "| -1     | 1        |",
+        "+--------+----------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_named_parameter_not_bound() -> Result<()> {
+    let ctx = SessionContext::new();
+    let signed_ints: Int32Array = vec![-1, 0, 1].into();
+    let unsigned_ints: UInt64Array = vec![1, 2, 3].into();
+    let batch = RecordBatch::try_from_iter(vec![
+        ("signed", Arc::new(signed_ints) as ArrayRef),
+        ("unsigned", Arc::new(unsigned_ints) as ArrayRef),
+    ])?;
+    ctx.register_batch("test", batch)?;
+
+    let query = "SELECT signed, unsigned FROM test \
+            WHERE $foo >= signed AND signed <= $bar \
+            AND unsigned <= $baz AND unsigned = $str";
+
+    let results = ctx.sql(query).await?.collect().await;
+
+    assert_eq!(
+        results.unwrap_err().strip_backtrace(),
+        "Execution error: Placeholder '$foo' was not provided a value for 
execution."
+    );
+
+    let results = ctx
+        .sql(query)
+        .await?
+        .with_param_values(vec![
+            ("foo", ScalarValue::from(4_i32)),
+            ("bar", ScalarValue::from(-1_i64)),
+            ("baz", ScalarValue::from(2_i32)),
+            ("str", ScalarValue::from("1")),
+        ])?
+        .collect()
+        .await?;
+
+    let expected = [
+        "+--------+----------+",
+        "| signed | unsigned |",
+        "+--------+----------+",
+        "| -1     | 1        |",
+        "+--------+----------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn test_version_function() {
     let expected_version = format!(
diff --git a/datafusion/expr/src/expr_schema.rs 
b/datafusion/expr/src/expr_schema.rs
index b1a461eca4..3317deafbd 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -215,13 +215,13 @@ impl ExprSchemable for Expr {
             }) => get_result_type(&left.get_type(schema)?, op, 
&right.get_type(schema)?),
             Expr::Like { .. } | Expr::SimilarTo { .. } => 
Ok(DataType::Boolean),
             Expr::Placeholder(Placeholder { data_type, .. }) => {
-                data_type.clone().ok_or_else(|| {
-                    plan_datafusion_err!(
-                        "Placeholder type could not be resolved. Make sure 
that the \
-                         placeholder is bound to a concrete type, e.g. by 
providing \
-                         parameter values."
-                    )
-                })
+                if let Some(dtype) = data_type {
+                    Ok(dtype.clone())
+                } else {
+                    // If the placeholder's type hasn't been specified, treat 
it as
+                    // null (unspecified placeholders generate an error during 
planning)
+                    Ok(DataType::Null)
+                }
             }
             Expr::Wildcard { .. } => Ok(DataType::Null),
             Expr::GroupingSet(_) => {
diff --git a/datafusion/physical-expr/src/planner.rs 
b/datafusion/physical-expr/src/planner.rs
index add6c18b32..906ca9fd10 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -28,7 +28,7 @@ use datafusion_common::{
     exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, 
ToDFSchema,
 };
 use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction};
+use datafusion_expr::expr::{Alias, Cast, InList, Placeholder, ScalarFunction};
 use datafusion_expr::var_provider::is_system_variables;
 use datafusion_expr::var_provider::VarType;
 use datafusion_expr::{
@@ -361,6 +361,9 @@ pub fn create_physical_expr(
                 expressions::in_list(value_expr, list_exprs, negated, 
input_schema)
             }
         },
+        Expr::Placeholder(Placeholder { id, .. }) => {
+            exec_err!("Placeholder '{id}' was not provided a value for 
execution.")
+        }
         other => {
             not_impl_err!("Physical plan does not support logical expression 
{other:?}")
         }
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 8f2325fa2d..8c2d8ebad4 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -571,10 +571,6 @@ Dml: op=[Insert Into] table=[test_decimal]
     "INSERT INTO person (id, first_name, last_name) VALUES ($1, $2, $3, $4)",
     "Error during planning: Placeholder $4 refers to a non existent column"
 )]
-#[case::placeholder_type_unresolved(
-    "INSERT INTO person (id, first_name, last_name) VALUES ($2, $4, $6)",
-    "Error during planning: Placeholder type could not be resolved. Make sure 
that the placeholder is bound to a concrete type, e.g. by providing parameter 
values."
-)]
 #[case::placeholder_type_unresolved(
     "INSERT INTO person (id, first_name, last_name) VALUES ($id, $first_name, 
$last_name)",
     "Error during planning: Can't parse placeholder: $id"
diff --git a/datafusion/sqllogictest/test_files/prepare.slt 
b/datafusion/sqllogictest/test_files/prepare.slt
index c3bd15c086..5d0f417640 100644
--- a/datafusion/sqllogictest/test_files/prepare.slt
+++ b/datafusion/sqllogictest/test_files/prepare.slt
@@ -53,10 +53,6 @@ PREPARE my_plan(INT, INT) AS SELECT 1 + $1;
 statement error SQL error: ParserError
 PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age is $1;
 
-# TODO: allow prepare without specifying data types
-statement error Placeholder type could not be resolved
-PREPARE my_plan AS SELECT $1;
-
 # #######################
 # Test prepare and execute statements
 
@@ -68,6 +64,40 @@ EXECUTE my_plan('Foo', 'Bar');
 statement error Prepared statement \'my_plan\' does not exist
 DEALLOCATE my_plan;
 
+# Allow prepare without specifying data types
+statement ok
+PREPARE my_plan AS SELECT $1;
+
+query T
+EXECUTE my_plan('Foo');
+----
+Foo
+
+statement ok
+DEALLOCATE my_plan
+
+# Allow prepare col LIKE $1
+statement ok
+PREPARE my_plan AS SELECT * FROM person WHERE first_name LIKE $1;
+
+query ITTITRPI rowsort
+EXECUTE my_plan('j%');
+----
+1 jane smith 20 MA 100000.45 2000-11-12T00:00:00 99
+
+statement ok
+DEALLOCATE my_plan
+
+# Check for missing parameters
+statement ok
+PREPARE my_plan AS SELECT * FROM person WHERE id < $1;
+
+statement error No value found for placeholder with id \$1
+EXECUTE my_plan
+
+statement ok
+DEALLOCATE my_plan
+
 statement ok
 PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t 
(num, letter);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to