dqkqd commented on code in PR #18286:
URL: https://github.com/apache/datafusion/pull/18286#discussion_r2484228630


##########
datafusion/core/tests/sql/select.rs:
##########
@@ -343,26 +342,53 @@ async fn test_query_parameters_with_metadata() -> 
Result<()> {
         ]))
         .unwrap();
 
-    // df_with_params_replaced.schema() is not correct here
-    // https://github.com/apache/datafusion/issues/18102
-    let batches = df_with_params_replaced.clone().collect().await.unwrap();
-    let schema = batches[0].schema();
-
+    let schema = df_with_params_replaced.schema();
     assert_eq!(schema.field(0).data_type(), &DataType::UInt32);
     assert_eq!(schema.field(0).metadata(), &metadata1);
     assert_eq!(schema.field(1).data_type(), &DataType::Utf8);
     assert_eq!(schema.field(1).metadata(), &metadata2);
 
-    assert_batches_eq!(
-        [
-            "+----+-----+",
-            "| $1 | $2  |",
-            "+----+-----+",
-            "| 1  | two |",
-            "+----+-----+",
-        ],
-        &batches
-    );
+    let batches = df_with_params_replaced.collect().await.unwrap();
+    assert_snapshot!(batches_to_sort_string(&batches), @r"
+    +----+-----+
+    | $1 | $2  |
+    +----+-----+
+    | 1  | two |
+    +----+-----+
+    ");
+
+    Ok(())
+}
+
+/// Test for https://github.com/apache/datafusion/issues/18102
+#[tokio::test]
+async fn test_query_parameters_in_values_list_relation() -> Result<()> {

Review Comment:
   The issue mentioned two queries.
   This tests the first one: `SELECT a, b FROM (VALUES ($1, $2)) AS t(a, b)`
   The second one is verified in the above test: `SELECT $1, $2`



##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -1471,7 +1509,10 @@ impl LogicalPlan {
                     // Preserve name to avoid breaking column references to 
this expression
                     Ok(transformed_expr.update_data(|expr| 
original_name.restore(expr)))
                 }
-            })
+            })?
+            // always recompute the schema to ensure the changed in the 
schema's field should be
+            // poplulated to the plan's parent
+            .map_data(|plan| plan.recompute_schema())

Review Comment:
   This needs to computed even though plan isn't transformed.
   Otherwise changes in a child's schema cannot be populated to
   its ancestors.



##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -4825,6 +4867,82 @@ mod tests {
             .expect_err("prepared field metadata mismatch unexpectedly 
succeeded");
     }
 
+    #[test]
+    fn test_replace_placeholder_values_list_relation_valid_schema() {
+        // SELECT a, b, c FROM (VALUES (1, $1, $2 + $3) AS t(a, b, c);
+        let plan = LogicalPlanBuilder::values(vec![vec![
+            lit(1),
+            placeholder("$1"),
+            binary_expr(placeholder("$2"), Operator::Plus, placeholder("$3")),
+        ]])
+        .unwrap()
+        .project(vec![
+            col("column1").alias("a"),
+            col("column2").alias("b"),
+            col("column3").alias("c"),
+        ])
+        .unwrap()
+        .alias("t")
+        .unwrap()
+        .project(vec![col("a"), col("b"), col("c")])
+        .unwrap()
+        .build()
+        .unwrap();
+
+        // original
+        assert_snapshot!(plan.display_indent_schema(), @r"
+        Projection: t.a, t.b, t.c [a:Int32;N, b:Null;N, c:Int64;N]
+          SubqueryAlias: t [a:Int32;N, b:Null;N, c:Int64;N]
+            Projection: column1 AS a, column2 AS b, column3 AS c [a:Int32;N, 
b:Null;N, c:Int64;N]
+              Values: (Int32(1), $1, $2 + $3) [column1:Int32;N, 
column2:Null;N, column3:Int64;N]
+        ");
+
+        let plan = plan
+            .with_param_values(vec![
+                ScalarValue::from(1i32),
+                ScalarValue::from(2i32),
+                ScalarValue::from(3i32),
+            ])
+            .unwrap();
+
+        // replaced
+        assert_snapshot!(plan.display_indent_schema(), @r"
+        Projection: t.a, t.b, t.c [a:Int32;N, b:Int32;N, c:Int64;N]

Review Comment:
   Without the code change, this line would be:
   ```
           Projection: t.a, t.b, t.c [a:Int32;N, b:Null;N, c:Int64;N]
   ```
   



##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -633,8 +633,46 @@ impl LogicalPlan {
             LogicalPlan::Dml(_) => Ok(self),
             LogicalPlan::Copy(_) => Ok(self),
             LogicalPlan::Values(Values { schema, values }) => {
-                // todo it isn't clear why the schema is not recomputed here
-                Ok(LogicalPlan::Values(Values { schema, values }))
+                // We cannot compute the correct schema if we only use values.
+                //
+                // For example, given the following plan:
+                //   Projection: col_1, col_2
+                //     Values: (Float32(1), Float32(10)), (Float32(100), 
Float32(10))
+                //
+                // We wouldn't know about `col_1`, and `col_2` if we only 
relied on `values`.
+                // To correctly recompute the new schema, we also need to 
retain some information
+                // from the original schema.
+                let new_plan = 
LogicalPlanBuilder::values(values.clone())?.build()?;
+
+                let qualified_fields = schema
+                    .iter()
+                    .zip(new_plan.schema().fields())
+                    .map(|((table_ref, old_field), new_field)| {
+                        // `old_field`'s data type is unknown but 
`new_field`'s is known
+                        if old_field.data_type().is_null()
+                            && !new_field.data_type().is_null()
+                        {
+                            let field = old_field
+                                .as_ref()
+                                .clone()
+                                .with_data_type(new_field.data_type().clone());
+                            (table_ref.cloned(), Arc::new(field))
+                        } else {
+                            (table_ref.cloned(), Arc::clone(old_field))
+                        }
+                    })
+                    .collect::<Vec<_>>();
+
+                let schema = DFSchema::new_with_metadata(
+                    qualified_fields,
+                    schema.metadata().clone(),
+                )?
+                
.with_functional_dependencies(schema.functional_dependencies().clone())?;

Review Comment:
   I created a new schema here because I didn't know how to modify fields in a 
schema.
   
   Please tell me if we have APIs for modifying schema's fields (then some 
`clone` can be avoided).



##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -216,10 +216,9 @@ impl LogicalPlanBuilder {
         if values.is_empty() {
             return plan_err!("Values list cannot be empty");
         }
+
+        // values list can have no columns, see: 
https://github.com/apache/datafusion/pull/12339
         let n_cols = values[0].len();
-        if n_cols == 0 {
-            return plan_err!("Values list cannot be zero length");
-        }

Review Comment:
   I use `LogicalPlanBuilder::values` to recompute the schema for 
`LogicalPlan::Values`
   causing this line to fail the test from #12339.
   
   Since we allow values without columns, I think this line should be removed.



##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -633,8 +633,46 @@ impl LogicalPlan {
             LogicalPlan::Dml(_) => Ok(self),
             LogicalPlan::Copy(_) => Ok(self),
             LogicalPlan::Values(Values { schema, values }) => {
-                // todo it isn't clear why the schema is not recomputed here
-                Ok(LogicalPlan::Values(Values { schema, values }))
+                // We cannot compute the correct schema if we only use values.
+                //
+                // For example, given the following plan:
+                //   Projection: col_1, col_2
+                //     Values: (Float32(1), Float32(10)), (Float32(100), 
Float32(10))
+                //
+                // We wouldn't know about `col_1`, and `col_2` if we only 
relied on `values`.
+                // To correctly recompute the new schema, we also need to 
retain some information
+                // from the original schema.
+                let new_plan = 
LogicalPlanBuilder::values(values.clone())?.build()?;
+
+                let qualified_fields = schema
+                    .iter()
+                    .zip(new_plan.schema().fields())
+                    .map(|((table_ref, old_field), new_field)| {
+                        // `old_field`'s data type is unknown but 
`new_field`'s is known
+                        if old_field.data_type().is_null()
+                            && !new_field.data_type().is_null()
+                        {
+                            let field = old_field
+                                .as_ref()
+                                .clone()
+                                .with_data_type(new_field.data_type().clone());
+                            (table_ref.cloned(), Arc::new(field))

Review Comment:
   This is the main change, the new schema needs `new_field`'s data type.
   
   What about other attributes?
   
   I tried to apply `new_field`'s nullability, but the test in `insert.slt:305` 
failed,
   because its schema require `NOT NULL` when the `new_field` is nullable.
   ```sql
   CREATE TABLE table_without_values(field1 BIGINT NOT NULL, field2 BIGINT 
NULL);
   
   -- statement error Invalid argument error: Column 'column1' is declared as 
non-nullable but contains null values
   insert into table_without_values values(NULL, 300);
   ```
   
   So the new schema shouldn't use `new_field`'s nullability.
   However, I wasn't quite sure whether other attributes (i.e. `metadata`) need 
to be populated.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to