alamb commented on code in PR #11961: URL: https://github.com/apache/datafusion/pull/11961#discussion_r1715818843
########## datafusion/sqllogictest/test_files/type_coercion.slt: ########## @@ -49,3 +49,179 @@ select interval '1 month' - '2023-05-01'::date; # interval - timestamp query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; + + +#################################### +## Test type coercion with UNIONs ## +#################################### + +# Disable optimizer to test only the analyzer with type coercion +statement ok +set datafusion.optimizer.max_passes = 0; + +statement ok +set datafusion.explain.logical_plan_only = true; + +# Create test table +statement ok +CREATE TABLE orders( + order_id INT UNSIGNED NOT NULL, + customer_id INT UNSIGNED NOT NULL, + o_item_id VARCHAR NOT NULL, + qty INT NOT NULL, + price DOUBLE NOT NULL, + delivered BOOLEAN NOT NULL +); + +# union_different_num_columns_error() / UNION +query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields +SELECT order_id FROM orders UNION SELECT customer_id, o_item_id FROM orders + +# union_different_num_columns_error() / UNION ALL +query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields +SELECT order_id FROM orders UNION ALL SELECT customer_id, o_item_id FROM orders + +# union_with_different_column_names() +query TT +EXPLAIN SELECT order_id from orders UNION ALL SELECT customer_id FROM orders +---- +logical_plan +01)Union +02)--Projection: orders.order_id +03)----TableScan: orders +04)--Projection: orders.customer_id AS order_id +05)----TableScan: orders + +# union_values_with_no_alias() +query TT +EXPLAIN SELECT 1, 2 UNION ALL SELECT 3, 4 +---- +logical_plan +01)Union +02)--Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2) +03)----EmptyRelation +04)--Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2) +05)----EmptyRelation + +# union_with_incompatible_data_type() +query error Error during planning: UNION Column 'Int64\(1\)' \(type: Int64\) is not compatible with other type: Interval\(MonthDayNano\) Review Comment: I noticed that the error refers to the types in reverse order than they appear in the query The error message might be better if it were something more like ``` Incompatible inputs for Union. Previous inputs were of type Interval\(MonthDayNano\), got incomaptible type 'Int64\(1\)' \(type: Int64\) ``` ########## datafusion/expr/src/logical_plan/builder.rs: ########## @@ -1339,95 +1338,14 @@ pub(crate) fn validate_unique_names<'a>( }) } -pub fn project_with_column_index( - expr: Vec<Expr>, - input: Arc<LogicalPlan>, - schema: DFSchemaRef, -) -> Result<LogicalPlan> { - let alias_expr = expr - .into_iter() - .enumerate() - .map(|(i, e)| match e { - Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => { - e.unalias().alias(schema.field(i).name()) - } - Expr::Column(Column { - relation: _, - ref name, - }) if name != schema.field(i).name() => e.alias(schema.field(i).name()), - Expr::Alias { .. } | Expr::Column { .. } => e, - _ => e.alias(schema.field(i).name()), - }) - .collect::<Vec<_>>(); - - Projection::try_new_with_schema(alias_expr, input, schema) - .map(LogicalPlan::Projection) -} - /// Union two logical plans. pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalPlan> { - let left_col_num = left_plan.schema().fields().len(); - - // check union plan length same. - let right_col_num = right_plan.schema().fields().len(); - if right_col_num != left_col_num { - return plan_err!( - "Union queries must have the same number of columns, (left is {left_col_num}, right is {right_col_num})"); - } - - // create union schema - let union_qualified_fields = - zip(left_plan.schema().iter(), right_plan.schema().iter()) - .map( - |((left_qualifier, left_field), (_right_qualifier, right_field))| { - let nullable = left_field.is_nullable() || right_field.is_nullable(); - let data_type = comparison_coercion( - left_field.data_type(), - right_field.data_type(), - ) - .ok_or_else(|| { - plan_datafusion_err!( - "UNION Column {} (type: {}) is not compatible with column {} (type: {})", - right_field.name(), - right_field.data_type(), - left_field.name(), - left_field.data_type() - ) - })?; - Ok(( - left_qualifier.cloned(), - Arc::new(Field::new(left_field.name(), data_type, nullable)), - )) - }, - ) - .collect::<Result<Vec<_>>>()?; - let union_schema = - DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())?; - - let inputs = vec![left_plan, right_plan] - .into_iter() - .map(|p| { - let plan = coerce_plan_expr_for_schema(&p, &union_schema)?; - match plan { - LogicalPlan::Projection(Projection { expr, input, .. }) => { - Ok(Arc::new(project_with_column_index( - expr, - input, - Arc::new(union_schema.clone()), - )?)) - } - other_plan => Ok(Arc::new(other_plan)), - } - }) - .collect::<Result<Vec<_>>>()?; - - if inputs.is_empty() { - return plan_err!("Empty UNION"); - } - + // Temporarily use the schema from the left input and later rely on the analyzer to Review Comment: 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org