alamb commented on code in PR #11961:
URL: https://github.com/apache/datafusion/pull/11961#discussion_r1715818843


##########
datafusion/sqllogictest/test_files/type_coercion.slt:
##########
@@ -49,3 +49,179 @@ select interval '1 month' - '2023-05-01'::date;
 # interval - timestamp
 query error DataFusion error: Error during planning: Cannot coerce arithmetic 
expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid 
types
 SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp;
+
+
+####################################
+## Test type coercion with UNIONs ##
+####################################
+
+# Disable optimizer to test only the analyzer with type coercion
+statement ok
+set datafusion.optimizer.max_passes = 0;
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+# Create test table
+statement ok
+CREATE TABLE orders(
+    order_id INT UNSIGNED NOT NULL,
+    customer_id INT UNSIGNED NOT NULL,
+    o_item_id VARCHAR NOT NULL,
+    qty INT NOT NULL,
+    price DOUBLE NOT NULL,
+    delivered BOOLEAN NOT NULL
+);
+
+# union_different_num_columns_error() / UNION
+query error Error during planning: Union schemas have different number of 
fields: query 1 has 1 fields whereas query 2 has 2 fields
+SELECT order_id FROM orders UNION SELECT customer_id, o_item_id FROM orders
+
+# union_different_num_columns_error() / UNION ALL
+query error Error during planning: Union schemas have different number of 
fields: query 1 has 1 fields whereas query 2 has 2 fields
+SELECT order_id FROM orders UNION ALL SELECT customer_id, o_item_id FROM orders
+
+# union_with_different_column_names()
+query TT
+EXPLAIN SELECT order_id from orders UNION ALL SELECT customer_id FROM orders
+----
+logical_plan
+01)Union
+02)--Projection: orders.order_id
+03)----TableScan: orders
+04)--Projection: orders.customer_id AS order_id
+05)----TableScan: orders
+
+# union_values_with_no_alias()
+query TT
+EXPLAIN SELECT 1, 2 UNION ALL SELECT 3, 4
+----
+logical_plan
+01)Union
+02)--Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2)
+03)----EmptyRelation
+04)--Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2)
+05)----EmptyRelation
+
+# union_with_incompatible_data_type()
+query error Error during planning: UNION Column 'Int64\(1\)' \(type: Int64\) 
is not compatible with other type: Interval\(MonthDayNano\)

Review Comment:
   I noticed that the error refers to the types in reverse order than they 
appear in the query
   
   The error message might be better if it were something more like
   
   ```
    Incompatible inputs for Union. Previous inputs were of type 
Interval\(MonthDayNano\), got incomaptible type 'Int64\(1\)' \(type: Int64\)
   ```



##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -1339,95 +1338,14 @@ pub(crate) fn validate_unique_names<'a>(
     })
 }
 
-pub fn project_with_column_index(
-    expr: Vec<Expr>,
-    input: Arc<LogicalPlan>,
-    schema: DFSchemaRef,
-) -> Result<LogicalPlan> {
-    let alias_expr = expr
-        .into_iter()
-        .enumerate()
-        .map(|(i, e)| match e {
-            Expr::Alias(Alias { ref name, .. }) if name != 
schema.field(i).name() => {
-                e.unalias().alias(schema.field(i).name())
-            }
-            Expr::Column(Column {
-                relation: _,
-                ref name,
-            }) if name != schema.field(i).name() => 
e.alias(schema.field(i).name()),
-            Expr::Alias { .. } | Expr::Column { .. } => e,
-            _ => e.alias(schema.field(i).name()),
-        })
-        .collect::<Vec<_>>();
-
-    Projection::try_new_with_schema(alias_expr, input, schema)
-        .map(LogicalPlan::Projection)
-}
-
 /// Union two logical plans.
 pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> 
Result<LogicalPlan> {
-    let left_col_num = left_plan.schema().fields().len();
-
-    // check union plan length same.
-    let right_col_num = right_plan.schema().fields().len();
-    if right_col_num != left_col_num {
-        return plan_err!(
-            "Union queries must have the same number of columns, (left is 
{left_col_num}, right is {right_col_num})");
-    }
-
-    // create union schema
-    let union_qualified_fields =
-        zip(left_plan.schema().iter(), right_plan.schema().iter())
-            .map(
-                |((left_qualifier, left_field), (_right_qualifier, 
right_field))| {
-                    let nullable = left_field.is_nullable() || 
right_field.is_nullable();
-                    let data_type = comparison_coercion(
-                        left_field.data_type(),
-                        right_field.data_type(),
-                    )
-                    .ok_or_else(|| {
-                        plan_datafusion_err!(
-                "UNION Column {} (type: {}) is not compatible with column {} 
(type: {})",
-                right_field.name(),
-                right_field.data_type(),
-                left_field.name(),
-                left_field.data_type()
-                )
-                    })?;
-                    Ok((
-                        left_qualifier.cloned(),
-                        Arc::new(Field::new(left_field.name(), data_type, 
nullable)),
-                    ))
-                },
-            )
-            .collect::<Result<Vec<_>>>()?;
-    let union_schema =
-        DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())?;
-
-    let inputs = vec![left_plan, right_plan]
-        .into_iter()
-        .map(|p| {
-            let plan = coerce_plan_expr_for_schema(&p, &union_schema)?;
-            match plan {
-                LogicalPlan::Projection(Projection { expr, input, .. }) => {
-                    Ok(Arc::new(project_with_column_index(
-                        expr,
-                        input,
-                        Arc::new(union_schema.clone()),
-                    )?))
-                }
-                other_plan => Ok(Arc::new(other_plan)),
-            }
-        })
-        .collect::<Result<Vec<_>>>()?;
-
-    if inputs.is_empty() {
-        return plan_err!("Empty UNION");
-    }
-
+    // Temporarily use the schema from the left input and later rely on the 
analyzer to

Review Comment:
   👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to