This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 75caa9ceb3 fix: ensure mutual compatibility of the two input schemas 
from recursive CTEs (#9795)
75caa9ceb3 is described below

commit 75caa9ceb38418e3b222be0a9189c878135f978a
Author: Jonah Gao <[email protected]>
AuthorDate: Wed Mar 27 21:18:29 2024 +0800

    fix: ensure mutual compatibility of the two input schemas from recursive 
CTEs (#9795)
    
    * fix: Ensure mutual compatibility of the two input schemas from recursive 
CTEs
    
    * fix typo
---
 datafusion/expr/src/logical_plan/builder.rs | 26 ++++++++++++++++++-------
 datafusion/sqllogictest/test_files/cte.slt  | 30 +++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/builder.rs 
b/datafusion/expr/src/logical_plan/builder.rs
index 01e6af9487..f47249d76d 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -51,9 +51,9 @@ use arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion_common::config::FormatOptions;
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::{
-    get_target_functional_dependencies, plan_datafusion_err, plan_err, Column, 
DFField,
-    DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, 
ScalarValue,
-    TableReference, ToDFSchema, UnnestOptions,
+    get_target_functional_dependencies, not_impl_err, plan_datafusion_err, 
plan_err,
+    Column, DFField, DFSchema, DFSchemaRef, DataFusionError, 
OwnedTableReference, Result,
+    ScalarValue, TableReference, ToDFSchema, UnnestOptions,
 };
 
 /// Default table name for unnamed table
@@ -132,14 +132,26 @@ impl LogicalPlanBuilder {
     ) -> Result<Self> {
         // TODO: we need to do a bunch of validation here. Maybe more.
         if is_distinct {
-            return Err(DataFusionError::NotImplemented(
-                "Recursive queries with a distinct 'UNION' (in which the 
previous iteration's results will be de-duplicated) is not 
supported".to_string(),
-            ));
+            return not_impl_err!(
+                "Recursive queries with a distinct 'UNION' (in which the 
previous iteration's results will be de-duplicated) is not supported"
+            );
+        }
+        // Ensure that the static term and the recursive term have the same 
number of fields
+        let static_fields_len = self.plan.schema().fields().len();
+        let recurive_fields_len = recursive_term.schema().fields().len();
+        if static_fields_len != recurive_fields_len {
+            return plan_err!(
+                "Non-recursive term and recursive term must have the same 
number of columns ({} != {})",
+                static_fields_len, recurive_fields_len
+            );
         }
+        // Ensure that the recursive term has the same field types as the 
static term
+        let coerced_recursive_term =
+            coerce_plan_expr_for_schema(&recursive_term, self.plan.schema())?;
         Ok(Self::from(LogicalPlan::RecursiveQuery(RecursiveQuery {
             name,
             static_term: Arc::new(self.plan.clone()),
-            recursive_term: Arc::new(recursive_term),
+            recursive_term: Arc::new(coerced_recursive_term),
             is_distinct,
         })))
     }
diff --git a/datafusion/sqllogictest/test_files/cte.slt 
b/datafusion/sqllogictest/test_files/cte.slt
index 50c88e4195..e33dfabaf2 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -714,3 +714,33 @@ RecursiveQueryExec: name=recursive_cte, is_distinct=false
 --------------WorkTableExec: name=recursive_cte
 ------ProjectionExec: expr=[2 as val]
 --------PlaceholderRowExec
+
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
+# Non-recursive term and recursive term have different types
+query IT
+WITH RECURSIVE my_cte AS(
+    SELECT 1::int AS a
+    UNION ALL
+    SELECT a::bigint+2 FROM my_cte WHERE a<3
+) SELECT *, arrow_typeof(a) FROM my_cte;
+----
+1 Int32
+3 Int32
+
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
+# Non-recursive term and recursive term have different number of columns
+query error DataFusion error: Error during planning: Non\-recursive term and 
recursive term must have the same number of columns \(1 != 3\)
+WITH RECURSIVE my_cte AS (
+    SELECT 1::bigint AS a
+    UNION ALL
+    SELECT a+2, 'a','c' FROM my_cte WHERE a<3
+) SELECT * FROM my_cte;
+
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
+# Non-recursive term and recursive term have different types, and cannot be 
casted
+query error DataFusion error: Arrow error: Cast error: Cannot cast string 
'abc' to value of Int64 type
+WITH RECURSIVE my_cte AS (
+    SELECT 1 AS a
+    UNION ALL
+    SELECT 'abc' FROM my_cte WHERE CAST(a AS text) !='abc'
+) SELECT * FROM my_cte;

Reply via email to