This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 75caa9ceb3 fix: ensure mutual compatibility of the two input schemas
from recursive CTEs (#9795)
75caa9ceb3 is described below
commit 75caa9ceb38418e3b222be0a9189c878135f978a
Author: Jonah Gao <[email protected]>
AuthorDate: Wed Mar 27 21:18:29 2024 +0800
fix: ensure mutual compatibility of the two input schemas from recursive
CTEs (#9795)
* fix: Ensure mutual compatibility of the two input schemas from recursive
CTEs
* fix typo
---
datafusion/expr/src/logical_plan/builder.rs | 26 ++++++++++++++++++-------
datafusion/sqllogictest/test_files/cte.slt | 30 +++++++++++++++++++++++++++++
2 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/datafusion/expr/src/logical_plan/builder.rs
b/datafusion/expr/src/logical_plan/builder.rs
index 01e6af9487..f47249d76d 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -51,9 +51,9 @@ use arrow::datatypes::{DataType, Schema, SchemaRef};
use datafusion_common::config::FormatOptions;
use datafusion_common::display::ToStringifiedPlan;
use datafusion_common::{
- get_target_functional_dependencies, plan_datafusion_err, plan_err, Column,
DFField,
- DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result,
ScalarValue,
- TableReference, ToDFSchema, UnnestOptions,
+ get_target_functional_dependencies, not_impl_err, plan_datafusion_err,
plan_err,
+ Column, DFField, DFSchema, DFSchemaRef, DataFusionError,
OwnedTableReference, Result,
+ ScalarValue, TableReference, ToDFSchema, UnnestOptions,
};
/// Default table name for unnamed table
@@ -132,14 +132,26 @@ impl LogicalPlanBuilder {
) -> Result<Self> {
// TODO: we need to do a bunch of validation here. Maybe more.
if is_distinct {
- return Err(DataFusionError::NotImplemented(
- "Recursive queries with a distinct 'UNION' (in which the
previous iteration's results will be de-duplicated) is not
supported".to_string(),
- ));
+ return not_impl_err!(
+ "Recursive queries with a distinct 'UNION' (in which the
previous iteration's results will be de-duplicated) is not supported"
+ );
+ }
+ // Ensure that the static term and the recursive term have the same
number of fields
+ let static_fields_len = self.plan.schema().fields().len();
+ let recurive_fields_len = recursive_term.schema().fields().len();
+ if static_fields_len != recurive_fields_len {
+ return plan_err!(
+ "Non-recursive term and recursive term must have the same
number of columns ({} != {})",
+ static_fields_len, recurive_fields_len
+ );
}
+ // Ensure that the recursive term has the same field types as the
static term
+ let coerced_recursive_term =
+ coerce_plan_expr_for_schema(&recursive_term, self.plan.schema())?;
Ok(Self::from(LogicalPlan::RecursiveQuery(RecursiveQuery {
name,
static_term: Arc::new(self.plan.clone()),
- recursive_term: Arc::new(recursive_term),
+ recursive_term: Arc::new(coerced_recursive_term),
is_distinct,
})))
}
diff --git a/datafusion/sqllogictest/test_files/cte.slt
b/datafusion/sqllogictest/test_files/cte.slt
index 50c88e4195..e33dfabaf2 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -714,3 +714,33 @@ RecursiveQueryExec: name=recursive_cte, is_distinct=false
--------------WorkTableExec: name=recursive_cte
------ProjectionExec: expr=[2 as val]
--------PlaceholderRowExec
+
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
+# Non-recursive term and recursive term have different types
+query IT
+WITH RECURSIVE my_cte AS(
+ SELECT 1::int AS a
+ UNION ALL
+ SELECT a::bigint+2 FROM my_cte WHERE a<3
+) SELECT *, arrow_typeof(a) FROM my_cte;
+----
+1 Int32
+3 Int32
+
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
+# Non-recursive term and recursive term have different number of columns
+query error DataFusion error: Error during planning: Non\-recursive term and
recursive term must have the same number of columns \(1 != 3\)
+WITH RECURSIVE my_cte AS (
+ SELECT 1::bigint AS a
+ UNION ALL
+ SELECT a+2, 'a','c' FROM my_cte WHERE a<3
+) SELECT * FROM my_cte;
+
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
+# Non-recursive term and recursive term have different types, and cannot be
casted
+query error DataFusion error: Arrow error: Cast error: Cannot cast string
'abc' to value of Int64 type
+WITH RECURSIVE my_cte AS (
+ SELECT 1 AS a
+ UNION ALL
+ SELECT 'abc' FROM my_cte WHERE CAST(a AS text) !='abc'
+) SELECT * FROM my_cte;