This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 08fa444aaa fix: make sure JOIN ON expression is boolean type (#11423)
08fa444aaa is described below

commit 08fa444aaa8513a60ede5c57d92f29e6156b91a8
Author: Jonah Gao <[email protected]>
AuthorDate: Sat Jul 13 17:34:45 2024 +0800

    fix: make sure JOIN ON expression is boolean type (#11423)
    
    * fix: make sure JOIN ON expression is boolean type
    
    * Applied to DataFrame
    
    * Update datafusion/optimizer/src/analyzer/type_coercion.rs
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/core/src/dataframe/mod.rs               | 31 +++++++++++++++++++---
 datafusion/optimizer/src/analyzer/type_coercion.rs | 17 +++++++++++-
 datafusion/sqllogictest/test_files/join.slt        | 12 ++++++++-
 3 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs 
b/datafusion/core/src/dataframe/mod.rs
index 05a08a6378..c55b7c7527 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -896,9 +896,8 @@ impl DataFrame {
         join_type: JoinType,
         on_exprs: impl IntoIterator<Item = Expr>,
     ) -> Result<DataFrame> {
-        let expr = on_exprs.into_iter().reduce(Expr::and);
         let plan = LogicalPlanBuilder::from(self.plan)
-            .join_on(right.plan, join_type, expr)?
+            .join_on(right.plan, join_type, on_exprs)?
             .build()?;
         Ok(DataFrame {
             session_state: self.session_state,
@@ -1694,7 +1693,7 @@ mod tests {
     use crate::test_util::{register_aggregate_csv, test_table, 
test_table_with_name};
 
     use arrow::array::{self, Int32Array};
-    use datafusion_common::{Constraint, Constraints};
+    use datafusion_common::{Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::{
         array_agg, cast, create_udf, expr, lit, BuiltInWindowFunction,
@@ -2555,6 +2554,32 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn join_on_filter_datatype() -> Result<()> {
+        let left = test_table_with_name("a").await?.select_columns(&["c1"])?;
+        let right = test_table_with_name("b").await?.select_columns(&["c1"])?;
+
+        // JOIN ON untyped NULL
+        let join = left.clone().join_on(
+            right.clone(),
+            JoinType::Inner,
+            Some(Expr::Literal(ScalarValue::Null)),
+        )?;
+        let expected_plan = "CrossJoin:\
+        \n  TableScan: a projection=[c1], full_filters=[Boolean(NULL)]\
+        \n  TableScan: b projection=[c1]";
+        assert_eq!(expected_plan, format!("{:?}", 
join.into_optimized_plan()?));
+
+        // JOIN ON expression must be boolean type
+        let join = left.join_on(right, JoinType::Inner, Some(lit("TRUE")))?;
+        let expected = join.into_optimized_plan().unwrap_err();
+        assert_eq!(
+            expected.strip_backtrace(),
+            "type_coercion\ncaused by\nError during planning: Join condition 
must be boolean type, but got Utf8"
+        );
+        Ok(())
+    }
+
     #[tokio::test]
     async fn join_ambiguous_filter() -> Result<()> {
         let left = test_table_with_name("a")
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs 
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 3cab474df8..80a8c864e4 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -127,7 +127,7 @@ impl<'a> TypeCoercionRewriter<'a> {
         Self { schema }
     }
 
-    /// Coerce join equality expressions
+    /// Coerce join equality expressions and join filter
     ///
     /// Joins must be treated specially as their equality expressions are 
stored
     /// as a parallel list of left and right expressions, rather than a single
@@ -151,9 +151,24 @@ impl<'a> TypeCoercionRewriter<'a> {
             })
             .collect::<Result<Vec<_>>>()?;
 
+        // Join filter must be boolean
+        join.filter = join
+            .filter
+            .map(|expr| self.coerce_join_filter(expr))
+            .transpose()?;
+
         Ok(LogicalPlan::Join(join))
     }
 
+    fn coerce_join_filter(&self, expr: Expr) -> Result<Expr> {
+        let expr_type = expr.get_type(self.schema)?;
+        match expr_type {
+            DataType::Boolean => Ok(expr),
+            DataType::Null => expr.cast_to(&DataType::Boolean, self.schema),
+            other => plan_err!("Join condition must be boolean type, but got 
{other:?}"),
+        }
+    }
+
     fn coerce_binary_op(
         &self,
         left: Expr,
diff --git a/datafusion/sqllogictest/test_files/join.slt 
b/datafusion/sqllogictest/test_files/join.slt
index 12cb8b3985..efebba1779 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -988,7 +988,6 @@ statement ok
 DROP TABLE department
 
 
-# Test issue: https://github.com/apache/datafusion/issues/11269
 statement ok
 CREATE TABLE t1 (v0 BIGINT) AS VALUES (-503661263);
 
@@ -998,11 +997,22 @@ CREATE TABLE t2 (v0 DOUBLE) AS VALUES (-1.663563947387);
 statement ok
 CREATE TABLE t3 (v0 DOUBLE) AS VALUES (0.05112015193508901);
 
+# Test issue: https://github.com/apache/datafusion/issues/11269
 query RR
 SELECT t3.v0, t2.v0 FROM t1,t2,t3 WHERE t3.v0 >= t1.v0;
 ----
 0.051120151935 -1.663563947387
 
+# Test issue: https://github.com/apache/datafusion/issues/11414
+query IRR
+SELECT * FROM t1 INNER JOIN t2 ON NULL RIGHT JOIN t3 ON TRUE;
+----
+NULL NULL 0.051120151935
+
+# ON expression must be boolean type
+query error DataFusion error: type_coercion\ncaused by\nError during planning: 
Join condition must be boolean type, but got Utf8
+SELECT * FROM t1 INNER JOIN t2 ON 'TRUE'
+
 statement ok
 DROP TABLE t1;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to