jackwener commented on code in PR #4826:
URL: https://github.com/apache/arrow-datafusion/pull/4826#discussion_r1064277025


##########
datafusion/core/tests/sql/joins.rs:
##########
@@ -2810,3 +2810,137 @@ async fn type_coercion_join_with_filter_and_equi_expr() 
-> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn subquery_to_join_with_both_side_expr() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", false)?;
+
+    let sql = "select t1.t1_id, t1.t1_name, t1.t1_int from t1 where t1.t1_id + 
12 in (select t2.t2_id + 1 from t2)";
+
+    // assert logical plan
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(&("explain ".to_owned() + sql)).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan().unwrap();
+
+    let expected = vec![
+        "Explain [plan_type:Utf8, plan:Utf8]",
+        "  Projection: t1.t1_id, t1.t1_name, t1.t1_int [t1_id:UInt32;N, 
t1_name:Utf8;N, t1_int:UInt32;N]",
+        "    LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = 
__correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) [t1_id:UInt32;N, 
t1_name:Utf8;N, t1_int:UInt32;N]",
+        "      TableScan: t1 projection=[t1_id, t1_name, t1_int] 
[t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "      SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + 
Int64(1):Int64;N]",
+        "        Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id 
AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "          TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
+    ];
+
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    let expected = vec![
+        "+-------+---------+--------+",
+        "| t1_id | t1_name | t1_int |",
+        "+-------+---------+--------+",
+        "| 11    | a       | 1      |",
+        "| 33    | c       | 3      |",
+        "| 44    | d       | 4      |",
+        "+-------+---------+--------+",
+    ];
+
+    let results = execute_to_batches(&ctx, sql).await;
+    assert_batches_sorted_eq!(expected, &results);

Review Comment:
   Now we already use `sqllogicaltest` to support `Data Driven Tests` #4460. 
   I suggest add a file `join.slt` | `subquery.slt` to cover some case of 
`join` and `subquery`.
   
   In UT and integration-test, we just focus on correctness of `Plan`, 
data-test is derived by `sqllogicaltest`



##########
datafusion/optimizer/src/decorrelate_where_in.rs:
##########
@@ -721,11 +815,17 @@ mod tests {
             .project(vec![col("customer.c_custkey")])?
             .build()?;
 
-        // TODO: support join on expression
-        assert_optimizer_err(
+        let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
+        \n  LeftSemi Join:  Filter: customer.c_custkey + Int32(1) = 
__correlated_sq_1.o_custkey AND customer.c_custkey = 
__correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8]\
+        \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+        \n    SubqueryAlias: __correlated_sq_1 [o_custkey:Int64]\
+        \n      Projection: orders.o_custkey AS o_custkey [o_custkey:Int64]\
+        \n        TableScan: orders [o_orderkey:Int64, o_custkey:Int64, 
o_orderstatus:Utf8, o_totalprice:Float64;N]";
+
+        assert_optimized_plan_eq_display_indent(
             Arc::new(DecorrelateWhereIn::new()),
             &plan,
-            "column comparison required",
+            expected,

Review Comment:
   👍



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to