avantgardnerio commented on code in PR #3334:
URL: https://github.com/apache/arrow-datafusion/pull/3334#discussion_r961112809


##########
datafusion/core/tests/sql/subqueries.rs:
##########
@@ -288,6 +288,136 @@ async fn tpch_q17_correlated() -> Result<()> {
     Ok(())
 }
 
+// Fix for issue#78 join predicates from inside of OR expr also pulled up 
properly.
+#[tokio::test]
+async fn tpch_q19_pull_predicates_to_innerjoin() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    register_tpch_csv(&ctx, "part").await?;
+    register_tpch_csv(&ctx, "lineitem").await?;
+
+    let sql = r#"
+select
+    sum(l_extendedprice* (1 - l_discount)) as revenue
+from
+    lineitem,
+    part
+where
+    (
+                p_partkey = l_partkey
+            and p_brand = 'Brand#12'
+            and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+            and l_quantity >= 1 and l_quantity <= 1 + 10
+            and p_size between 1 and 5
+            and l_shipmode in ('AIR', 'AIR REG')
+            and l_shipinstruct = 'DELIVER IN PERSON'
+        )
+   or
+    (
+                p_partkey = l_partkey
+            and p_brand = 'Brand#23'
+            and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+            and l_quantity >= 10 and l_quantity <= 10 + 10
+            and p_size between 1 and 10
+            and l_shipmode in ('AIR', 'AIR REG')
+            and l_shipinstruct = 'DELIVER IN PERSON'
+        )
+   or
+    (
+                p_partkey = l_partkey
+            and p_brand = 'Brand#34'
+            and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+            and l_quantity >= 20 and l_quantity <= 20 + 10
+            and p_size between 1 and 15
+            and l_shipmode in ('AIR', 'AIR REG')
+            and l_shipinstruct = 'DELIVER IN PERSON'
+        );"#;
+
+    // assert plan
+    let plan = ctx.create_logical_plan(sql).unwrap();
+    debug!("input:\n{}", plan.display_indent());
+
+    let plan = ctx.optimize(&plan).unwrap();
+    let actual = format!("{}", plan.display_indent());
+    let expected = r#"Projection: #SUM(lineitem.l_extendedprice * Int64(1) - 
lineitem.l_discount) AS revenue
+  Aggregate: groupBy=[[]], aggr=[[SUM(#lineitem.l_extendedprice * Int64(1) - 
#lineitem.l_discount)]]
+    Projection: #lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR REG")]) AS 
InList-falseLiteralAIR REGLiteralAIRColumn-lineitem.l_shipmode, 
#lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") AS 
BinaryExpr-=LiteralDELIVER IN PERSONColumn-lineitem.l_shipinstruct, 
#lineitem.l_quantity, #lineitem.l_extendedprice, #lineitem.l_discount, 
#part.p_brand, #part.p_size, #part.p_container
+      Filter: #part.p_brand = Utf8("Brand#12") AND #part.p_container IN 
([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND 
#lineitem.l_quantity >= Int64(1) AND #lineitem.l_quantity <= Int64(11) AND 
#part.p_size BETWEEN Int64(1) AND Int64(5) OR #part.p_brand = Utf8("Brand#23") 
AND #part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), 
Utf8("MED PACK")]) AND #lineitem.l_quantity >= Int64(10) AND 
#lineitem.l_quantity <= Int64(20) AND #part.p_size BETWEEN Int64(1) AND 
Int64(10) OR #part.p_brand = Utf8("Brand#34") AND #part.p_container IN 
([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND 
#lineitem.l_quantity >= Int64(20) AND #lineitem.l_quantity <= Int64(30) AND 
#part.p_size BETWEEN Int64(1) AND Int64(15)

Review Comment:
   This line is concerning to me, but maybe it is a bug in how `Display` works 
for filters:
   
   ```
   #part.p_brand = Utf8("Brand#12") AND #part.p_container IN ([Utf8("SM CASE"), 
Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND #lineitem.l_quantity >= 
Int64(1) AND #lineitem.l_quantity <= Int64(11) AND #part.p_size BETWEEN 
Int64(1) AND Int64(5)
   ```
   
   Should have parenthesis around it - each of the conjunctions should before 
they are joined into a disjunction. Does anyone know if that a problem with the 
plan or the formatter? 
   
   @DhamoPS did you compare the results with postgres or spark to verify they 
were correct?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to