This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 15db291  Fix Predicate Pushdown: split_members should be able to split 
aliased predicate (#1368)
15db291 is described below

commit 15db29153aefc9b759ec03e6eeab8cb9ebf87781
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Sun Nov 28 10:40:25 2021 -0800

    Fix Predicate Pushdown: split_members should be able to split aliased 
predicate (#1368)
    
    * Skip Filter.
    
    * Add tpch q10 query plan test.
    
    * split_members should be able to split aliased predicate.
    
    * Revert "Skip Filter."
    
    This reverts commit f07cbdd94f46449e4537e5bd333140e346bb2879.
    
    * Fix clippy.
---
 datafusion/src/optimizer/filter_push_down.rs |   4 +
 datafusion/tests/sql.rs                      | 129 +++++++++++++++++++++++++++
 datafusion/tests/tpch-csv/customer.csv       |  10 +++
 datafusion/tests/tpch-csv/lineitem.csv       |  10 +++
 datafusion/tests/tpch-csv/nation.csv         |  11 +++
 datafusion/tests/tpch-csv/orders.csv         |  11 +++
 6 files changed, 175 insertions(+)

diff --git a/datafusion/src/optimizer/filter_push_down.rs 
b/datafusion/src/optimizer/filter_push_down.rs
index a11a159..c55a5cd 100644
--- a/datafusion/src/optimizer/filter_push_down.rs
+++ b/datafusion/src/optimizer/filter_push_down.rs
@@ -245,6 +245,9 @@ fn split_members<'a>(predicate: &'a Expr, predicates: &mut 
Vec<&'a Expr>) {
             split_members(left, predicates);
             split_members(right, predicates);
         }
+        Expr::Alias(expr, _) => {
+            split_members(expr, predicates);
+        }
         other => predicates.push(other),
     }
 }
@@ -308,6 +311,7 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> 
Result<LogicalPlan> {
                     }
                     Ok(())
                 })?;
+
             // Predicates without columns will not be pushed down.
             // As those contain only literals, they could be optimized using 
constant folding
             // and removal of WHERE TRUE / WHERE FALSE
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 9216bdb..640556c 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -3543,6 +3543,135 @@ async fn explain_analyze_runs_optimizers() {
     assert_contains!(actual, expected);
 }
 
+#[tokio::test]
+async fn tpch_explain_q10() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+
+    register_tpch_csv(&mut ctx, "customer").await?;
+    register_tpch_csv(&mut ctx, "orders").await?;
+    register_tpch_csv(&mut ctx, "lineitem").await?;
+    register_tpch_csv(&mut ctx, "nation").await?;
+
+    let sql = "select
+    c_custkey,
+    c_name,
+    sum(l_extendedprice * (1 - l_discount)) as revenue,
+    c_acctbal,
+    n_name,
+    c_address,
+    c_phone,
+    c_comment
+from
+    customer,
+    orders,
+    lineitem,
+    nation
+where
+        c_custkey = o_custkey
+  and l_orderkey = o_orderkey
+  and o_orderdate >= date '1993-10-01'
+  and o_orderdate < date '1994-01-01'
+  and l_returnflag = 'R'
+  and c_nationkey = n_nationkey
+group by
+    c_custkey,
+    c_name,
+    c_acctbal,
+    c_phone,
+    n_name,
+    c_address,
+    c_comment
+order by
+    revenue desc;";
+
+    let mut plan = ctx.create_logical_plan(sql);
+    plan = ctx.optimize(&plan.unwrap());
+
+    let expected = "\
+    Sort: #revenue DESC NULLS FIRST\
+    \n  Projection: #customer.c_custkey, #customer.c_name, 
#SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, 
#customer.c_acctbal, #nation.n_name, #customer.c_address, #customer.c_phone, 
#customer.c_comment\
+    \n    Aggregate: groupBy=[[#customer.c_custkey, #customer.c_name, 
#customer.c_acctbal, #customer.c_phone, #nation.n_name, #customer.c_address, 
#customer.c_comment]], aggr=[[SUM(#lineitem.l_extendedprice * Int64(1) - 
#lineitem.l_discount)]]\
+    \n      Join: #customer.c_nationkey = #nation.n_nationkey\
+    \n        Join: #orders.o_orderkey = #lineitem.l_orderkey\
+    \n          Join: #customer.c_custkey = #orders.o_custkey\
+    \n            TableScan: customer projection=Some([0, 1, 2, 3, 4, 5, 7])\
+    \n            Filter: #orders.o_orderdate >= Date32(\"8674\") AND 
#orders.o_orderdate < Date32(\"8766\")\
+    \n              TableScan: orders projection=Some([0, 1, 4]), 
filters=[#orders.o_orderdate >= Date32(\"8674\"), #orders.o_orderdate < 
Date32(\"8766\")]\
+    \n          Filter: #lineitem.l_returnflag = Utf8(\"R\")\
+    \n            TableScan: lineitem projection=Some([0, 5, 6, 8]), 
filters=[#lineitem.l_returnflag = Utf8(\"R\")]\
+    \n        TableScan: nation projection=Some([0, 1])";
+    assert_eq!(format!("{:?}", plan.unwrap()), expected);
+
+    Ok(())
+}
+
+fn get_tpch_table_schema(table: &str) -> Schema {
+    match table {
+        "customer" => Schema::new(vec![
+            Field::new("c_custkey", DataType::Int64, false),
+            Field::new("c_name", DataType::Utf8, false),
+            Field::new("c_address", DataType::Utf8, false),
+            Field::new("c_nationkey", DataType::Int64, false),
+            Field::new("c_phone", DataType::Utf8, false),
+            Field::new("c_acctbal", DataType::Float64, false),
+            Field::new("c_mktsegment", DataType::Utf8, false),
+            Field::new("c_comment", DataType::Utf8, false),
+        ]),
+
+        "orders" => Schema::new(vec![
+            Field::new("o_orderkey", DataType::Int64, false),
+            Field::new("o_custkey", DataType::Int64, false),
+            Field::new("o_orderstatus", DataType::Utf8, false),
+            Field::new("o_totalprice", DataType::Float64, false),
+            Field::new("o_orderdate", DataType::Date32, false),
+            Field::new("o_orderpriority", DataType::Utf8, false),
+            Field::new("o_clerk", DataType::Utf8, false),
+            Field::new("o_shippriority", DataType::Int32, false),
+            Field::new("o_comment", DataType::Utf8, false),
+        ]),
+
+        "lineitem" => Schema::new(vec![
+            Field::new("l_orderkey", DataType::Int64, false),
+            Field::new("l_partkey", DataType::Int64, false),
+            Field::new("l_suppkey", DataType::Int64, false),
+            Field::new("l_linenumber", DataType::Int32, false),
+            Field::new("l_quantity", DataType::Float64, false),
+            Field::new("l_extendedprice", DataType::Float64, false),
+            Field::new("l_discount", DataType::Float64, false),
+            Field::new("l_tax", DataType::Float64, false),
+            Field::new("l_returnflag", DataType::Utf8, false),
+            Field::new("l_linestatus", DataType::Utf8, false),
+            Field::new("l_shipdate", DataType::Date32, false),
+            Field::new("l_commitdate", DataType::Date32, false),
+            Field::new("l_receiptdate", DataType::Date32, false),
+            Field::new("l_shipinstruct", DataType::Utf8, false),
+            Field::new("l_shipmode", DataType::Utf8, false),
+            Field::new("l_comment", DataType::Utf8, false),
+        ]),
+
+        "nation" => Schema::new(vec![
+            Field::new("n_nationkey", DataType::Int64, false),
+            Field::new("n_name", DataType::Utf8, false),
+            Field::new("n_regionkey", DataType::Int64, false),
+            Field::new("n_comment", DataType::Utf8, false),
+        ]),
+
+        _ => unimplemented!(),
+    }
+}
+
+async fn register_tpch_csv(ctx: &mut ExecutionContext, table: &str) -> 
Result<()> {
+    let schema = get_tpch_table_schema(table);
+
+    ctx.register_csv(
+        table,
+        format!("tests/tpch-csv/{}.csv", table).as_str(),
+        CsvReadOptions::new().schema(&schema),
+    )
+    .await?;
+    Ok(())
+}
+
 async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
     let testdata = datafusion::test_util::arrow_test_data();
 
diff --git a/datafusion/tests/tpch-csv/customer.csv 
b/datafusion/tests/tpch-csv/customer.csv
new file mode 100644
index 0000000..30f538e
--- /dev/null
+++ b/datafusion/tests/tpch-csv/customer.csv
@@ -0,0 +1,10 @@
+c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_mktsegment,c_comment
+2,Customer#000000002,"XSTf4,NCwDVaWNe6tEgvwfmRchLXak",13,23-768-687-3665,121.65,AUTOMOBILE,l
 accounts. blithely ironic theodolites integrate boldly: caref
+3,Customer#000000003,MG9kdTD2WBHm,1,11-719-748-3364,7498.12,AUTOMOBILE," 
deposits eat slyly ironic, even instructions. express foxes detect slyly. 
blithely even accounts abov"
+4,Customer#000000004,XxVSJsLAGtn,4,14-128-190-5944,2866.83,MACHINERY," 
requests. final, regular ideas sleep final accou"
+5,Customer#000000005,KvpyuHCplrB84WgAiGV6sYpZq7Tj,3,13-750-942-6364,794.47,HOUSEHOLD,n
 accounts will have to unwind. foxes cajole accor
+6,Customer#000000006,"sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh 
yVn",20,30-114-968-4951,7638.57,AUTOMOBILE,tions. even deposits boost according 
to the slyly bold packages. final accounts cajole requests. furious
+7,Customer#000000007,TcGe5gaZNgVePxU5kRrvXBfkasDTea,18,28-190-982-9759,9561.95,AUTOMOBILE,"ainst
 the ironic, express theodolites. express, even pinto beans among the exp"
+8,Customer#000000008,"I0B10bB0AymmC, 
0PrRYBCP1yGJ8xcBPmWhl5",17,27-147-574-9335,6819.74,BUILDING,among the slyly 
regular theodolites kindle blithely courts. carefully even theodolites haggle 
slyly along the ide
+9,Customer#000000009,xKiAFTjUsCuxfeleNqefumTrjS,8,18-338-906-3675,8324.07,FURNITURE,r
 theodolites according to the requests wake thinly excuses: pending requests 
haggle furiousl
+10,Customer#000000010,6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 
v2,5,15-741-346-9870,2753.54,HOUSEHOLD,es regular deposits haggle. fur
diff --git a/datafusion/tests/tpch-csv/lineitem.csv 
b/datafusion/tests/tpch-csv/lineitem.csv
new file mode 100644
index 0000000..47f0871
--- /dev/null
+++ b/datafusion/tests/tpch-csv/lineitem.csv
@@ -0,0 +1,10 @@
+l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment
+1,67310,7311,2,36.0,45983.16,0.09,0.06,N,O,1996-04-12,1996-02-28,1996-04-20,TAKE
 BACK RETURN,MAIL,ly final dependencies: slyly bold 
+1,63700,3701,3,8.0,13309.6,0.1,0.02,N,O,1996-01-29,1996-03-05,1996-01-31,TAKE 
BACK RETURN,REG AIR,"riously. regular, express dep"
+1,2132,4633,4,28.0,28955.64,0.09,0.06,N,O,1996-04-21,1996-03-30,1996-05-16,NONE,AIR,lites.
 fluffily even de
+1,24027,1534,5,24.0,22824.48,0.1,0.04,N,O,1996-03-30,1996-03-14,1996-04-01,NONE,FOB,
 pending foxes. slyly re
+1,15635,638,6,32.0,49620.16,0.07,0.02,N,O,1996-01-30,1996-02-07,1996-02-03,DELIVER
 IN PERSON,MAIL,arefully slyly ex
+2,106170,1191,1,38.0,44694.46,0.0,0.05,N,O,1997-01-28,1997-01-14,1997-02-02,TAKE
 BACK RETURN,RAIL,ven requests. deposits breach a
+3,4297,1798,1,45.0,54058.05,0.06,0.0,R,F,1994-02-02,1994-01-04,1994-02-23,NONE,AIR,ongside
 of the furiously brave acco
+3,19036,6540,2,49.0,46796.47,0.1,0.0,R,F,1993-11-09,1993-12-20,1993-11-24,TAKE 
BACK RETURN,RAIL, unusual accounts. eve
+3,128449,3474,3,27.0,39890.88,0.06,0.07,A,F,1994-01-16,1993-11-22,1994-01-23,DELIVER
 IN PERSON,SHIP,nal foxes wake. 
diff --git a/datafusion/tests/tpch-csv/nation.csv 
b/datafusion/tests/tpch-csv/nation.csv
new file mode 100644
index 0000000..e37130f
--- /dev/null
+++ b/datafusion/tests/tpch-csv/nation.csv
@@ -0,0 +1,11 @@
+n_nationkey,n_name,n_regionkey,n_comment
+1,ARGENTINA,1,al foxes promise slyly according to the regular accounts. bold 
requests alon
+2,BRAZIL,1,y alongside of the pending deposits. carefully special packages are 
about the ironic forges. slyly special 
+3,CANADA,1,"eas hang ironic, silent packages. slyly regular packages are 
furiously over the tithes. fluffily bold"
+4,EGYPT,4,y above the carefully unusual theodolites. final dugouts are quickly 
across the furiously regular d
+5,ETHIOPIA,0,ven packages wake quickly. regu
+6,FRANCE,3,"refully final requests. regular, ironi"
+7,GERMANY,3,"l platelets. regular accounts x-ray: unusual, regular acco"
+8,INDIA,2,ss excuses cajole slyly across the packages. deposits print aroun
+9,INDONESIA,2, slyly express asymptotes. regular deposits haggle slyly. 
carefully ironic hockey players sleep blithely. carefull
+10,IRAN,4,efully alongside of the slyly final dependencies. 
diff --git a/datafusion/tests/tpch-csv/orders.csv 
b/datafusion/tests/tpch-csv/orders.csv
new file mode 100644
index 0000000..1fc2e4e
--- /dev/null
+++ b/datafusion/tests/tpch-csv/orders.csv
@@ -0,0 +1,11 @@
+o_orderkey,o_custkey,o_orderstatus,o_totalprice,o_orderdate,o_orderpriority,o_clerk,o_shippriority,o_comment
+2,78002,O,46929.18,1996-12-01,1-URGENT,Clerk#000000880,0," foxes. pending 
accounts at the pending, silent asymptot"
+3,123314,F,193846.25,1993-10-14,5-LOW,Clerk#000000955,0,sly final accounts 
boost. carefully regular ideas cajole carefully. depos
+4,136777,O,32151.78,1995-10-11,5-LOW,Clerk#000000124,0,"sits. slyly regular 
warthogs cajole. regular, regular theodolites acro"
+5,44485,F,144659.2,1994-07-30,5-LOW,Clerk#000000925,0,quickly. bold deposits 
sleep slyly. packages use slyly
+6,55624,F,58749.59,1992-02-21,4-NOT SPECIFIED,Clerk#000000058,0,"ggle. 
special, final requests are against the furiously specia"
+7,39136,O,252004.18,1996-01-10,2-HIGH,Clerk#000000470,0,ly special requests 
+32,130057,O,208660.75,1995-07-16,2-HIGH,Clerk#000000616,0,"ise blithely bold, 
regular requests. quickly unusual dep"
+33,66958,F,163243.98,1993-10-27,3-MEDIUM,Clerk#000000409,0,uriously. furiously 
final request
+34,61001,O,58949.67,1998-07-21,3-MEDIUM,Clerk#000000223,0,ly final packages. 
fluffily final deposits wake blithely ideas. spe
+35,127588,O,253724.56,1995-10-23,4-NOT SPECIFIED,Clerk#000000259,0,zzle. 
carefully enticing deposits nag furio

Reply via email to