alamb commented on code in PR #9435:
URL: https://github.com/apache/arrow-datafusion/pull/9435#discussion_r1513583674


##########
datafusion/functions/src/datetime/date_bin.rs:
##########
@@ -0,0 +1,765 @@
+// Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   This is so nice that the `date_bin` function has been moved into its own 
module 



##########
datafusion/sqllogictest/test_files/tpch/q8.slt.part:
##########
@@ -61,34 +61,35 @@ Sort: all_nations.o_year ASC NULLS LAST
 ----Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN 
all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE 
Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation = 
Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), 
SUM(all_nations.volume)]]
 ------SubqueryAlias: all_nations
 --------Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, 
lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS 
volume, n2.n_name AS nation
-----------Inner Join: n1.n_regionkey = region.r_regionkey
-------------Projection: lineitem.l_extendedprice, lineitem.l_discount, 
orders.o_orderdate, n1.n_regionkey, n2.n_name
---------------Inner Join: supplier.s_nationkey = n2.n_nationkey
-----------------Projection: lineitem.l_extendedprice, lineitem.l_discount, 
supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey
-------------------Inner Join: customer.c_nationkey = n1.n_nationkey
---------------------Projection: lineitem.l_extendedprice, lineitem.l_discount, 
supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey
-----------------------Inner Join: orders.o_custkey = customer.c_custkey
-------------------------Projection: lineitem.l_extendedprice, 
lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate
---------------------------Inner Join: lineitem.l_orderkey = orders.o_orderkey
-----------------------------Projection: lineitem.l_orderkey, 
lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey
-------------------------------Inner Join: lineitem.l_suppkey = 
supplier.s_suppkey
---------------------------------Projection: lineitem.l_orderkey, 
lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
-----------------------------------Inner Join: part.p_partkey = 
lineitem.l_partkey
-------------------------------------Projection: part.p_partkey
---------------------------------------Filter: part.p_type = Utf8("ECONOMY 
ANODIZED STEEL")
-----------------------------------------TableScan: part projection=[p_partkey, 
p_type], partial_filters=[part.p_type = Utf8("ECONOMY ANODIZED STEEL")]
-------------------------------------TableScan: lineitem 
projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount]
---------------------------------TableScan: supplier projection=[s_suppkey, 
s_nationkey]
-----------------------------Filter: orders.o_orderdate >= Date32("9131") AND 
orders.o_orderdate <= Date32("9861")
-------------------------------TableScan: orders projection=[o_orderkey, 
o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("9131"), 
orders.o_orderdate <= Date32("9861")]
-------------------------TableScan: customer projection=[c_custkey, c_nationkey]
---------------------SubqueryAlias: n1
-----------------------TableScan: nation projection=[n_nationkey, n_regionkey]
-----------------SubqueryAlias: n2
-------------------TableScan: nation projection=[n_nationkey, n_name]
-------------Projection: region.r_regionkey
---------------Filter: region.r_name = Utf8("AMERICA")
-----------------TableScan: region projection=[r_regionkey, r_name], 
partial_filters=[region.r_name = Utf8("AMERICA")]
+----------Projection: lineitem.l_extendedprice, lineitem.l_discount, 
orders.o_orderdate, n2.n_name

Review Comment:
   🤔  I am surprised to see the plan change here. I don't expect anything to 
change.
   
   I will try and study what changed later and figure out what we might be able 
to change



##########
datafusion/physical-expr/src/equivalence/projection.rs:
##########
@@ -761,10 +735,7 @@ mod tests {
                 // expected
                 vec![
                     // [a_new ASC, date_bin_res ASC]
-                    // Please note that result is not [a_new ASC, date_bin_res 
ASC, b_new ASC]

Review Comment:
   In general I think this is important test coverage (it ensures that 
`date_bin` is properly being treated as monotonic). Indeed perhaps that 
`montonic` marking was not brought over to the the UDF 🤔 
   
   Though on the other hand, I think this case is already covered by .slt test 
(a plan test, specifically). @mustafasrepo  do you remember offhand? Otherwise 
I will dig around



##########
datafusion/functions/src/datetime/mod.rs:
##########
@@ -55,6 +61,21 @@ make_udf_function!(
 pub mod expr_fn {
     use datafusion_expr::Expr;
 
+    #[doc = "coerces an arbitrary timestamp to the start of the nearest 
specified interval"]
+    pub fn date_bin(args: Vec<Expr>) -> Expr {
+        super::date_bin().call(args)
+    }
+
+    #[doc = "extracts a subfield from the date"]
+    pub fn date_part(args: Vec<Expr>) -> Expr {
+        super::date_part().call(args)
+    }
+
+    #[doc = "truncates the date to a specified level of precision"]
+    pub fn date_trunc(args: Vec<Expr>) -> Expr {
+        super::date_trunc().call(args)
+    }

Review Comment:
   I think the original function definitions use multiple arguments (not a vec)
   
   ```rust
   scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the 
date");
   scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a 
specified level of precision");
   scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary 
timestamp to the start of the nearest specified interval");
   ```
   
   So this maybe would look more like
   
   ```suggestion
       #[doc = "coerces an arbitrary timestamp to the start of the nearest 
specified interval"]
       pub fn date_bin(part: Expr, date: Expr) -> Expr {
           super::date_bin().call(vec![part, date])
       }
   
       #[doc = "extracts a subfield from the date"]
       pub fn date_part(part: Expr, date: Expr) -> Expr {
           super::date_part().call(vec![part, date])
       }
   
       #[doc = "truncates the date to a specified level of precision"]
       pub fn date_trunc(stride: Expr, source: Expr, origin: Expr) -> Expr {
           super::date_trunc().call(vec![stride, source, origin])
       }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to