alamb commented on code in PR #9435:
URL: https://github.com/apache/arrow-datafusion/pull/9435#discussion_r1513583674
##########
datafusion/functions/src/datetime/date_bin.rs:
##########
@@ -0,0 +1,765 @@
+// Licensed to the Apache Software Foundation (ASF) under one
Review Comment:
This is so nice that the `date_bin` function has been moved into its own
module
##########
datafusion/sqllogictest/test_files/tpch/q8.slt.part:
##########
@@ -61,34 +61,35 @@ Sort: all_nations.o_year ASC NULLS LAST
----Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN
all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE
Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation =
Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END),
SUM(all_nations.volume)]]
------SubqueryAlias: all_nations
--------Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year,
lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS
volume, n2.n_name AS nation
-----------Inner Join: n1.n_regionkey = region.r_regionkey
-------------Projection: lineitem.l_extendedprice, lineitem.l_discount,
orders.o_orderdate, n1.n_regionkey, n2.n_name
---------------Inner Join: supplier.s_nationkey = n2.n_nationkey
-----------------Projection: lineitem.l_extendedprice, lineitem.l_discount,
supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey
-------------------Inner Join: customer.c_nationkey = n1.n_nationkey
---------------------Projection: lineitem.l_extendedprice, lineitem.l_discount,
supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey
-----------------------Inner Join: orders.o_custkey = customer.c_custkey
-------------------------Projection: lineitem.l_extendedprice,
lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate
---------------------------Inner Join: lineitem.l_orderkey = orders.o_orderkey
-----------------------------Projection: lineitem.l_orderkey,
lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey
-------------------------------Inner Join: lineitem.l_suppkey =
supplier.s_suppkey
---------------------------------Projection: lineitem.l_orderkey,
lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
-----------------------------------Inner Join: part.p_partkey =
lineitem.l_partkey
-------------------------------------Projection: part.p_partkey
---------------------------------------Filter: part.p_type = Utf8("ECONOMY
ANODIZED STEEL")
-----------------------------------------TableScan: part projection=[p_partkey,
p_type], partial_filters=[part.p_type = Utf8("ECONOMY ANODIZED STEEL")]
-------------------------------------TableScan: lineitem
projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount]
---------------------------------TableScan: supplier projection=[s_suppkey,
s_nationkey]
-----------------------------Filter: orders.o_orderdate >= Date32("9131") AND
orders.o_orderdate <= Date32("9861")
-------------------------------TableScan: orders projection=[o_orderkey,
o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("9131"),
orders.o_orderdate <= Date32("9861")]
-------------------------TableScan: customer projection=[c_custkey, c_nationkey]
---------------------SubqueryAlias: n1
-----------------------TableScan: nation projection=[n_nationkey, n_regionkey]
-----------------SubqueryAlias: n2
-------------------TableScan: nation projection=[n_nationkey, n_name]
-------------Projection: region.r_regionkey
---------------Filter: region.r_name = Utf8("AMERICA")
-----------------TableScan: region projection=[r_regionkey, r_name],
partial_filters=[region.r_name = Utf8("AMERICA")]
+----------Projection: lineitem.l_extendedprice, lineitem.l_discount,
orders.o_orderdate, n2.n_name
Review Comment:
🤔 I am surprised to see the plan change here. I don't expect anything to
change.
I will try and study what changed later and figure out what we might be able
to change
##########
datafusion/physical-expr/src/equivalence/projection.rs:
##########
@@ -761,10 +735,7 @@ mod tests {
// expected
vec![
// [a_new ASC, date_bin_res ASC]
- // Please note that result is not [a_new ASC, date_bin_res
ASC, b_new ASC]
Review Comment:
In general I think this is important test coverage (it ensures that
`date_bin` is properly being treated as monotonic). Indeed perhaps that
`montonic` marking was not brought over to the the UDF 🤔
Though on the other hand, I think this case is already covered by .slt test
(a plan test, specifically). @mustafasrepo do you remember offhand? Otherwise
I will dig around
##########
datafusion/functions/src/datetime/mod.rs:
##########
@@ -55,6 +61,21 @@ make_udf_function!(
pub mod expr_fn {
use datafusion_expr::Expr;
+ #[doc = "coerces an arbitrary timestamp to the start of the nearest
specified interval"]
+ pub fn date_bin(args: Vec<Expr>) -> Expr {
+ super::date_bin().call(args)
+ }
+
+ #[doc = "extracts a subfield from the date"]
+ pub fn date_part(args: Vec<Expr>) -> Expr {
+ super::date_part().call(args)
+ }
+
+ #[doc = "truncates the date to a specified level of precision"]
+ pub fn date_trunc(args: Vec<Expr>) -> Expr {
+ super::date_trunc().call(args)
+ }
Review Comment:
I think the original function definitions use multiple arguments (not a vec)
```rust
scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the
date");
scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a
specified level of precision");
scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary
timestamp to the start of the nearest specified interval");
```
So this maybe would look more like
```suggestion
#[doc = "coerces an arbitrary timestamp to the start of the nearest
specified interval"]
pub fn date_bin(part: Expr, date: Expr) -> Expr {
super::date_bin().call(vec![part, date])
}
#[doc = "extracts a subfield from the date"]
pub fn date_part(part: Expr, date: Expr) -> Expr {
super::date_part().call(vec![part, date])
}
#[doc = "truncates the date to a specified level of precision"]
pub fn date_trunc(stride: Expr, source: Expr, origin: Expr) -> Expr {
super::date_trunc().call(vec![stride, source, origin])
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]