alamb commented on code in PR #9388:
URL: https://github.com/apache/arrow-datafusion/pull/9388#discussion_r1508206742


##########
datafusion/core/tests/simplification.rs:
##########
@@ -108,3 +224,191 @@ fn fold_and_simplify() {
     let simplified = simplifier.simplify(expr).unwrap();
     assert_eq!(simplified, lit(true))
 }
+
+#[test]
+/// Ensure that timestamp expressions are folded so they aren't invoked on 
each row
+fn to_timestamp_expr_folded() -> Result<()> {
+    let table_scan = test_table_scan();
+    let proj = vec![to_timestamp_expr("2020-09-08T12:00:00+00:00")];
+
+    let plan = LogicalPlanBuilder::from(table_scan)
+        .project(proj)?
+        .build()?;
+
+    let expected = "Projection: TimestampNanosecond(1599566400000000000, None) 
AS to_timestamp(Utf8(\"2020-09-08T12:00:00+00:00\"))\
+            \n  TableScan: test"
+        .to_string();
+    let actual = get_optimized_plan_formatted(&plan, &Utc::now());
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[test]
+fn now_less_than_timestamp() -> Result<()> {
+    let table_scan = test_table_scan();
+
+    let ts_string = "2020-09-08T12:05:00+00:00";
+    let time = Utc.timestamp_nanos(1599566400000000000i64);
+
+    //  cast(now() as int) < cast(to_timestamp(...) as int) + 50000_i64
+    let plan = LogicalPlanBuilder::from(table_scan)
+        .filter(
+            cast_to_int64_expr(now_expr())
+                .lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + 
lit(50000_i64)),
+        )?
+        .build()?;
+
+    // Note that constant folder runs and folds the entire
+    // expression down to a single constant (true)
+    let expected = "Filter: Boolean(true)\
+                        \n  TableScan: test";
+    let actual = get_optimized_plan_formatted(&plan, &time);
+
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[test]
+fn select_date_plus_interval() -> Result<()> {
+    let table_scan = test_table_scan();
+
+    let ts_string = "2020-09-08T12:05:00+00:00";
+    let time = Utc.timestamp_nanos(1599566400000000000i64);
+
+    //  now() < cast(to_timestamp(...) as int) + 5000000000
+    let schema = table_scan.schema();
+
+    let date_plus_interval_expr = to_timestamp_expr(ts_string)
+        .cast_to(&DataType::Date32, schema)?
+        + Expr::Literal(ScalarValue::IntervalDayTime(Some(123i64 << 32)));
+
+    let plan = LogicalPlanBuilder::from(table_scan.clone())
+        .project(vec![date_plus_interval_expr])?
+        .build()?;
+
+    // Note that constant folder runs and folds the entire
+    // expression down to a single constant (true)
+    let expected = r#"Projection: Date32("18636") AS 
to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + 
IntervalDayTime("528280977408")
+  TableScan: test"#;
+    let actual = get_optimized_plan_formatted(&plan, &time);
+
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[test]
+fn test_const_evaluator() {
+    // true --> true
+    test_evaluate(lit(true), lit(true));
+    // true or true --> true
+    test_evaluate(lit(true).or(lit(true)), lit(true));
+    // true or false --> true
+    test_evaluate(lit(true).or(lit(false)), lit(true));
+
+    // "foo" == "foo" --> true
+    test_evaluate(lit("foo").eq(lit("foo")), lit(true));
+    // "foo" != "foo" --> false
+    test_evaluate(lit("foo").not_eq(lit("foo")), lit(false));
+
+    // c = 1 --> c = 1
+    test_evaluate(col("c").eq(lit(1)), col("c").eq(lit(1)));
+    // c = 1 + 2 --> c + 3
+    test_evaluate(col("c").eq(lit(1) + lit(2)), col("c").eq(lit(3)));
+    // (foo != foo) OR (c = 1) --> false OR (c = 1)
+    test_evaluate(
+        (lit("foo").not_eq(lit("foo"))).or(col("c").eq(lit(1))),
+        // lit(false).or(col("c").eq(lit(1))),

Review Comment:
   ```suggestion
   ```



##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -126,7 +126,7 @@ async fn prune_timestamps_micros() {
     RowGroupPruningTest::new()
         .with_scenario(Scenario::Timestamps)
         .with_query(
-            "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02 
01:01:11Z')",
+            "SELECT * FROM t where micros < arrow_cast('2020-01-02T01:01:11', 
'Timestamp(Microsecond, None)')",

Review Comment:
   ```suggestion
               "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02 
01:01:11Z')",
   ```



##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -141,7 +141,7 @@ async fn prune_timestamps_millis() {
     RowGroupPruningTest::new()
         .with_scenario(Scenario::Timestamps)
         .with_query(
-            "SELECT * FROM t where micros < to_timestamp_millis('2020-01-02 
01:01:11Z')",
+            "SELECT * FROM t where millis < arrow_cast('2020-01-02T01:01:11', 
'Timestamp(Millisecond, None)')",

Review Comment:
   This test also seems to refer to the wrong column
   
   ```suggestion
               "SELECT * FROM t where micros < to_timestamp_millis('2020-01-02 
01:01:11Z')",
   ```



##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -156,7 +156,7 @@ async fn prune_timestamps_seconds() {
     RowGroupPruningTest::new()
         .with_scenario(Scenario::Timestamps)
         .with_query(
-            "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02 
01:01:11Z')",
+            "SELECT * FROM t where seconds < arrow_cast('2020-01-02T01:01:11', 
'Timestamp(Second, None)')",

Review Comment:
   ```suggestion
               "SELECT * FROM t where seconds < 
to_timestamp_seconds('2020-01-02 01:01:11Z')",
   ```



##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -209,7 +209,7 @@ async fn prune_date64() {
 async fn prune_disabled() {
     RowGroupPruningTest::new()
         .with_scenario(Scenario::Timestamps)
-        .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 
01:01:11Z')")
+        .with_query("SELECT * FROM t where nanos < 
arrow_cast('2020-01-02T01:01:11', 'Timestamp(Nanosecond, None)')")

Review Comment:
   ```suggestion
           .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 
01:01:11Z')")
   ```



##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -218,7 +218,7 @@ async fn prune_disabled() {
         .await;
 
     // test without pruning
-    let query = "SELECT * FROM t where nanos < to_timestamp('2020-01-02 
01:01:11Z')";
+    let query = "SELECT * FROM t where nanos < 
arrow_cast('2020-01-02T01:01:11', 'Timestamp(Nanosecond, None)')";

Review Comment:
   ```suggestion
       let query = "SELECT * FROM t where nanos < to_timestamp('2020-01-02 
01:01:11Z')";
   ```



##########
datafusion/functions/benches/to_timestamp.rs:
##########
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use datafusion_expr::lit;
+use datafusion_functions::expr_fn::to_timestamp;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    c.bench_function("to_timestamp_no_formats", |b| {
+        let inputs = vec![
+            lit("1997-01-31T09:26:56.123Z"),
+            lit("1997-01-31T09:26:56.123-05:00"),
+            lit("1997-01-31 09:26:56.123-05:00"),
+            lit("2023-01-01 04:05:06.789 -08"),
+            lit("1997-01-31T09:26:56.123"),
+            lit("1997-01-31 09:26:56.123"),
+            lit("1997-01-31 09:26:56"),
+            lit("1997-01-31 13:26:56"),
+            lit("1997-01-31 13:26:56+04:00"),
+            lit("1997-01-31"),
+        ];
+        b.iter(|| {
+            for i in inputs.iter() {
+                black_box(to_timestamp(vec![i.clone()]));

Review Comment:
   I think testing the public API is actually a more real-world comparison (so 
this seems like an improvement to me)



##########
datafusion/core/tests/simplification.rs:
##########
@@ -108,3 +224,191 @@ fn fold_and_simplify() {
     let simplified = simplifier.simplify(expr).unwrap();
     assert_eq!(simplified, lit(true))
 }
+
+#[test]
+/// Ensure that timestamp expressions are folded so they aren't invoked on 
each row
+fn to_timestamp_expr_folded() -> Result<()> {
+    let table_scan = test_table_scan();
+    let proj = vec![to_timestamp_expr("2020-09-08T12:00:00+00:00")];
+
+    let plan = LogicalPlanBuilder::from(table_scan)
+        .project(proj)?
+        .build()?;
+
+    let expected = "Projection: TimestampNanosecond(1599566400000000000, None) 
AS to_timestamp(Utf8(\"2020-09-08T12:00:00+00:00\"))\
+            \n  TableScan: test"
+        .to_string();
+    let actual = get_optimized_plan_formatted(&plan, &Utc::now());
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[test]
+fn now_less_than_timestamp() -> Result<()> {
+    let table_scan = test_table_scan();
+
+    let ts_string = "2020-09-08T12:05:00+00:00";
+    let time = Utc.timestamp_nanos(1599566400000000000i64);
+
+    //  cast(now() as int) < cast(to_timestamp(...) as int) + 50000_i64
+    let plan = LogicalPlanBuilder::from(table_scan)
+        .filter(
+            cast_to_int64_expr(now_expr())
+                .lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + 
lit(50000_i64)),
+        )?
+        .build()?;
+
+    // Note that constant folder runs and folds the entire
+    // expression down to a single constant (true)
+    let expected = "Filter: Boolean(true)\
+                        \n  TableScan: test";
+    let actual = get_optimized_plan_formatted(&plan, &time);
+
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[test]
+fn select_date_plus_interval() -> Result<()> {
+    let table_scan = test_table_scan();
+
+    let ts_string = "2020-09-08T12:05:00+00:00";
+    let time = Utc.timestamp_nanos(1599566400000000000i64);
+
+    //  now() < cast(to_timestamp(...) as int) + 5000000000
+    let schema = table_scan.schema();
+
+    let date_plus_interval_expr = to_timestamp_expr(ts_string)
+        .cast_to(&DataType::Date32, schema)?
+        + Expr::Literal(ScalarValue::IntervalDayTime(Some(123i64 << 32)));
+
+    let plan = LogicalPlanBuilder::from(table_scan.clone())
+        .project(vec![date_plus_interval_expr])?
+        .build()?;
+
+    // Note that constant folder runs and folds the entire
+    // expression down to a single constant (true)
+    let expected = r#"Projection: Date32("18636") AS 
to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + 
IntervalDayTime("528280977408")
+  TableScan: test"#;
+    let actual = get_optimized_plan_formatted(&plan, &time);
+
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[test]
+fn test_const_evaluator() {
+    // true --> true
+    test_evaluate(lit(true), lit(true));
+    // true or true --> true
+    test_evaluate(lit(true).or(lit(true)), lit(true));
+    // true or false --> true
+    test_evaluate(lit(true).or(lit(false)), lit(true));
+
+    // "foo" == "foo" --> true
+    test_evaluate(lit("foo").eq(lit("foo")), lit(true));
+    // "foo" != "foo" --> false
+    test_evaluate(lit("foo").not_eq(lit("foo")), lit(false));
+
+    // c = 1 --> c = 1
+    test_evaluate(col("c").eq(lit(1)), col("c").eq(lit(1)));
+    // c = 1 + 2 --> c + 3
+    test_evaluate(col("c").eq(lit(1) + lit(2)), col("c").eq(lit(3)));
+    // (foo != foo) OR (c = 1) --> false OR (c = 1)
+    test_evaluate(
+        (lit("foo").not_eq(lit("foo"))).or(col("c").eq(lit(1))),
+        // lit(false).or(col("c").eq(lit(1))),
+        col("c").eq(lit(1)),

Review Comment:
   This change looks better to me (false OR ...) is always (...). 
   
   The reason that the test changed is that previously it was only testing the 
constant propagation, but now it is also testing the simplifier code as well. I 
think this looks good



##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -112,7 +112,7 @@ impl RowGroupPruningTest {
 async fn prune_timestamps_nanos() {
     RowGroupPruningTest::new()
         .with_scenario(Scenario::Timestamps)
-        .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 
01:01:11Z')")
+        .with_query("SELECT * FROM t where nanos < 
arrow_cast('2020-01-02T01:01:11', 'Timestamp(Nanosecond, None)')")

Review Comment:
   🤔  the tests still seem to be changed
   
   ```suggestion
           .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 
01:01:11Z')")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to