This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new c27b56f09 Add datafusion example of expression apis (#3741)
c27b56f09 is described below

commit c27b56f09a437bbe296ad5782142e8fe3e700e4e
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Oct 12 13:29:35 2022 -0400

    Add datafusion example of expression apis (#3741)
---
 datafusion-examples/examples/expr_api.rs    | 136 ++++++++++++++++++++++++++++
 datafusion/core/src/prelude.rs              |   2 +-
 datafusion/optimizer/src/expr_simplifier.rs |   3 +-
 3 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/datafusion-examples/examples/expr_api.rs 
b/datafusion-examples/examples/expr_api.rs
new file mode 100644
index 000000000..35a508b0f
--- /dev/null
+++ b/datafusion-examples/examples/expr_api.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+
+use datafusion::error::Result;
+use datafusion::logical_plan::ToDFSchema;
+use datafusion::optimizer::expr_simplifier::{ExprSimplifier, SimplifyContext};
+use datafusion::physical_expr::execution_props::ExecutionProps;
+use datafusion::prelude::*;
+use datafusion::{logical_plan::Operator, scalar::ScalarValue};
+
+/// This example demonstrates the DataFusion [`Expr`] API.
+///
+/// DataFusion comes with a powerful and extensive system for
+/// representing and manipulating expressions such as `A + 5` and `X
+/// IN ('foo', 'bar', 'baz')` and many other constructs.
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    // The easiest way to do create expressions is to use the
+    // "fluent"-style API, like this:
+    let expr = col("a") + lit(5);
+
+    // this creates the same expression as the following though with
+    // much less code,
+    let expr2 = Expr::BinaryExpr {
+        left: Box::new(col("a")),
+        op: Operator::Plus,
+        right: Box::new(Expr::Literal(ScalarValue::Int32(Some(5)))),
+    };
+    assert_eq!(expr, expr2);
+
+    simplify_demo()?;
+
+    Ok(())
+}
+
+/// In addition to easy construction, DataFusion exposes APIs for
+/// working with and simplifying such expressions that call into the
+/// same powerful and extensive implementation used for the query
+/// engine.
+fn simplify_demo() -> Result<()> {
+    // For example, lets say you have has created an expression such
+    // ts = to_timestamp("2020-09-08T12:00:00+00:00")
+    let expr = col("ts").eq(call_fn(
+        "to_timestamp",
+        vec![lit("2020-09-08T12:00:00+00:00")],
+    )?);
+
+    // Naively evaluating such an expression against a large number of
+    // rows would involve re-converting "2020-09-08T12:00:00+00:00" to a
+    // timestamp for each row which gets expensive
+    //
+    // However, DataFusion's simplification logic can do this for you
+
+    // you need to tell DataFusion the type of column "ts":
+    let schema = Schema::new(vec![make_ts_field("ts")]).to_dfschema_ref()?;
+
+    // And then build a simplifier
+    // the ExecutionProps carries information needed to simplify
+    // expressions, such as the current time (to evaluate `now()`
+    // correctly)
+    let props = ExecutionProps::new();
+    let context = SimplifyContext::new(&props).with_schema(schema);
+    let simplifier = ExprSimplifier::new(context);
+
+    // And then call the simplify_expr function:
+    let expr = simplifier.simplify(expr)?;
+
+    // DataFusion has simplified the expression to a comparison with a constant
+    // ts = 1599566400000000000; Tada!
+    assert_eq!(
+        expr,
+        col("ts").eq(lit_timestamp_nano(1599566400000000000i64))
+    );
+
+    // here are some other examples of what DataFusion is capable of
+    let schema = Schema::new(vec![
+        make_field("i", DataType::Int64),
+        make_field("b", DataType::Boolean),
+    ])
+    .to_dfschema_ref()?;
+    let context = SimplifyContext::new(&props).with_schema(schema);
+    let simplifier = ExprSimplifier::new(context);
+
+    // basic arithmetic simplification
+    // i + 1 + 2 => a + 3
+    // (note this is not done if the expr is (col("i") + (lit(1) + lit(2))))
+    assert_eq!(
+        simplifier.simplify(col("i") + (lit(1) + lit(2)))?,
+        col("i") + lit(3)
+    );
+
+    // TODO uncomment when 
https://github.com/apache/arrow-datafusion/issues/1160 is done
+    // (i * 0) > 5 --> false (only if null)
+    // assert_eq!(
+    //     simplifier.simplify((col("i") * lit(0)).gt(lit(5)))?,
+    //     lit(false)
+    // );
+
+    // Logical simplification
+
+    // ((i > 5) AND FALSE) OR (i < 10)   --> i < 10
+    assert_eq!(
+        simplifier
+            
.simplify(col("i").gt(lit(5)).and(lit(false)).or(col("i").lt(lit(10))))?,
+        col("i").lt(lit(10))
+    );
+
+    Ok(())
+}
+
+fn make_field(name: &str, data_type: DataType) -> Field {
+    let nullable = false;
+    Field::new(name, data_type, nullable)
+}
+
+fn make_ts_field(name: &str) -> Field {
+    let tz = None;
+    make_field(name, DataType::Timestamp(TimeUnit::Nanosecond, tz))
+}
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index 44869c86a..ed2c81a69 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -34,7 +34,7 @@ pub use crate::execution::options::{
 pub use datafusion_common::Column;
 pub use datafusion_expr::{
     expr_fn::*,
-    lit,
+    lit, lit_timestamp_nano,
     logical_plan::{JoinType, Partitioning},
     Expr,
 };
diff --git a/datafusion/optimizer/src/expr_simplifier.rs 
b/datafusion/optimizer/src/expr_simplifier.rs
index b75e86081..6e56ff715 100644
--- a/datafusion/optimizer/src/expr_simplifier.rs
+++ b/datafusion/optimizer/src/expr_simplifier.rs
@@ -50,7 +50,8 @@ pub struct ExprSimplifier<S> {
 }
 
 impl<S: SimplifyInfo> ExprSimplifier<S> {
-    /// Create a new `ExprSimplifier` with the given `info`. See
+    /// Create a new `ExprSimplifier` with the given `info` such as an
+    /// instance of [`SimplifyContext`]. See
     /// [`simplify`](Self::simplify) for an example.
     pub fn new(info: S) -> Self {
         Self { info }

Reply via email to