This is an automated email from the ASF dual-hosted git repository.

jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 09054263df Minor: improve Expr documentation (#10685)
09054263df is described below

commit 09054263df1d8de06b6e77ab0cbd99027bb7ceb6
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 29 03:26:16 2024 -0400

    Minor: improve Expr documentation (#10685)
    
    * Minor: improve Expr documentation
    
    * Update datafusion/expr/src/expr.rs
    
    Co-authored-by: Oleks V <[email protected]>
    
    * Refine words
    
    ---------
    
    Co-authored-by: Oleks V <[email protected]>
---
 datafusion/expr/src/expr.rs         | 96 +++++++++++++++++++++++++++++--------
 datafusion/sql/src/unparser/expr.rs |  8 ++--
 2 files changed, 82 insertions(+), 22 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 0c05355cde..3542e2d985 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -39,17 +39,39 @@ use datafusion_common::{
 };
 use sqlparser::ast::NullTreatment;
 
-/// `Expr` is a central struct of DataFusion's query API, and
-/// represent logical expressions such as `A + 1`, or `CAST(c1 AS
-/// int)`.
+/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
 ///
-/// An `Expr` can compute its [DataType]
-/// and nullability, and has functions for building up complex
-/// expressions.
+/// For example the expression `A + 1` will be represented as
+///
+///```text
+///  BinaryExpr {
+///    left: Expr::Column("A"),
+///    op: Operator::Plus,
+///    right: Expr::Literal(ScalarValue::Int32(Some(1)))
+/// }
+/// ```
+///
+/// # Creating Expressions
+///
+/// `Expr`s can be created directly, but it is often easier and less verbose to
+/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
+/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
+///
+/// # Schema Access
+///
+/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
+/// of an `Expr`.
 ///
 /// # Examples
 ///
-/// ## Create an expression `c1` referring to column named "c1"
+/// ## Column references and literals
+///
+/// [`Expr::Column`] refer to the values of columns and are often created with
+/// the [`col`] function. For example to create an expression `c1` referring to
+/// column named "c1":
+///
+/// [`col`]: crate::expr_fn::col
+///
 /// ```
 /// # use datafusion_common::Column;
 /// # use datafusion_expr::{lit, col, Expr};
@@ -57,11 +79,33 @@ use sqlparser::ast::NullTreatment;
 /// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
 /// ```
 ///
-/// ## Create the expression `c1 + c2` to add columns "c1" and "c2" together
+/// [`Expr::Literal`] refer to literal, or constant, values. These are created
+/// with the [`lit`] function. For example to create an expression `42`:
+///
+/// [`lit`]: crate::lit
+///
+/// ```
+/// # use datafusion_common::{Column, ScalarValue};
+/// # use datafusion_expr::{lit, col, Expr};
+/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
+/// let expr = lit(42i64);
+/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
+/// // To make a (typed) NULL:
+/// let expr = Expr::Literal(ScalarValue::Int64(None));
+/// // to make an (untyped) NULL (the optimizer will coerce this to the 
correct type):
+/// let expr = lit(ScalarValue::Null);
+/// ```
+///
+/// ## Binary Expressions
+///
+/// Exprs implement traits that allow easy to understand construction of more
+/// complex expresions. For example, to create `c1 + c2` to add columns "c1" 
and
+/// "c2" together
+///
 /// ```
 /// # use datafusion_expr::{lit, col, Operator, Expr};
+/// // Use the `+` operator to add two columns together
 /// let expr = col("c1") + col("c2");
-///
 /// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
 /// if let Expr::BinaryExpr(binary_expr) = expr {
 ///   assert_eq!(*binary_expr.left, col("c1"));
@@ -70,12 +114,13 @@ use sqlparser::ast::NullTreatment;
 /// }
 /// ```
 ///
-/// ## Create expression `c1 = 42` to compare the value in column "c1" to the 
literal value `42`
+/// The expression `c1 = 42` to compares the value in column "c1" to the
+/// literal value `42`:
+///
 /// ```
 /// # use datafusion_common::ScalarValue;
 /// # use datafusion_expr::{lit, col, Operator, Expr};
 /// let expr = col("c1").eq(lit(42_i32));
-///
 /// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
 /// if let Expr::BinaryExpr(binary_expr) = expr {
 ///   assert_eq!(*binary_expr.left, col("c1"));
@@ -85,19 +130,23 @@ use sqlparser::ast::NullTreatment;
 /// }
 /// ```
 ///
-/// ## Return a list of [`Expr::Column`] from a schema's columns
+/// Here is how to implement the equivalent of `SELECT *` to select all
+/// [`Expr::Column`] from a [`DFSchema`]'s columns:
+///
 /// ```
 /// # use arrow::datatypes::{DataType, Field, Schema};
 /// # use datafusion_common::{DFSchema, Column};
 /// # use datafusion_expr::Expr;
-///
+/// // Create a schema c1(int, c2 float)
 /// let arrow_schema = Schema::new(vec![
 ///    Field::new("c1", DataType::Int32, false),
 ///    Field::new("c2", DataType::Float64, false),
 /// ]);
-/// let df_schema = DFSchema::try_from_qualified_schema("t1", 
&arrow_schema).unwrap();
+/// // DFSchema is a an Arrow schema with optional relation name
+/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
+///   .unwrap();
 ///
-/// // Form a list of expressions for each item in the schema
+/// // Form Vec<Expr> with an expression for each column in the schema
 /// let exprs: Vec<_> = df_schema.iter()
 ///   .map(Expr::from)
 ///   .collect();
@@ -227,6 +276,7 @@ impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> 
for Expr {
     }
 }
 
+/// UNNEST expression.
 #[derive(Clone, PartialEq, Eq, Hash, Debug)]
 pub struct Unnest {
     pub expr: Box<Expr>,
@@ -434,9 +484,13 @@ pub enum GetFieldAccess {
     },
 }
 
-/// Returns the field of a [`arrow::array::ListArray`] or
-/// [`arrow::array::StructArray`] by `key`. See [`GetFieldAccess`] for
-/// details.
+/// Returns the field of a [`ListArray`] or
+/// [`StructArray`] by `key`.
+///
+/// See [`GetFieldAccess`] for details.
+///
+/// [`ListArray`]: arrow::array::ListArray
+/// [`StructArray`]: arrow::array::StructArray
 #[derive(Clone, PartialEq, Eq, Hash, Debug)]
 pub struct GetIndexedField {
     /// The expression to take the field from
@@ -712,7 +766,7 @@ pub fn find_df_window_func(name: &str) -> 
Option<WindowFunctionDefinition> {
     }
 }
 
-// Exists expression.
+/// EXISTS expression
 #[derive(Clone, PartialEq, Eq, Hash, Debug)]
 pub struct Exists {
     /// subquery that will produce a single column of data
@@ -728,6 +782,9 @@ impl Exists {
     }
 }
 
+/// User Defined Aggregate Function
+///
+/// See [`udaf::AggregateUDF`] for more information.
 #[derive(Clone, PartialEq, Eq, Hash, Debug)]
 pub struct AggregateUDF {
     /// The function
@@ -821,6 +878,7 @@ impl Placeholder {
 }
 
 /// Grouping sets
+///
 /// See 
<https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
 /// for Postgres definition.
 /// See 
<https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
diff --git a/datafusion/sql/src/unparser/expr.rs 
b/datafusion/sql/src/unparser/expr.rs
index ea991102df..df390ce6ea 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -20,6 +20,10 @@ use std::{fmt::Display, vec};
 
 use arrow_array::{Date32Array, Date64Array};
 use arrow_schema::DataType;
+use sqlparser::ast::{
+    self, Expr as AstExpr, Function, FunctionArg, Ident, UnaryOperator,
+};
+
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, 
Result,
     ScalarValue,
@@ -28,9 +32,6 @@ use datafusion_expr::{
     expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction},
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, 
TryCast,
 };
-use sqlparser::ast::{
-    self, Expr as AstExpr, Function, FunctionArg, Ident, UnaryOperator,
-};
 
 use super::Unparser;
 
@@ -931,6 +932,7 @@ mod tests {
 
     use arrow::datatypes::{Field, Schema};
     use arrow_schema::DataType::Int8;
+
     use datafusion_common::TableReference;
     use datafusion_expr::{
         case, col, cube, exists,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to