This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new befac37584 Improve PhysicalExpr and Column documentation (#12457)
befac37584 is described below

commit befac37584101afba2c41037f11b6e6dfb2fe910
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Sep 13 20:26:54 2024 -0400

    Improve PhysicalExpr and Column documentation (#12457)
    
    * Improve PhysicalExpr and Column documentation
    
    * Apply suggestions from code review
    
    Co-authored-by: Chunchun Ye 
<[email protected]>
    
    ---------
    
    Co-authored-by: Chunchun Ye 
<[email protected]>
---
 .../physical-expr-common/src/physical_expr.rs      | 23 ++++++++++-
 datafusion/physical-expr/src/expressions/column.rs | 45 +++++++++++++++++++---
 2 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/datafusion/physical-expr-common/src/physical_expr.rs 
b/datafusion/physical-expr-common/src/physical_expr.rs
index 75d300dd01..a443a65eaa 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -31,8 +31,27 @@ use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::interval_arithmetic::Interval;
 use datafusion_expr_common::sort_properties::ExprProperties;
 
-/// See 
[create_physical_expr](https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html)
-/// for examples of creating `PhysicalExpr` from `Expr`
+/// [`PhysicalExpr`]s represent expressions such as `A + 1` or `CAST(c1 AS 
int)`.
+///
+/// `PhysicalExpr` knows its type, nullability and can be evaluated directly on
+/// a [`RecordBatch`] (see [`Self::evaluate`]).
+///
+/// `PhysicalExpr` are the physical counterpart to [`Expr`] used in logical
+/// planning. They are typically created from [`Expr`] by a [`PhysicalPlanner`]
+/// invoked from a higher level API
+///
+/// Some important examples of `PhysicalExpr` are:
+/// * [`Column`]: Represents a column at a given index in a RecordBatch
+///
+/// To create `PhysicalExpr` from  `Expr`, see
+/// * [`SessionContext::create_physical_expr`]: A high level API
+/// * [`create_physical_expr`]: A low level API
+///
+/// [`SessionContext::create_physical_expr`]: 
https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.create_physical_expr
+/// [`PhysicalPlanner`]: 
https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html
+/// [`Expr`]: 
https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
+/// [`create_physical_expr`]: 
https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html
+/// [`Column`]: 
https://docs.rs/datafusion/latest/datafusion/physical_expr/expressions/struct.Column.html
 pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
     /// Returns the physical expression as [`Any`] so that it can be
     /// downcast to a specific implementation.
diff --git a/datafusion/physical-expr/src/expressions/column.rs 
b/datafusion/physical-expr/src/expressions/column.rs
index 79d15fdb02..bf15821bca 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Column expression
+//! Physical column reference: [`Column`]
 
 use std::any::Any;
 use std::hash::{Hash, Hasher};
@@ -33,14 +33,48 @@ use datafusion_expr::ColumnarValue;
 use crate::physical_expr::{down_cast_any_ref, PhysicalExpr};
 
 /// Represents the column at a given index in a RecordBatch
+///
+/// This is a physical expression that represents a column at a given index in 
an
+/// arrow [`Schema`] / [`RecordBatch`].
+///
+/// Unlike the [logical `Expr::Column`], this expression is always resolved by 
schema index,
+/// even though it does have a name. This is because the physical plan is 
always
+/// resolved to a specific schema and there is no concept of "relation"
+///
+/// # Example:
+///  If the schema is `a`, `b`, `c` the `Column` for `b` would be represented 
by
+///  index 1, since `b` is the second colum in the schema.
+///
+/// ```
+/// # use datafusion_physical_expr::expressions::Column;
+/// # use arrow::datatypes::{DataType, Field, Schema};
+/// // Schema with columns a, b, c
+/// let schema = Schema::new(vec![
+///    Field::new("a", DataType::Int32, false),
+///    Field::new("b", DataType::Int32, false),
+///    Field::new("c", DataType::Int32, false),
+/// ]);
+///
+/// // reference to column b is index 1
+/// let column_b = Column::new_with_schema("b", &schema).unwrap();
+/// assert_eq!(column_b.index(), 1);
+///
+/// // reference to column c is index 2
+/// let column_c = Column::new_with_schema("c", &schema).unwrap();
+/// assert_eq!(column_c.index(), 2);
+/// ```
+/// [logical `Expr::Column`]: 
https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html#variant.Column
 #[derive(Debug, Hash, PartialEq, Eq, Clone)]
 pub struct Column {
+    /// The name of the column (used for debugging and display purposes)
     name: String,
+    /// The index of the column in its schema
     index: usize,
 }
 
 impl Column {
-    /// Create a new column expression
+    /// Create a new column expression which references the
+    /// column with the given index in the schema.
     pub fn new(name: &str, index: usize) -> Self {
         Self {
             name: name.to_owned(),
@@ -48,17 +82,18 @@ impl Column {
         }
     }
 
-    /// Create a new column expression based on column name and schema
+    /// Create a new column expression which references the
+    /// column with the given name in the schema
     pub fn new_with_schema(name: &str, schema: &Schema) -> Result<Self> {
         Ok(Column::new(name, schema.index_of(name)?))
     }
 
-    /// Get the column name
+    /// Get the column's name
     pub fn name(&self) -> &str {
         &self.name
     }
 
-    /// Get the column index
+    /// Get the column's schema index
     pub fn index(&self) -> usize {
         self.index
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to