Jefffrey commented on code in PR #17250: URL: https://github.com/apache/datafusion/pull/17250#discussion_r2342968747
########## datafusion/expr/src/logical_plan/plan.rs: ########## @@ -2184,14 +2184,22 @@ impl Projection { /// will be computed. /// * `exprs`: A slice of `Expr` expressions representing the projection operation to apply. /// +/// # Metadata Handling +/// +/// - **Schema-level metadata**: Passed through unchanged from the input schema +/// - **Field-level metadata**: Determined by each expression via `exprlist_to_fields`, which +/// calls `Expr::to_field` to handle expression-specific metadata (literals, aliases, etc.) Review Comment: ```suggestion /// - **Field-level metadata**: Determined by each expression via [`exprlist_to_fields`], which /// calls [`Expr::to_field`] to handle expression-specific metadata (literals, aliases, etc.) ``` ########## datafusion/expr/src/expr_schema.rs: ########## @@ -371,6 +371,49 @@ impl ExprSchemable for Expr { /// Returns a [arrow::datatypes::Field] compatible with this expression. /// + /// This function converts an expression into a field with appropriate metadata + /// and nullability based on the expression type and context. It is the primary + /// mechanism for determining field-level schemas. + /// + /// # Field Property Resolution + /// + /// For each expression, the following properties are determined: + /// + /// ## Data Type Resolution + /// - **Column references**: Data type from input schema field + /// - **Literals**: Data type inferred from literal value + /// - **Aliases**: Data type inherited from the underlying expression (the aliased expression) + /// - **Binary expressions**: Result type from type coercion rules + /// - **Boolean expressions**: Always a boolean type + /// - **Cast expressions**: Target data type from cast operation + /// - **Function calls**: Return type based on function signature and argument types + /// + /// ## Nullability Determination + /// - **Column references**: Inherit nullability from input schema field + /// - **Literals**: Nullable only if literal value is NULL + /// - **Aliases**: Inherit nullability from the underlying expression (the aliased expression) + /// - **Binary expressions**: Nullable if either operand is nullable + /// - **Boolean expressions**: Always non-nullable (IS NULL, EXISTS, etc.) + /// - **Cast expressions**: determined by the input expression's nullability rules + /// - **Function calls**: Based on function nullability rules and input nullability + /// + /// ## Metadata Handling + /// - **Column references**: Preserve original field metadata from input schema + /// - **Literals**: Use explicitly provided metadata, otherwise empty + /// - **Aliases**: Merge underlying expr metadata with alias-specific metadata, preferring the alias metadata + /// - **Binary expressions**: field metadata is empty + /// - **Boolean expressions**: field metadata is empty + /// - **Cast expressions**: determined by the input expression's field metadata handling + /// - **Scalar functions**: Generate metadata via function's `return_field_from_args` method, + /// with the default implementation returning empty field metadata + /// - **Aggregate functions**: Generate metadata via function's `return_field` method, Review Comment: ```suggestion /// - **Scalar functions**: Generate metadata via function's [`return_field_from_args`] method, /// with the default implementation returning empty field metadata /// - **Aggregate functions**: Generate metadata via function's [`return_field`] method, ``` And add at the bottom of the doc: ```rust /// [`return_field_from_args`]: crate::ScalarUDF::return_field_from_args /// [`return_field`]: crate::AggregateUDF::return_field ``` ########## datafusion/expr/src/utils.rs: ########## @@ -690,7 +690,23 @@ where err } -/// Create field meta-data from an expression, for use in a result set schema +/// Create schema fields from an expression list, for use in result set schema construction +/// +/// This function converts a list of expressions into a list of complete schema fields, +/// making comprehensive determinations about each field's properties including: +/// - **Data type**: Resolved based on expression type and input schema context +/// - **Nullability**: Determined by expression-specific nullability rules +/// - **Metadata**: Computed based on expression type (preserving, merging, or generating new metadata) +/// - **Table reference scoping**: Establishing proper qualified field references +/// +/// Each expression is converted to a field by calling `Expr::to_field`, which performs Review Comment: ```suggestion /// Each expression is converted to a field by calling [`Expr::to_field`], which performs ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org