alamb commented on code in PR #11487: URL: https://github.com/apache/datafusion/pull/11487#discussion_r1683393268
########## datafusion/expr/src/planner.rs: ########## @@ -173,6 +173,24 @@ pub trait ExprPlanner: Send + Sync { fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { Ok(PlannerResult::Original(args)) } + + fn plan_get_field(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { + Ok(PlannerResult::Original(args)) + } + /// Plans compound identifier eg `db.schema.table`. + /// + /// Note: + /// Currently compound identifier for outer query schema is not supported. + /// + /// Returns empty expression arguments if not possible + fn plan_compound_identifier( + &self, + _filed: &Field, Review Comment: ```suggestion _field: &Field, ``` ########## datafusion/expr/src/planner.rs: ########## @@ -173,6 +173,24 @@ pub trait ExprPlanner: Send + Sync { fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { Ok(PlannerResult::Original(args)) } + + fn plan_get_field(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { Review Comment: Can we please add comments to this function so implementers know what it is used for? ########## datafusion/expr/src/planner.rs: ########## @@ -173,6 +173,24 @@ pub trait ExprPlanner: Send + Sync { fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { Ok(PlannerResult::Original(args)) } + + fn plan_get_field(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { + Ok(PlannerResult::Original(args)) + } + /// Plans compound identifier eg `db.schema.table`. + /// + /// Note: + /// Currently compound identifier for outer query schema is not supported. + /// + /// Returns empty expression arguments if not possible Review Comment: Since this doesn't have to pass back the arguments, how about simply returning an Error if it is not possible to plan Also, I feel like it would be nice to be able to plan basic SQL like column references without having to implement `plan_compound_indentifier` So maybe we could update the docs to say it is only called when `nested_names` is non empty? ########## datafusion-cli/Cargo.lock: ########## @@ -1342,6 +1342,7 @@ dependencies = [ "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions", Review Comment: I think we are trying hard to avoid adding this dependency (otherwise the optimizer depends on the built in functions -- where it is supposed to be general purpose) ########## datafusion/sql/src/expr/identifier.rs: ########## @@ -124,31 +127,20 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let search_result = search_dfschema(&ids, schema); match search_result { // found matching field with spare identifier(s) for nested field(s) in structure - Some((field, qualifier, nested_names)) if !nested_names.is_empty() => { - // TODO: remove when can support multiple nested identifiers - if nested_names.len() > 1 { - return not_impl_err!( - "Nested identifiers not yet supported for column {}", - Column::from((qualifier, field)).quoted_flat_name() - ); - } - let nested_name = nested_names[0].to_string(); - - let col = Expr::Column(Column::from((qualifier, field))); - if let Some(udf) = - self.context_provider.get_function_meta("get_field") - { - Ok(Expr::ScalarFunction(ScalarFunction::new_udf( - udf, - vec![col, lit(ScalarValue::from(nested_name))], - ))) - } else { - internal_err!("get_field not found") + Some((field, qualifier, nested_names)) => { + for planner in self.context_provider.get_expr_planners() { + match planner.plan_compound_identifier( + field, + qualifier, + nested_names, + )? { + PlannerResult::Planned(expr) => return Ok(expr), + PlannerResult::Original(_args) => {} + } } - } - // found matching field with no spare identifier(s) - Some((field, qualifier, _nested_names)) => { Review Comment: I think it would be nice to leave this in the base planner rather than requiring users to override it -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org