jayzhan211 commented on code in PR #11681: URL: https://github.com/apache/datafusion/pull/11681#discussion_r1714506171
########## datafusion/expr/src/expr.rs: ########## @@ -970,6 +976,60 @@ impl GroupingSet { } } +#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)] +pub struct WildcardOptions { + pub opt_ilike: Option<IlikeSelectItem>, + pub opt_exclude: Option<ExcludeSelectItem>, + pub opt_except: Option<ExceptSelectItem>, + pub opt_replace: Option<PlannedReplaceSelectItem>, + pub opt_rename: Option<RenameSelectItem>, +} + +impl Display for WildcardOptions { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if let Some(ilike) = &self.opt_ilike { + write!(f, " {ilike}")?; + } + if let Some(exclude) = &self.opt_exclude { + write!(f, " {exclude}")?; + } + if let Some(except) = &self.opt_except { + write!(f, " {except}")?; + } + if let Some(replace) = &self.opt_replace { + write!(f, " {replace}")?; + } + if let Some(rename) = &self.opt_rename { + write!(f, " {rename}")?; + } + Ok(()) + } +} + +#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)] +pub struct PlannedReplaceSelectItem { + pub items: Vec<Box<ReplaceSelectElement>>, Review Comment: I don't think we need Box here. `Vec<ReplaceSelectElement>` ########## datafusion/core/src/datasource/view.rs: ########## @@ -61,6 +64,15 @@ impl ViewTable { Ok(view) } + fn apply_required_rule(logical_plan: LogicalPlan) -> Result<LogicalPlan> { Review Comment: 👍 ########## datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs: ########## @@ -0,0 +1,308 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use crate::AnalyzerRule; +use datafusion_common::config::ConfigOptions; +use datafusion_common::tree_node::{Transformed, TransformedResult}; +use datafusion_common::{Column, Result}; +use datafusion_expr::builder::validate_unique_names; +use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem}; +use datafusion_expr::utils::{ + expand_qualified_wildcard, expand_wildcard, find_base_plan, +}; +use datafusion_expr::{Expr, LogicalPlan, Projection, SubqueryAlias}; + +#[derive(Default)] +pub struct ExpandWildcardRule {} + +impl ExpandWildcardRule { + pub fn new() -> Self { + Self {} + } +} + +impl AnalyzerRule for ExpandWildcardRule { + fn analyze(&self, plan: LogicalPlan, _: &ConfigOptions) -> Result<LogicalPlan> { + // Because the wildcard expansion is based on the schema of the input plan, + // using `transform_up_with_subqueries` here. + plan.transform_up_with_subqueries(expand_internal).data() + } + + fn name(&self) -> &str { + "expand_wildcard_rule" + } +} + +fn expand_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> { + match plan { + LogicalPlan::Projection(Projection { expr, input, .. }) => { + let projected_expr = expand_exprlist(&input, expr)?; + validate_unique_names("Projections", projected_expr.iter())?; + Ok(Transformed::yes( + Projection::try_new(projected_expr, Arc::clone(&input)) + .map(LogicalPlan::Projection)?, + )) + } + // Teh schema of the plan should also be updated if the child plan is transformed. + LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => { + Ok(Transformed::yes( + SubqueryAlias::try_new(input, alias).map(LogicalPlan::SubqueryAlias)?, + )) + } + _ => Ok(Transformed::no(plan)), + } +} + +fn expand_exprlist(input: &LogicalPlan, expr: Vec<Expr>) -> Result<Vec<Expr>> { + let mut projected_expr = vec![]; + let input = find_base_plan(input); + for e in expr { + match e { + Expr::Wildcard { qualifier, options } => { + if let Some(qualifier) = qualifier { + let expanded = expand_qualified_wildcard( + &qualifier, + input.schema(), + Some(&options), + )?; + // If there is a REPLACE statement, replace that column with the given + // replace expression. Column name remains the same. + let replaced = if let Some(replace) = options.opt_replace { + replace_columns(expanded, replace)? + } else { + expanded + }; + projected_expr.extend(replaced); + } else { + let expanded = + expand_wildcard(input.schema(), input, Some(&options))?; + // If there is a REPLACE statement, replace that column with the given + // replace expression. Column name remains the same. + let replaced = if let Some(replace) = options.opt_replace { + replace_columns(expanded, replace)? + } else { + expanded + }; + projected_expr.extend(replaced); + } + } + // A workaround to handle the case when the column name is "*". + // We transform the expression to a Expr::Column through [Column::from_name] in many places. + // It would also convert the wildcard expression to a column expression with name "*". + Expr::Column(Column { + ref relation, + ref name, + }) => { + if name.eq("*") { + if let Some(qualifier) = relation { + projected_expr.extend(expand_qualified_wildcard( + qualifier, + input.schema(), + None, + )?); + } else { + projected_expr.extend(expand_wildcard( + input.schema(), + input, + None, + )?); + } + } else { + projected_expr.push(e.clone()); + } + } + _ => projected_expr.push(e), + } + } + Ok(projected_expr) +} + +/// If there is a REPLACE statement in the projected expression in the form of +/// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces +/// that column with the given replace expression. Column name remains the same. +/// Multiple REPLACEs are also possible with comma separations. +fn replace_columns( + mut exprs: Vec<Expr>, + replace: PlannedReplaceSelectItem, +) -> Result<Vec<Expr>> { + for expr in exprs.iter_mut() { + if let Expr::Column(Column { name, .. }) = expr { + if let Some((_, new_expr)) = replace + .items() + .iter() + .zip(replace.expressions().iter()) + .find(|(item, _)| item.column_name.value == *name) + { + *expr = Expr::Alias(Alias { Review Comment: You can try `new_expr.alias(name)` ########## datafusion/expr/src/expr.rs: ########## @@ -970,6 +976,60 @@ impl GroupingSet { } } +#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)] +pub struct WildcardOptions { + pub opt_ilike: Option<IlikeSelectItem>, Review Comment: nit: I think `opt_xxx` is redundant ########## datafusion/expr/src/utils.rs: ########## @@ -731,11 +731,133 @@ pub fn exprlist_to_fields<'a>( plan: &LogicalPlan, ) -> Result<Vec<(Option<TableReference>, Arc<Field>)>> { // look for exact match in plan's output schema - let input_schema = &plan.schema(); - exprs + let wildcard_schema = find_base_plan(plan).schema(); + let input_schema = plan.schema(); + let result = exprs .into_iter() - .map(|e| e.to_field(input_schema)) - .collect() + .map(|e| match e { + Expr::Wildcard { qualifier, options } => match qualifier { + None => { + let excluded: Vec<String> = get_excluded_columns( + options.opt_exclude.as_ref(), + options.opt_except.as_ref(), + wildcard_schema, + None, + )? + .into_iter() + .map(|c| c.flat_name()) + .collect(); + Ok::<_, DataFusionError>( + (0..wildcard_schema.fields().len()) Review Comment: I think you can use `wildcard_schema.field_names()` ########## datafusion/sql/src/select.rs: ########## @@ -637,40 +616,49 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } /// If there is a REPLACE statement in the projected expression in the form of - /// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces - /// that column with the given replace expression. Column name remains the same. - /// Multiple REPLACEs are also possible with comma separations. - fn replace_columns( + /// "REPLACE (some_column_within_an_expr AS some_column)", we should plan the + /// replace expressions first. + fn plan_wildcard_options( &self, plan: &LogicalPlan, empty_from: bool, planner_context: &mut PlannerContext, - mut exprs: Vec<Expr>, - replace: ReplaceSelectItem, - ) -> Result<Vec<Expr>> { - for expr in exprs.iter_mut() { - if let Expr::Column(Column { name, .. }) = expr { - if let Some(item) = replace - .items - .iter() - .find(|item| item.column_name.value == *name) - { - let new_expr = self.sql_select_to_rex( + options: WildcardAdditionalOptions, + ) -> Result<WildcardOptions> { + if let Some(replace) = options.opt_replace { + let replace_expr = replace + .items + .iter() + .map(|item| { + Ok(self.sql_select_to_rex( SelectItem::UnnamedExpr(item.expr.clone()), plan, empty_from, planner_context, )?[0] - .clone(); - *expr = Expr::Alias(Alias { - expr: Box::new(new_expr), - relation: None, - name: name.clone(), - }); - } - } + .clone()) + }) + .collect::<Result<Vec<_>>>()?; + let planned_option = PlannedReplaceSelectItem { + items: replace.items, + planned_expressions: replace_expr, + }; + Ok(WildcardOptions { Review Comment: Maybe it is worth to have a `with_replace` function, so we just need `options.with_replace(planned_option)` ########## datafusion/sqllogictest/test_files/window.slt: ########## @@ -3929,7 +3929,11 @@ b 1 3 a 1 4 b 5 5 -statement error DataFusion error: Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c1 could not be resolved from available columns: rn +# expected failed message: +# DataFusion error: expand_wildcard_rule +# caused by +# Schema error: No field named aggregate_test_100.c1. Valid fields are rn. +statement error Review Comment: Is this TODO? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org