phillipleblanc commented on code in PR #13824: URL: https://github.com/apache/datafusion/pull/13824#discussion_r1893430912
########## datafusion/sql/src/unparser/rewrite.rs: ########## @@ -257,6 +267,43 @@ pub(super) fn subquery_alias_inner_query_and_columns( (outer_projections.input.as_ref(), columns) } +/// Try to find the column alias for UNNEST in the inner projection. +/// For example: +/// ```sql +/// SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1) +/// ``` +/// The above query will be parsed into the following plan: +/// ```text +/// Projection: * +/// Cross Join: +/// SubqueryAlias: t1 +/// TableScan: t +/// SubqueryAlias: u +/// Subquery: +/// Projection: UNNEST(outer_ref(t1.c1)) AS c1 +/// Projection: __unnest_placeholder(outer_ref(t1.c1),depth=1) AS UNNEST(outer_ref(t1.c1)) +/// Unnest: lists[__unnest_placeholder(outer_ref(t1.c1))|depth=1] structs[] +/// Projection: outer_ref(t1.c1) AS __unnest_placeholder(outer_ref(t1.c1)) +/// EmptyRelation +/// ``` +/// The function will return the inner projection and the column alias `c1` if the column name +/// starts with `UNNEST(` (the `Display` result of [Expr::Unnest]) in the inner projection. +pub(super) fn find_unnest_column_alias( + plan: &LogicalPlan, +) -> (&LogicalPlan, Option<String>) { + if let LogicalPlan::Projection(projection) = plan { + if projection.expr.len() != 1 { + return (plan, None); + } + if let Some(Expr::Alias(alias)) = projection.expr.first() { + if alias.expr.schema_name().to_string().starts_with("UNNEST(") { Review Comment: Is this not an expression of type `Unnest`? I don't think we need to do this string allocation here just to check that right? ########## datafusion/sql/src/unparser/plan.rs: ########## @@ -723,19 +733,48 @@ impl Unparser<'_> { internal_err!("Unnest input is not a Projection: {unnest:?}") } } - _ => not_impl_err!("Unsupported operator: {plan:?}"), + LogicalPlan::Subquery(subquery) + if find_unnest_node_until_relation(subquery.subquery.as_ref()) + .is_some() => + { + if self.dialect.unnest_as_table_factor() { + self.select_to_sql_recursively( + subquery.subquery.as_ref(), + query, + select, + relation, + ) + } else { + self.derive_with_dialect_alias( + "derived_unnest", + subquery.subquery.as_ref(), + relation, + true, + ) + } + } + _ => { + not_impl_err!("Unsupported operator: {plan:?}") + } } } /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter` - /// Only match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` - fn is_unnest_placeholder(expr: &Expr) -> bool { + /// The first return value is a boolean indicating if the column is a placeholder column: + /// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` + /// The second return value is a boolean indicating if the column uses an outer reference: + /// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")` + /// + /// `outer_ref` is the display result of [Expr::OuterReferenceColumn] + fn is_unnest_placeholder_with_outer_ref(expr: &Expr) -> (bool, bool) { if let Expr::Alias(Alias { expr, .. }) = expr { if let Expr::Column(Column { name, .. }) = expr.as_ref() { - return name.starts_with(UNNEST_PLACEHOLDER); + if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) { + return (true, prefix.starts_with("(outer_ref(")); Review Comment: Not a huge fan of this string matching. At least the UNNEST_PLACEHOLDER is a shared const, but the `outer_ref` could potentially change and we would miss it here. ########## datafusion/sql/src/unparser/plan.rs: ########## @@ -723,19 +733,48 @@ impl Unparser<'_> { internal_err!("Unnest input is not a Projection: {unnest:?}") } } - _ => not_impl_err!("Unsupported operator: {plan:?}"), + LogicalPlan::Subquery(subquery) + if find_unnest_node_until_relation(subquery.subquery.as_ref()) + .is_some() => + { + if self.dialect.unnest_as_table_factor() { + self.select_to_sql_recursively( + subquery.subquery.as_ref(), + query, + select, + relation, + ) + } else { + self.derive_with_dialect_alias( + "derived_unnest", + subquery.subquery.as_ref(), + relation, + true, + ) + } + } + _ => { + not_impl_err!("Unsupported operator: {plan:?}") + } } } /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter` - /// Only match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` - fn is_unnest_placeholder(expr: &Expr) -> bool { + /// The first return value is a boolean indicating if the column is a placeholder column: + /// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` + /// The second return value is a boolean indicating if the column uses an outer reference: + /// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")` + /// + /// `outer_ref` is the display result of [Expr::OuterReferenceColumn] + fn is_unnest_placeholder_with_outer_ref(expr: &Expr) -> (bool, bool) { if let Expr::Alias(Alias { expr, .. }) = expr { if let Expr::Column(Column { name, .. }) = expr.as_ref() { - return name.starts_with(UNNEST_PLACEHOLDER); + if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) { + return (true, prefix.starts_with("(outer_ref(")); Review Comment: I don't feel very strongly about this since it does seem unlikely to change though. ########## datafusion/sql/src/unparser/plan.rs: ########## @@ -723,19 +733,48 @@ impl Unparser<'_> { internal_err!("Unnest input is not a Projection: {unnest:?}") } } - _ => not_impl_err!("Unsupported operator: {plan:?}"), + LogicalPlan::Subquery(subquery) + if find_unnest_node_until_relation(subquery.subquery.as_ref()) + .is_some() => + { + if self.dialect.unnest_as_table_factor() { + self.select_to_sql_recursively( + subquery.subquery.as_ref(), + query, + select, + relation, + ) + } else { + self.derive_with_dialect_alias( + "derived_unnest", + subquery.subquery.as_ref(), + relation, + true, + ) + } + } + _ => { + not_impl_err!("Unsupported operator: {plan:?}") + } } } /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter` - /// Only match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` - fn is_unnest_placeholder(expr: &Expr) -> bool { + /// The first return value is a boolean indicating if the column is a placeholder column: + /// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` + /// The second return value is a boolean indicating if the column uses an outer reference: + /// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")` + /// + /// `outer_ref` is the display result of [Expr::OuterReferenceColumn] + fn is_unnest_placeholder_with_outer_ref(expr: &Expr) -> (bool, bool) { Review Comment: Could we define an enum that we return here? That is clearer to read than two booleans and we can limit it to reference only the possible states (i.e. it doesn't look like `(false, true)` can happen, but its something a caller would need to handle from the type system) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org