phillipleblanc commented on code in PR #13824:
URL: https://github.com/apache/datafusion/pull/13824#discussion_r1893430912
##########
datafusion/sql/src/unparser/rewrite.rs:
##########
@@ -257,6 +267,43 @@ pub(super) fn subquery_alias_inner_query_and_columns(
(outer_projections.input.as_ref(), columns)
}
+/// Try to find the column alias for UNNEST in the inner projection.
+/// For example:
+/// ```sql
+/// SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1)
+/// ```
+/// The above query will be parsed into the following plan:
+/// ```text
+/// Projection: *
+/// Cross Join:
+/// SubqueryAlias: t1
+/// TableScan: t
+/// SubqueryAlias: u
+/// Subquery:
+/// Projection: UNNEST(outer_ref(t1.c1)) AS c1
+/// Projection: __unnest_placeholder(outer_ref(t1.c1),depth=1) AS
UNNEST(outer_ref(t1.c1))
+/// Unnest: lists[__unnest_placeholder(outer_ref(t1.c1))|depth=1]
structs[]
+/// Projection: outer_ref(t1.c1) AS
__unnest_placeholder(outer_ref(t1.c1))
+/// EmptyRelation
+/// ```
+/// The function will return the inner projection and the column alias `c1` if
the column name
+/// starts with `UNNEST(` (the `Display` result of [Expr::Unnest]) in the
inner projection.
+pub(super) fn find_unnest_column_alias(
+ plan: &LogicalPlan,
+) -> (&LogicalPlan, Option<String>) {
+ if let LogicalPlan::Projection(projection) = plan {
+ if projection.expr.len() != 1 {
+ return (plan, None);
+ }
+ if let Some(Expr::Alias(alias)) = projection.expr.first() {
+ if alias.expr.schema_name().to_string().starts_with("UNNEST(") {
Review Comment:
Is this not an expression of type `Unnest`? I don't think we need to do this
string allocation here just to check that right?
##########
datafusion/sql/src/unparser/plan.rs:
##########
@@ -723,19 +733,48 @@ impl Unparser<'_> {
internal_err!("Unnest input is not a Projection:
{unnest:?}")
}
}
- _ => not_impl_err!("Unsupported operator: {plan:?}"),
+ LogicalPlan::Subquery(subquery)
+ if find_unnest_node_until_relation(subquery.subquery.as_ref())
+ .is_some() =>
+ {
+ if self.dialect.unnest_as_table_factor() {
+ self.select_to_sql_recursively(
+ subquery.subquery.as_ref(),
+ query,
+ select,
+ relation,
+ )
+ } else {
+ self.derive_with_dialect_alias(
+ "derived_unnest",
+ subquery.subquery.as_ref(),
+ relation,
+ true,
+ )
+ }
+ }
+ _ => {
+ not_impl_err!("Unsupported operator: {plan:?}")
+ }
}
}
/// Try to find the placeholder column name generated by
`RecursiveUnnestRewriter`
- /// Only match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
- fn is_unnest_placeholder(expr: &Expr) -> bool {
+ /// The first return value is a boolean indicating if the column is a
placeholder column:
+ /// Try to match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
+ /// The second return value is a boolean indicating if the column uses an
outer reference:
+ /// Try to match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`
+ ///
+ /// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
+ fn is_unnest_placeholder_with_outer_ref(expr: &Expr) -> (bool, bool) {
if let Expr::Alias(Alias { expr, .. }) = expr {
if let Expr::Column(Column { name, .. }) = expr.as_ref() {
- return name.starts_with(UNNEST_PLACEHOLDER);
+ if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) {
+ return (true, prefix.starts_with("(outer_ref("));
Review Comment:
Not a huge fan of this string matching. At least the UNNEST_PLACEHOLDER is a
shared const, but the `outer_ref` could potentially change and we would miss it
here.
##########
datafusion/sql/src/unparser/plan.rs:
##########
@@ -723,19 +733,48 @@ impl Unparser<'_> {
internal_err!("Unnest input is not a Projection:
{unnest:?}")
}
}
- _ => not_impl_err!("Unsupported operator: {plan:?}"),
+ LogicalPlan::Subquery(subquery)
+ if find_unnest_node_until_relation(subquery.subquery.as_ref())
+ .is_some() =>
+ {
+ if self.dialect.unnest_as_table_factor() {
+ self.select_to_sql_recursively(
+ subquery.subquery.as_ref(),
+ query,
+ select,
+ relation,
+ )
+ } else {
+ self.derive_with_dialect_alias(
+ "derived_unnest",
+ subquery.subquery.as_ref(),
+ relation,
+ true,
+ )
+ }
+ }
+ _ => {
+ not_impl_err!("Unsupported operator: {plan:?}")
+ }
}
}
/// Try to find the placeholder column name generated by
`RecursiveUnnestRewriter`
- /// Only match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
- fn is_unnest_placeholder(expr: &Expr) -> bool {
+ /// The first return value is a boolean indicating if the column is a
placeholder column:
+ /// Try to match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
+ /// The second return value is a boolean indicating if the column uses an
outer reference:
+ /// Try to match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`
+ ///
+ /// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
+ fn is_unnest_placeholder_with_outer_ref(expr: &Expr) -> (bool, bool) {
if let Expr::Alias(Alias { expr, .. }) = expr {
if let Expr::Column(Column { name, .. }) = expr.as_ref() {
- return name.starts_with(UNNEST_PLACEHOLDER);
+ if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) {
+ return (true, prefix.starts_with("(outer_ref("));
Review Comment:
I don't feel very strongly about this since it does seem unlikely to change
though.
##########
datafusion/sql/src/unparser/plan.rs:
##########
@@ -723,19 +733,48 @@ impl Unparser<'_> {
internal_err!("Unnest input is not a Projection:
{unnest:?}")
}
}
- _ => not_impl_err!("Unsupported operator: {plan:?}"),
+ LogicalPlan::Subquery(subquery)
+ if find_unnest_node_until_relation(subquery.subquery.as_ref())
+ .is_some() =>
+ {
+ if self.dialect.unnest_as_table_factor() {
+ self.select_to_sql_recursively(
+ subquery.subquery.as_ref(),
+ query,
+ select,
+ relation,
+ )
+ } else {
+ self.derive_with_dialect_alias(
+ "derived_unnest",
+ subquery.subquery.as_ref(),
+ relation,
+ true,
+ )
+ }
+ }
+ _ => {
+ not_impl_err!("Unsupported operator: {plan:?}")
+ }
}
}
/// Try to find the placeholder column name generated by
`RecursiveUnnestRewriter`
- /// Only match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
- fn is_unnest_placeholder(expr: &Expr) -> bool {
+ /// The first return value is a boolean indicating if the column is a
placeholder column:
+ /// Try to match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
+ /// The second return value is a boolean indicating if the column uses an
outer reference:
+ /// Try to match the pattern
`Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`
+ ///
+ /// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
+ fn is_unnest_placeholder_with_outer_ref(expr: &Expr) -> (bool, bool) {
Review Comment:
Could we define an enum that we return here? That is clearer to read than
two booleans and we can limit it to reference only the possible states (i.e. it
doesn't look like `(false, true)` can happen, but its something a caller would
need to handle from the type system)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]