This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 8b4a8e6b15 support unnest as subexpression  (#9592)
8b4a8e6b15 is described below

commit 8b4a8e6b157c007e7988f715cb4b693578438f8b
Author: YjyJeff <[email protected]>
AuthorDate: Thu Mar 14 14:22:42 2024 +0800

    support unnest as subexpression  (#9592)
    
    * support unnest as subexpression and multiple unnests in projection
    
    * update alias unnest
    
    * remove unnest multiple columns
    
    * rename mulitple unnest columns to multiple unnest functions in selection
    
    * fix: move column name to if branch
    
    ---------
    
    Co-authored-by: jefffffyang <[email protected]>
---
 datafusion/expr/src/expr_schema.rs            | 20 +++++++-
 datafusion/sql/src/select.rs                  | 52 ++++++++++++++++-----
 datafusion/sqllogictest/test_files/unnest.slt | 67 ++++++++++++++++++++++++++-
 3 files changed, 123 insertions(+), 16 deletions(-)

diff --git a/datafusion/expr/src/expr_schema.rs 
b/datafusion/expr/src/expr_schema.rs
index 70ffa5064a..1d83fbe8c0 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -28,7 +28,8 @@ use crate::{utils, LogicalPlan, Projection, Subquery};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{
-    internal_err, plan_datafusion_err, plan_err, Column, DFField, ExprSchema, 
Result,
+    internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFField,
+    ExprSchema, Result,
 };
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -113,7 +114,22 @@ impl ExprSchemable for Expr {
                     .iter()
                     .map(|e| e.get_type(schema))
                     .collect::<Result<Vec<_>>>()?;
-                Ok(arg_data_types[0].clone())
+                let arg_data_type = arg_data_types[0].clone();
+                // Unnest's output type is the inner type of the list
+                match arg_data_type{
+                    DataType::List(field) | DataType::LargeList(field) | 
DataType::FixedSizeList(field, _) =>{
+                        Ok(field.data_type().clone())
+                    }
+                    DataType::Struct(_) => {
+                        not_impl_err!("unnest() does not support struct yet")
+                    }
+                    DataType::Null => {
+                        not_impl_err!("unnest() does not support null yet")
+                    }
+                    _ => {
+                        plan_err!("unnest() can only be applied to array, 
struct and null")
+                    }
+                }
             }
             Expr::ScalarFunction(ScalarFunction { func_def, args }) => {
                 let arg_data_types = args
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index c5c80bd6ac..1bfd60a8ce 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -24,6 +24,7 @@ use crate::utils::{
     resolve_columns, resolve_positions_to_exprs,
 };
 
+use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
 use datafusion_common::{Column, UnnestOptions};
 use datafusion_expr::expr::{Alias, Unnest};
@@ -276,25 +277,51 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         Ok(plan)
     }
 
-    // Try converting Expr::Unnest to LogicalPlan::Unnest if possible, 
otherwise do the final projection
+    /// Try converting Expr(Unnest(Expr)) to Projection/Unnest/Projection
     pub(super) fn try_process_unnest(
         &self,
         input: LogicalPlan,
         select_exprs: Vec<Expr>,
     ) -> Result<LogicalPlan> {
         let mut unnest_columns = vec![];
-        // Map unnest expressions to their argument
-        let projection_exprs = select_exprs
+        let mut inner_projection_exprs = vec![];
+
+        let outer_projection_exprs = select_exprs
             .into_iter()
             .map(|expr| {
-                if let Expr::Unnest(Unnest { ref exprs }) = expr {
-                    let column_name = expr.display_name()?;
-                    unnest_columns.push(column_name.clone());
-                    // Add alias for the argument expression, to avoid naming 
conflicts with other expressions
-                    // in the select list. For example: `select unnest(col1), 
col1 from t`.
-                    Ok(exprs[0].clone().alias(column_name))
+                let Transformed {
+                    data: transformed_expr,
+                    transformed,
+                    tnr: _,
+                } = expr.transform_up_mut(&mut |expr: Expr| {
+                    if let Expr::Unnest(Unnest { ref exprs }) = expr {
+                        let column_name = expr.display_name()?;
+                        unnest_columns.push(column_name.clone());
+                        // Add alias for the argument expression, to avoid 
naming conflicts with other expressions
+                        // in the select list. For example: `select 
unnest(col1), col1 from t`.
+                        inner_projection_exprs
+                            .push(exprs[0].clone().alias(column_name.clone()));
+                        Ok(Transformed::yes(Expr::Column(Column::from_name(
+                            column_name,
+                        ))))
+                    } else {
+                        Ok(Transformed::no(expr))
+                    }
+                })?;
+
+                if !transformed {
+                    if matches!(&transformed_expr, Expr::Column(_)) {
+                        inner_projection_exprs.push(transformed_expr.clone());
+                        Ok(transformed_expr)
+                    } else {
+                        // We need to evaluate the expr in the inner 
projection,
+                        // outer projection just select its name
+                        let column_name = transformed_expr.display_name()?;
+                        inner_projection_exprs.push(transformed_expr);
+                        Ok(Expr::Column(Column::from_name(column_name)))
+                    }
                 } else {
-                    Ok(expr)
+                    Ok(transformed_expr)
                 }
             })
             .collect::<Result<Vec<_>>>()?;
@@ -302,7 +329,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // Do the final projection
         if unnest_columns.is_empty() {
             LogicalPlanBuilder::from(input)
-                .project(projection_exprs)?
+                .project(inner_projection_exprs)?
                 .build()
         } else {
             if unnest_columns.len() > 1 {
@@ -312,8 +339,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // Set preserve_nulls to false to ensure compatibility with DuckDB 
and PostgreSQL
             let unnest_options = 
UnnestOptions::new().with_preserve_nulls(false);
             LogicalPlanBuilder::from(input)
-                .project(projection_exprs)?
+                .project(inner_projection_exprs)?
                 .unnest_column_with_options(unnest_column, unnest_options)?
+                .project(outer_projection_exprs)?
                 .build()
         }
     }
diff --git a/datafusion/sqllogictest/test_files/unnest.slt 
b/datafusion/sqllogictest/test_files/unnest.slt
index f60f715242..5c178bb392 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -135,11 +135,10 @@ select array_remove(column1, 4), unnest(column2), column3 
* 10 from unnest_table
 query error DataFusion error: Error during planning: unnest\(\) can only be 
applied to array, struct and null
 select unnest(column3) from unnest_table;
 
-## Unnest multiple columns
+## Multiple unnest functions in selection
 query error DataFusion error: This feature is not implemented: Only support 
single unnest expression for now
 select unnest(column1), unnest(column2) from unnest_table;
 
-
 ## Unnest scalar in select list
 query error DataFusion error: Error during planning: unnest\(\) can only be 
applied to array, struct and null
 select unnest(1);
@@ -254,5 +253,69 @@ select * from unnest([1,2,(select sum(column3) from 
unnest_table)]);
 2
 10
 
+## Unnest is the sub-expression of other expression
+query II
+select unnest(column1) as a, column3 from unnest_table;
+----
+1 1
+2 1
+3 1
+4 2
+5 2
+6 3
+12 NULL
+
+query BI
+select unnest(column1) is not null, column3 from unnest_table;
+----
+true 1
+true 1
+true 1
+true 2
+true 2
+true 3
+true NULL
+
+query II
+select -unnest(column1) as a, column3 from unnest_table;
+----
+-1 1
+-2 1
+-3 1
+-4 2
+-5 2
+-6 3
+-12 NULL
+
+query II
+select unnest(array_remove(column1, 3)) as a, column3 from unnest_table;
+----
+1 1
+2 1
+4 2
+5 2
+6 3
+12 NULL
+
+query II
+select unnest(array_remove(column1, 3)) as c1, column3 from unnest_table order 
by c1 desc, column3;
+----
+12 NULL
+6 3
+5 2
+4 2
+2 1
+1 1
+
+query II
+select unnest(array_remove(column1, 3)) - 1 as c1, column3 from unnest_table;
+----
+0 1
+1 1
+3 2
+4 2
+5 3
+11 NULL
+
 statement ok
 drop table unnest_table;

Reply via email to