alamb commented on code in PR #12836:
URL: https://github.com/apache/datafusion/pull/12836#discussion_r1806957606


##########
datafusion/common/src/unnest.rs:
##########
@@ -64,13 +66,24 @@
 pub struct UnnestOptions {
     /// Should nulls in the input be preserved? Defaults to true
     pub preserve_nulls: bool,
+    /// Without explicit recursions, all
+    /// Datafusion will infer the unesting type with depth = 1
+    pub recursions: Option<Vec<RecursionUnnestOption>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub struct RecursionUnnestOption {

Review Comment:
   Can we please add documentation to this structure as it is public?



##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -2263,24 +2238,20 @@ mod tests {
 
         // Simultaneously unnesting a list (with different depth) and a struct 
column
         let plan = nested_table_scan("test_table")?
-            .unnest_columns_recursive_with_options(
-                vec![
-                    (
-                        "stringss".into(),
-                        ColumnUnnestType::List(vec![
-                            ColumnUnnestList {
-                                output_column: 
Column::from_name("stringss_depth_1"),
-                                depth: 1,
-                            },
-                            ColumnUnnestList {
-                                output_column: 
Column::from_name("stringss_depth_2"),
-                                depth: 2,
-                            },
-                        ]),
-                    ),
-                    ("struct_singular".into(), ColumnUnnestType::Inferred),
-                ],
-                UnnestOptions::default(),
+            .unnest_columns_with_options(
+                vec!["stringss".into(), "struct_singular".into()],
+                UnnestOptions::default().with_recursions(vec![
+                    RecursionUnnestOption {
+                        input_column: "stringss".into(),
+                        output_column: "stringss_depth_1".into(),
+                        depth: 1,
+                    },
+                    RecursionUnnestOption {
+                        input_column: "stringss".into(),
+                        output_column: "stringss_depth_2".into(),
+                        depth: 2,
+                    },
+                ]),

Review Comment:
   Here is another potential way for this API to look would be to take 
individual options rather than a single Vec 
   
   I don't feel strongly about this
   
   ```suggestion
                   UnnestOptions::new()
                    .with_recursion(
                       RecursionUnnestOption {
                           input_column: "stringss".into(),
                           output_column: "stringss_depth_1".into(),
                           depth: 1,
                       })
                     .with_recusion(
                       RecursionUnnestOption {
                           input_column: "stringss".into(),
                           output_column: "stringss_depth_2".into(),
                           depth: 2,
                       },
                   ),
   ```



##########
datafusion/sql/src/select.rs:
##########
@@ -440,11 +460,31 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 };
                 projection_exprs.extend(inner_projection_exprs);
 
+                let mut list_recursions = vec![];
+                let unnest_col_vec = unnest_columns
+                    .into_iter()
+                    .map(|(col, maybe_list_unnest)| {
+                        match maybe_list_unnest {
+                            None => {}
+                            Some(list_unnest) => {
+                                
list_recursions.extend(list_unnest.into_iter().map(
+                                    |unnest_list| RecursionUnnestOption {
+                                        input_column: col.clone(),
+                                        output_column: 
unnest_list.output_column,
+                                        depth: unnest_list.depth,
+                                    },
+                                ));
+                            }
+                        }

Review Comment:
   I think you could simplify this code (reduce recursion level) with `if let` 
like
   
   ```suggestion
                           if let Some(list_unnest) = maybe_list_unnest {
                                   
list_recursions.extend(list_unnest.into_iter().map(
                                       |unnest_list| RecursionUnnestOption {
                                           input_column: col.clone(),
                                           output_column: 
unnest_list.output_column,
                                           depth: unnest_list.depth,
                                       },
                                   ));
                               }
                           }
   ```
   
   Also I wonder if it would be possible / easier for 
`rewrite_recursive_unnests_bottom_up` to simply return a `HashSet<Column, 
RecursionUnnestOption>` directly rather than having to convert the results



##########
datafusion/sql/src/select.rs:
##########
@@ -301,7 +301,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // The transformation happen bottom up, one at a time for each 
iteration
         // Only exhaust the loop if no more unnest transformation is found
         for i in 0.. {
-            let mut unnest_columns = vec![];
+            let mut unnest_columns = HashMap::new();

Review Comment:
   It might be better to use an IndexMap here (to retain the same order as was 
specified in the query) -- a HashMap can mix up the order



##########
datafusion/physical-plan/src/unnest.rs:
##########
@@ -1092,38 +1090,37 @@ mod tests {
             &HashSet::default(),
             &UnnestOptions {
                 preserve_nulls: true,
+                recursions: None,
             },
         )?;
-        let actual =
-            format!("{}", 
pretty_format_batches(vec![ret].as_ref())?).to_lowercase();
-        let expected = r#"
-+---------------------------------+---------------------------------+---------------------------------+
-| col1_unnest_placeholder_depth_1 | col1_unnest_placeholder_depth_2 | 
col2_unnest_placeholder_depth_1 |
-+---------------------------------+---------------------------------+---------------------------------+
-| [1, 2, 3]                       | 1                               | a        
                       |
-|                                 | 2                               | b        
                       |
-| [4, 5]                          | 3                               |          
                       |
-| [1, 2, 3]                       |                                 | a        
                       |
-|                                 |                                 | b        
                       |
-| [4, 5]                          |                                 |          
                       |
-| [1, 2, 3]                       | 4                               | a        
                       |
-|                                 | 5                               | b        
                       |
-| [4, 5]                          |                                 |          
                       |
-| [7, 8, 9, 10]                   | 7                               | c        
                       |
-|                                 | 8                               | d        
                       |
-| [11, 12, 13]                    | 9                               |          
                       |
-|                                 | 10                              |          
                       |
-| [7, 8, 9, 10]                   |                                 | c        
                       |
-|                                 |                                 | d        
                       |
-| [11, 12, 13]                    |                                 |          
                       |
-| [7, 8, 9, 10]                   | 11                              | c        
                       |
-|                                 | 12                              | d        
                       |
-| [11, 12, 13]                    | 13                              |          
                       |
-|                                 |                                 | e        
                       |
-+---------------------------------+---------------------------------+---------------------------------+
-        "#
-        .trim();
-        assert_eq!(actual, expected);
+
+        let expected = &[
+"+---------------------------------+---------------------------------+---------------------------------+",
+"| col1_unnest_placeholder_depth_1 | col1_unnest_placeholder_depth_2 | 
col2_unnest_placeholder_depth_1 |",
+"+---------------------------------+---------------------------------+---------------------------------+",
+"| [1, 2, 3]                       | 1                               | a       
                        |",
+"|                                 | 2                               | b       
                        |",
+"| [4, 5]                          | 3                               |         
                        |",
+"| [1, 2, 3]                       |                                 | a       
                        |",
+"|                                 |                                 | b       
                        |",
+"| [4, 5]                          |                                 |         
                        |",
+"| [1, 2, 3]                       | 4                               | a       
                        |",
+"|                                 | 5                               | b       
                        |",
+"| [4, 5]                          |                                 |         
                        |",
+"| [7, 8, 9, 10]                   | 7                               | c       
                        |",
+"|                                 | 8                               | d       
                        |",
+"| [11, 12, 13]                    | 9                               |         
                        |",
+"|                                 | 10                              |         
                        |",
+"| [7, 8, 9, 10]                   |                                 | c       
                        |",
+"|                                 |                                 | d       
                        |",
+"| [11, 12, 13]                    |                                 |         
                        |",
+"| [7, 8, 9, 10]                   | 11                              | c       
                        |",
+"|                                 | 12                              | d       
                        |",
+"| [11, 12, 13]                    | 13                              |         
                        |",
+"|                                 |                                 | e       
                        |",
+"+---------------------------------+---------------------------------+---------------------------------+",
+        ];
+        assert_batches_eq!(expected, &[ret]);

Review Comment:
   👍 



##########
datafusion/common/src/unnest.rs:
##########
@@ -64,13 +66,24 @@
 pub struct UnnestOptions {
     /// Should nulls in the input be preserved? Defaults to true
     pub preserve_nulls: bool,
+    /// Without explicit recursions, all

Review Comment:
   Can you please document what the `Vec` means in this context? Is the option 
for each of the columns in the schema? If so can you say it is expected to have 
the same number of elements?
   
   If possible it would also be great to update the documentation on this 
struct to explain what is going on with recursive options



##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -1192,26 +1191,6 @@ impl LogicalPlanBuilder {
         self,
         columns: Vec<Column>,
         options: UnnestOptions,
-    ) -> Result<Self> {
-        unnest_with_options(
-            Arc::unwrap_or_clone(self.plan),
-            columns
-                .into_iter()
-                .map(|c| (c, ColumnUnnestType::Inferred))
-                .collect(),
-            options,
-        )
-        .map(Self::new)
-    }
-
-    /// Unnest the given columns with the given [`UnnestOptions`]
-    /// if one column is a list type, it can be recursively and simultaneously
-    /// unnested into the desired recursion levels
-    /// e.g select unnest(list_col,depth=1), unnest(list_col,depth=2)
-    pub fn unnest_columns_recursive_with_options(

Review Comment:
   👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to