efredine commented on code in PR #11289:
URL: https://github.com/apache/datafusion/pull/11289#discussion_r1676961201


##########
datafusion/core/src/datasource/physical_plan/parquet/statistics.rs:
##########
@@ -1234,7 +1250,87 @@ impl<'a> StatisticsConverter<'a> {
             arrow_field,
         })
     }
+    /// recursively get the corresponding statistics for all the column data, 
used for
+    /// DataType::Struct
+    pub(crate) fn get_statistics_min_max_recursive(
+        metadata: &[&RowGroupMetaData],
+        index: &mut usize,
+        is_min: bool,
+        data_type: &DataType,
+    ) -> Result<ArrayRef> {
+        match data_type.is_nested() {
+            false => {
+                let iterator = metadata.iter().map(|meta| {
+                    let stat = meta.column(*index).statistics();
+                    stat
+                });
+                let stat = if is_min {
+                    min_statistics(data_type, iterator)
+                } else {
+                    max_statistics(data_type, iterator)
+                };
+                *index += 1;
+                stat
+            }
+            true => {
+                if let DataType::Struct(fields) = data_type {
+                    let field_arrays: Vec<_> = fields
+                        .iter()
+                        .map(|field| {
+                            let array = Self::get_statistics_min_max_recursive(
+                                metadata,
+                                index,
+                                is_min,
+                                field.data_type(),
+                            )?;
+                            Ok((field.clone(), array))
+                        })
+                        .collect::<Result<Vec<_>>>()?;
+                    Ok(Arc::new(StructArray::from(field_arrays)) as ArrayRef)
+                } else {
+                    plan_err!("unsupported nested data type for extracting 
statistics")
+                }
+            }
+        }
+    }
+    /// recursively get the corresponding statistics for all the column data, 
used for
+    /// DataType::Struct
+    pub(crate) fn get_null_counts_recursive(
+        metadata: &[&RowGroupMetaData],
+        index: usize,
+        data_type: &DataType,
+    ) -> Vec<u64> {

Review Comment:
   If you preferred, this could also be expressed as a fold:
   ```Rust
               let num_row_groups = metadata.len();
               fields.iter().fold(vec![0; num_row_groups], |mut acc, field| {
                   let field_null_counts = Self::get_null_counts_recursive(
                       metadata,
                       index + 1,
                       field.data_type(),
                   );
                   acc.iter_mut().zip(field_null_counts.iter()).for_each(|(a, 
b)| *a += b);
                   acc
               })
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to