Lordworms commented on code in PR #11289: URL: https://github.com/apache/datafusion/pull/11289#discussion_r1678201647
########## datafusion/core/src/datasource/physical_plan/parquet/statistics.rs: ########## @@ -1234,7 +1250,87 @@ impl<'a> StatisticsConverter<'a> { arrow_field, }) } + /// recursively get the corresponding statistics for all the column data, used for + /// DataType::Struct + pub(crate) fn get_statistics_min_max_recursive( + metadata: &[&RowGroupMetaData], + index: &mut usize, + is_min: bool, + data_type: &DataType, + ) -> Result<ArrayRef> { + match data_type.is_nested() { + false => { + let iterator = metadata.iter().map(|meta| { + let stat = meta.column(*index).statistics(); + stat + }); + let stat = if is_min { + min_statistics(data_type, iterator) + } else { + max_statistics(data_type, iterator) + }; + *index += 1; + stat + } + true => { + if let DataType::Struct(fields) = data_type { + let field_arrays: Vec<_> = fields + .iter() + .map(|field| { + let array = Self::get_statistics_min_max_recursive( + metadata, + index, + is_min, + field.data_type(), + )?; + Ok((field.clone(), array)) + }) + .collect::<Result<Vec<_>>>()?; + Ok(Arc::new(StructArray::from(field_arrays)) as ArrayRef) + } else { + plan_err!("unsupported nested data type for extracting statistics") + } + } + } + } + /// recursively get the corresponding statistics for all the column data, used for + /// DataType::Struct + pub(crate) fn get_null_counts_recursive( + metadata: &[&RowGroupMetaData], + index: usize, + data_type: &DataType, + ) -> Vec<u64> { Review Comment: Sure, sorry for the late response -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org