alamb commented on code in PR #10946: URL: https://github.com/apache/datafusion/pull/10946#discussion_r1643333559
########## datafusion/core/src/datasource/physical_plan/parquet/statistics.rs: ########## @@ -1041,21 +1043,20 @@ impl<'a> StatisticsConverter<'a> { pub fn data_page_row_counts<I>( &self, column_offset_index: &ParquetOffsetIndex, - row_group_metadatas: &[RowGroupMetaData], + row_group_metadatas: &'a [RowGroupMetaData], row_group_indices: I, - ) -> Result<ArrayRef> + ) -> Result<UInt64Array> where I: IntoIterator<Item = &'a usize>, { - let data_type = self.arrow_field.data_type(); - let Some(parquet_index) = self.parquet_index else { - return Ok(self.make_null_array(data_type, row_group_indices)); + // no matching column found in parquet_index; + // thus we cannot extract page_locations in order to determine + // the row count on a per DataPage basis. + // We use `row_group_row_counts` instead. + return Self::row_group_row_counts(row_group_metadatas); Review Comment: Filed https://github.com/apache/datafusion/issues/10965 to track the changes to row_group_row_counts -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org