alamb commented on code in PR #10973:
URL: https://github.com/apache/datafusion/pull/10973#discussion_r1644578233
##########
datafusion/core/tests/parquet/arrow_statistics.rs:
##########
@@ -2027,7 +2010,7 @@ async fn test_column_non_existent() {
// row counts are [5, 5, 5, 5]
expected_row_counts: None,
column_name: "i_do_not_exist",
- check: Check::DataPage,
+ check: Check::Both,
Review Comment:
👍
##########
datafusion/core/src/datasource/physical_plan/parquet/statistics.rs:
##########
@@ -718,21 +718,33 @@ impl<'a> StatisticsConverter<'a> {
///
/// # Example
/// ```no_run
+ /// # use arrow::datatypes::Schema;
+ /// # use arrow_array::ArrayRef;
/// # use parquet::file::metadata::ParquetMetaData;
/// # use
datafusion::datasource::physical_plan::parquet::StatisticsConverter;
/// # fn get_parquet_metadata() -> ParquetMetaData { unimplemented!() }
- /// // Given the metadata for a parquet file
+ /// # fn get_arrow_schema() -> Schema { unimplemented!() }
+ /// // Given the metadata for a parquet file and the arrow schema
/// let metadata: ParquetMetaData = get_parquet_metadata();
+ /// let arrow_schema: Schema = get_arrow_schema();
+ /// let parquet_schema = metadata.file_metadata().schema_descr();
+ /// // create a converter
+ /// let converter = StatisticsConverter::try_new("foo", &arrow_schema,
parquet_schema)
+ /// .unwrap();
/// // get the row counts for each row group
- /// let row_counts = StatisticsConverter::row_group_row_counts(metadata
+ /// let row_counts = converter.row_group_row_counts(metadata
/// .row_groups()
/// .iter()
/// );
/// ```
- pub fn row_group_row_counts<I>(metadatas: I) -> Result<UInt64Array>
+ pub fn row_group_row_counts<I>(&self, metadatas: I) ->
Result<Option<UInt64Array>>
Review Comment:
I think this is a cleaner and more consistent interface
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]