alamb commented on code in PR #11558: URL: https://github.com/apache/datafusion/pull/11558#discussion_r1686431340
########## datafusion/common/src/file_options/parquet_writer.rs: ########## @@ -712,35 +662,13 @@ mod tests { "should see the extern parquet's default over-riding datafusion's None", ); - // datafusion's `None` for Option<usize> => becomes parquet's 4096 - // TODO: should this be changed? - // refer to https://github.com/apache/datafusion/issues/11367 - assert_eq!( - default_writer_props.max_statistics_size(&"default".into()), - 4096, - "extern parquet's default is 4096" - ); - assert_eq!( - default_table_writer_opts.global.max_statistics_size, None, - "datafusion's has no default" - ); - assert_eq!( - default_writer_props.max_statistics_size(&"default".into()), - 4096, - "should see the extern parquet's default over-riding datafusion's None", - ); - // Confirm all other settings are equal. // First resolve the known discrepancies, (set as the same). // TODO: once we fix the above mis-matches, we should be able to remove this. let mut from_extern_parquet = session_config_from_writer_props(&default_writer_props); from_extern_parquet.global.compression = Some("zstd(3)".into()); - from_extern_parquet.global.data_page_row_count_limit = usize::MAX; - from_extern_parquet.global.column_index_truncate_length = None; - from_extern_parquet.global.dictionary_enabled = None; from_extern_parquet.global.statistics_enabled = None; Review Comment: Is there any rationale for not setting the statistics value to the same as in the arrow-rs writer? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org