wiedld commented on code in PR #13866: URL: https://github.com/apache/datafusion/pull/13866#discussion_r1898612752
########## datafusion/common/src/file_options/parquet_writer.rs: ########## @@ -51,38 +58,53 @@ impl ParquetWriterOptions { } } -impl TryFrom<&TableParquetOptions> for ParquetWriterOptions { - type Error = DataFusionError; - - fn try_from(parquet_table_options: &TableParquetOptions) -> Result<Self> { +impl TableParquetOptions { + #[deprecated( + since = "44.0.0", + note = "Please use `TableParquetOptions::into_writer_properties_builder` and `TableParquetOptions::into_writer_properties_builder_with_arrow_schema`" + )] + pub fn try_from(table_opts: &TableParquetOptions) -> Result<ParquetWriterOptions> { // ParquetWriterOptions will have defaults for the remaining fields (e.g. sorting_columns) Ok(ParquetWriterOptions { - writer_options: WriterPropertiesBuilder::try_from(parquet_table_options)? - .build(), + writer_options: table_opts.into_writer_properties_builder()?.build(), }) } -} - -impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { - type Error = DataFusionError; /// Convert the session's [`TableParquetOptions`] into a single write action's [`WriterPropertiesBuilder`]. /// /// The returned [`WriterPropertiesBuilder`] includes customizations applicable per column. - fn try_from(table_parquet_options: &TableParquetOptions) -> Result<Self> { + pub fn into_writer_properties_builder(&self) -> Result<WriterPropertiesBuilder> { + self.into_writer_properties_builder_with_arrow_schema(None) + } + + /// Convert the session's [`TableParquetOptions`] into a single write action's [`WriterPropertiesBuilder`]. + /// + /// The returned [`WriterPropertiesBuilder`] includes customizations applicable per column, + /// as well as the arrow schema encoded into the kv_meta at [`ARROW_SCHEMA_META_KEY`]. + pub fn into_writer_properties_builder_with_arrow_schema( Review Comment: > So for example I would expect that ParquetOptions::into_writer_properties_builder would always set the arrow metadata to be consistent with how WriterProperties works 🤔 The [ParquetOptions](https://docs.rs/datafusion/latest/datafusion/config/struct.ParquetOptions.html) does not have kv_metadata. That is held within the parent structure [TableParquetOptions](https://docs.rs/datafusion/latest/datafusion/common/config/struct.TableParquetOptions.html). That is why the existing [TryFrom code only constructs the writer properties from the TableParquetOptions](https://github.com/apache/datafusion/blob/a08dc0af8f798afe73a2fdf170ab36e72ad7e782/datafusion/common/src/file_options/parquet_writer.rs#L66). Not the ParquetOptions. I like the idea of making the metadata APIs on the `TableParquetOptions`, altho I'll keep the config on the ParquetOptions. I'll push up the change shortly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org