Jefffrey commented on code in PR #22467:
URL: https://github.com/apache/datafusion/pull/22467#discussion_r3296125976


##########
datafusion/proto/src/logical_plan/file_formats.rs:
##########
@@ -500,81 +500,144 @@ mod parquet {
         }
     }
 
-    impl FromProto<&ParquetOptionsProto> for ParquetOptions {

Review Comment:
   Dunno how much we care about maintaining compatibility here; I guess we 
could keep `FromProto` impl and just defer to the try version with an unwrap 🤔 



##########
datafusion/proto/src/logical_plan/file_formats.rs:
##########
@@ -500,81 +500,144 @@ mod parquet {
         }
     }
 
-    impl FromProto<&ParquetOptionsProto> for ParquetOptions {
-        fn from_proto(proto: &ParquetOptionsProto) -> Self {
-            ParquetOptions {
-            enable_page_index: proto.enable_page_index,
-            pruning: proto.pruning,
-            skip_metadata: proto.skip_metadata,
-            metadata_size_hint: 
proto.metadata_size_hint_opt.as_ref().map(|opt| match opt {
-                parquet_options::MetadataSizeHintOpt::MetadataSizeHint(size) 
=> *size as usize,
-            }),
-            pushdown_filters: proto.pushdown_filters,
-            reorder_filters: proto.reorder_filters,
-            force_filter_selections: proto.force_filter_selections,
-            data_pagesize_limit: proto.data_pagesize_limit as usize,
-            write_batch_size: proto.write_batch_size as usize,
-                   // TODO: Consider changing to TryFrom to avoid panic on 
invalid proto data
-            writer_version: proto.writer_version.parse().expect("
-                Invalid parquet writer version in proto, expected '1.0' or 
'2.0'
-            "),
-            compression: proto.compression_opt.as_ref().map(|opt| match opt {
-                parquet_options::CompressionOpt::Compression(compression) => 
compression.clone(),
-            }),
-            dictionary_enabled: 
proto.dictionary_enabled_opt.as_ref().map(|opt| match opt {
-                
parquet_options::DictionaryEnabledOpt::DictionaryEnabled(enabled) => *enabled,
-            }),
-            dictionary_page_size_limit: proto.dictionary_page_size_limit as 
usize,
-            statistics_enabled: 
proto.statistics_enabled_opt.as_ref().map(|opt| match opt {
-                
parquet_options::StatisticsEnabledOpt::StatisticsEnabled(statistics) => 
statistics.clone(),
-            }),
-            max_row_group_size: proto.max_row_group_size as usize,
-            created_by: proto.created_by.clone(),
-            column_index_truncate_length: 
proto.column_index_truncate_length_opt.as_ref().map(|opt| match opt {
-                
parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(length)
 => *length as usize,
-            }),
-            statistics_truncate_length: 
proto.statistics_truncate_length_opt.as_ref().map(|opt| match opt {
-                
parquet_options::StatisticsTruncateLengthOpt::StatisticsTruncateLength(length) 
=> *length as usize,
-            }),
-            data_page_row_count_limit: proto.data_page_row_count_limit as 
usize,
-            encoding: proto.encoding_opt.as_ref().map(|opt| match opt {
-                parquet_options::EncodingOpt::Encoding(encoding) => 
encoding.clone(),
-            }),
-            bloom_filter_on_read: proto.bloom_filter_on_read,
-            bloom_filter_on_write: proto.bloom_filter_on_write,
-            bloom_filter_fpp: proto.bloom_filter_fpp_opt.as_ref().map(|opt| 
match opt {
-                parquet_options::BloomFilterFppOpt::BloomFilterFpp(fpp) => 
*fpp,
-            }),
-            bloom_filter_ndv: proto.bloom_filter_ndv_opt.as_ref().map(|opt| 
match opt {
-                parquet_options::BloomFilterNdvOpt::BloomFilterNdv(ndv) => 
*ndv,
-            }),
-            allow_single_file_parallelism: proto.allow_single_file_parallelism,
-            maximum_parallel_row_group_writers: 
proto.maximum_parallel_row_group_writers as usize,
-            maximum_buffered_record_batches_per_stream: 
proto.maximum_buffered_record_batches_per_stream as usize,
-            schema_force_view_types: proto.schema_force_view_types,
-            binary_as_string: proto.binary_as_string,
-            skip_arrow_metadata: proto.skip_arrow_metadata,
-            coerce_int96: proto.coerce_int96_opt.as_ref().map(|opt| match opt {
-                parquet_options::CoerceInt96Opt::CoerceInt96(coerce_int96) => 
coerce_int96.clone(),
-            }),
-            coerce_int96_tz: proto.coerce_int96_tz_opt.as_ref().map(|opt| 
match opt {
-                parquet_options::CoerceInt96TzOpt::CoerceInt96Tz(tz) => 
tz.clone(),
-            }),
-            max_predicate_cache_size: 
proto.max_predicate_cache_size_opt.as_ref().map(|opt| match opt {
-                
parquet_options::MaxPredicateCacheSizeOpt::MaxPredicateCacheSize(size) => *size 
as usize,
-            }),
-            use_content_defined_chunking: 
proto.content_defined_chunking.map(|cdc| {
-                let defaults = CdcOptions::default();
-                CdcOptions {
-                    // proto3 uses 0 as the wire default for uint64; a zero 
chunk size is
-                    // invalid, so treat it as "field not set" and fall back 
to the default.
-                    min_chunk_size: if cdc.min_chunk_size != 0 { 
cdc.min_chunk_size as usize } else { defaults.min_chunk_size },
-                    max_chunk_size: if cdc.max_chunk_size != 0 { 
cdc.max_chunk_size as usize } else { defaults.max_chunk_size },
-                    // norm_level = 0 is a valid value (and the default), so 
pass it through directly.
-                    norm_level: cdc.norm_level,
-                }
-            }),
-        }
+    impl TryFromProto<&ParquetOptionsProto> for ParquetOptions {
+        type Error = datafusion_common::DataFusionError;
+
+        fn try_from_proto(
+            proto: &ParquetOptionsProto,
+        ) -> datafusion_common::Result<Self, Self::Error> {
+            let default_options = ParquetOptions::default();
+            let writer_version = if proto.writer_version.is_empty() {
+                default_options.writer_version

Review Comment:
   Not sure about this default behaviour here, given for other options we don't 
really do this? Happy to hear more thoughts though



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to