etseidl commented on code in PR #8574:
URL: https://github.com/apache/arrow-rs/pull/8574#discussion_r2417901222


##########
parquet/src/file/metadata/thrift_gen.rs:
##########
@@ -837,62 +697,578 @@ fn get_file_decryptor(
     }
 }
 
-/// Create ParquetMetaData from thrift input. Note that this only decodes the 
file metadata in
-/// the Parquet footer. Page indexes will need to be added later.
-impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for 
ParquetMetaData {
-    fn read_thrift(prot: &mut R) -> Result<Self> {
-        let file_meta = FileMetaData::read_thrift(prot)?;
-
-        let version = file_meta.version;
-        let num_rows = file_meta.num_rows;
-        let row_groups = file_meta.row_groups;
-        let created_by = file_meta.created_by.map(|c| c.to_owned());
-        let key_value_metadata = file_meta.key_value_metadata;
-
-        let val = parquet_schema_from_array(file_meta.schema)?;
-        let schema_descr = Arc::new(SchemaDescriptor::new(val));
-
-        // need schema_descr to get final RowGroupMetaData
-        let row_groups = convert_row_groups(row_groups, schema_descr.clone())?;
-
-        // need to map read column orders to actual values based on the schema
-        if file_meta
-            .column_orders
-            .as_ref()
-            .is_some_and(|cos| cos.len() != schema_descr.num_columns())
-        {
-            return Err(general_err!("Column order length mismatch"));
+// using ThriftSliceInputProtocol rather than ThriftCompactInputProtocl trait 
because
+// these are all internal and operate on slices.
+fn read_column_chunk<'a>(

Review Comment:
   I realize this is quite ugly, but necessary for both performance and for 
stats skipping and other optimizations. I do have a simplified version of this 
function in the queue, so it will get better with time.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to