etseidl commented on code in PR #8111: URL: https://github.com/apache/arrow-rs/pull/8111#discussion_r2277735445
########## parquet/src/file/metadata/reader.rs: ########## @@ -1040,6 +1055,107 @@ impl ParquetMetaDataReader { Ok(ParquetMetaData::new(file_metadata, row_groups)) } + /// create meta data from thrift encoded bytes + pub fn decode_file_metadata(buf: &[u8]) -> Result<ParquetMetaData> { + let mut prot = ThriftCompactInputProtocol::new(buf); + + // components of the FileMetaData + let mut version: Option<i32> = None; + let mut schema_descr: Option<Arc<SchemaDescriptor>> = None; + let mut num_rows: Option<i64> = None; + let mut row_groups: Option<Vec<RowGroup>> = None; + let mut key_value_metadata: Option<Vec<KeyValue>> = None; + let mut created_by: Option<String> = None; + let mut column_orders: Option<Vec<ColumnOrder>> = None; + + // begin decoding to intermediates + prot.read_struct_begin()?; + loop { + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + break; + } + let prot = &mut prot; + + match field_ident.id { + 1 => { Review Comment: This is now moot...I wound up rolling back to using a temp `FileMetaData` as well. Since there are no string allocations done, the cost of decoding to the temp structures and then pretty much just taking ownership of their members during conversion winds up being acceptably fast. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org