nevi-me commented on a change in pull request #7917:
URL: https://github.com/apache/arrow/pull/7917#discussion_r470650350



##########
File path: rust/parquet/src/arrow/schema.rs
##########
@@ -34,16 +34,26 @@ use crate::schema::types::{ColumnDescriptor, 
SchemaDescriptor, Type, TypePtr};
 use arrow::datatypes::TimeUnit;
 use arrow::datatypes::{DataType, DateUnit, Field, Schema};
 
-/// Convert parquet schema to arrow schema including optional metadata.
+/// Convert Parquet schema to Arrow schema including optional metadata.
+/// Attempts to decode any existing Arrow shcema metadata, falling back
+/// to converting the Parquet schema column-wise
 pub fn parquet_to_arrow_schema(
     parquet_schema: &SchemaDescriptor,
-    metadata: &Option<Vec<KeyValue>>,
+    key_value_metadata: &Option<Vec<KeyValue>>,
 ) -> Result<Schema> {
-    parquet_to_arrow_schema_by_columns(
-        parquet_schema,
-        0..parquet_schema.columns().len(),
-        metadata,
-    )
+    let mut metadata = 
parse_key_value_metadata(key_value_metadata).unwrap_or_default();
+    let arrow_schema_metadata = metadata
+        .remove(super::ARROW_SCHEMA_META_KEY)
+        .map(|encoded| get_arrow_schema_from_metadata(&encoded));
+
+    match arrow_schema_metadata {
+        Some(Some(schema)) => Ok(schema),
+        _ => parquet_to_arrow_schema_by_columns(
+            parquet_schema,
+            0..parquet_schema.columns().len(),
+            key_value_metadata,
+        ),
+    }
 }
 
 /// Convert parquet schema to arrow schema including optional metadata, only 
preserving some leaf columns.

Review comment:
       @sunchao @liurenjie1024 I think I'll need help with correctly filtering 
an Arrow schema if the user supplies column indices. Not sure if there's a 1:1 
mapping between Arrow's fields and Parquet leaves.
   
   We can either do it as part of this PR or as a follow-up




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to