tustvold commented on code in PR #4307:
URL: https://github.com/apache/arrow-rs/pull/4307#discussion_r1209565643
##########
parquet/src/arrow/schema/mod.rs:
##########
@@ -88,15 +89,50 @@ pub(crate) fn parquet_to_array_schema_and_fields(
});
}
- match complex::convert_schema(parquet_schema, mask,
maybe_schema.as_ref())? {
+ let hint = maybe_schema.as_ref().map(|s| s.fields());
+ let field_levels = parquet_to_arrow_field_levels(parquet_schema, mask,
hint)?;
+ let schema = Schema::new_with_metadata(field_levels.fields, metadata);
+ Ok((schema, field_levels.levels))
+}
+
+/// Stores the parquet level information for a set of arrow [`Fields`]
+#[derive(Debug, Clone)]
+pub struct FieldLevels {
+ pub(crate) fields: Fields,
+ pub(crate) levels: Option<ParquetField>,
+}
+
+/// Convert a parquet [`SchemaDescriptor`] to its corresponding arrow
representation
+///
+/// Columns not included within [`ProjectionMask`] will be ignored.
+///
+/// Where a field type in `hint` is compatible with the corresponding parquet
type in `schema`, it
+/// will be used, otherwise the default arrow type for the given parquet
column type will be used.
+///
+/// This is to accommodate arrow types that cannot be round-tripped through
parquet natively.
+/// Depending on the parquet writer, this can lead to a mismatch between a
file's parquet schema
+/// and its embedded arrow schema. The parquet `schema` must be treated as
authoritative in such
+/// an event. See [#1663](https://github.com/apache/arrow-rs/issues/1663) for
more information
+///
+/// Note: this is a low-level API, most users will want to make use of the
higher-level
+/// [`parquet_to_arrow_schema`] for decoding metadata from a parquet file.
+pub fn parquet_to_arrow_field_levels(
Review Comment:
This is the alternative to making `parquet_to_array_schema_and_fields`
public, and does not expose the details of what `ParquetField` is.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]