codephage2020 commented on code in PR #9598:
URL: https://github.com/apache/arrow-rs/pull/9598#discussion_r3015264846
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -216,22 +223,63 @@ fn shredded_get_path(
// Structs are special. Recurse into each field separately, hoping to
follow the shredding even
// further, and build up the final struct from those individually shredded
results.
if let DataType::Struct(fields) = as_field.data_type() {
- let children = fields
+ let mut updated_fields = Vec::with_capacity(fields.len());
+ let children: Result<Vec<_>> = fields
.iter()
.map(|field| {
- shredded_get_path(
+ // If the field has VariantType extension metadata, extract it
as a
+ // VariantArray instead of casting to the declared data type.
This allows
+ // callers to request structs where some fields remain as
variants.
+ // See test_struct_extraction_with_variant_fields for usage
example.
+ let is_variant_field =
field.try_extension_type::<VariantType>().is_ok();
+ let field_as_type: Option<&Field> = if is_variant_field {
+ None
+ } else {
+ Some(field.as_ref())
+ };
+ let child = shredded_get_path(
&target,
&[VariantPathElement::from(field.name().as_str())],
- Some(field),
+ field_as_type,
cast_options,
- )
+ )?;
+
+ // Update field type if it was a Variant marker (extracted as
VariantArray).
+ // The actual data type will be the internal structure of
VariantArray.
+ // Preserve VariantType extension metadata so downstream
consumers
+ // can recognize this field as a Variant column.
+ //
+ // When the field is entirely absent in the data,
shredded_get_path
+ // returns a NullArray (DataType::Null). VariantType only
supports
+ // Struct storage, so we must skip the extension in that case.
+ let updated_field =
+ if is_variant_field && matches!(child.data_type(),
DataType::Struct(_)) {
+ field
+ .as_ref()
+ .clone()
+ .with_data_type(child.data_type().clone())
Review Comment:
For example, a VariantArray's internal StructArray has concrete fields
like `metadata` and `value`, not empty fields. So we need `with_data_type`
to align the
field's declared type with the actual child array's type, otherwise
`StructArray::try_new`
will reject the mismatch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]