friendlymatthew commented on code in PR #8673:
URL: https://github.com/apache/arrow-rs/pull/8673#discussion_r2535172156


##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -1632,4 +1774,63 @@ mod test {
         ),]),
         "Cast error: Cast failed at index 0 (array type: Decimal128(38, 10)): 
Invalid argument error: 123456789012345678901234567890123456789 is wider than 
max precision 38"
     );
+
+    #[test]
+    fn test_fixed_size_binary_without_uuid_extension() {

Review Comment:
   Here's a test that will err when trying to access a `FixedSizeBinary(16)` 
without a `Uuid` extension type



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -733,6 +736,91 @@ impl From<ShreddedVariantFieldArray> for StructArray {
     }
 }
 
+/// A typed array reference that pairs an [`ArrayRef`] with its [`Field`] 
metadata.
+///
+/// This struct is used to represent the `typed_value` field in shredded 
variant arrays,
+/// where we need to preserve both the array data and its field metadata (such 
as field
+/// name, data type, nullability, and extension type information).
+///
+/// The separation of array data and field metadata allows for proper handling 
of:
+/// - Field names when working with struct fields
+/// - Nullability information for proper null handling
+/// - Extension type metadata (e.g., UUID extension on FixedSizeBinary)
+/// - Data type information for casting and validation
+#[derive(Debug, Clone)]
+pub struct TypedArrayRef {
+    inner: ArrayRef,
+    field: FieldRef,
+}
+
+impl TypedArrayRef {
+    pub fn inner(&self) -> &ArrayRef {
+        &self.inner
+    }
+
+    pub fn into_inner(self) -> ArrayRef {
+        self.inner
+    }
+
+    pub fn field(&self) -> &FieldRef {
+        &self.field
+    }
+
+    // note: these methods below make me want to impl Array for 
TypedArrayRef...
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        let Self { inner, field } = self;
+
+        Self {
+            inner: inner.slice(offset, length),
+            field: Arc::clone(field),
+        }
+    }
+
+    pub fn is_valid(&self, index: usize) -> bool {
+        self.inner.is_valid(index)
+    }

Review Comment:
   I also wonder if we want to find a better home for this struct. I suspect a 
`TypedArrayRef` would be useful in various parts of the library, not just the 
variant-related work 🤔 



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -733,6 +736,91 @@ impl From<ShreddedVariantFieldArray> for StructArray {
     }
 }
 
+/// A typed array reference that pairs an [`ArrayRef`] with its [`Field`] 
metadata.
+///
+/// This struct is used to represent the `typed_value` field in shredded 
variant arrays,
+/// where we need to preserve both the array data and its field metadata (such 
as field
+/// name, data type, nullability, and extension type information).
+///
+/// The separation of array data and field metadata allows for proper handling 
of:
+/// - Field names when working with struct fields
+/// - Nullability information for proper null handling
+/// - Extension type metadata (e.g., UUID extension on FixedSizeBinary)
+/// - Data type information for casting and validation
+#[derive(Debug, Clone)]
+pub struct TypedArrayRef {
+    inner: ArrayRef,
+    field: FieldRef,
+}
+
+impl TypedArrayRef {
+    pub fn inner(&self) -> &ArrayRef {
+        &self.inner
+    }
+
+    pub fn into_inner(self) -> ArrayRef {
+        self.inner
+    }
+
+    pub fn field(&self) -> &FieldRef {
+        &self.field
+    }
+
+    // note: these methods below make me want to impl Array for 
TypedArrayRef...
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        let Self { inner, field } = self;
+
+        Self {
+            inner: inner.slice(offset, length),
+            field: Arc::clone(field),
+        }
+    }
+
+    pub fn is_valid(&self, index: usize) -> bool {
+        self.inner.is_valid(index)
+    }

Review Comment:
   I'm curious what people think. Since `TypedArrayRef` owns the inner 
`ArrayRef`, it'll be pretty easy to get `TypedArrayRef` to also `impl Array`. 
   
   I don't have a strong opinion, but it feels weird to rewrite methods that 
exist on a trait



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -1632,4 +1774,63 @@ mod test {
         ),]),
         "Cast error: Cast failed at index 0 (array type: Decimal128(38, 10)): 
Invalid argument error: 123456789012345678901234567890123456789 is wider than 
max precision 38"
     );
+
+    #[test]
+    fn test_fixed_size_binary_without_uuid_extension() {
+        let uuid_bytes = vec![
+            0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 
0x67, 0x89, 0xab,
+            0xcd, 0xef,
+        ];
+        let fixed_size_binary = 
FixedSizeBinaryArray::from(vec![uuid_bytes.as_slice()]);
+
+        let metadata =
+            
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES,
 1));
+
+        // create a field without the uuid extension type
+        let field = Field::new("typed_value", DataType::FixedSizeBinary(16), 
true);
+
+        let struct_array = StructArrayBuilder::new()
+            .with_column_name("metadata", Arc::new(metadata), false)
+            .with_field_ref(Arc::new(field), Arc::new(fixed_size_binary))
+            .build();
+
+        let variant_array =
+            VariantArray::try_new(&struct_array).expect("should create variant 
array");
+
+        let res = variant_array.try_value(0);
+        assert!(res.is_err())
+    }
+
+    #[test]
+    fn test_fixed_size_binary_with_uuid_extension() {

Review Comment:
   Here's a test case that will pass since we properly annotated the `Uuid` 
extension type to its field metadata



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -733,6 +736,91 @@ impl From<ShreddedVariantFieldArray> for StructArray {
     }
 }
 
+/// A typed array reference that pairs an [`ArrayRef`] with its [`Field`] 
metadata.
+///
+/// This struct is used to represent the `typed_value` field in shredded 
variant arrays,
+/// where we need to preserve both the array data and its field metadata (such 
as field
+/// name, data type, nullability, and extension type information).
+///
+/// The separation of array data and field metadata allows for proper handling 
of:
+/// - Field names when working with struct fields
+/// - Nullability information for proper null handling
+/// - Extension type metadata (e.g., UUID extension on FixedSizeBinary)
+/// - Data type information for casting and validation
+#[derive(Debug, Clone)]
+pub struct TypedArrayRef {
+    inner: ArrayRef,
+    field: FieldRef,
+}
+
+impl TypedArrayRef {
+    pub fn inner(&self) -> &ArrayRef {
+        &self.inner
+    }
+
+    pub fn into_inner(self) -> ArrayRef {
+        self.inner
+    }
+
+    pub fn field(&self) -> &FieldRef {
+        &self.field
+    }
+
+    // note: these methods below make me want to impl Array for 
TypedArrayRef...
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        let Self { inner, field } = self;
+
+        Self {
+            inner: inner.slice(offset, length),
+            field: Arc::clone(field),
+        }
+    }
+
+    pub fn is_valid(&self, index: usize) -> bool {
+        self.inner.is_valid(index)
+    }
+}
+
+impl From<ArrayRef> for TypedArrayRef {
+    fn from(inner: ArrayRef) -> Self {
+        let data_type = inner.data_type().clone();
+
+        Self {
+            inner,
+            field: Arc::new(Field::new("typed_value", data_type, true)),
+        }
+    }
+}
+
+impl PartialEq for TypedArrayRef {
+    #[allow(clippy::op_ref)]
+    fn eq(&self, other: &Self) -> bool {
+        &self.inner == &other.inner && self.field == other.field
+    }
+}

Review Comment:
   I was forced to add references to both operands (removing it fails to 
compile). Spent a lot of time this morning trying to fix/investigate this issue
   
   Weirdly, using a tuple like I did in a prior iteration of this PR would 
compile 🤔 
   
   For those curious:
   
   Here's a repro: 
https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=c868bf8f2f8e2b699f9a84b55a89cd71
   
   And here's a more reduced version: 
https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=224ffe1076c24e1c8db5d4a8f53ca2fc
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to