scovich commented on code in PR #7666:
URL: https://github.com/apache/arrow-rs/pull/7666#discussion_r2150781494


##########
parquet-variant/src/variant.rs:
##########
@@ -300,62 +303,218 @@ impl<'m> VariantMetadata<'m> {
     }
 }
 
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub struct VariantObject<'m, 'v> {
-    pub metadata: &'m VariantMetadata<'m>,
-    pub value: &'v [u8],
+#[derive(Clone, Debug, PartialEq)]
+pub struct VariantObjectHeader {
+    field_offset_size: OffsetSizeBytes,
+    field_id_size: OffsetSizeBytes,
+    num_elements: usize,
+    field_ids_start_byte: usize,
+    field_offsets_start_byte: usize,
+    values_start_byte: usize,
 }
-impl<'m, 'v> VariantObject<'m, 'v> {
-    pub fn fields(&self) -> Result<impl Iterator<Item = (&'m str, Variant<'m, 
'v>)>, ArrowError> {
-        todo!();
-        #[allow(unreachable_code)] // Just to infer the return type
-        Ok(vec![].into_iter())
+
+impl VariantObjectHeader {
+    pub fn try_new(value: &[u8]) -> Result<Self, ArrowError> {
+        // Parse the header byte to get object parameters
+        let header = first_byte_from_slice(value)?;
+        let value_header = header >> 2;
+
+        let field_offset_size_minus_one = value_header & 0x03; // Last 2 bits
+        let field_id_size_minus_one = (value_header >> 2) & 0x03; // Next 2 
bits
+        let is_large = value_header & 0x10; // 5th bit
+
+        let field_offset_size = 
OffsetSizeBytes::try_new(field_offset_size_minus_one)?;
+        let field_id_size = OffsetSizeBytes::try_new(field_id_size_minus_one)?;
+
+        // Determine num_elements size based on is_large flag
+        let num_elements_size = if is_large != 0 {
+            OffsetSizeBytes::Four
+        } else {
+            OffsetSizeBytes::One
+        };
+
+        // Parse num_elements
+        let num_elements = num_elements_size.unpack_usize(value, 1, 0)?;
+
+        // Calculate byte offsets for different sections
+        let field_ids_start_byte = 1 + num_elements_size as usize;
+        let field_offsets_start_byte = field_ids_start_byte + num_elements * 
field_id_size as usize;
+        let values_start_byte =
+            field_offsets_start_byte + (num_elements + 1) * field_offset_size 
as usize;
+
+        // Verify that the last field offset array entry is inside the value 
slice
+        let last_field_offset_byte =
+            field_offsets_start_byte + (num_elements + 1) * field_offset_size 
as usize;
+        if last_field_offset_byte > value.len() {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Last field offset array entry at offset {} with length {} is 
outside the value slice of length {}",
+                last_field_offset_byte,
+                field_offset_size as usize,
+                value.len()
+            )));
+        }
+
+        // Verify that the value of the last field offset array entry fits 
inside the value slice
+        let last_field_offset =
+            field_offset_size.unpack_usize(value, field_offsets_start_byte, 
num_elements)?;
+        if values_start_byte + last_field_offset > value.len() {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Last field offset value {} at offset {} is outside the value 
slice of length {}",
+                last_field_offset,
+                values_start_byte,
+                value.len()
+            )));
+        }
+        Ok(Self {
+            field_offset_size,
+            field_id_size,
+            num_elements,
+            field_ids_start_byte,
+            field_offsets_start_byte,
+            values_start_byte,
+        })
     }
-    pub fn field(&self, _name: &'m str) -> Result<Variant<'m, 'v>, ArrowError> 
{
-        todo!()
+
+    /// Returns the number of key-value pairs in this object
+    pub fn num_elements(&self) -> usize {
+        self.num_elements
     }
 }
 
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub struct VariantArray<'m, 'v> {
+#[derive(Clone, Debug, PartialEq)]
+pub struct VariantObject<'m, 'v> {
     pub metadata: &'m VariantMetadata<'m>,
     pub value: &'v [u8],
+    header: VariantObjectHeader,
 }
 
-impl<'m, 'v> VariantArray<'m, 'v> {
-    /// Return the length of this array
+impl<'m, 'v> VariantObject<'m, 'v> {
+    pub fn try_new(metadata: &'m VariantMetadata<'m>, value: &'v [u8]) -> 
Result<Self, ArrowError> {
+        Ok(Self {
+            metadata,
+            value,
+            header: VariantObjectHeader::try_new(value)?,
+        })
+    }
+
+    /// Returns the number of key-value pairs in this object
     pub fn len(&self) -> usize {
-        todo!()
+        self.header.num_elements()
     }
 
-    /// Is the array of zero length
+    /// Returns true if the object contains no key-value pairs
     pub fn is_empty(&self) -> bool {
         self.len() == 0
     }
 
-    pub fn values(&self) -> Result<impl Iterator<Item = Variant<'m, 'v>>, 
ArrowError> {
-        todo!();
-        #[allow(unreachable_code)] // Just to infer the return type
-        Ok(vec![].into_iter())
+    pub fn fields(&self) -> Result<impl Iterator<Item = (&'m str, Variant<'m, 
'v>)>, ArrowError> {
+        let field_list = self.parse_field_list()?;
+        Ok(field_list.into_iter())
     }
 
-    pub fn get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+    pub fn field(&self, name: &str) -> Result<Option<Variant<'m, 'v>>, 
ArrowError> {
+        // Binary search through the sorted field IDs to find the field
+        let (field_ids, field_offsets) = self.parse_field_arrays()?;
+        let search_result = try_binary_search_by(&field_ids, &name, 
|&field_id| {

Review Comment:
   (basically, if the requested field name actually exists, it must match the 
name referenced by one of the struct's field ids... and we can binary search 
them because those ids are in lexical order according to their backing 
dictionary entries)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to