scovich commented on code in PR #7666:
URL: https://github.com/apache/arrow-rs/pull/7666#discussion_r2151054705
##########
parquet-variant/src/variant.rs:
##########
@@ -300,62 +303,218 @@ impl<'m> VariantMetadata<'m> {
}
}
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub struct VariantObject<'m, 'v> {
- pub metadata: &'m VariantMetadata<'m>,
- pub value: &'v [u8],
+#[derive(Clone, Debug, PartialEq)]
+pub struct VariantObjectHeader {
+ field_offset_size: OffsetSizeBytes,
+ field_id_size: OffsetSizeBytes,
+ num_elements: usize,
+ field_ids_start_byte: usize,
+ field_offsets_start_byte: usize,
+ values_start_byte: usize,
}
-impl<'m, 'v> VariantObject<'m, 'v> {
- pub fn fields(&self) -> Result<impl Iterator<Item = (&'m str, Variant<'m,
'v>)>, ArrowError> {
- todo!();
- #[allow(unreachable_code)] // Just to infer the return type
- Ok(vec![].into_iter())
+
+impl VariantObjectHeader {
+ pub fn try_new(value: &[u8]) -> Result<Self, ArrowError> {
+ // Parse the header byte to get object parameters
+ let header = first_byte_from_slice(value)?;
+ let value_header = header >> 2;
+
+ let field_offset_size_minus_one = value_header & 0x03; // Last 2 bits
+ let field_id_size_minus_one = (value_header >> 2) & 0x03; // Next 2
bits
+ let is_large = value_header & 0x10; // 5th bit
+
+ let field_offset_size =
OffsetSizeBytes::try_new(field_offset_size_minus_one)?;
+ let field_id_size = OffsetSizeBytes::try_new(field_id_size_minus_one)?;
+
+ // Determine num_elements size based on is_large flag
+ let num_elements_size = if is_large != 0 {
+ OffsetSizeBytes::Four
+ } else {
+ OffsetSizeBytes::One
+ };
+
+ // Parse num_elements
+ let num_elements = num_elements_size.unpack_usize(value, 1, 0)?;
+
+ // Calculate byte offsets for different sections
+ let field_ids_start_byte = 1 + num_elements_size as usize;
+ let field_offsets_start_byte = field_ids_start_byte + num_elements *
field_id_size as usize;
+ let values_start_byte =
+ field_offsets_start_byte + (num_elements + 1) * field_offset_size
as usize;
+
+ // Verify that the last field offset array entry is inside the value
slice
+ let last_field_offset_byte =
+ field_offsets_start_byte + (num_elements + 1) * field_offset_size
as usize;
+ if last_field_offset_byte > value.len() {
+ return Err(ArrowError::InvalidArgumentError(format!(
Review Comment:
Filed as https://github.com/apache/arrow-rs/issues/7681
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]