scovich commented on code in PR #7452: URL: https://github.com/apache/arrow-rs/pull/7452#discussion_r2098455874
########## arrow-variant/src/variant.rs: ########## @@ -0,0 +1,418 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Core Variant data type for working with the Arrow Variant binary format. + +use crate::decoder; +use arrow_schema::ArrowError; +use std::fmt; + +/// A Variant value in the Arrow binary format +#[derive(Debug, Clone, PartialEq)] +pub struct Variant<'a> { + /// Raw metadata bytes + metadata: &'a [u8], + /// Raw value bytes + value: &'a [u8], +} + +impl<'a> Variant<'a> { + /// Creates a new Variant with metadata and value bytes + pub fn new(metadata: &'a [u8], value: &'a [u8]) -> Self { + Self { metadata, value } + } + + /// Creates a Variant by parsing binary metadata and value + pub fn try_new(metadata: &'a [u8], value: &'a [u8]) -> Result<Self, ArrowError> { + // Validate that the binary data is a valid Variant + decoder::validate_variant(value, metadata)?; + + Ok(Self { metadata, value }) + } + + /// Returns the raw metadata bytes + pub fn metadata(&self) -> &'a [u8] { + self.metadata + } + + /// Returns the raw value bytes + pub fn value(&self) -> &'a [u8] { + self.value + } + + /// Gets a value by key from an object Variant + /// + /// Returns: + /// - `Ok(Some(Variant))` if the key exists + /// - `Ok(None)` if the key doesn't exist or the Variant is not an object + /// - `Err` if there was an error parsing the Variant + pub fn get(&self, key: &str) -> Result<Option<Variant<'a>>, ArrowError> { + let result = decoder::get_field_value_range(self.value, self.metadata, key)?; + Ok(result.map(|(start, end)| Variant { + metadata: self.metadata, // Share the same metadata reference + value: &self.value[start..end], // Use a slice of the original value buffer + })) + } + + /// Gets a value by index from an array Variant + /// + /// Returns: + /// - `Ok(Some(Variant))` if the index is valid + /// - `Ok(None)` if the index is out of bounds or the Variant is not an array + /// - `Err` if there was an error parsing the Variant + pub fn get_index(&self, index: usize) -> Result<Option<Variant<'a>>, ArrowError> { + let result = decoder::get_array_element_range(self.value, index)?; + Ok(result.map(|(start, end)| Variant { + metadata: self.metadata, // Share the same metadata reference + value: &self.value[start..end], // Use a slice of the original value buffer + })) + } + + /// Checks if this Variant is an object + pub fn is_object(&self) -> Result<bool, ArrowError> { + decoder::is_object(self.value) + } + + /// Checks if this Variant is an array + pub fn is_array(&self) -> Result<bool, ArrowError> { + decoder::is_array(self.value) + } + + /// Converts the variant value to a serde_json::Value + pub fn as_value(&self) -> Result<serde_json::Value, ArrowError> { + let keys = crate::decoder::parse_metadata_keys(self.metadata)?; + crate::decoder::decode_value(self.value, &keys) + } + + /// Converts the variant value to a string. + pub fn as_string(&self) -> Result<String, ArrowError> { + match self.as_value()? { + serde_json::Value::String(s) => Ok(s), + serde_json::Value::Number(n) => Ok(n.to_string()), + serde_json::Value::Bool(b) => Ok(b.to_string()), + serde_json::Value::Null => Ok("null".to_string()), + _ => Err(ArrowError::InvalidArgumentError( + "Cannot convert value to string".to_string(), + )), + } + } + + /// Converts the variant value to a i32. + pub fn as_i32(&self) -> Result<i32, ArrowError> { + match self.as_value()? { + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + if i >= i32::MIN as i64 && i <= i32::MAX as i64 { + return Ok(i as i32); + } + } + Err(ArrowError::InvalidArgumentError( + "Number outside i32 range".to_string(), + )) + } + _ => Err(ArrowError::InvalidArgumentError( + "Cannot convert value to i32".to_string(), + )), + } + } + + /// Converts the variant value to a i64. + pub fn as_i64(&self) -> Result<i64, ArrowError> { Review Comment: A couple corrections to the above: * We _can't_ actually widen `Decimal16` to `Decimal16` of a different scale, without also knowing the precision (which variant doesn't track). * The suggested `VariantDecimal16::try_new` had a bug: ```rust let exponent = u32::from(scale - current_scale); let (unscaled_value, false) = unscaled_value.overflowing_pow(exponent) else { return Err(...); }; ``` should be something like: ```rust let exponent = u32::from(scale - current_scale); let (exponent, false) = unscaled_value.overflowing_pow(exponent) else { return Err(...); }; unscaled_value *= exponent; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org