scovich commented on code in PR #8166: URL: https://github.com/apache/arrow-rs/pull/8166#discussion_r2313762359
########## parquet-variant-compute/src/variant_get/output/row_builder.rs: ########## @@ -0,0 +1,211 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::ArrayRef; +use arrow::datatypes; +use arrow::datatypes::ArrowPrimitiveType; +use arrow::error::{ArrowError, Result}; +use parquet_variant::{Variant, VariantPath}; + +use crate::VariantArrayBuilder; + +use std::sync::Arc; + +pub(crate) fn make_shredding_row_builder<'a>( + //metadata: &BinaryViewArray, + path: VariantPath<'a>, + data_type: Option<&'a datatypes::DataType>, +) -> Result<Box<dyn VariantShreddingRowBuilder + 'a>> { + use arrow::array::PrimitiveBuilder; + use datatypes::Int32Type; + + // support non-empty paths (field access) and some empty path cases + if path.is_empty() { + return match data_type { + Some(datatypes::DataType::Int32) => { + // Return PrimitiveInt32Builder for type conversion + let builder = PrimitiveVariantShreddingRowBuilder { + builder: PrimitiveBuilder::<Int32Type>::new(), + }; + Ok(Box::new(builder)) + } + None => { + // Return VariantArrayBuilder for VariantArray output + let builder = VariantArrayShreddingRowBuilder::new(16); + Ok(Box::new(builder)) + } + _ => { + // only Int32 supported for empty paths + Err(ArrowError::NotYetImplemented(format!( + "variant_get with empty path and data_type={:?} not yet implemented", + data_type + ))) + } + }; + } + + // Non-empty paths: field access functionality + // Helper macro to reduce duplication when wrapping builders with path functionality + macro_rules! wrap_with_path { + ($inner_builder:expr) => { + Ok(Box::new(VariantPathRowBuilder { + builder: $inner_builder, + path, + }) as Box<dyn VariantShreddingRowBuilder + 'a>) + }; + } + + match data_type { + Some(datatypes::DataType::Int32) => { + // Create a primitive builder and wrap it with path functionality + let inner_builder = PrimitiveVariantShreddingRowBuilder { + builder: PrimitiveBuilder::<Int32Type>::new(), + }; + wrap_with_path!(inner_builder) + } + None => { + // Create a variant array builder and wrap it with path functionality + let inner_builder = VariantArrayShreddingRowBuilder::new(16); + wrap_with_path!(inner_builder) + } + _ => { + // only Int32 and VariantArray supported + Err(ArrowError::NotYetImplemented(format!( + "variant_get with path={:?} and data_type={:?} not yet implemented", + path, data_type + ))) + } + } +} + +/// Builder for shredding variant values into strongly typed Arrow arrays. +/// +/// Useful for variant_get kernels that need to extract specific paths from variant values, possibly +/// with casting of leaf values to specific types. +pub(crate) trait VariantShreddingRowBuilder { + fn append_null(&mut self) -> Result<()>; + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool>; + + fn finish(&mut self) -> Result<ArrayRef>; +} + +/// A thin wrapper whose only job is to extract a specific path from a variant value and pass the +/// result to a nested builder. +struct VariantPathRowBuilder<'a, T: VariantShreddingRowBuilder> { Review Comment: Not sure I understand? The caller already followed shredded path steps as far as possible before creating _any_ builder. This path builder is used to extract the remaining path steps, on a row-by-row basis, from the values of a binary variant column the caller encountered before the path was exhausted. Not sure how that could be moved to the caller? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
