scovich commented on code in PR #8122: URL: https://github.com/apache/arrow-rs/pull/8122#discussion_r2277284754
########## parquet-variant-compute/src/variant_get/output/struct_output.rs: ########## @@ -0,0 +1,371 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, AsArray as _, NullBufferBuilder}; +use arrow::datatypes; +use arrow::datatypes::{ArrowPrimitiveType, FieldRef}; +use arrow::error::{ArrowError, Result}; +use parquet_variant::{Variant, VariantObject, VariantPath}; + +use std::sync::Arc; + +#[allow(unused)] +pub(crate) fn make_shredding_row_builder( + //metadata: &BinaryViewArray, + path: VariantPath<'_>, + data_type: Option<&datatypes::DataType>, +) -> Result<Box<dyn VariantShreddingRowBuilder>> { + todo!() // wire it all up! +} + +/// Builder for shredding variant values into strongly typed Arrow arrays. +/// +/// Useful for variant_get kernels that need to extract specific paths from variant values, possibly +/// with casting of leaf values to specific types. +#[allow(unused)] +pub(crate) trait VariantShreddingRowBuilder { + fn append_null(&mut self) -> Result<()>; + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool>; + + fn finish(&mut self) -> Result<ArrayRef>; +} + +/// A thin wrapper whose only job is to extract a specific path from a variant value and pass the +/// result to a nested builder. +#[allow(unused)] +struct VariantPathRowBuilder<'a, T: VariantShreddingRowBuilder> { + builder: T, + path: VariantPath<'a>, +} + +impl<T: VariantShreddingRowBuilder> VariantShreddingRowBuilder for VariantPathRowBuilder<'_, T> { + fn append_null(&mut self) -> Result<()> { + self.builder.append_null() + } + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> { + if let Some(v) = value.get_path(&self.path) { + self.builder.append_value(&v) + } else { + self.builder.append_null()?; + Ok(false) + } + } + fn finish(&mut self) -> Result<ArrayRef> { + self.builder.finish() + } +} + +/// Helper trait for converting `Variant` values to arrow primitive values. +#[allow(unused)] +trait VariantAsPrimitive<T: ArrowPrimitiveType> { + fn as_primitive(&self) -> Option<T::Native>; +} +impl VariantAsPrimitive<datatypes::Int32Type> for Variant<'_, '_> { + fn as_primitive(&self) -> Option<i32> { + self.as_int32() + } +} +impl VariantAsPrimitive<datatypes::Float64Type> for Variant<'_, '_> { + fn as_primitive(&self) -> Option<f64> { + self.as_f64() + } +} + +/// Builder for shredding variant values to primitive values +#[allow(unused)] +struct PrimitiveVariantShreddingRowBuilder<T: ArrowPrimitiveType> { + builder: arrow::array::PrimitiveBuilder<T>, +} + +impl<T> VariantShreddingRowBuilder for PrimitiveVariantShreddingRowBuilder<T> +where + T: ArrowPrimitiveType, + for<'m, 'v> Variant<'m, 'v>: VariantAsPrimitive<T>, +{ + fn append_null(&mut self) -> Result<()> { + self.builder.append_null(); + Ok(()) + } + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> { + if let Some(v) = value.as_primitive() { + self.builder.append_value(v); + Ok(true) + } else { + self.builder.append_null(); // TODO: handle casting failure + Ok(false) + } + } + + fn finish(&mut self) -> Result<ArrayRef> { + Ok(Arc::new(self.builder.finish())) + } +} + +/// Builder for appending raw binary variant values to a BinaryViewArray. It copies the bytes +/// as-is, without any decoding. +#[allow(unused)] +struct BinaryVariantRowBuilder { + nulls: NullBufferBuilder, +} + +impl VariantShreddingRowBuilder for BinaryVariantRowBuilder { + fn append_null(&mut self) -> Result<()> { + self.nulls.append_null(); + Ok(()) + } + fn append_value(&mut self, _value: &Variant<'_, '_>) -> Result<bool> { + // We need a way to convert a Variant directly to bytes. In particular, we want to just copy + // across the underlying value byte slice of a `Variant::Object` or `Variant::List`, without + // any interaction with a `VariantMetadata` (because we will just reuse the existing one). + // + // One could _probably_ emulate this with parquet_variant::VariantBuilder, but it would do a + // lot of unnecessary work and would also create a new metadata column we don't need. Review Comment: Actually, I think it can be easier, PTAL? * https://github.com/apache/arrow-rs/pull/8141 If we like it, I can add tests and get it merged. We would still need some kind of dummy `MetadataBuilder`, just to keep the builder interface happy. However, when unshredding partially shredded objects, we would need the read-only builder I proposed earlier, so maybe we just use that? * https://github.com/apache/arrow-rs/pull/7915 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org