This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6407c7e381 [Variant] Rename VariantShreddingRowBuilder to 
VariantToArrowRowBuilder (#8344)
6407c7e381 is described below

commit 6407c7e3817aeed8201722cf6c617ae1f2cbb678
Author: Ryan Johnson <[email protected]>
AuthorDate: Sun Sep 14 05:34:28 2025 -0600

    [Variant] Rename VariantShreddingRowBuilder to VariantToArrowRowBuilder 
(#8344)
    
    # Which issue does this PR close?
    
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    
    - Closes #NNN.
    
    # Rationale for this change
    
    The "shredding row builder" name is a bit hard to understand. And now
    that we have `ArrowToVariantRowBuilder` (used by `cast_to_variant`), it
    makes sense to rename these as `VariantToArrowRowBuilder` since that's
    actually what they do.
    
    While we're at it, rename `variant_get/mod.rs` as just `variant_get.rs`,
    since it was the only file in its directory.
    
    # What changes are included in this PR?
    
    Just the rename (which includes a couple of file renames).
    
    # Are these changes tested?
    
    Pure code movement/rename -- compilation suffices
    
    # Are there any user-facing changes?
    
    No
---
 parquet-variant-compute/src/lib.rs                 |  1 +
 .../src/{variant_get/mod.rs => variant_get.rs}     | 10 ++--
 .../src/variant_get/output/mod.rs                  | 18 -------
 .../output/row_builder.rs => variant_to_arrow.rs}  | 57 ++++++++++++----------
 4 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/parquet-variant-compute/src/lib.rs 
b/parquet-variant-compute/src/lib.rs
index 43d642d745..999e118367 100644
--- a/parquet-variant-compute/src/lib.rs
+++ b/parquet-variant-compute/src/lib.rs
@@ -43,6 +43,7 @@ mod type_conversion;
 mod variant_array;
 mod variant_array_builder;
 pub mod variant_get;
+mod variant_to_arrow;
 
 pub use variant_array::{ShreddingState, VariantArray};
 pub use variant_array_builder::VariantArrayBuilder;
diff --git a/parquet-variant-compute/src/variant_get/mod.rs 
b/parquet-variant-compute/src/variant_get.rs
similarity index 99%
rename from parquet-variant-compute/src/variant_get/mod.rs
rename to parquet-variant-compute/src/variant_get.rs
index 10403b1369..7774d13670 100644
--- a/parquet-variant-compute/src/variant_get/mod.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -23,13 +23,12 @@ use arrow::{
 use arrow_schema::{ArrowError, DataType, FieldRef};
 use parquet_variant::{VariantPath, VariantPathElement};
 
-use crate::variant_array::ShreddingState;
-use crate::{variant_array::ShreddedVariantFieldArray, VariantArray};
+use crate::variant_array::{ShreddedVariantFieldArray, ShreddingState};
+use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
+use crate::VariantArray;
 
 use std::sync::Arc;
 
-mod output;
-
 pub(crate) enum ShreddedPathStep<'a> {
     /// Path step succeeded, return the new shredding state
     Success(&'a ShreddingState),
@@ -136,8 +135,7 @@ fn shredded_get_path(
     let shred_basic_variant =
         |target: VariantArray, path: VariantPath<'_>, as_field: 
Option<&Field>| {
             let as_type = as_field.map(|f| f.data_type());
-            let mut builder =
-                output::row_builder::make_shredding_row_builder(path, as_type, 
cast_options)?;
+            let mut builder = make_variant_to_arrow_row_builder(path, as_type, 
cast_options)?;
             for i in 0..target.len() {
                 if target.is_null(i) {
                     builder.append_null()?;
diff --git a/parquet-variant-compute/src/variant_get/output/mod.rs 
b/parquet-variant-compute/src/variant_get/output/mod.rs
deleted file mode 100644
index c3df183ec8..0000000000
--- a/parquet-variant-compute/src/variant_get/output/mod.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub(crate) mod row_builder;
diff --git a/parquet-variant-compute/src/variant_get/output/row_builder.rs 
b/parquet-variant-compute/src/variant_to_arrow.rs
similarity index 73%
rename from parquet-variant-compute/src/variant_get/output/row_builder.rs
rename to parquet-variant-compute/src/variant_to_arrow.rs
index 066f207f78..7cb3c4e281 100644
--- a/parquet-variant-compute/src/variant_get/output/row_builder.rs
+++ b/parquet-variant-compute/src/variant_to_arrow.rs
@@ -27,39 +27,39 @@ use crate::VariantArrayBuilder;
 
 use std::sync::Arc;
 
-pub(crate) fn make_shredding_row_builder<'a>(
+pub(crate) fn make_variant_to_arrow_row_builder<'a>(
     //metadata: &BinaryViewArray,
     path: VariantPath<'a>,
     data_type: Option<&'a datatypes::DataType>,
     cast_options: &'a CastOptions,
-) -> Result<Box<dyn VariantShreddingRowBuilder + 'a>> {
+) -> Result<Box<dyn VariantToArrowRowBuilder + 'a>> {
     use datatypes::{
         Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type, Int8Type,
     };
 
     let builder = match data_type {
         // If no data type was requested, build an unshredded VariantArray.
-        None => VariantArrayShreddingRowBuilder::new(16).with_path(path),
+        None => VariantToBinaryVariantArrowRowBuilder::new(16).with_path(path),
         Some(datatypes::DataType::Int8) => {
-            
PrimitiveVariantShreddingRowBuilder::<Int8Type>::new(cast_options).with_path(path)
+            
VariantToPrimitiveArrowRowBuilder::<Int8Type>::new(cast_options).with_path(path)
         }
         Some(datatypes::DataType::Int16) => {
-            
PrimitiveVariantShreddingRowBuilder::<Int16Type>::new(cast_options).with_path(path)
+            
VariantToPrimitiveArrowRowBuilder::<Int16Type>::new(cast_options).with_path(path)
         }
         Some(datatypes::DataType::Int32) => {
-            
PrimitiveVariantShreddingRowBuilder::<Int32Type>::new(cast_options).with_path(path)
+            
VariantToPrimitiveArrowRowBuilder::<Int32Type>::new(cast_options).with_path(path)
         }
         Some(datatypes::DataType::Int64) => {
-            
PrimitiveVariantShreddingRowBuilder::<Int64Type>::new(cast_options).with_path(path)
+            
VariantToPrimitiveArrowRowBuilder::<Int64Type>::new(cast_options).with_path(path)
         }
         Some(datatypes::DataType::Float16) => {
-            
PrimitiveVariantShreddingRowBuilder::<Float16Type>::new(cast_options).with_path(path)
+            
VariantToPrimitiveArrowRowBuilder::<Float16Type>::new(cast_options).with_path(path)
         }
         Some(datatypes::DataType::Float32) => {
-            
PrimitiveVariantShreddingRowBuilder::<Float32Type>::new(cast_options).with_path(path)
+            
VariantToPrimitiveArrowRowBuilder::<Float32Type>::new(cast_options).with_path(path)
         }
         Some(datatypes::DataType::Float64) => {
-            
PrimitiveVariantShreddingRowBuilder::<Float64Type>::new(cast_options).with_path(path)
+            
VariantToPrimitiveArrowRowBuilder::<Float64Type>::new(cast_options).with_path(path)
         }
         _ => {
             return Err(ArrowError::NotYetImplemented(format!(
@@ -71,11 +71,11 @@ pub(crate) fn make_shredding_row_builder<'a>(
     Ok(builder)
 }
 
-/// Builder for shredding variant values into strongly typed Arrow arrays.
+/// Builder for converting variant values into strongly typed Arrow arrays.
 ///
 /// Useful for variant_get kernels that need to extract specific paths from 
variant values, possibly
 /// with casting of leaf values to specific types.
-pub(crate) trait VariantShreddingRowBuilder {
+pub(crate) trait VariantToArrowRowBuilder {
     fn append_null(&mut self) -> Result<()>;
 
     fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool>;
@@ -85,17 +85,17 @@ pub(crate) trait VariantShreddingRowBuilder {
 
 /// A thin wrapper whose only job is to extract a specific path from a variant 
value and pass the
 /// result to a nested builder.
-struct VariantPathRowBuilder<'a, T: VariantShreddingRowBuilder> {
+struct VariantPathRowBuilder<'a, T: VariantToArrowRowBuilder> {
     builder: T,
     path: VariantPath<'a>,
 }
 
-trait VariantShreddingRowBuilderWithPath<'a>: VariantShreddingRowBuilder {
-    fn with_path(self, path: VariantPath<'a>) -> Box<dyn 
VariantShreddingRowBuilder + 'a>;
+trait VariantToArrowRowBuilderWithPath<'a>: VariantToArrowRowBuilder {
+    fn with_path(self, path: VariantPath<'a>) -> Box<dyn 
VariantToArrowRowBuilder + 'a>;
 }
 
-impl<'a, T: VariantShreddingRowBuilder + 'a> 
VariantShreddingRowBuilderWithPath<'a> for T {
-    fn with_path(self, path: VariantPath<'a>) -> Box<dyn 
VariantShreddingRowBuilder + 'a> {
+impl<'a, T: VariantToArrowRowBuilder + 'a> 
VariantToArrowRowBuilderWithPath<'a> for T {
+    fn with_path(self, path: VariantPath<'a>) -> Box<dyn 
VariantToArrowRowBuilder + 'a> {
         if path.is_empty() {
             Box::new(self)
         } else {
@@ -107,7 +107,7 @@ impl<'a, T: VariantShreddingRowBuilder + 'a> 
VariantShreddingRowBuilderWithPath<
     }
 }
 
-impl<T: VariantShreddingRowBuilder> VariantShreddingRowBuilder for 
VariantPathRowBuilder<'_, T> {
+impl<T: VariantToArrowRowBuilder> VariantToArrowRowBuilder for 
VariantPathRowBuilder<'_, T> {
     fn append_null(&mut self) -> Result<()> {
         self.builder.append_null()
     }
@@ -143,13 +143,13 @@ fn get_type_name<T: ArrowPrimitiveType>() -> &'static str 
{
     }
 }
 
-/// Builder for shredding variant values to primitive values
-struct PrimitiveVariantShreddingRowBuilder<'a, T: ArrowPrimitiveType> {
+/// Builder for converting variant values to primitive values
+struct VariantToPrimitiveArrowRowBuilder<'a, T: ArrowPrimitiveType> {
     builder: arrow::array::PrimitiveBuilder<T>,
     cast_options: &'a CastOptions<'a>,
 }
 
-impl<'a, T: ArrowPrimitiveType> PrimitiveVariantShreddingRowBuilder<'a, T> {
+impl<'a, T: ArrowPrimitiveType> VariantToPrimitiveArrowRowBuilder<'a, T> {
     fn new(cast_options: &'a CastOptions<'a>) -> Self {
         Self {
             builder: PrimitiveBuilder::<T>::new(),
@@ -158,7 +158,7 @@ impl<'a, T: ArrowPrimitiveType> 
PrimitiveVariantShreddingRowBuilder<'a, T> {
     }
 }
 
-impl<'a, T> VariantShreddingRowBuilder for 
PrimitiveVariantShreddingRowBuilder<'a, T>
+impl<'a, T> VariantToArrowRowBuilder for VariantToPrimitiveArrowRowBuilder<'a, 
T>
 where
     T: ArrowPrimitiveType,
     for<'m, 'v> Variant<'m, 'v>: VariantAsPrimitive<T>,
@@ -193,11 +193,11 @@ where
 }
 
 /// Builder for creating VariantArray output (for path extraction without type 
conversion)
-struct VariantArrayShreddingRowBuilder {
+struct VariantToBinaryVariantArrowRowBuilder {
     builder: VariantArrayBuilder,
 }
 
-impl VariantArrayShreddingRowBuilder {
+impl VariantToBinaryVariantArrowRowBuilder {
     fn new(capacity: usize) -> Self {
         Self {
             builder: VariantArrayBuilder::new(capacity),
@@ -205,13 +205,20 @@ impl VariantArrayShreddingRowBuilder {
     }
 }
 
-impl VariantShreddingRowBuilder for VariantArrayShreddingRowBuilder {
+impl VariantToArrowRowBuilder for VariantToBinaryVariantArrowRowBuilder {
     fn append_null(&mut self) -> Result<()> {
         self.builder.append_null();
         Ok(())
     }
 
     fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        // TODO: We need a way to convert a Variant directly to bytes. In 
particular, we want to
+        // just copy across the underlying value byte slice of a 
`Variant::Object` or
+        // `Variant::List`, without any interaction with a `VariantMetadata` 
(because the shredding
+        // spec requires us to reuse the existing metadata when unshredding).
+        //
+        // One could _probably_ emulate this with 
parquet_variant::VariantBuilder, but it would do a
+        // lot of unnecessary work and would also create a new metadata column 
we don't need.
         self.builder.append_variant(value.clone());
         Ok(true)
     }

Reply via email to