This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 1e03a1bc05 [Variant] Add variant to arrow primitive support for
boolean/timestamp/time (#8516)
1e03a1bc05 is described below
commit 1e03a1bc053c1d9d9e4c61f97895ce1915eb27fe
Author: Congxian Qiu <[email protected]>
AuthorDate: Thu Oct 9 04:35:48 2025 +0800
[Variant] Add variant to arrow primitive support for boolean/timestamp/time
(#8516)
# Which issue does this PR close?
- Closes #8515 .
# What changes are included in this PR?
- Add a macro_rule `define_variant_to_primitive_builder` used to
construct the variant to x arrow row builder
- implement
`VariantToBooleanArrowRowBuilder`/`VariantToPrimitiveArrowRowBuilder`/`VariantToTimestampArrowRowBuilder`
using the macro `define_variant_to_primitive_builder`
- Add type access for `Variant::Timestamp/Time`(timestamp will automatic
widen micros to nanos)
- Add tests to cover `Variant::{int8/float32/float64}` for existing code
# Are these changes tested?
Added tests
# Are there any user-facing changes?
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
If there are any breaking changes to public APIs, please call them out.
---
parquet-variant-compute/src/type_conversion.rs | 54 ++++++-
parquet-variant-compute/src/variant_get.rs | 207 +++++++++++++++++++++---
parquet-variant-compute/src/variant_to_arrow.rs | 167 ++++++++++++++-----
parquet-variant/src/variant.rs | 134 ++++++++++++---
4 files changed, 475 insertions(+), 87 deletions(-)
diff --git a/parquet-variant-compute/src/type_conversion.rs
b/parquet-variant-compute/src/type_conversion.rs
index 5dda185529..5afebb1bfa 100644
--- a/parquet-variant-compute/src/type_conversion.rs
+++ b/parquet-variant-compute/src/type_conversion.rs
@@ -17,7 +17,7 @@
//! Module for transforming a typed arrow `Array` to `VariantArray`.
-use arrow::datatypes::{self, ArrowPrimitiveType};
+use arrow::datatypes::{self, ArrowPrimitiveType, ArrowTimestampType,
Date32Type};
use parquet_variant::Variant;
/// Options for controlling the behavior of `cast_to_variant_with_options`.
@@ -38,12 +38,31 @@ pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
}
+/// Extension trait for Arrow timestamp types that can extract their native
value from a Variant
+/// We can't use [`PrimitiveFromVariant`] directly because we need _two_
implementations for each
+/// timestamp type -- the `NTZ` param here.
+pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
+ fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
+}
+
/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive
types
macro_rules! impl_primitive_from_variant {
- ($arrow_type:ty, $variant_method:ident) => {
+ ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
impl PrimitiveFromVariant for $arrow_type {
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>
{
- variant.$variant_method()
+ let value = variant.$variant_method();
+ $( let value = value.map($cast_fn); )?
+ value
+ }
+ }
+ };
+}
+
+macro_rules! impl_timestamp_from_variant {
+ ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr
$(,)?) => {
+ impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
+ fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>
{
+ variant.$variant_method().and_then($cast_fn)
}
}
};
@@ -60,6 +79,35 @@ impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+impl_primitive_from_variant!(
+ datatypes::Date32Type,
+ as_naive_date,
+ Date32Type::from_naive_date
+);
+impl_timestamp_from_variant!(
+ datatypes::TimestampMicrosecondType,
+ as_timestamp_ntz_micros,
+ ntz = true,
+ Self::make_value,
+);
+impl_timestamp_from_variant!(
+ datatypes::TimestampMicrosecondType,
+ as_timestamp_micros,
+ ntz = false,
+ |timestamp| Self::make_value(timestamp.naive_utc())
+);
+impl_timestamp_from_variant!(
+ datatypes::TimestampNanosecondType,
+ as_timestamp_ntz_nanos,
+ ntz = true,
+ Self::make_value
+);
+impl_timestamp_from_variant!(
+ datatypes::TimestampNanosecondType,
+ as_timestamp_nanos,
+ ntz = false,
+ |timestamp| Self::make_value(timestamp.naive_utc())
+);
/// Convert the value at a specific index in the given array into a `Variant`.
macro_rules! non_generic_conversion_single_value {
diff --git a/parquet-variant-compute/src/variant_get.rs
b/parquet-variant-compute/src/variant_get.rs
index 4859abe8aa..8ee489cfe5 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -300,13 +300,14 @@ mod test {
use crate::json_to_variant;
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
use arrow::array::{
- Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Float32Array,
Float64Array,
- Int8Array, Int16Array, Int32Array, Int64Array, StringArray,
StructArray,
+ Array, ArrayRef, AsArray, BinaryViewArray, BooleanArray, Date32Array,
Float32Array,
+ Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
StringArray, StructArray,
};
use arrow::buffer::NullBuffer;
use arrow::compute::CastOptions;
use arrow::datatypes::DataType::{Int16, Int32, Int64};
- use arrow_schema::{DataType, Field, FieldRef, Fields};
+ use arrow_schema::DataType::{Boolean, Float32, Float64, Int8};
+ use arrow_schema::{DataType, Field, FieldRef, Fields, TimeUnit};
use chrono::DateTime;
use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, Variant, VariantPath};
@@ -700,7 +701,7 @@ mod test {
}
macro_rules! perfectly_shredded_to_arrow_primitive_test {
- ($name:ident, $primitive_type:ident,
$perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
+ ($name:ident, $primitive_type:expr,
$perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
#[test]
fn $name() {
let array = $perfectly_shredded_array_gen_fun();
@@ -713,6 +714,13 @@ mod test {
};
}
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_int18_as_int8,
+ Int8,
+ perfectly_shredded_int8_variant_array,
+ Int8Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
perfectly_shredded_to_arrow_primitive_test!(
get_variant_perfectly_shredded_int16_as_int16,
Int16,
@@ -734,19 +742,29 @@ mod test {
Int64Array::from(vec![Some(1), Some(2), Some(3)])
);
- /// Return a VariantArray that represents a perfectly "shredded" variant
- /// for the given typed value.
- ///
- /// The schema of the corresponding `StructArray` would look like this:
- ///
- /// ```text
- /// StructArray {
- /// metadata: BinaryViewArray,
- /// typed_value: Int32Array,
- /// }
- /// ```
- macro_rules! numeric_perfectly_shredded_variant_array_fn {
- ($func:ident, $array_type:ident, $primitive_type:ty) => {
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_float32_as_float32,
+ Float32,
+ perfectly_shredded_float32_variant_array,
+ Float32Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_float64_as_float64,
+ Float64,
+ perfectly_shredded_float64_variant_array,
+ Float64Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_boolean_as_boolean,
+ Boolean,
+ perfectly_shredded_bool_variant_array,
+ BooleanArray::from(vec![Some(true), Some(false), Some(true)])
+ );
+
+ macro_rules! perfectly_shredded_variant_array_fn {
+ ($func:ident, $typed_value_gen:expr) => {
fn $func() -> ArrayRef {
// At the time of writing, the `VariantArrayBuilder` does not
support shredding.
// so we must construct the array manually. see
https://github.com/apache/arrow-rs/issues/7895
@@ -754,11 +772,7 @@ mod test {
EMPTY_VARIANT_METADATA_BYTES,
3,
));
- let typed_value = $array_type::from(vec![
- Some(<$primitive_type>::try_from(1u8).unwrap()),
- Some(<$primitive_type>::try_from(2u8).unwrap()),
- Some(<$primitive_type>::try_from(3u8).unwrap()),
- ]);
+ let typed_value = $typed_value_gen();
let struct_array = StructArrayBuilder::new()
.with_field("metadata", Arc::new(metadata), false)
@@ -772,6 +786,33 @@ mod test {
};
}
+
perfectly_shredded_variant_array_fn!(perfectly_shredded_bool_variant_array, || {
+ BooleanArray::from(vec![Some(true), Some(false), Some(true)])
+ });
+
+ /// Return a VariantArray that represents a perfectly "shredded" variant
+ /// for the given typed value.
+ ///
+ /// The schema of the corresponding `StructArray` would look like this:
+ ///
+ /// ```text
+ /// StructArray {
+ /// metadata: BinaryViewArray,
+ /// typed_value: Int32Array,
+ /// }
+ /// ```
+ macro_rules! numeric_perfectly_shredded_variant_array_fn {
+ ($func:ident, $array_type:ident, $primitive_type:ty) => {
+ perfectly_shredded_variant_array_fn!($func, || {
+ $array_type::from(vec![
+ Some(<$primitive_type>::try_from(1u8).unwrap()),
+ Some(<$primitive_type>::try_from(2u8).unwrap()),
+ Some(<$primitive_type>::try_from(3u8).unwrap()),
+ ])
+ });
+ };
+ }
+
numeric_perfectly_shredded_variant_array_fn!(
perfectly_shredded_int8_variant_array,
Int8Array,
@@ -803,6 +844,128 @@ mod test {
f64
);
+ perfectly_shredded_variant_array_fn!(
+ perfectly_shredded_timestamp_micro_ntz_variant_array,
+ || {
+ arrow::array::TimestampMicrosecondArray::from(vec![
+ Some(-456000),
+ Some(1758602096000001),
+ Some(1758602096000002),
+ ])
+ }
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+
get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_micro_ntz,
+ DataType::Timestamp(TimeUnit::Microsecond, None),
+ perfectly_shredded_timestamp_micro_ntz_variant_array,
+ arrow::array::TimestampMicrosecondArray::from(vec![
+ Some(-456000),
+ Some(1758602096000001),
+ Some(1758602096000002),
+ ])
+ );
+
+ // test converting micro to nano
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_timestamp_micro_ntz_as_nano_ntz,
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ perfectly_shredded_timestamp_micro_ntz_variant_array,
+ arrow::array::TimestampNanosecondArray::from(vec![
+ Some(-456000000),
+ Some(1758602096000001000),
+ Some(1758602096000002000)
+ ])
+ );
+
+
perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_micro_variant_array,
|| {
+ arrow::array::TimestampMicrosecondArray::from(vec![
+ Some(-456000),
+ Some(1758602096000001),
+ Some(1758602096000002),
+ ])
+ .with_timezone("+00:00")
+ });
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_timestamp_micro_as_timestamp_micro,
+ DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("+00:00"))),
+ perfectly_shredded_timestamp_micro_variant_array,
+ arrow::array::TimestampMicrosecondArray::from(vec![
+ Some(-456000),
+ Some(1758602096000001),
+ Some(1758602096000002),
+ ])
+ .with_timezone("+00:00")
+ );
+
+ // test converting micro to nano
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_timestamp_micro_as_nano,
+ DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
+ perfectly_shredded_timestamp_micro_variant_array,
+ arrow::array::TimestampNanosecondArray::from(vec![
+ Some(-456000000),
+ Some(1758602096000001000),
+ Some(1758602096000002000)
+ ])
+ .with_timezone("+00:00")
+ );
+
+ perfectly_shredded_variant_array_fn!(
+ perfectly_shredded_timestamp_nano_ntz_variant_array,
+ || {
+ arrow::array::TimestampNanosecondArray::from(vec![
+ Some(-4999999561),
+ Some(1758602096000000001),
+ Some(1758602096000000002),
+ ])
+ }
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+
get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_nano_ntz,
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ perfectly_shredded_timestamp_nano_ntz_variant_array,
+ arrow::array::TimestampNanosecondArray::from(vec![
+ Some(-4999999561),
+ Some(1758602096000000001),
+ Some(1758602096000000002),
+ ])
+ );
+
+
perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_nano_variant_array,
|| {
+ arrow::array::TimestampNanosecondArray::from(vec![
+ Some(-4999999561),
+ Some(1758602096000000001),
+ Some(1758602096000000002),
+ ])
+ .with_timezone("+00:00")
+ });
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_timestamp_nano_as_timestamp_nano,
+ DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
+ perfectly_shredded_timestamp_nano_variant_array,
+ arrow::array::TimestampNanosecondArray::from(vec![
+ Some(-4999999561),
+ Some(1758602096000000001),
+ Some(1758602096000000002),
+ ])
+ .with_timezone("+00:00")
+ );
+
+
perfectly_shredded_variant_array_fn!(perfectly_shredded_date_variant_array, || {
+ Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
+ });
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_date_as_date,
+ DataType::Date32,
+ perfectly_shredded_date_variant_array,
+ Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
+ );
+
macro_rules! assert_variant_get_as_variant_array_with_default_option {
($variant_array: expr, $array_expected: expr) => {{
let options = GetOptions::new();
diff --git a/parquet-variant-compute/src/variant_to_arrow.rs
b/parquet-variant-compute/src/variant_to_arrow.rs
index 50249aa63d..d60a4eea05 100644
--- a/parquet-variant-compute/src/variant_to_arrow.rs
+++ b/parquet-variant-compute/src/variant_to_arrow.rs
@@ -15,21 +15,25 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder,
PrimitiveBuilder};
+use arrow::array::{
+ ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder,
builder::BooleanBuilder,
+};
use arrow::compute::CastOptions;
use arrow::datatypes::{self, ArrowPrimitiveType, DataType};
use arrow::error::{ArrowError, Result};
use parquet_variant::{Variant, VariantPath};
-use crate::type_conversion::PrimitiveFromVariant;
+use crate::type_conversion::{PrimitiveFromVariant, TimestampFromVariant};
use crate::{VariantArray, VariantValueArrayBuilder};
+use arrow_schema::TimeUnit;
use std::sync::Arc;
/// Builder for converting variant values to primitive Arrow arrays. It is
used by both
/// `VariantToArrowRowBuilder` (below) and
`VariantToShreddedPrimitiveVariantRowBuilder` (in
/// `shred_variant.rs`).
pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
+ Boolean(VariantToBooleanArrowRowBuilder<'a>),
Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
@@ -41,6 +45,13 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
+ TimestampMicro(VariantToTimestampArrowRowBuilder<'a,
datatypes::TimestampMicrosecondType>),
+ TimestampMicroNtz(
+ VariantToTimestampNtzArrowRowBuilder<'a,
datatypes::TimestampMicrosecondType>,
+ ),
+ TimestampNano(VariantToTimestampArrowRowBuilder<'a,
datatypes::TimestampNanosecondType>),
+ TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a,
datatypes::TimestampNanosecondType>),
+ Date(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
}
/// Builder for converting variant values into strongly typed Arrow arrays.
@@ -59,6 +70,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
pub fn append_null(&mut self) -> Result<()> {
use PrimitiveVariantToArrowRowBuilder::*;
match self {
+ Boolean(b) => b.append_null(),
Int8(b) => b.append_null(),
Int16(b) => b.append_null(),
Int32(b) => b.append_null(),
@@ -70,12 +82,18 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
Float16(b) => b.append_null(),
Float32(b) => b.append_null(),
Float64(b) => b.append_null(),
+ TimestampMicro(b) => b.append_null(),
+ TimestampMicroNtz(b) => b.append_null(),
+ TimestampNano(b) => b.append_null(),
+ TimestampNanoNtz(b) => b.append_null(),
+ Date(b) => b.append_null(),
}
}
pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
use PrimitiveVariantToArrowRowBuilder::*;
match self {
+ Boolean(b) => b.append_value(value),
Int8(b) => b.append_value(value),
Int16(b) => b.append_value(value),
Int32(b) => b.append_value(value),
@@ -87,12 +105,18 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
Float16(b) => b.append_value(value),
Float32(b) => b.append_value(value),
Float64(b) => b.append_value(value),
+ TimestampMicro(b) => b.append_value(value),
+ TimestampMicroNtz(b) => b.append_value(value),
+ TimestampNano(b) => b.append_value(value),
+ TimestampNanoNtz(b) => b.append_value(value),
+ Date(b) => b.append_value(value),
}
}
pub fn finish(self) -> Result<ArrayRef> {
use PrimitiveVariantToArrowRowBuilder::*;
match self {
+ Boolean(b) => b.finish(),
Int8(b) => b.finish(),
Int16(b) => b.finish(),
Int32(b) => b.finish(),
@@ -104,6 +128,11 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
Float16(b) => b.finish(),
Float32(b) => b.finish(),
Float64(b) => b.finish(),
+ TimestampMicro(b) => b.finish(),
+ TimestampMicroNtz(b) => b.finish(),
+ TimestampNano(b) => b.finish(),
+ TimestampNanoNtz(b) => b.finish(),
+ Date(b) => b.finish(),
}
}
}
@@ -146,6 +175,7 @@ pub(crate) fn
make_primitive_variant_to_arrow_row_builder<'a>(
use PrimitiveVariantToArrowRowBuilder::*;
let builder = match data_type {
+ DataType::Boolean =>
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
cast_options,
capacity,
@@ -190,6 +220,22 @@ pub(crate) fn
make_primitive_variant_to_arrow_row_builder<'a>(
cast_options,
capacity,
)),
+ DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
+ VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+ ),
+ DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
+ VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
+ ),
+ DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
+ VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+ ),
+ DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
+ VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
+ ),
+ DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
_ if data_type.is_primitive() => {
return Err(ArrowError::NotYetImplemented(format!(
"Primitive data_type {data_type:?} not yet implemented"
@@ -293,55 +339,98 @@ fn get_type_name<T: ArrowPrimitiveType>() -> &'static str
{
"arrow_array::types::Float32Type" => "Float32",
"arrow_array::types::Float64Type" => "Float64",
"arrow_array::types::Float16Type" => "Float16",
+ "arrow_array::types::TimestampMicrosecondType" =>
"Timestamp(Microsecond)",
+ "arrow_array::types::TimestampNanosecondType" =>
"Timestamp(Nanosecond)",
+ "arrow_array::types::Date32Type" => "Date32",
_ => "Unknown",
}
}
-/// Builder for converting variant values to primitive values
-pub(crate) struct VariantToPrimitiveArrowRowBuilder<'a, T:
PrimitiveFromVariant> {
- builder: arrow::array::PrimitiveBuilder<T>,
- cast_options: &'a CastOptions<'a>,
-}
-
-impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> {
- fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
- Self {
- builder: PrimitiveBuilder::<T>::with_capacity(capacity),
- cast_options,
+macro_rules! define_variant_to_primitive_builder {
+ (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
+ |$array_param:ident $(, $field:ident: $field_type:ty)?| ->
$builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
+ |$value: ident| $value_transform:expr,
+ type_name: $type_name:expr) => {
+ pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
+ {
+ builder: $builder_name $(<$array_type>)?,
+ cast_options: &$lifetime CastOptions<$lifetime>,
}
- }
-}
-impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> {
- fn append_null(&mut self) -> Result<()> {
- self.builder.append_null();
- Ok(())
- }
+ impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic
)?> {
+ fn new(
+ cast_options: &$lifetime CastOptions<$lifetime>,
+ $array_param: usize,
+ // add this so that $init_expr can use it
+ $( $field: $field_type, )?
+ ) -> Self {
+ Self {
+ builder: $init_expr,
+ cast_options,
+ }
+ }
- fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
- if let Some(v) = T::from_variant(value) {
- self.builder.append_value(v);
- Ok(true)
- } else {
- if !self.cast_options.safe {
- // Unsafe casting: return error on conversion failure
- return Err(ArrowError::CastError(format!(
- "Failed to extract primitive of type {} from variant {:?}
at path VariantPath([])",
- get_type_name::<T>(),
- value
- )));
+ fn append_null(&mut self) -> Result<()> {
+ self.builder.append_null();
+ Ok(())
}
- // Safe casting: append null on conversion failure
- self.builder.append_null();
- Ok(false)
- }
- }
- fn finish(mut self) -> Result<ArrayRef> {
- Ok(Arc::new(self.builder.finish()))
+ fn append_value(&mut self, $value: &Variant<'_, '_>) ->
Result<bool> {
+ if let Some(v) = $value_transform {
+ self.builder.append_value(v);
+ Ok(true)
+ } else {
+ if !self.cast_options.safe {
+ // Unsafe casting: return error on conversion failure
+ return Err(ArrowError::CastError(format!(
+ "Failed to extract primitive of type {} from
variant {:?} at path VariantPath([])",
+ $type_name,
+ $value
+ )));
+ }
+ // Safe casting: append null on conversion failure
+ self.builder.append_null();
+ Ok(false)
+ }
+ }
+
+ fn finish(mut self) -> Result<ArrayRef> {
+ Ok(Arc::new(self.builder.finish()))
+ }
+ }
}
}
+define_variant_to_primitive_builder!(
+ struct VariantToBooleanArrowRowBuilder<'a>
+ |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
+ |value| value.as_boolean(),
+ type_name: "Boolean"
+);
+
+define_variant_to_primitive_builder!(
+ struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
+ |capacity| -> PrimitiveBuilder<T> {
PrimitiveBuilder::<T>::with_capacity(capacity) },
+ |value| T::from_variant(value),
+ type_name: get_type_name::<T>()
+);
+
+define_variant_to_primitive_builder!(
+ struct VariantToTimestampNtzArrowRowBuilder<'a,
T:TimestampFromVariant<true>>
+ |capacity| -> PrimitiveBuilder<T> {
PrimitiveBuilder::<T>::with_capacity(capacity) },
+ |value| T::from_variant(value),
+ type_name: get_type_name::<T>()
+);
+
+define_variant_to_primitive_builder!(
+ struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
+ |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
+ PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
+ },
+ |value| T::from_variant(value),
+ type_name: get_type_name::<T>()
+);
+
/// Builder for creating VariantArray output (for path extraction without type
conversion)
pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
metadata: BinaryViewArray,
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 5663ab155c..aa3eb51ed3 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -533,8 +533,8 @@ impl<'m, 'v> Variant<'m, 'v> {
/// Converts this variant to a `DateTime<Utc>` if possible.
///
- /// Returns `Some(DateTime<Utc>)` for timestamp variants,
- /// `None` for non-timestamp variants.
+ /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampMicros`]
variants,
+ /// `None` for other variants.
///
/// # Examples
///
@@ -543,20 +543,101 @@ impl<'m, 'v> Variant<'m, 'v> {
/// use chrono::NaiveDate;
///
/// // you can extract a DateTime<Utc> from a UTC-adjusted variant
- /// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
+ /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+ /// .unwrap()
+ /// .and_hms_milli_opt(12, 34, 56, 780)
+ /// .unwrap()
+ /// .and_utc();
/// let v1 = Variant::from(datetime);
- /// assert_eq!(v1.as_datetime_utc(), Some(datetime));
- /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8,
14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc();
+ /// assert_eq!(v1.as_timestamp_micros(), Some(datetime));
+ ///
+ /// // but not for other variants.
+ /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
+ /// .unwrap()
+ /// .and_hms_nano_opt(12, 33, 54, 123456789)
+ /// .unwrap()
+ /// .and_utc();
/// let v2 = Variant::from(datetime_nanos);
- /// assert_eq!(v2.as_datetime_utc(), Some(datetime_nanos));
+ /// assert_eq!(v2.as_timestamp_micros(), None);
+ /// ```
+ pub fn as_timestamp_micros(&self) -> Option<DateTime<Utc>> {
+ match *self {
+ Variant::TimestampMicros(d) => Some(d),
+ _ => None,
+ }
+ }
+
+ /// Converts this variant to a `NaiveDateTime` if possible.
///
- /// // but not from other variants
+ /// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzMicros`]
variants,
+ /// `None` for other variants.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use parquet_variant::Variant;
+ /// use chrono::NaiveDate;
+ ///
+ /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
+ /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+ /// .unwrap()
+ /// .and_hms_milli_opt(12, 34, 56, 780)
+ /// .unwrap();
+ /// let v1 = Variant::from(datetime);
+ /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
+ ///
+ /// // but not for other variants.
+ /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
+ /// .unwrap()
+ /// .and_hms_nano_opt(12, 33, 54, 123456789)
+ /// .unwrap();
+ /// let v2 = Variant::from(datetime_nanos);
+ /// assert_eq!(v2.as_timestamp_micros(), None);
+ /// ```
+ pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
+ match *self {
+ Variant::TimestampNtzMicros(d) => Some(d),
+ _ => None,
+ }
+ }
+
+ /// Converts this variant to a `DateTime<Utc>` if possible.
+ ///
+ /// Returns `Some(DateTime<Utc>)` for timestamp variants,
+ /// `None` for other variants.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use parquet_variant::Variant;
+ /// use chrono::NaiveDate;
+ ///
+ /// // you can extract a DateTime<Utc> from a UTC-adjusted
nanosecond-precision variant
+ /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+ /// .unwrap()
+ /// .and_hms_nano_opt(12, 34, 56, 789123456)
+ /// .unwrap()
+ /// .and_utc();
+ /// let v1 = Variant::from(datetime);
+ /// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
+ ///
+ /// // or from UTC-adjusted microsecond-precision variant
+ /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
+ /// .unwrap()
+ /// .and_hms_milli_opt(12, 33, 54, 123)
+ /// .unwrap()
+ /// .and_utc();
+ /// // this will convert to `Variant::TimestampMicros`.
+ /// let v2 = Variant::from(datetime_micros);
+ /// assert_eq!(v2.as_timestamp_nanos(), Some(datetime_micros));
+ ///
+ /// // but not for other variants.
/// let v3 = Variant::from("hello!");
- /// assert_eq!(v3.as_datetime_utc(), None);
+ /// assert_eq!(v3.as_timestamp_nanos(), None);
/// ```
- pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
+ pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
match *self {
- Variant::TimestampMicros(d) | Variant::TimestampNanos(d) =>
Some(d),
+ Variant::TimestampNanos(d) | Variant::TimestampMicros(d) =>
Some(d),
_ => None,
}
}
@@ -564,7 +645,7 @@ impl<'m, 'v> Variant<'m, 'v> {
/// Converts this variant to a `NaiveDateTime` if possible.
///
/// Returns `Some(NaiveDateTime)` for timestamp variants,
- /// `None` for non-timestamp variants.
+ /// `None` for other variants.
///
/// # Examples
///
@@ -573,22 +654,29 @@ impl<'m, 'v> Variant<'m, 'v> {
/// use chrono::NaiveDate;
///
/// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
- /// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
+ /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+ /// .unwrap()
+ /// .and_hms_nano_opt(12, 34, 56, 789123456)
+ /// .unwrap();
/// let v1 = Variant::from(datetime);
- /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
- ///
- /// // or a UTC-adjusted variant
- /// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_nano_opt(12, 34, 56, 123456789).unwrap();
- /// let v2 = Variant::from(datetime);
- /// assert_eq!(v2.as_naive_datetime(), Some(datetime));
- ///
- /// // but not from other variants
+ /// assert_eq!(v1.as_timestamp_ntz_nanos(), Some(datetime));
+ ///
+ /// // or from a microsecond-precision non-UTC-adjusted variant
+ /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
+ /// .unwrap()
+ /// .and_hms_milli_opt(12, 33, 54, 123)
+ /// .unwrap();
+ /// // this will convert to `Variant::TimestampMicros`.
+ /// let v2 = Variant::from(datetime_micros);
+ /// assert_eq!(v2.as_timestamp_ntz_nanos(), Some(datetime_micros));
+ ///
+ /// // but not for other variants.
/// let v3 = Variant::from("hello!");
- /// assert_eq!(v3.as_naive_datetime(), None);
+ /// assert_eq!(v3.as_timestamp_ntz_nanos(), None);
/// ```
- pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
+ pub fn as_timestamp_ntz_nanos(&self) -> Option<NaiveDateTime> {
match *self {
- Variant::TimestampNtzMicros(d) | Variant::TimestampNtzNanos(d) =>
Some(d),
+ Variant::TimestampNtzNanos(d) | Variant::TimestampNtzMicros(d) =>
Some(d),
_ => None,
}
}