scovich commented on code in PR #8516:
URL: https://github.com/apache/arrow-rs/pull/8516#discussion_r2406764267
##########
parquet-variant/src/variant.rs:
##########
@@ -575,20 +573,74 @@ impl<'m, 'v> Variant<'m, 'v> {
/// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
/// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
/// let v1 = Variant::from(datetime);
- /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
+ /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
///
- /// // or a UTC-adjusted variant
- /// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_nano_opt(12, 34, 56, 123456789).unwrap();
- /// let v2 = Variant::from(datetime);
- /// assert_eq!(v2.as_naive_datetime(), Some(datetime));
+ /// // but not for other variants.
+ /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8,
14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap();
+ /// let v2 = Variant::from(datetime_nanos);
+ /// assert_eq!(v2.as_timestamp_micros(), None);
+ /// ```
+ pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
+ match *self {
+ Variant::TimestampNtzMicros(d) => Some(d),
+ _ => None,
+ }
+ }
+
+ /// Converts this variant to a `DateTime<Utc>` if possible.
///
- /// // but not from other variants
- /// let v3 = Variant::from("hello!");
- /// assert_eq!(v3.as_naive_datetime(), None);
+ /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampNanos`] variants,
+ /// `None` for other variants.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use parquet_variant::Variant;
+ /// use chrono::NaiveDate;
+ ///
+ /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
+ /// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_nano_opt(12, 34, 56, 789123456).unwrap().and_utc();
+ /// let v1 = Variant::from(datetime);
+ /// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
+ ///
+ /// // but not for other variants.
+ /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8,
14).unwrap().and_hms_milli_opt(12, 33, 54, 123).unwrap().and_utc();
+ /// // this will convert to `Variant::TimestampMicros`.
+ /// let v2 = Variant::from(datetime_micros);
+ /// assert_eq!(v2.as_timestamp_nanos(), None);
+ /// ```
+ pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
+ match *self {
+ Variant::TimestampNanos(d) => Some(d),
Review Comment:
```suggestion
Variant::TimestampNanos(d) | Variant::TimestampMicros(d) =>
Some(d),
```
(they both store `DateTime<Utc>`)
(again below for ntz nanos)
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -38,12 +39,40 @@ pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
}
+/// Extension trait for Arrow timestamp types that can extract their native
value from a Variant
+/// We can't use [`PrimitiveFromVariant`] directly because we might need to
use methods that
+/// are only available on [`ArrowTimestampType`] (such as with_timezone_opt)
+pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
+ fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
+}
+
+/// Extension trait that `ArrowTimestampType` handle `DateTime<Utc>` like
`NaiveDateTime`
+trait MakeValueTz: ArrowTimestampType {
+ fn make_value_tz(timestamp: DateTime<Utc>) -> Option<i64> {
Review Comment:
note: this name is fine, but because it's a different trait we should also
be able to "overload" `make_value` if you want.
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -38,12 +39,40 @@ pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
}
+/// Extension trait for Arrow timestamp types that can extract their native
value from a Variant
+/// We can't use [`PrimitiveFromVariant`] directly because we might need to
use methods that
+/// are only available on [`ArrowTimestampType`] (such as with_timezone_opt)
+pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
+ fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
+}
+
+/// Extension trait that [`ArrowTimestampType`] handle [`DateTime<Utc>`] like
[`NaiveDateTime`]
+trait MakeValueTz: ArrowTimestampType {
+ fn make_value_tz(timestamp: DateTime<Utc>) -> Option<i64> {
+ Self::make_value(timestamp.naive_utc())
+ }
+}
+
+impl<T: ArrowTimestampType> MakeValueTz for T {}
+
/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive
types
macro_rules! impl_primitive_from_variant {
- ($arrow_type:ty, $variant_method:ident) => {
+ ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
impl PrimitiveFromVariant for $arrow_type {
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>
{
- variant.$variant_method()
+ let value = variant.$variant_method();
+ $( let value = value.map($cast_fn); )?
+ value
+ }
+ }
+ };
+}
+
+macro_rules! impl_timestamp_from_variant {
+ ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr
$(,)?) => {
Review Comment:
The micro -> nano conversion actually happens inside
`as_timestamp_[ntz_]nanos`, I left a comment there about it.
##########
parquet-variant/src/variant.rs:
##########
@@ -575,20 +573,74 @@ impl<'m, 'v> Variant<'m, 'v> {
/// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
/// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
/// let v1 = Variant::from(datetime);
- /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
+ /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
///
- /// // or a UTC-adjusted variant
- /// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_nano_opt(12, 34, 56, 123456789).unwrap();
- /// let v2 = Variant::from(datetime);
- /// assert_eq!(v2.as_naive_datetime(), Some(datetime));
+ /// // but not for other variants.
+ /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8,
14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap();
+ /// let v2 = Variant::from(datetime_nanos);
+ /// assert_eq!(v2.as_timestamp_micros(), None);
+ /// ```
+ pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
+ match *self {
+ Variant::TimestampNtzMicros(d) => Some(d),
+ _ => None,
+ }
+ }
+
+ /// Converts this variant to a `DateTime<Utc>` if possible.
///
- /// // but not from other variants
- /// let v3 = Variant::from("hello!");
- /// assert_eq!(v3.as_naive_datetime(), None);
+ /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampNanos`] variants,
+ /// `None` for other variants.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use parquet_variant::Variant;
+ /// use chrono::NaiveDate;
+ ///
+ /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
+ /// let datetime = NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_nano_opt(12, 34, 56, 789123456).unwrap().and_utc();
Review Comment:
nit: really long line...
```suggestion
/// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
/// .unwrap()
/// .and_hms_nano_opt(12, 34, 56, 789123456)
/// .unwrap()
/// .and_utc();
```
(several others in the doc tests)
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -38,12 +39,40 @@ pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
}
+/// Extension trait for Arrow timestamp types that can extract their native
value from a Variant
+/// We can't use [`PrimitiveFromVariant`] directly because we might need to
use methods that
+/// are only available on [`ArrowTimestampType`] (such as with_timezone_opt)
+pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
Review Comment:
nit: AFAIK, `with_timezone_opt` isn't the reason we need this trait -- we
could always use the trait bound `PrimitiveFromVariant + ArrowTimestampType` to
get that. The problem is, we need _two_ implementations for each type --
captured by the `NTZ` param here -- which the other trait cannot express
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]