This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 14544fb95 Support for casting `Utf8` and `LargeUtf8` -->  `Interval` 
(#3762)
14544fb95 is described below

commit 14544fb959fd8bf8a733e137302a567ca9381b95
Author: Jie Han <[email protected]>
AuthorDate: Tue Mar 7 18:37:21 2023 +0800

    Support for casting `Utf8` and `LargeUtf8` -->  `Interval` (#3762)
    
    * cast string to interval
    
    * cast string to interval
    
    * unit tests
    
    * fix
    
    * update
    
    * code clean
    
    * update unit tests and align_interval_parts
    
    * fix ut
    
    * make clippy happy
    
    * Update arrow-cast/src/parse.rs
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * change return types of calculate_from_part and fix bug of 
align_interval_parts
    
    * make clippy happy
    
    * remote useless overflow check
    
    * remove the "convert to higher units" logic
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
---
 arrow-cast/src/cast.rs  | 293 +++++++++++++++++++++++++++++++++++++++-
 arrow-cast/src/parse.rs | 349 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 641 insertions(+), 1 deletion(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index af192fdd5..ae9016654 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -40,7 +40,10 @@ use std::cmp::Ordering;
 use std::sync::Arc;
 
 use crate::display::{array_value_to_string, ArrayFormatter, FormatOptions};
-use crate::parse::string_to_timestamp_nanos;
+use crate::parse::{
+    parse_interval_day_time, parse_interval_month_day_nano, 
parse_interval_year_month,
+    string_to_timestamp_nanos,
+};
 use arrow_array::{
     builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *,
 };
@@ -170,6 +173,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
             | Timestamp(TimeUnit::Millisecond, _)
             | Timestamp(TimeUnit::Microsecond, _)
             | Timestamp(TimeUnit::Nanosecond, _)
+            | Interval(_)
         ) => true,
         (Utf8, _) => to_type.is_numeric() && to_type != &Float16,
         (LargeUtf8,
@@ -186,6 +190,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
             | Timestamp(TimeUnit::Millisecond, _)
             | Timestamp(TimeUnit::Microsecond, _)
             | Timestamp(TimeUnit::Nanosecond, _)
+            | Interval(_)
         ) => true,
         (LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
         (_, Utf8 | LargeUtf8) => from_type.is_primitive(),
@@ -1158,6 +1163,15 @@ pub fn cast_with_options(
             Timestamp(TimeUnit::Nanosecond, to_tz) => {
                 cast_string_to_timestamp::<i32, 
TimestampNanosecondType>(array, to_tz,cast_options)
             }
+            Interval(IntervalUnit::YearMonth) => {
+                cast_string_to_year_month_interval::<i32>(array, cast_options)
+            }
+            Interval(IntervalUnit::DayTime) => {
+                cast_string_to_day_time_interval::<i32>(array, cast_options)
+            }
+            Interval(IntervalUnit::MonthDayNano) => {
+                cast_string_to_month_day_nano_interval::<i32>(array, 
cast_options)
+            }
             _ => Err(ArrowError::CastError(format!(
                 "Casting from {from_type:?} to {to_type:?} not supported",
             ))),
@@ -1208,6 +1222,15 @@ pub fn cast_with_options(
             Timestamp(TimeUnit::Nanosecond, to_tz) => {
                 cast_string_to_timestamp::<i64, 
TimestampNanosecondType>(array, to_tz,cast_options)
             }
+            Interval(IntervalUnit::YearMonth) => {
+                cast_string_to_year_month_interval::<i64>(array, cast_options)
+            }
+            Interval(IntervalUnit::DayTime) => {
+                cast_string_to_day_time_interval::<i64>(array, cast_options)
+            }
+            Interval(IntervalUnit::MonthDayNano) => {
+                cast_string_to_month_day_nano_interval::<i64>(array, 
cast_options)
+            }
             _ => Err(ArrowError::CastError(format!(
                 "Casting from {from_type:?} to {to_type:?} not supported",
             ))),
@@ -2631,6 +2654,105 @@ fn cast_string_to_timestamp<
     Ok(Arc::new(array) as ArrayRef)
 }
 
+fn cast_string_to_year_month_interval<Offset: OffsetSizeTrait>(
+    array: &dyn Array,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let string_array = array
+        .as_any()
+        .downcast_ref::<GenericStringArray<Offset>>()
+        .unwrap();
+    let interval_array = if cast_options.safe {
+        let iter = string_array
+            .iter()
+            .map(|v| v.and_then(|v| parse_interval_year_month(v).ok()));
+
+        // Benefit:
+        //     20% performance improvement
+        // Soundness:
+        //     The iterator is trustedLen because it comes from an 
`StringArray`.
+        unsafe { IntervalYearMonthArray::from_trusted_len_iter(iter) }
+    } else {
+        let vec = string_array
+            .iter()
+            .map(|v| v.map(parse_interval_year_month).transpose())
+            .collect::<Result<Vec<_>, ArrowError>>()?;
+
+        // Benefit:
+        //     20% performance improvement
+        // Soundness:
+        //     The iterator is trustedLen because it comes from an 
`StringArray`.
+        unsafe { IntervalYearMonthArray::from_trusted_len_iter(vec) }
+    };
+    Ok(Arc::new(interval_array) as ArrayRef)
+}
+
+fn cast_string_to_day_time_interval<Offset: OffsetSizeTrait>(
+    array: &dyn Array,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let string_array = array
+        .as_any()
+        .downcast_ref::<GenericStringArray<Offset>>()
+        .unwrap();
+    let interval_array = if cast_options.safe {
+        let iter = string_array
+            .iter()
+            .map(|v| v.and_then(|v| parse_interval_day_time(v).ok()));
+
+        // Benefit:
+        //     20% performance improvement
+        // Soundness:
+        //     The iterator is trustedLen because it comes from an 
`StringArray`.
+        unsafe { IntervalDayTimeArray::from_trusted_len_iter(iter) }
+    } else {
+        let vec = string_array
+            .iter()
+            .map(|v| v.map(parse_interval_day_time).transpose())
+            .collect::<Result<Vec<_>, ArrowError>>()?;
+
+        // Benefit:
+        //     20% performance improvement
+        // Soundness:
+        //     The iterator is trustedLen because it comes from an 
`StringArray`.
+        unsafe { IntervalDayTimeArray::from_trusted_len_iter(vec) }
+    };
+    Ok(Arc::new(interval_array) as ArrayRef)
+}
+
+fn cast_string_to_month_day_nano_interval<Offset: OffsetSizeTrait>(
+    array: &dyn Array,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let string_array = array
+        .as_any()
+        .downcast_ref::<GenericStringArray<Offset>>()
+        .unwrap();
+    let interval_array = if cast_options.safe {
+        let iter = string_array
+            .iter()
+            .map(|v| v.and_then(|v| parse_interval_month_day_nano(v).ok()));
+
+        // Benefit:
+        //     20% performance improvement
+        // Soundness:
+        //     The iterator is trustedLen because it comes from an 
`StringArray`.
+        unsafe { IntervalMonthDayNanoArray::from_trusted_len_iter(iter) }
+    } else {
+        let vec = string_array
+            .iter()
+            .map(|v| v.map(parse_interval_month_day_nano).transpose())
+            .collect::<Result<Vec<_>, ArrowError>>()?;
+
+        // Benefit:
+        //     20% performance improvement
+        // Soundness:
+        //     The iterator is trustedLen because it comes from an 
`StringArray`.
+        unsafe { IntervalMonthDayNanoArray::from_trusted_len_iter(vec) }
+    };
+    Ok(Arc::new(interval_array) as ArrayRef)
+}
+
 /// Casts Utf8 to Boolean
 fn cast_utf8_to_boolean<OffsetSize>(
     from: &dyn Array,
@@ -4966,6 +5088,175 @@ mod tests {
         }
     }
 
+    macro_rules! test_safe_string_to_interval {
+        ($data_vec:expr, $interval_unit:expr, $array_ty:ty, $expect_vec:expr) 
=> {
+            let source_string_array =
+                Arc::new(StringArray::from($data_vec.clone())) as ArrayRef;
+
+            let options = CastOptions { safe: true };
+
+            let target_interval_array = cast_with_options(
+                &source_string_array.clone(),
+                &DataType::Interval($interval_unit),
+                &options,
+            )
+            .unwrap()
+            .as_any()
+            .downcast_ref::<$array_ty>()
+            .unwrap()
+            .clone() as $array_ty;
+
+            let target_string_array =
+                cast_with_options(&target_interval_array, &DataType::Utf8, 
&options)
+                    .unwrap()
+                    .as_any()
+                    .downcast_ref::<StringArray>()
+                    .unwrap()
+                    .clone();
+
+            let expect_string_array = StringArray::from($expect_vec);
+
+            assert_eq!(target_string_array, expect_string_array);
+
+            let target_large_string_array =
+                cast_with_options(&target_interval_array, 
&DataType::LargeUtf8, &options)
+                    .unwrap()
+                    .as_any()
+                    .downcast_ref::<LargeStringArray>()
+                    .unwrap()
+                    .clone();
+
+            let expect_large_string_array = 
LargeStringArray::from($expect_vec);
+
+            assert_eq!(target_large_string_array, expect_large_string_array);
+        };
+    }
+
+    #[test]
+    fn test_cast_string_to_interval_year_month() {
+        test_safe_string_to_interval!(
+            vec![
+                Some("1 year 1 month"),
+                Some("1.5 years 13 month"),
+                Some("30 days"),
+                Some("31 days"),
+                Some("2 months 31 days"),
+                Some("2 months 31 days 1 second"),
+                Some("foobar"),
+            ],
+            IntervalUnit::YearMonth,
+            IntervalYearMonthArray,
+            vec![
+                Some("1 years 1 mons 0 days 0 hours 0 mins 0.00 secs"),
+                Some("2 years 7 mons 0 days 0 hours 0 mins 0.00 secs"),
+                None,
+                None,
+                None,
+                None,
+                None,
+            ]
+        );
+    }
+
+    #[test]
+    fn test_cast_string_to_interval_day_time() {
+        test_safe_string_to_interval!(
+            vec![
+                Some("1 year 1 month"),
+                Some("1.5 years 13 month"),
+                Some("30 days"),
+                Some("1 day 2 second 3.5 milliseconds"),
+                Some("foobar"),
+            ],
+            IntervalUnit::DayTime,
+            IntervalDayTimeArray,
+            vec![
+                Some("0 years 0 mons 390 days 0 hours 0 mins 0.000 secs"),
+                Some("0 years 0 mons 930 days 0 hours 0 mins 0.000 secs"),
+                Some("0 years 0 mons 30 days 0 hours 0 mins 0.000 secs"),
+                None,
+                None,
+            ]
+        );
+    }
+
+    #[test]
+    fn test_cast_string_to_interval_month_day_nano() {
+        test_safe_string_to_interval!(
+            vec![
+                Some("1 year 1 month 1 day"),
+                None,
+                Some("1.5 years 13 month 35 days 1.4 milliseconds"),
+                Some("3 days"),
+                Some("8 seconds"),
+                None,
+                Some("1 day 29800 milliseconds"),
+                Some("3 months 1 second"),
+                Some("6 minutes 120 second"),
+                Some("2 years 39 months 9 days 19 hours 1 minute 83 seconds 
399222 milliseconds"),
+                Some("foobar"),
+            ],
+            IntervalUnit::MonthDayNano,
+            IntervalMonthDayNanoArray,
+            vec![
+                Some("0 years 13 mons 1 days 0 hours 0 mins 0.000000000 secs"),
+                None,
+                Some("0 years 31 mons 35 days 0 hours 0 mins 0.001400000 
secs"),
+                Some("0 years 0 mons 3 days 0 hours 0 mins 0.000000000 secs"),
+                Some("0 years 0 mons 0 days 0 hours 0 mins 8.000000000 secs"),
+                None,
+                Some("0 years 0 mons 1 days 0 hours 0 mins 29.800000000 secs"),
+                Some("0 years 3 mons 0 days 0 hours 0 mins 1.000000000 secs"),
+                Some("0 years 0 mons 0 days 0 hours 8 mins 0.000000000 secs"),
+                Some("0 years 63 mons 9 days 19 hours 9 mins 2.222000000 
secs"),
+                None,
+            ]
+        );
+    }
+
+    macro_rules! test_unsafe_string_to_interval_err {
+        ($data_vec:expr, $interval_unit:expr, $error_msg:expr) => {
+            let string_array = Arc::new(StringArray::from($data_vec.clone())) 
as ArrayRef;
+            let options = CastOptions { safe: false };
+            let arrow_err = cast_with_options(
+                &string_array.clone(),
+                &DataType::Interval($interval_unit),
+                &options,
+            )
+            .unwrap_err();
+            assert_eq!($error_msg, arrow_err.to_string());
+        };
+    }
+
+    #[test]
+    fn test_cast_string_to_interval_err() {
+        test_unsafe_string_to_interval_err!(
+            vec![Some("foobar")],
+            IntervalUnit::YearMonth,
+            r#"Not yet implemented: Unsupported Interval Expression with value 
"foobar""#
+        );
+        test_unsafe_string_to_interval_err!(
+            vec![Some("foobar")],
+            IntervalUnit::DayTime,
+            r#"Not yet implemented: Unsupported Interval Expression with value 
"foobar""#
+        );
+        test_unsafe_string_to_interval_err!(
+            vec![Some("foobar")],
+            IntervalUnit::MonthDayNano,
+            r#"Not yet implemented: Unsupported Interval Expression with value 
"foobar""#
+        );
+        test_unsafe_string_to_interval_err!(
+            vec![Some("2 months 31 days 1 second")],
+            IntervalUnit::YearMonth,
+            r#"Cast error: Cannot cast 2 months 31 days 1 second to 
IntervalYearMonth. Only year and month fields are allowed."#
+        );
+        test_unsafe_string_to_interval_err!(
+            vec![Some("1 day 1.5 milliseconds")],
+            IntervalUnit::DayTime,
+            r#"Cast error: Cannot cast 1 day 1.5 milliseconds to 
IntervalDayTime because the nanos part isn't multiple of milliseconds"#
+        );
+    }
+
     #[test]
     fn test_cast_string_to_binary() {
         let string_1 = "Hi";
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index f498bf142..7f6ca742d 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -20,6 +20,7 @@ use arrow_array::{ArrowNativeTypeOp, ArrowPrimitiveType};
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::ArrowError;
 use chrono::prelude::*;
+use std::str::FromStr;
 
 /// Accepts a string and parses it relative to the provided `timezone`
 ///
@@ -563,6 +564,233 @@ fn is_valid_decimal(s: &str) -> bool {
     seen_digit
 }
 
+pub fn parse_interval_year_month(
+    value: &str,
+) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> 
{
+    let (result_months, result_days, result_nanos) = parse_interval("years", 
value)?;
+    if result_days != 0 || result_nanos != 0 {
+        return Err(ArrowError::CastError(format!(
+            "Cannot cast {value} to IntervalYearMonth. Only year and month 
fields are allowed."
+        )));
+    }
+    Ok(IntervalYearMonthType::make_value(0, result_months))
+}
+
+pub fn parse_interval_day_time(
+    value: &str,
+) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
+    let (result_months, mut result_days, result_nanos) = 
parse_interval("days", value)?;
+    if result_nanos % 1_000_000 != 0 {
+        return Err(ArrowError::CastError(format!(
+            "Cannot cast {value} to IntervalDayTime because the nanos part 
isn't multiple of milliseconds"
+        )));
+    }
+    result_days += result_months * 30;
+    Ok(IntervalDayTimeType::make_value(
+        result_days,
+        (result_nanos / 1_000_000) as i32,
+    ))
+}
+
+pub fn parse_interval_month_day_nano(
+    value: &str,
+) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, 
ArrowError> {
+    let (result_months, result_days, result_nanos) = parse_interval("months", 
value)?;
+    Ok(IntervalMonthDayNanoType::make_value(
+        result_months,
+        result_days,
+        result_nanos,
+    ))
+}
+
+const SECONDS_PER_HOUR: f64 = 3_600_f64;
+const NANOS_PER_MILLIS: f64 = 1_000_000_f64;
+const NANOS_PER_SECOND: f64 = 1_000_f64 * NANOS_PER_MILLIS;
+#[cfg(test)]
+const NANOS_PER_MINUTE: f64 = 60_f64 * NANOS_PER_SECOND;
+#[cfg(test)]
+const NANOS_PER_HOUR: f64 = 60_f64 * NANOS_PER_MINUTE;
+#[cfg(test)]
+const NANOS_PER_DAY: f64 = 24_f64 * NANOS_PER_HOUR;
+
+#[derive(Clone, Copy)]
+#[repr(u16)]
+enum IntervalType {
+    Century = 0b_00_0000_0001,
+    Decade = 0b_00_0000_0010,
+    Year = 0b_00_0000_0100,
+    Month = 0b_00_0000_1000,
+    Week = 0b_00_0001_0000,
+    Day = 0b_00_0010_0000,
+    Hour = 0b_00_0100_0000,
+    Minute = 0b_00_1000_0000,
+    Second = 0b_01_0000_0000,
+    Millisecond = 0b_10_0000_0000,
+}
+
+impl FromStr for IntervalType {
+    type Err = ArrowError;
+
+    fn from_str(s: &str) -> Result<Self, ArrowError> {
+        match s.to_lowercase().as_str() {
+            "century" | "centuries" => Ok(Self::Century),
+            "decade" | "decades" => Ok(Self::Decade),
+            "year" | "years" => Ok(Self::Year),
+            "month" | "months" => Ok(Self::Month),
+            "week" | "weeks" => Ok(Self::Week),
+            "day" | "days" => Ok(Self::Day),
+            "hour" | "hours" => Ok(Self::Hour),
+            "minute" | "minutes" => Ok(Self::Minute),
+            "second" | "seconds" => Ok(Self::Second),
+            "millisecond" | "milliseconds" => Ok(Self::Millisecond),
+            _ => Err(ArrowError::NotYetImplemented(format!(
+                "Unknown interval type: {s}"
+            ))),
+        }
+    }
+}
+
+pub type MonthDayNano = (i32, i32, i64);
+
+/// parse string value to a triple of aligned months, days, nanos.
+/// leading field is the default unit. e.g. `INTERVAL 1` represents `INTERVAL 
1 SECOND` when leading_filed = 'second'
+fn parse_interval(leading_field: &str, value: &str) -> Result<MonthDayNano, 
ArrowError> {
+    let mut used_interval_types = 0;
+
+    let mut calculate_from_part = |interval_period_str: &str,
+                                   interval_type: &str|
+     -> Result<(i32, i32, i64), ArrowError> {
+        // TODO: Use fixed-point arithmetic to avoid truncation and rounding 
errors (#3809)
+        let interval_period = match f64::from_str(interval_period_str) {
+            Ok(n) => n,
+            Err(_) => {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "Unsupported Interval Expression with value {value:?}"
+                )));
+            }
+        };
+
+        if interval_period > (i64::MAX as f64) {
+            return Err(ArrowError::ParseError(format!(
+                "Interval field value out of range: {value:?}"
+            )));
+        }
+
+        let it = IntervalType::from_str(interval_type).map_err(|_| {
+            ArrowError::ParseError(format!(
+                "Invalid input syntax for type interval: {value:?}"
+            ))
+        })?;
+
+        // Disallow duplicate interval types
+        if used_interval_types & (it as u16) != 0 {
+            return Err(ArrowError::ParseError(format!(
+                "Invalid input syntax for type interval: {value:?}. Repeated 
type '{interval_type}'"
+            )));
+        } else {
+            used_interval_types |= it as u16;
+        }
+
+        match it {
+            IntervalType::Century => {
+                align_interval_parts(interval_period * 1200_f64, 0.0, 0.0)
+            }
+            IntervalType::Decade => {
+                align_interval_parts(interval_period * 120_f64, 0.0, 0.0)
+            }
+            IntervalType::Year => {
+                align_interval_parts(interval_period * 12_f64, 0.0, 0.0)
+            }
+            IntervalType::Month => align_interval_parts(interval_period, 0.0, 
0.0),
+            IntervalType::Week => align_interval_parts(0.0, interval_period * 
7_f64, 0.0),
+            IntervalType::Day => align_interval_parts(0.0, interval_period, 
0.0),
+            IntervalType::Hour => Ok((
+                0,
+                0,
+                (interval_period * SECONDS_PER_HOUR * NANOS_PER_SECOND) as i64,
+            )),
+            IntervalType::Minute => {
+                Ok((0, 0, (interval_period * 60_f64 * NANOS_PER_SECOND) as 
i64))
+            }
+            IntervalType::Second => {
+                Ok((0, 0, (interval_period * NANOS_PER_SECOND) as i64))
+            }
+            IntervalType::Millisecond => {
+                Ok((0, 0, (interval_period * 1_000_000f64) as i64))
+            }
+        }
+    };
+
+    let mut result_month: i32 = 0;
+    let mut result_days: i32 = 0;
+    let mut result_nanos: i64 = 0;
+
+    let mut parts = value.split_whitespace();
+
+    while let Some(interval_period_str) = parts.next() {
+        let unit = parts.next().unwrap_or(leading_field);
+
+        let (diff_month, diff_days, diff_nanos) =
+            calculate_from_part(interval_period_str, unit)?;
+
+        result_month =
+            result_month
+                .checked_add(diff_month)
+                .ok_or(ArrowError::ParseError(format!(
+                    "Interval field value out of range: {value:?}"
+                )))?;
+
+        result_days =
+            result_days
+                .checked_add(diff_days)
+                .ok_or(ArrowError::ParseError(format!(
+                    "Interval field value out of range: {value:?}"
+                )))?;
+
+        result_nanos =
+            result_nanos
+                .checked_add(diff_nanos)
+                .ok_or(ArrowError::ParseError(format!(
+                    "Interval field value out of range: {value:?}"
+                )))?;
+    }
+
+    Ok((result_month, result_days, result_nanos))
+}
+
+/// The fractional units must be spilled to smaller units.
+/// [reference Postgresql 
doc](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
+/// INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
+/// INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
+fn align_interval_parts(
+    month_part: f64,
+    mut day_part: f64,
+    mut nanos_part: f64,
+) -> Result<(i32, i32, i64), ArrowError> {
+    // Convert fractional month to days, It's not supported by Arrow types, 
but anyway
+    day_part += (month_part - (month_part as i64) as f64) * 30_f64;
+
+    // Convert fractional days to hours
+    nanos_part += (day_part - ((day_part as i64) as f64))
+        * 24_f64
+        * SECONDS_PER_HOUR
+        * NANOS_PER_SECOND;
+
+    if month_part > i32::MAX as f64
+        || month_part < i32::MIN as f64
+        || day_part > i32::MAX as f64
+        || day_part < i32::MIN as f64
+        || nanos_part > i64::MAX as f64
+        || nanos_part < i64::MIN as f64
+    {
+        return Err(ArrowError::ParseError(format!(
+            "Parsed interval field value out of range: {month_part} months 
{day_part} days {nanos_part} nanos"
+        )));
+    }
+
+    Ok((month_part as i32, day_part as i32, nanos_part as i64))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1019,6 +1247,127 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_parse_interval() {
+        assert_eq!(
+            (1i32, 0i32, 0i64),
+            parse_interval("months", "1 month").unwrap(),
+        );
+
+        assert_eq!(
+            (2i32, 0i32, 0i64),
+            parse_interval("months", "2 month").unwrap(),
+        );
+
+        assert_eq!(
+            (-1i32, -18i32, (-0.2 * NANOS_PER_DAY) as i64),
+            parse_interval("months", "-1.5 months -3.2 days").unwrap(),
+        );
+
+        assert_eq!(
+            (2i32, 10i32, (9.0 * NANOS_PER_HOUR) as i64),
+            parse_interval("months", "2.1 months 7.25 days 3 hours").unwrap(),
+        );
+
+        assert_eq!(
+            parse_interval("months", "1 centurys 1 month")
+                .unwrap_err()
+                .to_string(),
+            r#"Parser error: Invalid input syntax for type interval: "1 
centurys 1 month""#
+        );
+
+        assert_eq!(
+            (37i32, 0i32, 0i64),
+            parse_interval("months", "3 year 1 month").unwrap(),
+        );
+
+        assert_eq!(
+            (35i32, 0i32, 0i64),
+            parse_interval("months", "3 year -1 month").unwrap(),
+        );
+
+        assert_eq!(
+            (-37i32, 0i32, 0i64),
+            parse_interval("months", "-3 year -1 month").unwrap(),
+        );
+
+        assert_eq!(
+            (-35i32, 0i32, 0i64),
+            parse_interval("months", "-3 year 1 month").unwrap(),
+        );
+
+        assert_eq!(
+            (0i32, 5i32, 0i64),
+            parse_interval("months", "5 days").unwrap(),
+        );
+
+        assert_eq!(
+            (0i32, 7i32, (3f64 * NANOS_PER_HOUR) as i64),
+            parse_interval("months", "7 days 3 hours").unwrap(),
+        );
+
+        assert_eq!(
+            (0i32, 7i32, (5f64 * NANOS_PER_MINUTE) as i64),
+            parse_interval("months", "7 days 5 minutes").unwrap(),
+        );
+
+        assert_eq!(
+            (0i32, 7i32, (-5f64 * NANOS_PER_MINUTE) as i64),
+            parse_interval("months", "7 days -5 minutes").unwrap(),
+        );
+
+        assert_eq!(
+            (0i32, -7i32, (5f64 * NANOS_PER_HOUR) as i64),
+            parse_interval("months", "-7 days 5 hours").unwrap(),
+        );
+
+        assert_eq!(
+            (
+                0i32,
+                -7i32,
+                (-5f64 * NANOS_PER_HOUR
+                    - 5f64 * NANOS_PER_MINUTE
+                    - 5f64 * NANOS_PER_SECOND) as i64
+            ),
+            parse_interval("months", "-7 days -5 hours -5 minutes -5 
seconds").unwrap(),
+        );
+
+        assert_eq!(
+            (12i32, 0i32, (25f64 * NANOS_PER_MILLIS) as i64),
+            parse_interval("months", "1 year 25 millisecond").unwrap(),
+        );
+
+        assert_eq!(
+            (12i32, 1i32, (0.000000001 * NANOS_PER_SECOND) as i64),
+            parse_interval("months", "1 year 1 day 0.000000001 
seconds").unwrap(),
+        );
+
+        assert_eq!(
+            (12i32, 1i32, (0.1 * NANOS_PER_MILLIS) as i64),
+            parse_interval("months", "1 year 1 day 0.1 milliseconds").unwrap(),
+        );
+
+        assert_eq!(
+            (1i32, 0i32, (-NANOS_PER_SECOND) as i64),
+            parse_interval("months", "1 month -1 second").unwrap(),
+        );
+
+        assert_eq!(
+            (-13i32, -8i32, (- NANOS_PER_HOUR - NANOS_PER_MINUTE - 
NANOS_PER_SECOND - 1.11 * NANOS_PER_MILLIS) as i64),
+            parse_interval("months", "-1 year -1 month -1 week -1 day -1 hour 
-1 minute -1 second -1.11 millisecond").unwrap(),
+        );
+    }
+
+    #[test]
+    fn test_duplicate_interval_type() {
+        let err = parse_interval("months", "1 month 1 second 1 second")
+            .expect_err("parsing interval should have failed");
+        assert_eq!(
+            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 
second 1 second\". Repeated type 'second'")"#,
+            format!("{err:?}")
+        );
+    }
+
     #[test]
     fn string_to_timestamp_old() {
         parse_timestamp("1677-06-14T07:29:01.256")

Reply via email to