doki23 commented on code in PR #3762:
URL: https://github.com/apache/arrow-rs/pull/3762#discussion_r1127332823


##########
arrow-cast/src/parse.rs:
##########
@@ -445,6 +446,237 @@ impl Parser for Date64Type {
     }
 }
 
+pub fn parse_interval_year_month(
+    value: &str,
+) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> 
{
+    let (result_months, result_days, result_nanos) = parse_interval("years", 
value)?;
+    if result_days != 0 || result_nanos != 0 {
+        return Err(ArrowError::CastError(format!(
+            "Cannot cast {value} to IntervalYearMonth because the value isn't 
multiple of months"
+        )));
+    }
+    Ok(IntervalYearMonthType::make_value(0, result_months))
+}
+
+pub fn parse_interval_day_time(
+    value: &str,
+) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
+    let (result_months, mut result_days, result_nanos) = 
parse_interval("days", value)?;
+    if result_nanos % 1_000_000 != 0 {
+        return Err(ArrowError::CastError(format!(
+            "Cannot cast {value} to IntervalDayTime because the nanos part 
isn't multiple of milliseconds"
+        )));
+    }
+    result_days += result_months * 30;
+    Ok(IntervalDayTimeType::make_value(
+        result_days,
+        (result_nanos / 1_000_000) as i32,
+    ))
+}
+
+pub fn parse_interval_month_day_nano(
+    value: &str,
+) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, 
ArrowError> {
+    let (result_months, result_days, result_nanos) = parse_interval("months", 
value)?;
+    Ok(IntervalMonthDayNanoType::make_value(
+        result_months,
+        result_days,
+        result_nanos,
+    ))
+}
+
+const SECONDS_PER_HOUR: f64 = 3_600_f64;
+const NANOS_PER_MILLIS: f64 = 1_000_000_f64;
+const NANOS_PER_SECOND: f64 = 1_000_f64 * NANOS_PER_MILLIS;
+const NANOS_PER_MINUTE: f64 = 60_f64 * NANOS_PER_SECOND;
+const NANOS_PER_HOUR: f64 = 60_f64 * NANOS_PER_MINUTE;
+const NANOS_PER_DAY: f64 = 24_f64 * NANOS_PER_HOUR;
+
+#[derive(Clone, Copy)]
+#[repr(u16)]
+enum IntervalType {
+    Century = 0b_00_0000_0001,
+    Decade = 0b_00_0000_0010,
+    Year = 0b_00_0000_0100,
+    Month = 0b_00_0000_1000,
+    Week = 0b_00_0001_0000,
+    Day = 0b_00_0010_0000,
+    Hour = 0b_00_0100_0000,
+    Minute = 0b_00_1000_0000,
+    Second = 0b_01_0000_0000,
+    Millisecond = 0b_10_0000_0000,
+}
+
+impl FromStr for IntervalType {
+    type Err = ArrowError;
+
+    fn from_str(s: &str) -> Result<Self, ArrowError> {
+        match s.to_lowercase().as_str() {
+            "century" | "centuries" => Ok(Self::Century),
+            "decade" | "decades" => Ok(Self::Decade),
+            "year" | "years" => Ok(Self::Year),
+            "month" | "months" => Ok(Self::Month),
+            "week" | "weeks" => Ok(Self::Week),
+            "day" | "days" => Ok(Self::Day),
+            "hour" | "hours" => Ok(Self::Hour),
+            "minute" | "minutes" => Ok(Self::Minute),
+            "second" | "seconds" => Ok(Self::Second),
+            "millisecond" | "milliseconds" => Ok(Self::Millisecond),
+            _ => Err(ArrowError::NotYetImplemented(format!(
+                "Unknown interval type: {s}"
+            ))),
+        }
+    }
+}
+
+pub type MonthDayNano = (i32, i32, i64);
+
+/// parse string value to a triple of aligned months, days, nanos.
+/// Fractional units must be spilled to smaller units.
+/// Fractional parts of units greater than months are rounded to be an integer 
number of months,
+/// e.g. '1.5 years' becomes '12 mons + 6 mons', returns (18, 0, 0)
+/// Fractional parts of months, weeks, days, hours, minutes, seconds and 
milliseconds are computed
+/// to be an integer number of days and nanoseconds, assuming 30 days per 
month and 24 hours per day,
+/// e.g., '1.75 months' becomes '1 mon + 22 days + 12 hours', returns (1, 22, 
12 * `NANOS_PER_HOUR`)
+/// leading field is the default unit. e.g. leading field is `second`, `1` = 
`1 second`
+fn parse_interval(leading_field: &str, value: &str) -> Result<MonthDayNano, 
ArrowError> {
+    let mut used_interval_types = 0;
+
+    let mut calculate_from_part = |interval_period_str: &str,
+                                   interval_type: &str|
+     -> Result<(i32, i32, i64), ArrowError> {
+        // @todo It's better to use Decimal in order to protect rounding errors
+        // Wait https://github.com/apache/arrow/pull/9232
+        let interval_period = match f64::from_str(interval_period_str) {
+            Ok(n) => n,
+            Err(_) => {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "Unsupported Interval Expression with value {value:?}"
+                )));
+            }
+        };
+
+        if interval_period > (i64::MAX as f64) {
+            return Err(ArrowError::ParseError(format!(
+                "Interval field value out of range: {value:?}"
+            )));
+        }
+
+        let it = IntervalType::from_str(interval_type).map_err(|_| {
+            ArrowError::ParseError(format!(
+                "Invalid input syntax for type interval: {value:?}"
+            ))
+        })?;
+
+        // Disallow duplicate interval types
+        if used_interval_types & (it as u16) != 0 {
+            return Err(ArrowError::ParseError(format!(
+                "Invalid input syntax for type interval: {value:?}. Repeated 
type '{interval_type}'"
+            )));
+        } else {
+            used_interval_types |= it as u16;
+        }
+
+        match it {
+            IntervalType::Century => {
+                align_interval_parts(interval_period * 1200_f64, 0.0, 0.0)
+            }
+            IntervalType::Decade => {
+                align_interval_parts(interval_period * 120_f64, 0.0, 0.0)
+            }
+            IntervalType::Year => {
+                align_interval_parts(interval_period * 12_f64, 0.0, 0.0)
+            }
+            IntervalType::Month => align_interval_parts(interval_period, 0.0, 
0.0),
+            IntervalType::Week => align_interval_parts(0.0, interval_period * 
7_f64, 0.0),
+            IntervalType::Day => align_interval_parts(0.0, interval_period, 
0.0),
+            IntervalType::Hour => Ok((
+                0,
+                0,
+                (interval_period * SECONDS_PER_HOUR * NANOS_PER_SECOND) as i64,
+            )),
+            IntervalType::Minute => {
+                Ok((0, 0, (interval_period * 60_f64 * NANOS_PER_SECOND) as 
i64))
+            }
+            IntervalType::Second => {
+                Ok((0, 0, (interval_period * NANOS_PER_SECOND) as i64))
+            }
+            IntervalType::Millisecond => {
+                Ok((0, 0, (interval_period * 1_000_000f64) as i64))
+            }
+        }
+    };
+
+    let mut result_month: i32 = 0;
+    let mut result_days: i32 = 0;
+    let mut result_nanos: i64 = 0;
+
+    let mut parts = value.split_whitespace();
+
+    while let Some(interval_period_str) = parts.next() {
+        let unit = parts.next().unwrap_or(leading_field);
+
+        let (diff_month, diff_days, diff_nanos) =
+            calculate_from_part(interval_period_str, unit)?;
+
+        result_month =
+            result_month
+                .checked_add(diff_month)
+                .ok_or(ArrowError::ParseError(format!(
+                    "Interval field value out of range: {value:?}"
+                )))?;
+
+        result_days =
+            result_days
+                .checked_add(diff_days)
+                .ok_or(ArrowError::ParseError(format!(
+                    "Interval field value out of range: {value:?}"
+                )))?;
+
+        result_nanos =
+            result_nanos
+                .checked_add(diff_nanos)
+                .ok_or(ArrowError::ParseError(format!(
+                    "Interval field value out of range: {value:?}"
+                )))?;
+    }
+
+    Ok((result_month, result_days, result_nanos))
+}
+
+/// We are storing parts as integers, it's why we need to align parts 
fractional
+/// INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
+/// INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
+/// INTERVAL '30 DAYS' = 1 MONTH
+fn align_interval_parts(
+    mut month_part: f64,
+    mut day_part: f64,
+    mut nanos_part: f64,
+) -> Result<(i32, i32, i64), ArrowError> {
+    // Convert fractional month to days, It's not supported by Arrow types, 
but anyway
+    day_part += (month_part - (month_part as i64) as f64) * 30_f64;
+
+    // Convert fractional days to hours
+    nanos_part += (day_part - ((day_part as i64) as f64))
+        * 24_f64
+        * SECONDS_PER_HOUR
+        * NANOS_PER_SECOND;
+
+    // Convert to higher units as much as possible
+    day_part += ((nanos_part as i64) / (NANOS_PER_DAY as i64)) as f64;
+    month_part += ((day_part as i64) / 30_i64) as f64;
+    nanos_part %= NANOS_PER_DAY;
+    day_part %= 30_f64;

Review Comment:
   I agree. But it will affect `parse_interval_year_month` -- we can not parse 
`30 days` to `IntervalYearMonthType`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to