alamb commented on a change in pull request #9373:
URL: https://github.com/apache/arrow/pull/9373#discussion_r570403114



##########
File path: rust/datafusion/src/sql/planner.rs
##########
@@ -982,6 +996,171 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             ))),
         }
     }
+
+    fn sql_interval_to_literal(
+        &self,
+        value: &str,
+        leading_field: &Option<DateTimeField>,
+        leading_precision: &Option<u64>,
+        last_field: &Option<DateTimeField>,
+        fractional_seconds_precision: &Option<u64>,
+    ) -> Result<Expr> {
+        if leading_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_field {:?}",
+                leading_field
+            )));
+        }
+
+        if leading_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_precision {:?}",
+                leading_precision
+            )));
+        }
+
+        if last_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with last_field {:?}",
+                last_field
+            )));
+        }
+
+        if fractional_seconds_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with 
fractional_seconds_precision {:?}",
+                fractional_seconds_precision
+            )));
+        }
+
+        const SECONDS_PER_HOUR: f32 = 3_600_f32;
+        const MILLIS_PER_SECOND: f32 = 1_000_f32;
+
+        // We are storing parts as integers, it's why we need to align parts 
fractional
+        // INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 
days

Review comment:
       I found it strange that the example of`1.5 months` is not supported 
whereas `0.5` and `1` are supported
   
   ```
   >  select (interval '0.5 month');
   +-------------------------------------------------+
   | IntervalDayTime("64424509440")                  |
   +-------------------------------------------------+
   | 0 years 0 mons 15 days 0 hours 0 mins 0.00 secs |
   +-------------------------------------------------+
   1 rows in set. Query took 0 seconds.
   >  select (interval '1 month');
   +------------------------------------------------+
   | IntervalYearMonth("1")                         |
   +------------------------------------------------+
   | 0 years 1 mons 0 days 0 hours 0 mins 0.00 secs |
   +------------------------------------------------+
   1 rows in set. Query took 0 seconds.
   >  select (interval '1.5 month');
   NotImplemented("DF doesnt support complex intervals: \"1.1 month\"")
   
   ```

##########
File path: rust/arrow/src/util/display.rs
##########
@@ -44,6 +44,66 @@ macro_rules! make_string {
     }};
 }
 
+macro_rules! make_string_interval_year_month {
+    ($column: ident, $row: ident) => {{
+        let array = $column
+            .as_any()
+            .downcast_ref::<array::IntervalYearMonthArray>()
+            .unwrap();
+
+        let s = if array.is_null($row) {
+            "0 years 0 mons 0 days 0 hours 0 mins 0.00 secs".to_string()
+        } else {
+            let interval = array.value($row) as f64;
+            let years = (interval / 12_f64).floor();
+            let month = interval - (years * 12_f64);
+
+            format!(
+                "{} years {} mons 0 days 0 hours 0 mins 0.00 secs",
+                years, month,
+            )
+        };
+
+        Ok(s)
+    }};
+}
+
+macro_rules! make_string_interval_day_time {
+    ($column: ident, $row: ident) => {{
+        let array = $column
+            .as_any()
+            .downcast_ref::<array::IntervalDayTimeArray>()
+            .unwrap();
+
+        let s = if array.is_null($row) {
+            "0 years 0 mons 0 days 0 hours 0 mins 0.00 secs".to_string()

Review comment:
       I wonder if `""` would be a more appropriate representation for `NULL`, 
to conform with the handling of null strings:
   
   
https://github.com/apache/arrow/pull/9373/files#diff-821be3a8a9239cdd12ab2b3c979e174f7b936aa6bfad4b9bc76489b9923cf747R164
   
   I also think `"0 years 0 mons 0 days 0 hours 0 mins 0.00 secs"` might be the 
same representation if `value = 0` (so you couldn't distinguish between `0` and 
`NULL`(

##########
File path: rust/arrow/src/util/display.rs
##########
@@ -44,6 +44,66 @@ macro_rules! make_string {
     }};
 }
 
+macro_rules! make_string_interval_year_month {
+    ($column: ident, $row: ident) => {{
+        let array = $column
+            .as_any()
+            .downcast_ref::<array::IntervalYearMonthArray>()
+            .unwrap();
+
+        let s = if array.is_null($row) {
+            "0 years 0 mons 0 days 0 hours 0 mins 0.00 secs".to_string()

Review comment:
       same comment as below about NULL vs 0 handling

##########
File path: rust/datafusion/src/sql/planner.rs
##########
@@ -982,6 +996,171 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             ))),
         }
     }
+
+    fn sql_interval_to_literal(
+        &self,
+        value: &str,
+        leading_field: &Option<DateTimeField>,
+        leading_precision: &Option<u64>,
+        last_field: &Option<DateTimeField>,
+        fractional_seconds_precision: &Option<u64>,
+    ) -> Result<Expr> {
+        if leading_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_field {:?}",
+                leading_field
+            )));
+        }
+
+        if leading_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_precision {:?}",
+                leading_precision
+            )));
+        }
+
+        if last_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with last_field {:?}",
+                last_field
+            )));
+        }
+
+        if fractional_seconds_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with 
fractional_seconds_precision {:?}",
+                fractional_seconds_precision
+            )));
+        }
+
+        const SECONDS_PER_HOUR: f32 = 3_600_f32;
+        const MILLIS_PER_SECOND: f32 = 1_000_f32;
+
+        // We are storing parts as integers, it's why we need to align parts 
fractional
+        // INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 
days

Review comment:
       I can understand why we might not support fractional months (as 
depending on the month the length of a month can not necessarily be expressed 
as an integral number of days or months).
   
   But it seems inconsitent to me to say `0.5 month` is `15 days` but `1.5 
month` is not `45 days`
   
   I think the easiest thing for me to reason about is if they both error. 

##########
File path: rust/datafusion/src/sql/planner.rs
##########
@@ -982,6 +996,171 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             ))),
         }
     }
+
+    fn sql_interval_to_literal(
+        &self,
+        value: &str,
+        leading_field: &Option<DateTimeField>,
+        leading_precision: &Option<u64>,
+        last_field: &Option<DateTimeField>,
+        fractional_seconds_precision: &Option<u64>,
+    ) -> Result<Expr> {
+        if leading_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_field {:?}",
+                leading_field
+            )));
+        }
+
+        if leading_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_precision {:?}",
+                leading_precision
+            )));
+        }
+
+        if last_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with last_field {:?}",
+                last_field
+            )));
+        }
+
+        if fractional_seconds_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with 
fractional_seconds_precision {:?}",
+                fractional_seconds_precision
+            )));
+        }
+
+        const SECONDS_PER_HOUR: f32 = 3_600_f32;
+        const MILLIS_PER_SECOND: f32 = 1_000_f32;
+
+        // We are storing parts as integers, it's why we need to align parts 
fractional
+        // INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 
days
+        // INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
+        let align_interval_parts = |month_part: f32,
+                                    mut day_part: f32,
+                                    mut milles_part: f32|
+         -> (i32, i32, f32) {
+            // Convert fractional month to days, It's not supported by Arrow 
types, but anyway
+            day_part += (month_part - (month_part as i32) as f32) * 30_f32;
+
+            // Convert fractional days to hours
+            milles_part += (day_part - ((day_part as i32) as f32))
+                * 24_f32
+                * SECONDS_PER_HOUR
+                * MILLIS_PER_SECOND;
+
+            (month_part as i32, day_part as i32, milles_part)
+        };
+
+        let calculate_from_part = |interval_period_str: &str,
+                                   interval_type: &str|
+         -> Result<(i32, i32, f32)> {
+            let interval_period = match f32::from_str(interval_period_str) {
+                Ok(n) => n,
+                Err(_) => {
+                    return Err(DataFusionError::SQL(ParserError(format!(
+                        "Unsupported Interval Expression with value {:?}",
+                        value
+                    ))))
+                }
+            };
+
+            if interval_period > (i32::MAX as f32) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+
+            match interval_type.to_lowercase().as_str() {
+                "year" => Ok(align_interval_parts(interval_period * 12_f32, 
0.0, 0.0)),
+                "month" => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
+                "day" | "days" => Ok(align_interval_parts(0.0, 
interval_period, 0.0)),
+                "hour" | "hours" => {
+                    Ok((0, 0, interval_period * SECONDS_PER_HOUR * 
MILLIS_PER_SECOND))
+                }
+                "minutes" | "minute" => {
+                    Ok((0, 0, interval_period * 60_f32 * MILLIS_PER_SECOND))
+                }
+                "seconds" | "second" => Ok((0, 0, interval_period * 
MILLIS_PER_SECOND)),
+                "milliseconds" | "millisecond" => Ok((0, 0, interval_period)),
+                _ => Err(DataFusionError::NotImplemented(format!(
+                    "Invalid input syntax for type interval: {:?}",
+                    value
+                ))),
+            }
+        };
+
+        let mut result_month: i64 = 0;
+        let mut result_days: i64 = 0;
+        let mut result_millis: i64 = 0;
+
+        let mut parts = value.split_whitespace();
+
+        loop {
+            let interval_period_str = parts.next();
+            if interval_period_str.is_none() {
+                break;
+            }
+
+            let (diff_month, diff_days, diff_millis) = calculate_from_part(
+                interval_period_str.unwrap(),
+                parts.next().unwrap_or("second"),
+            )?;
+
+            result_month += diff_month as i64;
+
+            if result_month > (i32::MAX as i64) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+
+            result_days += diff_days as i64;
+
+            if result_days > (i32::MAX as i64) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+
+            result_millis += diff_millis as i64;
+
+            if result_millis > (i32::MAX as i64) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+        }
+
+        // Interval is tricky thing
+        // 1 day is not 24 hours because timezones, 1 year != 365/364! 30 days 
!= 1 month
+        // The true way to store and calculate intervals is to store it as it 
defined
+        // Due the fact that Arrow supports only two types YearMonth (month) 
and DayTime (day, time)
+        // It's not possible to store complex intervals
+        // It's possible to do select (NOW() + INTERVAL '1 year') + INTERVAL 
'1 day'; as workaround
+        if result_month != 0 && (result_days != 0 || result_millis != 0) {
+            return Err(DataFusionError::NotImplemented(format!(
+                "DF doesnt support complex intervals: {:?}",

Review comment:
       ```suggestion
                   "DF does not support intervals that have both a Year/Month 
part as well as Days/Hours/Mins/Seconds: {:?}. Hint: try breaking the interval 
into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds 
- e.g. (NOW() + INTERVAL '1 year') + INTERVAL '1 day'",
   ```
   
   Maybe too much in an error message but as I user of DataFusion I would 
appreciate having this context :)

##########
File path: rust/datafusion/src/sql/planner.rs
##########
@@ -982,6 +996,171 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             ))),
         }
     }
+
+    fn sql_interval_to_literal(
+        &self,
+        value: &str,
+        leading_field: &Option<DateTimeField>,
+        leading_precision: &Option<u64>,
+        last_field: &Option<DateTimeField>,
+        fractional_seconds_precision: &Option<u64>,
+    ) -> Result<Expr> {
+        if leading_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_field {:?}",
+                leading_field
+            )));
+        }
+
+        if leading_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with leading_precision {:?}",
+                leading_precision
+            )));
+        }
+
+        if last_field.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with last_field {:?}",
+                last_field
+            )));
+        }
+
+        if fractional_seconds_precision.is_some() {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Unsupported Interval Expression with 
fractional_seconds_precision {:?}",
+                fractional_seconds_precision
+            )));
+        }
+
+        const SECONDS_PER_HOUR: f32 = 3_600_f32;
+        const MILLIS_PER_SECOND: f32 = 1_000_f32;
+
+        // We are storing parts as integers, it's why we need to align parts 
fractional
+        // INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 
days
+        // INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
+        let align_interval_parts = |month_part: f32,
+                                    mut day_part: f32,
+                                    mut milles_part: f32|
+         -> (i32, i32, f32) {
+            // Convert fractional month to days, It's not supported by Arrow 
types, but anyway
+            day_part += (month_part - (month_part as i32) as f32) * 30_f32;
+
+            // Convert fractional days to hours
+            milles_part += (day_part - ((day_part as i32) as f32))
+                * 24_f32
+                * SECONDS_PER_HOUR
+                * MILLIS_PER_SECOND;
+
+            (month_part as i32, day_part as i32, milles_part)
+        };
+
+        let calculate_from_part = |interval_period_str: &str,
+                                   interval_type: &str|
+         -> Result<(i32, i32, f32)> {
+            let interval_period = match f32::from_str(interval_period_str) {
+                Ok(n) => n,
+                Err(_) => {
+                    return Err(DataFusionError::SQL(ParserError(format!(
+                        "Unsupported Interval Expression with value {:?}",
+                        value
+                    ))))
+                }
+            };
+
+            if interval_period > (i32::MAX as f32) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+
+            match interval_type.to_lowercase().as_str() {
+                "year" => Ok(align_interval_parts(interval_period * 12_f32, 
0.0, 0.0)),
+                "month" => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
+                "day" | "days" => Ok(align_interval_parts(0.0, 
interval_period, 0.0)),
+                "hour" | "hours" => {
+                    Ok((0, 0, interval_period * SECONDS_PER_HOUR * 
MILLIS_PER_SECOND))
+                }
+                "minutes" | "minute" => {
+                    Ok((0, 0, interval_period * 60_f32 * MILLIS_PER_SECOND))
+                }
+                "seconds" | "second" => Ok((0, 0, interval_period * 
MILLIS_PER_SECOND)),
+                "milliseconds" | "millisecond" => Ok((0, 0, interval_period)),
+                _ => Err(DataFusionError::NotImplemented(format!(
+                    "Invalid input syntax for type interval: {:?}",
+                    value
+                ))),
+            }
+        };
+
+        let mut result_month: i64 = 0;
+        let mut result_days: i64 = 0;
+        let mut result_millis: i64 = 0;
+
+        let mut parts = value.split_whitespace();
+
+        loop {
+            let interval_period_str = parts.next();
+            if interval_period_str.is_none() {
+                break;
+            }
+
+            let (diff_month, diff_days, diff_millis) = calculate_from_part(
+                interval_period_str.unwrap(),
+                parts.next().unwrap_or("second"),
+            )?;
+
+            result_month += diff_month as i64;
+
+            if result_month > (i32::MAX as i64) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+
+            result_days += diff_days as i64;
+
+            if result_days > (i32::MAX as i64) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+
+            result_millis += diff_millis as i64;
+
+            if result_millis > (i32::MAX as i64) {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Interval field value out of range: {:?}",
+                    value
+                )));
+            }
+        }
+
+        // Interval is tricky thing
+        // 1 day is not 24 hours because timezones, 1 year != 365/364! 30 days 
!= 1 month
+        // The true way to store and calculate intervals is to store it as it 
defined

Review comment:
       💯  agree with this comment.
   
   I think an additional restriction we should perhaps consider is 'only 
support integral numbers of months + years' as the `YearMonthInterval` interval 
type can not capture precisely fractional parts 
   
   https://github.com/apache/arrow/blob/master/format/Schema.fbs#L253-L254




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to