alamb commented on code in PR #12400:
URL: https://github.com/apache/datafusion/pull/12400#discussion_r1754716549
##########
datafusion/functions-nested/src/range.rs:
##########
@@ -394,3 +412,136 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool)
-> Result<ArrayRef> {
Ok(arr)
}
+
+fn gen_range_timestamp(args: &[ArrayRef], include_upper_bound: bool) ->
Result<ArrayRef> {
+ if args.len() != 3 {
+ return exec_err!(
+ "Arguments length must be 3 for {}",
+ if include_upper_bound {
+ "GENERATE_SERIES"
+ } else {
+ "RANGE"
+ }
+ );
+ }
+
+ // coerce_types fn should coerce all types to Timestamp(Nanosecond, tz)
+ let (start_arr, start_tz_opt) = cast_timestamp_arg(&args[0],
include_upper_bound)?;
+ let (stop_arr, stop_tz_opt) = cast_timestamp_arg(&args[1],
include_upper_bound)?;
+ let step_arr = as_interval_mdn_array(&args[2])?;
+ let start_tz = parse_tz(start_tz_opt)?;
+ let stop_tz = parse_tz(stop_tz_opt)?;
+
+ // values are timestamps
+ let values_builder = start_tz_opt
+ .clone()
+ .map_or_else(TimestampNanosecondBuilder::new, |start_tz_str| {
+ TimestampNanosecondBuilder::new().with_timezone(start_tz_str)
+ });
+ let mut list_builder = ListBuilder::new(values_builder);
+
+ for idx in 0..start_arr.len() {
+ if start_arr.is_null(idx) || stop_arr.is_null(idx) ||
step_arr.is_null(idx) {
+ list_builder.append_null();
+ continue;
+ }
+
+ let start = start_arr.value(idx);
+ let stop = stop_arr.value(idx);
+ let step = step_arr.value(idx);
+
+ let (months, days, ns) = IntervalMonthDayNanoType::to_parts(step);
+ if months == 0 && days == 0 && ns == 0 {
+ return exec_err!(
+ "Interval argument to {} must not be 0",
+ if include_upper_bound {
+ "GENERATE_SERIES"
+ } else {
+ "RANGE"
+ }
+ );
+ }
+
+ let neg = TSNT::add_month_day_nano(start, step, start_tz)
+ .ok_or(exec_datafusion_err!(
+ "Cannot generate timestamp range where start + step overflows"
+ ))?
+ .cmp(&start)
+ == Ordering::Less;
+
+ let stop_dt = as_datetime_with_timezone::<TSNT>(stop, stop_tz).ok_or(
+ exec_datafusion_err!(
+ "Cannot generate timestamp for stop: {}: {:?}",
+ stop,
+ stop_tz
+ ),
+ )?;
+
+ let mut current = start;
+ let mut current_dt = as_datetime_with_timezone::<TSNT>(current,
start_tz).ok_or(
+ exec_datafusion_err!(
+ "Cannot generate timestamp for start: {}: {:?}",
+ current,
+ start_tz
+ ),
+ )?;
+
+ let values = from_fn(|| {
+ if (include_upper_bound
+ && ((neg && current_dt < stop_dt) || (!neg && current_dt >
stop_dt)))
+ || (!include_upper_bound
+ && ((neg && current_dt <= stop_dt)
+ || (!neg && current_dt >= stop_dt)))
+ {
+ return None;
+ }
+
+ let prev_current = current;
+
+ if let Some(ts) = TSNT::add_month_day_nano(current, step,
start_tz) {
+ current = ts;
+ current_dt = as_datetime_with_timezone::<TSNT>(current,
start_tz)?;
+
+ Some(Some(prev_current))
+ } else {
+ // we failed to parse the timestamp here so terminate the
series
+ None
+ }
+ });
+
+ list_builder.append_value(values);
+ }
+
+ let arr = Arc::new(list_builder.finish());
+
+ Ok(arr)
+}
+
+fn cast_timestamp_arg(
+ arg: &ArrayRef,
+ include_upper: bool,
+) -> Result<(&TimestampNanosecondArray, &Option<Arc<str>>)> {
+ match arg.data_type() {
+ Timestamp(Nanosecond, tz_opt) => {
+ Ok((as_timestamp_nanosecond_array(arg)?, tz_opt))
+ }
+ _ => {
+ internal_err!(
+ "Unexpected argument type for {} : {}",
+ if include_upper {
+ "GENERATE_SERIES"
+ } else {
+ "RANGE"
+ },
+ arg.data_type()
+ )
+ }
+ }
+}
+
+fn parse_tz(tz: &Option<Arc<str>>) -> Result<Tz> {
+ let tz = tz.as_ref().map_or_else(|| "+00", |s| s);
+
+ Tz::from_str(tz)
+ .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {:?}",
op))
Review Comment:
```suggestion
.map_err(|op| exec_datafusion_err!("failed to parse timezone {tz}:
{:?}", op))
```
##########
datafusion/functions-nested/src/range.rs:
##########
@@ -394,3 +412,136 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool)
-> Result<ArrayRef> {
Ok(arr)
}
+
+fn gen_range_timestamp(args: &[ArrayRef], include_upper_bound: bool) ->
Result<ArrayRef> {
+ if args.len() != 3 {
+ return exec_err!(
+ "Arguments length must be 3 for {}",
+ if include_upper_bound {
+ "GENERATE_SERIES"
+ } else {
+ "RANGE"
+ }
+ );
+ }
+
+ // coerce_types fn should coerce all types to Timestamp(Nanosecond, tz)
+ let (start_arr, start_tz_opt) = cast_timestamp_arg(&args[0],
include_upper_bound)?;
+ let (stop_arr, stop_tz_opt) = cast_timestamp_arg(&args[1],
include_upper_bound)?;
+ let step_arr = as_interval_mdn_array(&args[2])?;
+ let start_tz = parse_tz(start_tz_opt)?;
+ let stop_tz = parse_tz(stop_tz_opt)?;
+
+ // values are timestamps
+ let values_builder = start_tz_opt
+ .clone()
+ .map_or_else(TimestampNanosecondBuilder::new, |start_tz_str| {
+ TimestampNanosecondBuilder::new().with_timezone(start_tz_str)
+ });
+ let mut list_builder = ListBuilder::new(values_builder);
+
+ for idx in 0..start_arr.len() {
+ if start_arr.is_null(idx) || stop_arr.is_null(idx) ||
step_arr.is_null(idx) {
+ list_builder.append_null();
+ continue;
+ }
+
+ let start = start_arr.value(idx);
+ let stop = stop_arr.value(idx);
+ let step = step_arr.value(idx);
+
+ let (months, days, ns) = IntervalMonthDayNanoType::to_parts(step);
+ if months == 0 && days == 0 && ns == 0 {
+ return exec_err!(
+ "Interval argument to {} must not be 0",
+ if include_upper_bound {
+ "GENERATE_SERIES"
+ } else {
+ "RANGE"
+ }
+ );
+ }
+
+ let neg = TSNT::add_month_day_nano(start, step, start_tz)
+ .ok_or(exec_datafusion_err!(
+ "Cannot generate timestamp range where start + step overflows"
+ ))?
+ .cmp(&start)
+ == Ordering::Less;
+
+ let stop_dt = as_datetime_with_timezone::<TSNT>(stop, stop_tz).ok_or(
+ exec_datafusion_err!(
+ "Cannot generate timestamp for stop: {}: {:?}",
+ stop,
+ stop_tz
+ ),
+ )?;
+
+ let mut current = start;
+ let mut current_dt = as_datetime_with_timezone::<TSNT>(current,
start_tz).ok_or(
+ exec_datafusion_err!(
+ "Cannot generate timestamp for start: {}: {:?}",
+ current,
+ start_tz
+ ),
+ )?;
+
+ let values = from_fn(|| {
+ if (include_upper_bound
+ && ((neg && current_dt < stop_dt) || (!neg && current_dt >
stop_dt)))
+ || (!include_upper_bound
+ && ((neg && current_dt <= stop_dt)
+ || (!neg && current_dt >= stop_dt)))
+ {
+ return None;
+ }
+
+ let prev_current = current;
+
+ if let Some(ts) = TSNT::add_month_day_nano(current, step,
start_tz) {
+ current = ts;
+ current_dt = as_datetime_with_timezone::<TSNT>(current,
start_tz)?;
+
+ Some(Some(prev_current))
+ } else {
+ // we failed to parse the timestamp here so terminate the
series
+ None
+ }
+ });
+
+ list_builder.append_value(values);
+ }
+
+ let arr = Arc::new(list_builder.finish());
+
+ Ok(arr)
+}
+
+fn cast_timestamp_arg(
Review Comment:
nit:
You could potentially use
[`as_timestamp_nanosecond_array`](https://docs.rs/datafusion/latest/datafusion/common/cast/fn.as_timestamp_second_array.html)
(the datafusion version) from:
https://docs.rs/datafusion/latest/datafusion/common/cast/index.html
And then call
[`timezone`](https://docs.rs/arrow/latest/arrow/array/struct.PrimitiveArray.html#tymethod.timezone)
to get the timezone from the array
I personally found the use of the word `cast` somewhat confusing as function
doesn't actually call the arrow `cast` kernel, but instead does the rust
`as_any` stuff (which is like a cast in C or Java)
##########
datafusion/functions-nested/src/range.rs:
##########
@@ -109,8 +121,11 @@ impl ScalarUDFImpl for Range {
match args[0].data_type() {
Int64 => make_scalar_function(|args| gen_range_inner(args,
false))(args),
Date32 => make_scalar_function(|args| gen_range_date(args,
false))(args),
- _ => {
- exec_err!("unsupported type for range")
+ Timestamp(_, _) => {
+ make_scalar_function(|args| gen_range_timestamp(args,
false))(args)
+ }
+ dt => {
+ exec_err!("unsupported type for RANGE. Expected Int64, Date32
or Timestamp, got: {dt}")
Review Comment:
❤️ for a much better error
##########
datafusion/sqllogictest/test_files/array.slt:
##########
@@ -5866,50 +5902,124 @@ select generate_series(5),
----
[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10,
7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01,
1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28,
1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22,
1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16,
1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10,
1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04,
1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01,
1992-04-01]
-query error DataFusion error: Execution error: Cannot generate date range less
than 1 day\.
-select generate_series('2021-01-01'::timestamp, '2021-01-02'::timestamp,
INTERVAL '1' HOUR);
+query ?
+select generate_series('2021-01-01'::timestamp,
'2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR);
+----
+[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00,
2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00,
2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00,
2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00,
2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00,
2021-01-01T15:00:00]
+
+query ?
+select generate_series('2021-01-01T00:00:00EST'::timestamp,
'2021-01-01T15:00:00-12:00'::timestamp, INTERVAL '1' HOUR);
Review Comment:
is it worth testing when the end timestamp is not an exact increment?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]