kosiew commented on code in PR #19078: URL: https://github.com/apache/datafusion/pull/19078#discussion_r2633783034
########## datafusion/functions/src/datetime/to_timestamp.rs: ########## @@ -199,80 +270,161 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ), argument( name = "format_n", - description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + description = r#" +Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. +Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully +parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is +only supported at the end of the string preceded by a space. +"# ) )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct ToTimestampNanosFunc { signature: Signature, + timezone: Option<Arc<str>>, } impl Default for ToTimestampFunc { fn default() -> Self { - Self::new() + Self::new_with_config(&ConfigOptions::default()) } } impl ToTimestampFunc { + #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")] + /// Deprecated constructor retained for backwards compatibility. + /// + /// Prefer [`ToTimestampFunc::new_with_config`] which allows specifying the + /// timezone via [`ConfigOptions`]. This helper now mirrors the + /// canonical default offset (None) provided by `ConfigOptions::default()`. pub fn new() -> Self { + Self::new_with_config(&ConfigOptions::default()) + } + + pub fn new_with_config(config: &ConfigOptions) -> Self { Review Comment: what do you think about creating a macro so we could ``` impl_timestamp_func_with_config!(ToTimestampFunc); impl_timestamp_func_with_config!(ToTimestampSecondsFunc); impl_timestamp_func_with_config!(ToTimestampMillisFunc); impl_timestamp_func_with_config!(ToTimestampMicrosFunc); impl_timestamp_func_with_config!(ToTimestampNanosFunc); ``` and generate the boilerplate codes - pub fn new_with_config(config: &ConfigOptions) ... - fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> ... for `ToTimestampFunc`, `ToTimestampSecondsFunc`, `ToTimestampMillisFunc`, `ToTimestampMicrosFunc`, `ToTimestampNanosFunc` ? ########## datafusion/functions/src/datetime/common.rs: ########## @@ -176,14 +234,50 @@ pub(crate) fn string_to_timestamp_millis_formatted(s: &str, format: &str) -> Res .timestamp_millis()) } -pub(crate) fn handle<O, F, S>( +pub(crate) struct ScalarDataType<T: PrimInt> { + data_type: DataType, + _marker: PhantomData<T>, +} + +impl<T: PrimInt> ScalarDataType<T> { + pub(crate) fn new(dt: DataType) -> Self { + Self { + data_type: dt, + _marker: PhantomData, + } + } + + fn scalar(&self, r: Option<i64>) -> Result<ScalarValue> { + match &self.data_type { + DataType::Date32 => Ok(ScalarValue::Date32(r.and_then(|v| v.to_i32()))), + DataType::Timestamp(u, tz) => match u { + TimeUnit::Second => Ok(ScalarValue::TimestampSecond(r, tz.clone())), + TimeUnit::Millisecond => { + Ok(ScalarValue::TimestampMillisecond(r, tz.clone())) + } + TimeUnit::Microsecond => { + Ok(ScalarValue::TimestampMicrosecond(r, tz.clone())) + } + TimeUnit::Nanosecond => { + Ok(ScalarValue::TimestampNanosecond(r, tz.clone())) + } + }, + t => Err(internal_datafusion_err!( + "Unsupported data type for ScalarDataType<T>: {t:?}" + )), Review Comment: I think this can be simplified to a function ``` pub(crate) fn scalar_value(dt: &DataType, r: Option<i64>) -> Result<ScalarValue> { match dt { DataType::Date32 => Ok(ScalarValue::Date32(r.and_then(|v| v.to_i32()))), DataType::Timestamp(u, tz) => match u { TimeUnit::Second => Ok(ScalarValue::TimestampSecond(r, tz.clone())), TimeUnit::Millisecond => Ok(ScalarValue::TimestampMillisecond(r, tz.clone())), TimeUnit::Microsecond => Ok(ScalarValue::TimestampMicrosecond(r, tz.clone())), TimeUnit::Nanosecond => Ok(ScalarValue::TimestampNanosecond(r, tz.clone())), }, t => Err(internal_datafusion_err!("Unsupported data type: {t:?}")), } } ``` without the ScalarDataType struct and PhantomData. ########## datafusion/functions/src/datetime/common.rs: ########## @@ -15,31 +15,44 @@ // specific language governing permissions and limitations // under the License. -use std::sync::Arc; +use std::marker::PhantomData; +use std::sync::{Arc, LazyLock}; +use arrow::array::timezone::Tz; use arrow::array::{ Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray, StringArrayType, StringViewArray, }; -use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; -use arrow::datatypes::DataType; +use arrow::compute::kernels::cast_utils::string_to_datetime; +use arrow::datatypes::{DataType, TimeUnit}; +use arrow_buffer::ArrowNativeType; use chrono::LocalResult::Single; use chrono::format::{Parsed, StrftimeItems, parse}; use chrono::{DateTime, TimeZone, Utc}; - use datafusion_common::cast::as_generic_string_array; use datafusion_common::{ - DataFusionError, Result, ScalarType, ScalarValue, exec_datafusion_err, exec_err, - unwrap_or_internal_err, + DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, + internal_datafusion_err, unwrap_or_internal_err, }; use datafusion_expr::ColumnarValue; +use num_traits::{PrimInt, ToPrimitive}; /// Error message if nanosecond conversion request beyond supported interval const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"; -/// Calls string_to_timestamp_nanos and converts the error type -pub(crate) fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> { - string_to_timestamp_nanos(s).map_err(|e| e.into()) +static UTC: LazyLock<Tz> = LazyLock::new(|| "UTC".parse().expect("UTC is always valid")); + +pub(crate) fn string_to_timestamp_nanos_with_timezone( + timezone: &Option<Tz>, Review Comment: I think documentation would be helpful for a pub(crate) fn -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
