kosiew commented on code in PR #19078:
URL: https://github.com/apache/datafusion/pull/19078#discussion_r2633783034


##########
datafusion/functions/src/datetime/to_timestamp.rs:
##########
@@ -199,80 +270,161 @@ Additional examples can be found 
[here](https://github.com/apache/datafusion/blo
     ),
     argument(
         name = "format_n",
-        description = "Optional [Chrono 
format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 
strings to use to parse the expression. Formats will be tried in the order they 
appear with the first successful one being returned. If none of the formats 
successfully parse the expression an error will be returned."
+        description = r#"
+Optional [Chrono 
format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 
strings to use to parse the expression.
+Formats will be tried in the order they appear with the first successful one 
being returned. If none of the formats successfully
+parse the expression an error will be returned. Note: parsing of named 
timezones (e.g. 'America/New_York') using %Z is
+only supported at the end of the string preceded by a space.
+"#
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ToTimestampNanosFunc {
     signature: Signature,
+    timezone: Option<Arc<str>>,
 }
 
 impl Default for ToTimestampFunc {
     fn default() -> Self {
-        Self::new()
+        Self::new_with_config(&ConfigOptions::default())
     }
 }
 
 impl ToTimestampFunc {
+    #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")]
+    /// Deprecated constructor retained for backwards compatibility.
+    ///
+    /// Prefer [`ToTimestampFunc::new_with_config`] which allows specifying the
+    /// timezone via [`ConfigOptions`]. This helper now mirrors the
+    /// canonical default offset (None) provided by `ConfigOptions::default()`.
     pub fn new() -> Self {
+        Self::new_with_config(&ConfigOptions::default())
+    }
+
+    pub fn new_with_config(config: &ConfigOptions) -> Self {

Review Comment:
   what do you think about creating a macro so we could 
   
   ```
   impl_timestamp_func_with_config!(ToTimestampFunc);
   impl_timestamp_func_with_config!(ToTimestampSecondsFunc);
   impl_timestamp_func_with_config!(ToTimestampMillisFunc);
   impl_timestamp_func_with_config!(ToTimestampMicrosFunc);
   impl_timestamp_func_with_config!(ToTimestampNanosFunc);
   ```
   
   and generate the boilerplate codes
    
   - pub fn new_with_config(config: &ConfigOptions) ...
   -     fn with_updated_config(&self, config: &ConfigOptions) -> 
Option<ScalarUDF> ...
   
   for 
   `ToTimestampFunc`, `ToTimestampSecondsFunc`, `ToTimestampMillisFunc`, 
`ToTimestampMicrosFunc`, `ToTimestampNanosFunc`
   ?
   



##########
datafusion/functions/src/datetime/common.rs:
##########
@@ -176,14 +234,50 @@ pub(crate) fn string_to_timestamp_millis_formatted(s: 
&str, format: &str) -> Res
         .timestamp_millis())
 }
 
-pub(crate) fn handle<O, F, S>(
+pub(crate) struct ScalarDataType<T: PrimInt> {
+    data_type: DataType,
+    _marker: PhantomData<T>,
+}
+
+impl<T: PrimInt> ScalarDataType<T> {
+    pub(crate) fn new(dt: DataType) -> Self {
+        Self {
+            data_type: dt,
+            _marker: PhantomData,
+        }
+    }
+
+    fn scalar(&self, r: Option<i64>) -> Result<ScalarValue> {
+        match &self.data_type {
+            DataType::Date32 => Ok(ScalarValue::Date32(r.and_then(|v| 
v.to_i32()))),
+            DataType::Timestamp(u, tz) => match u {
+                TimeUnit::Second => Ok(ScalarValue::TimestampSecond(r, 
tz.clone())),
+                TimeUnit::Millisecond => {
+                    Ok(ScalarValue::TimestampMillisecond(r, tz.clone()))
+                }
+                TimeUnit::Microsecond => {
+                    Ok(ScalarValue::TimestampMicrosecond(r, tz.clone()))
+                }
+                TimeUnit::Nanosecond => {
+                    Ok(ScalarValue::TimestampNanosecond(r, tz.clone()))
+                }
+            },
+            t => Err(internal_datafusion_err!(
+                "Unsupported data type for ScalarDataType<T>: {t:?}"
+            )),

Review Comment:
   I think this can be simplified to a function
   ```
   pub(crate) fn scalar_value(dt: &DataType, r: Option<i64>) -> 
Result<ScalarValue> {
       match dt {
           DataType::Date32 => Ok(ScalarValue::Date32(r.and_then(|v| 
v.to_i32()))),
           DataType::Timestamp(u, tz) => match u {
               TimeUnit::Second => Ok(ScalarValue::TimestampSecond(r, 
tz.clone())),
               TimeUnit::Millisecond => Ok(ScalarValue::TimestampMillisecond(r, 
tz.clone())),
               TimeUnit::Microsecond => Ok(ScalarValue::TimestampMicrosecond(r, 
tz.clone())),
               TimeUnit::Nanosecond => Ok(ScalarValue::TimestampNanosecond(r, 
tz.clone())),
           },
           t => Err(internal_datafusion_err!("Unsupported data type: {t:?}")),
       }
   }
   ```
   without the ScalarDataType struct and PhantomData.



##########
datafusion/functions/src/datetime/common.rs:
##########
@@ -15,31 +15,44 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
+use std::marker::PhantomData;
+use std::sync::{Arc, LazyLock};
 
+use arrow::array::timezone::Tz;
 use arrow::array::{
     Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray,
     StringArrayType, StringViewArray,
 };
-use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
-use arrow::datatypes::DataType;
+use arrow::compute::kernels::cast_utils::string_to_datetime;
+use arrow::datatypes::{DataType, TimeUnit};
+use arrow_buffer::ArrowNativeType;
 use chrono::LocalResult::Single;
 use chrono::format::{Parsed, StrftimeItems, parse};
 use chrono::{DateTime, TimeZone, Utc};
-
 use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::{
-    DataFusionError, Result, ScalarType, ScalarValue, exec_datafusion_err, 
exec_err,
-    unwrap_or_internal_err,
+    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err,
+    internal_datafusion_err, unwrap_or_internal_err,
 };
 use datafusion_expr::ColumnarValue;
+use num_traits::{PrimInt, ToPrimitive};
 
 /// Error message if nanosecond conversion request beyond supported interval
 const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented 
as nanoseconds have to be between 1677-09-21T00:12:44.0 and 
2262-04-11T23:47:16.854775804";
 
-/// Calls string_to_timestamp_nanos and converts the error type
-pub(crate) fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
-    string_to_timestamp_nanos(s).map_err(|e| e.into())
+static UTC: LazyLock<Tz> = LazyLock::new(|| "UTC".parse().expect("UTC is 
always valid"));
+
+pub(crate) fn string_to_timestamp_nanos_with_timezone(
+    timezone: &Option<Tz>,

Review Comment:
   I think documentation would be helpful for a pub(crate) fn



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to