alamb commented on code in PR #8886:
URL: https://github.com/apache/arrow-datafusion/pull/8886#discussion_r1457493231


##########
datafusion/expr/src/expr_fn.rs:
##########
@@ -885,28 +885,29 @@ nary_scalar_expr!(
 scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the 
date");
 scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a 
specified level of precision");
 scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary 
timestamp to the start of the nearest specified interval");
-scalar_expr!(
+nary_scalar_expr!(
+    ToTimestamp,
+    to_timestamp,
+    "converts a string to a `Timestamp(Nanoseconds, None)`"

Review Comment:
   Is this accurate? If so, maybe we can update the other doc comments as well
   
   ```suggestion
       "converts a string and optional formats to a `Timestamp(Nanoseconds, 
None)`"
   ```
   
   



##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -1922,6 +1951,65 @@ true true true true true true
 #----
 #0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 
+63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37
 
+# verify timestamp data with formatting options
+query PPPPPP
+SELECT to_timestamp(null, '%+'), to_timestamp(0, '%s'), 
to_timestamp(1926632005, '%s'), to_timestamp(1, '%+', '%s'), to_timestamp(-1, 
'%c', '%+', '%s'), to_timestamp(0-1, '%c', '%+', '%s')
+----
+NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 
1969-12-31T23:59:59 1969-12-31T23:59:59
+
+# verify timestamp data with formatting options
+query PPPPPP
+SELECT to_timestamp(null, '%+'), to_timestamp(0, '%s'), 
to_timestamp(1926632005, '%s'), to_timestamp(1, '%+', '%s'), to_timestamp(-1, 
'%c', '%+', '%s'), to_timestamp(0-1, '%c', '%+', '%s')

Review Comment:
   I think the key observation here is that this PR implements different 
semantics than any existing `to_timestamp` (it isn't postgres format strings, 
nor is it  spark format strings, it is something datafusion specific based on 
the rust chrono format strings)



##########
datafusion/expr/src/built_in_function.rs:
##########
@@ -1053,67 +1053,13 @@ impl BuiltinScalarFunction {
                 vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::ToTimestamp => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Float64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampMillis => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampMicros => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampNanos => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::ToTimestampSeconds => Signature::uniform(
-                1,
-                vec![
-                    Int64,
-                    Timestamp(Nanosecond, None),
-                    Timestamp(Microsecond, None),
-                    Timestamp(Millisecond, None),
-                    Timestamp(Second, None),
-                    Utf8,
-                ],
-                self.volatility(),
-            ),
+            BuiltinScalarFunction::ToTimestamp
+            | BuiltinScalarFunction::ToTimestampSeconds
+            | BuiltinScalarFunction::ToTimestampMillis
+            | BuiltinScalarFunction::ToTimestampMicros
+            | BuiltinScalarFunction::ToTimestampNanos => {
+                Signature::variadic_any(self.volatility())

Review Comment:
   Yeah, it is unfortunate there is no way to express "first argument is one of 
X, Y, Z and the remaining arguments have any type"
   
   



##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -331,6 +331,35 @@ SELECT COUNT(*) FROM ts_data_secs where ts > 
to_timestamp_seconds('2020-09-08T12
 ----
 2
 
+# to_timestamp with formatting
+query I
+SELECT COUNT(*) FROM ts_data_nanos where ts > 
to_timestamp('2020-09-08T12:00:00+00:00', '2020-09-08 12/00/00+00:00', '%c', 
'%+', '%Y-%m-%d %H/%M/%s%#z')

Review Comment:
   Can we also please add some tests that show
   
   1.  `to_timestamp` with formatting being invoked on a table with a string 
column (not just constant values)
   2. error case for bad format strings
   3. error cases where some of the values in a column can be parsed with the 
format string, but some can not



##########
datafusion-examples/examples/dataframe_to_timestamp.rs:
##########
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use datafusion::arrow::array::StringArray;
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::error::Result;
+use datafusion::prelude::*;
+use datafusion_common::assert_contains;
+
+/// This example demonstrates how to use the to_timestamp function in the 
DataFrame API as well as via sql.

Review Comment:
   This is really nice 👌 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to