This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 666f7a5221 Optimize to_timestamp (with format) (#9090) (#9833)
666f7a5221 is described below
commit 666f7a5221ac9b4d5232cef7b8008ca71d2c1be9
Author: Vojtěch Toman <[email protected]>
AuthorDate: Thu Mar 28 17:57:16 2024 +0100
Optimize to_timestamp (with format) (#9090) (#9833)
Eliminate duplicate parsing of the input and format strings in some cases
Co-authored-by: Vojtech Toman <[email protected]>
---
datafusion/functions/src/datetime/common.rs | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/datafusion/functions/src/datetime/common.rs
b/datafusion/functions/src/datetime/common.rs
index 007ffd35ca..f0689ffd64 100644
--- a/datafusion/functions/src/datetime/common.rs
+++ b/datafusion/functions/src/datetime/common.rs
@@ -22,8 +22,9 @@ use arrow::array::{
};
use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
use arrow::datatypes::DataType;
+use chrono::format::{parse, Parsed, StrftimeItems};
use chrono::LocalResult::Single;
-use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
+use chrono::{DateTime, TimeZone, Utc};
use itertools::Either;
use datafusion_common::cast::as_generic_string_array;
@@ -84,12 +85,15 @@ pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
))
};
+ let mut parsed = Parsed::new();
+ parse(&mut parsed, s, StrftimeItems::new(format)).map_err(|e|
err(&e.to_string()))?;
+
// attempt to parse the string assuming it has a timezone
- let dt = DateTime::parse_from_str(s, format);
+ let dt = parsed.to_datetime();
if let Err(e) = &dt {
// no timezone or other failure, try without a timezone
- let ndt = NaiveDateTime::parse_from_str(s, format);
+ let ndt = parsed.to_naive_datetime_with_offset(0);
if let Err(e) = &ndt {
return Err(err(&e.to_string()));
}