This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new d4bf0b6 Add support for parsing timezone using chrono-tz (#824)
d4bf0b6 is described below
commit d4bf0b64f4f8d072094f2f89256fced471c43ad8
Author: Sumit <[email protected]>
AuthorDate: Wed Oct 13 14:59:33 2021 +0200
Add support for parsing timezone using chrono-tz (#824)
- add chrono-tz as an optional depedancy
- try parse using chrono for the numeric format
- if not then try using chrono-tz if present
- return error if neither result in FixedOffset
---
arrow/Cargo.toml | 1 +
arrow/README.md | 1 +
arrow/src/compute/kernels/temporal.rs | 76 +++++++++++++++++++++++++++--------
3 files changed, 62 insertions(+), 16 deletions(-)
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index f597c22..085f99c 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -47,6 +47,7 @@ regex = "1.3"
lazy_static = "1.4"
packed_simd = { version = "0.3", optional = true, package = "packed_simd_2" }
chrono = "0.4"
+chrono-tz = {version = "0.4", optional = true}
flatbuffers = { version = "=2.0.0", optional = true }
hex = "0.4"
comfy-table = { version = "4.0", optional = true, default-features = false }
diff --git a/arrow/README.md b/arrow/README.md
index c20752e..053cb0c 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -42,6 +42,7 @@ The arrow crate provides the following features which may be
enabled:
- `simd` - (_Requires Nightly Rust_) alternate optimized
implementations of some
[compute](https://github.com/apache/arrow/tree/master/rust/arrow/src/compute)
kernels using explicit SIMD processor intrinsics.
+- `chrono-tz` - support of parsing timezone using
[chrono-tz](https://docs.rs/chrono-tz/0.6.0/chrono_tz/)
## Safety
diff --git a/arrow/src/compute/kernels/temporal.rs
b/arrow/src/compute/kernels/temporal.rs
index f461b51..24559b0 100644
--- a/arrow/src/compute/kernels/temporal.rs
+++ b/arrow/src/compute/kernels/temporal.rs
@@ -25,6 +25,7 @@ use crate::error::{ArrowError, Result};
use chrono::format::strftime::StrftimeItems;
use chrono::format::{parse, Parsed};
+use chrono::FixedOffset;
macro_rules! extract_component_from_array {
($array:ident, $builder:ident, $extract_fn:ident, $using:ident) => {
@@ -46,25 +47,25 @@ macro_rules! extract_component_from_array {
"Expected format [+-]XX:XX".to_string()
)
} else {
- match parse(&mut $parsed, $tz, StrftimeItems::new("%z")) {
+ let fixed_offset = match parse(&mut $parsed, $tz,
StrftimeItems::new("%z")) {
Ok(_) => match $parsed.to_fixed_offset() {
- Ok(fixed_offset) => {
- for i in 0..$array.len() {
- if $array.is_null(i) {
- $builder.append_null()?;
- } else {
- match $array.$using(i, fixed_offset) {
- Some(dt) => {
- $builder.append_value(dt.$extract_fn()
as i32)?
- }
- None => $builder.append_null()?,
- }
- }
- }
- }
+ Ok(fo) => fo,
err => return_compute_error_with!("Invalid timezone", err),
},
- err => return_compute_error_with!("Unable to parse timezone",
err),
+ _ => match using_chrono_tz($tz) {
+ Some(fo) => fo,
+ err => return_compute_error_with!("Unable to parse
timezone", err),
+ },
+ };
+ for i in 0..$array.len() {
+ if $array.is_null(i) {
+ $builder.append_null()?;
+ } else {
+ match $array.$using(i, fixed_offset) {
+ Some(dt) => $builder.append_value(dt.$extract_fn() as
i32)?,
+ None => $builder.append_null()?,
+ }
+ }
}
}
};
@@ -76,6 +77,24 @@ macro_rules! return_compute_error_with {
};
}
+/// Parse the given string into a string representing fixed-offset
+#[cfg(not(feature = "chrono-tz"))]
+pub fn using_chrono_tz(_: &str) -> Option<FixedOffset> {
+ None
+}
+
+/// Parse the given string into a string representing fixed-offset
+#[cfg(feature = "chrono-tz")]
+pub fn using_chrono_tz(tz: &str) -> Option<FixedOffset> {
+ use chrono::{Offset, TimeZone};
+ tz.parse::<chrono_tz::Tz>()
+ .map(|tz| {
+
tz.offset_from_utc_datetime(&chrono::NaiveDateTime::from_timestamp(0, 0))
+ .fix()
+ })
+ .ok()
+}
+
/// Extracts the hours of a given temporal array as an array of integers
pub fn hour<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
where
@@ -391,4 +410,29 @@ mod tests {
));
assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
}
+
+ #[cfg(feature = "chrono-tz")]
+ #[test]
+ fn test_temporal_array_timestamp_hour_with_timezone_using_chrono_tz() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![60 * 60 * 10],
+ Some("Asia/Kolkata".to_string()),
+ ));
+ let b = hour(&a).unwrap();
+ assert_eq!(15, b.value(0));
+ }
+
+ #[cfg(not(feature = "chrono-tz"))]
+ #[test]
+ fn test_temporal_array_timestamp_hour_with_timezone_using_chrono_tz() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![60 * 60 * 10],
+ Some("Asia/Kolkatta".to_string()),
+ ));
+ assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
+ }
}