This is an automated email from the ASF dual-hosted git repository.
sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1c1bfe8 ARROW-4386: [Rust] Temporal array support
1c1bfe8 is described below
commit 1c1bfe8ffc01e877713ae3aeadcd32ea3e597398
Author: Neville Dipale <[email protected]>
AuthorDate: Thu Mar 7 16:05:24 2019 -0800
ARROW-4386: [Rust] Temporal array support
This adds support for date and time arrays
Author: Neville Dipale <[email protected]>
Closes #3726 from nevi-me/arrow-4386 and squashes the following commits:
f63b0a51 <Neville Dipale> extract constants, cleanups
bce7d15b <Neville Dipale> reuse some macros
f2977fab <Neville Dipale> add simd mask for temporal arrays
69df3846 <Neville Dipale> format temporal values, rename functions
cbe657af <Neville Dipale> fix temporal unit test
5fba10e5 <Neville Dipale> add date and time conversions, nano precision
af527df6 <Neville Dipale> clean up arrays and types, add builders
60f975f9 <Neville Dipale> : Temporal array support
---
rust/arrow/Cargo.toml | 1 +
rust/arrow/src/array.rs | 322 +++++++++++++++++++++++++++-
rust/arrow/src/builder.rs | 56 +++++
rust/arrow/src/compute/{ => kernels}/mod.rs | 12 +-
rust/arrow/src/compute/kernels/temporal.rs | 81 +++++++
rust/arrow/src/compute/mod.rs | 2 +
rust/arrow/src/datatypes.rs | 144 +++++++++++--
7 files changed, 593 insertions(+), 25 deletions(-)
diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index c825316..fbc9be0 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -46,6 +46,7 @@ num = "0.2"
regex = "1.1"
lazy_static = "1.2"
packed_simd = "0.3.1"
+chrono = "0.4"
[dev-dependencies]
criterion = "0.2"
diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs
index 09a8dd3..0d702ee 100644
--- a/rust/arrow/src/array.rs
+++ b/rust/arrow/src/array.rs
@@ -61,6 +61,8 @@ use std::io::Write;
use std::mem;
use std::sync::Arc;
+use chrono::prelude::*;
+
use crate::array_data::{ArrayData, ArrayDataRef};
use crate::buffer::{Buffer, MutableBuffer};
use crate::builder::*;
@@ -68,6 +70,15 @@ use crate::datatypes::*;
use crate::memory;
use crate::util::bit_util;
+/// Number of seconds in a day
+const SECONDS_IN_DAY: i64 = 86_400;
+/// Number of milliseconds in a second
+const MILLISECONDS: i64 = 1_000;
+/// Number of microseconds in a second
+const MICROSECONDS: i64 = 1_000_000;
+/// Number of nanoseconds in a second
+const NANOSECONDS: i64 = 1_000_000_000;
+
/// Trait for dealing with different types of array at runtime when the type
of the
/// array is not known in advance
pub trait Array: Send + Sync {
@@ -180,6 +191,18 @@ pub type UInt64Array = PrimitiveArray<UInt64Type>;
pub type Float32Array = PrimitiveArray<Float32Type>;
pub type Float64Array = PrimitiveArray<Float64Type>;
+pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
+pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
+pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
+pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
+pub type Date32Array = PrimitiveArray<Date32Type>;
+pub type Date64Array = PrimitiveArray<Date64Type>;
+pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
+pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
+pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
+pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
+// TODO add interval
+
impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
fn as_any(&self) -> &Any {
self
@@ -249,8 +272,128 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
}
}
+impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T>
+where
+ i64: std::convert::From<T::Native>,
+{
+ /// Returns value as a chrono `NaiveDateTime`, handling time resolution
+ ///
+ /// If a data type cannot be converted to `NaiveDateTime`, a `None` is
returned.
+ /// A valid value is expected, thus the user should first check for
validity.
+ /// TODO: extract constants into static variables
+ pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
+ let v = i64::from(self.value(i));
+ match self.data_type() {
+ DataType::Date32(_) => {
+ // convert days into seconds
+ Some(NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY,
0))
+ }
+ DataType::Date64(_) => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from milliseconds
+ v / MILLISECONDS,
+ // discard extracted seconds and convert milliseconds to
nanoseconds
+ (v % MILLISECONDS * MICROSECONDS) as u32,
+ )),
+ DataType::Time32(_) | DataType::Time64(_) => None,
+ DataType::Timestamp(unit) => match unit {
+ TimeUnit::Second => Some(NaiveDateTime::from_timestamp(v, 0)),
+ TimeUnit::Millisecond => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from milliseconds
+ v / MILLISECONDS,
+ // discard extracted seconds and convert milliseconds to
nanoseconds
+ (v % MILLISECONDS * MICROSECONDS) as u32,
+ )),
+ TimeUnit::Microsecond => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from microseconds
+ v / MICROSECONDS,
+ // discard extracted seconds and convert microseconds to
nanoseconds
+ (v % MICROSECONDS * MILLISECONDS) as u32,
+ )),
+ TimeUnit::Nanosecond => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from nanoseconds
+ v / NANOSECONDS,
+ // discard extracted seconds
+ (v % NANOSECONDS) as u32,
+ )),
+ },
+ // interval is not yet fully documented [ARROW-3097]
+ DataType::Interval(_) => None,
+ _ => None,
+ }
+ }
+
+ /// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
+ ///
+ /// If a data type cannot be converted to `NaiveDate`, a `None` is returned
+ pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> {
+ match self.value_as_datetime(i) {
+ Some(datetime) => Some(datetime.date()),
+ None => None,
+ }
+ }
+
+ /// Returns a value as a chrono `NaiveTime`
+ ///
+ /// `Date32` and `Date64` return UTC midnight as they do not have time
resolution
+ pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
+ match self.data_type() {
+ DataType::Time32(unit) => {
+ // safe to immediately cast to u32 as `self.value(i)` is
positive i32
+ let v = i64::from(self.value(i)) as u32;
+ match unit {
+ TimeUnit::Second => {
+ Some(NaiveTime::from_num_seconds_from_midnight(v, 0))
+ }
+ TimeUnit::Millisecond => {
+ Some(NaiveTime::from_num_seconds_from_midnight(
+ // extract seconds from milliseconds
+ v / MILLISECONDS as u32,
+ // discard extracted seconds and convert
milliseconds to
+ // nanoseconds
+ v % MILLISECONDS as u32 * MICROSECONDS as u32,
+ ))
+ }
+ _ => None,
+ }
+ }
+ DataType::Time64(unit) => {
+ let v = i64::from(self.value(i));
+ match unit {
+ TimeUnit::Microsecond => {
+ Some(NaiveTime::from_num_seconds_from_midnight(
+ // extract seconds from microseconds
+ (v / MICROSECONDS) as u32,
+ // discard extracted seconds and convert
microseconds to
+ // nanoseconds
+ (v % MICROSECONDS * MILLISECONDS) as u32,
+ ))
+ }
+ TimeUnit::Nanosecond => {
+ Some(NaiveTime::from_num_seconds_from_midnight(
+ // extract seconds from nanoseconds
+ (v / NANOSECONDS) as u32,
+ // discard extracted seconds
+ (v % NANOSECONDS) as u32,
+ ))
+ }
+ _ => None,
+ }
+ }
+ DataType::Timestamp(_) => match self.value_as_datetime(i) {
+ Some(datetime) => Some(datetime.time()),
+ None => None,
+ },
+ DataType::Date32(_) | DataType::Date64(_) => {
+ Some(NaiveTime::from_hms(0, 0, 0))
+ }
+ DataType::Interval(_) => None,
+ _ => None,
+ }
+ }
+}
+
impl<T: ArrowNumericType> fmt::Debug for PrimitiveArray<T> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
for i in 0..self.len() {
if self.is_null(i) {
@@ -263,6 +406,41 @@ impl<T: ArrowNumericType> fmt::Debug for PrimitiveArray<T>
{
}
}
+impl<T: ArrowNumericType + ArrowTemporalType> fmt::Debug for PrimitiveArray<T>
+where
+ i64: std::convert::From<T::Native>,
+{
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
+ for i in 0..self.len() {
+ if self.is_null(i) {
+ write!(f, " null,\n")?;
+ } else {
+ match T::get_data_type() {
+ DataType::Date32(_) | DataType::Date64(_) => {
+ match self.value_as_date(i) {
+ Some(date) => write!(f, " {:?},\n", date)?,
+ None => write!(f, " null,\n")?,
+ }
+ }
+ DataType::Time32(_) | DataType::Time64(_) => {
+ match self.value_as_time(i) {
+ Some(time) => write!(f, " {:?},\n", time)?,
+ None => write!(f, " null,\n")?,
+ }
+ }
+ DataType::Timestamp(_) => match self.value_as_datetime(i) {
+ Some(datetime) => write!(f, " {:?},\n", datetime)?,
+ None => write!(f, " null,\n")?,
+ },
+ _ => write!(f, " {:?},\n", "null,\n")?,
+ }
+ }
+ }
+ write!(f, "]")
+ }
+}
+
/// Specific implementation for Boolean arrays due to bit-packing
impl PrimitiveArray<BooleanType> {
pub fn new(length: usize, values: Buffer, null_count: usize, offset:
usize) -> Self {
@@ -313,7 +491,7 @@ impl fmt::Debug for PrimitiveArray<BooleanType> {
// otherwise with both `From<Vec<T::Native>>` and
`From<Vec<Option<T::Native>>>`.
// We should revisit this in future.
macro_rules! def_numeric_from_vec {
- ( $ty:ident, $native_ty:ident, $ty_id:path ) => {
+ ( $ty:ident, $native_ty:ident, $ty_id:expr ) => {
impl From<Vec<$native_ty>> for PrimitiveArray<$ty> {
fn from(data: Vec<$native_ty>) -> Self {
let array_data = ArrayData::builder($ty_id)
@@ -370,6 +548,46 @@ def_numeric_from_vec!(UInt32Type, u32, DataType::UInt32);
def_numeric_from_vec!(UInt64Type, u64, DataType::UInt64);
def_numeric_from_vec!(Float32Type, f32, DataType::Float32);
def_numeric_from_vec!(Float64Type, f64, DataType::Float64);
+// TODO: add temporal arrays
+
+def_numeric_from_vec!(
+ TimestampSecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Second)
+);
+def_numeric_from_vec!(
+ TimestampMillisecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Millisecond)
+);
+def_numeric_from_vec!(
+ TimestampMicrosecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Microsecond)
+);
+def_numeric_from_vec!(
+ TimestampNanosecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Nanosecond)
+);
+def_numeric_from_vec!(Date32Type, i32, DataType::Date32(DateUnit::Day));
+def_numeric_from_vec!(Date64Type, i64,
DataType::Date64(DateUnit::Millisecond));
+def_numeric_from_vec!(Time32SecondType, i32,
DataType::Time32(TimeUnit::Second));
+def_numeric_from_vec!(
+ Time32MillisecondType,
+ i32,
+ DataType::Time32(TimeUnit::Millisecond)
+);
+def_numeric_from_vec!(
+ Time64MicrosecondType,
+ i64,
+ DataType::Time64(TimeUnit::Microsecond)
+);
+def_numeric_from_vec!(
+ Time64NanosecondType,
+ i64,
+ DataType::Time64(TimeUnit::Nanosecond)
+);
/// Constructs a boolean array from a vector. Should only be used for testing.
impl From<Vec<bool>> for BooleanArray {
@@ -805,6 +1023,78 @@ mod tests {
}
#[test]
+ fn test_date64_array_from_vec_option() {
+ // Test building a primitive array with null values
+ // we use Int32 and Int64 as a backing array, so all Int32 and Int64
conventions
+ // work
+ let arr: PrimitiveArray<Date64Type> =
+ vec![Some(1550902545147), None, Some(1550902545147)].into();
+ assert_eq!(3, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(1, arr.null_count());
+ for i in 0..3 {
+ if i % 2 == 0 {
+ assert!(!arr.is_null(i));
+ assert!(arr.is_valid(i));
+ assert_eq!(1550902545147, arr.value(i));
+ // roundtrip to and from datetime
+ assert_eq!(
+ 1550902545147,
+ arr.value_as_datetime(i).unwrap().timestamp_millis()
+ );
+ } else {
+ assert!(arr.is_null(i));
+ assert!(!arr.is_valid(i));
+ }
+ }
+ }
+
+ #[test]
+ fn test_time32_millisecond_array_from_vec() {
+ // 1: 00:00:00.001
+ // 37800005: 10:30:00.005
+ // 86399210: 23:59:59.210
+ let arr: PrimitiveArray<Time32MillisecondType> =
+ vec![1, 37_800_005, 86_399_210].into();
+ assert_eq!(3, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(0, arr.null_count());
+ let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
+ for i in 0..3 {
+ // check that we can't create dates or datetimes from time
instances
+ assert_eq!(None, arr.value_as_datetime(i));
+ assert_eq!(None, arr.value_as_date(i));
+ let time = arr.value_as_time(i).unwrap();
+ assert_eq!(formatted[i], time.format("%H:%M:%S%.3f").to_string());
+ }
+ }
+
+ #[test]
+ fn test_time64_nanosecond_array_from_vec() {
+ // Test building a primitive array with null values
+ // we use Int32 and Int64 as a backing array, so all Int32 and Int64
convensions
+ // work
+
+ // 1e6: 00:00:00.001
+ // 37800005e6: 10:30:00.005
+ // 86399210e6: 23:59:59.210
+ let arr: PrimitiveArray<Time64NanosecondType> =
+ vec![1_000_000, 37_800_005_000_000, 86_399_210_000_000].into();
+ assert_eq!(3, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(0, arr.null_count());
+ let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
+ for i in 0..3 {
+ // check that we can't create dates or datetimes from time
instances
+ assert_eq!(None, arr.value_as_datetime(i));
+ assert_eq!(None, arr.value_as_date(i));
+ let time = arr.value_as_time(i).unwrap();
+ dbg!(time);
+ assert_eq!(formatted[i], time.format("%H:%M:%S%.3f").to_string());
+ }
+ }
+
+ #[test]
fn test_value_slice_no_bounds_check() {
let arr = Int32Array::from(vec![2, 3, 4]);
let _slice = arr.value_slice(0, 4);
@@ -857,6 +1147,34 @@ mod tests {
}
#[test]
+ fn test_timestamp_fmt_debug() {
+ let arr: PrimitiveArray<TimestampMillisecondType> =
+ vec![1546214400000, 1546214400000].into();
+ assert_eq!(
+ "PrimitiveArray<Timestamp(Millisecond)>\n[\n
2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n]",
+ format!("{:?}", arr)
+ );
+ }
+
+ #[test]
+ fn test_date32_fmt_debug() {
+ let arr: PrimitiveArray<Date32Type> = vec![12356, 13548].into();
+ assert_eq!(
+ "PrimitiveArray<Date32(Day)>\n[\n 2003-10-31,\n 2007-02-04,\n]",
+ format!("{:?}", arr)
+ );
+ }
+
+ #[test]
+ fn test_time32second_fmt_debug() {
+ let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into();
+ assert_eq!(
+ "PrimitiveArray<Time32(Second)>\n[\n 02:00:01,\n 16:40:54,\n]",
+ format!("{:?}", arr)
+ );
+ }
+
+ #[test]
fn test_primitive_array_builder() {
// Test building an primitive array with ArrayData builder and offset
let buf = Buffer::from(&[0, 1, 2, 3, 4].to_byte_slice());
diff --git a/rust/arrow/src/builder.rs b/rust/arrow/src/builder.rs
index 77dcc24..0cbb5e1 100644
--- a/rust/arrow/src/builder.rs
+++ b/rust/arrow/src/builder.rs
@@ -50,6 +50,17 @@ pub type UInt64BufferBuilder = BufferBuilder<UInt64Type>;
pub type Float32BufferBuilder = BufferBuilder<Float32Type>;
pub type Float64BufferBuilder = BufferBuilder<Float64Type>;
+pub type TimestampSecondBufferBuilder = BufferBuilder<TimestampSecondType>;
+pub type TimestampMillisecondBufferBuilder =
BufferBuilder<TimestampMillisecondType>;
+pub type TimestampMicrosecondBufferBuilder =
BufferBuilder<TimestampMicrosecondType>;
+pub type TimestampNanosecondBufferBuilder =
BufferBuilder<TimestampNanosecondType>;
+pub type Date32BufferBuilder = BufferBuilder<Date32Type>;
+pub type Date64BufferBuilder = BufferBuilder<Date64Type>;
+pub type Time32SecondBufferBuilder = BufferBuilder<Time32SecondType>;
+pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
+pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
+pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
+
// Trait for buffer builder. This is used mainly to offer separate
implementations for
// numeric types and boolean types, while still be able to call methods on
buffer builder
// with generic primitive type.
@@ -256,6 +267,17 @@ pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
pub type Float32Builder = PrimitiveBuilder<Float32Type>;
pub type Float64Builder = PrimitiveBuilder<Float64Type>;
+pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
+pub type TimestampMillisecondBuilder =
PrimitiveBuilder<TimestampMillisecondType>;
+pub type TimestampMicrosecondBuilder =
PrimitiveBuilder<TimestampMicrosecondType>;
+pub type TimestampNanosecondBuilder =
PrimitiveBuilder<TimestampNanosecondType>;
+pub type Date32Builder = PrimitiveBuilder<Date32Type>;
+pub type Date64Builder = PrimitiveBuilder<Date64Type>;
+pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
+pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
+pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
+pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
+
impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
/// Returns the builder as an non-mutable `Any` reference.
fn as_any(&self) -> &Any {
@@ -861,6 +883,40 @@ mod tests {
}
#[test]
+ fn test_primitive_array_builder_date32() {
+ let mut builder = Date32Array::builder(5);
+ for i in 0..5 {
+ builder.append_value(i).unwrap();
+ }
+ let arr = builder.finish();
+ assert_eq!(5, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(0, arr.null_count());
+ for i in 0..5 {
+ assert!(!arr.is_null(i));
+ assert!(arr.is_valid(i));
+ assert_eq!(i as i32, arr.value(i));
+ }
+ }
+
+ #[test]
+ fn test_primitive_array_builder_timestamp_second() {
+ let mut builder = TimestampSecondArray::builder(5);
+ for i in 0..5 {
+ builder.append_value(i).unwrap();
+ }
+ let arr = builder.finish();
+ assert_eq!(5, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(0, arr.null_count());
+ for i in 0..5 {
+ assert!(!arr.is_null(i));
+ assert!(arr.is_valid(i));
+ assert_eq!(i as i64, arr.value(i));
+ }
+ }
+
+ #[test]
fn test_primitive_array_builder_bool() {
// 00000010 01001000
let buf = Buffer::from([72_u8, 2_u8]);
diff --git a/rust/arrow/src/compute/mod.rs
b/rust/arrow/src/compute/kernels/mod.rs
similarity index 77%
copy from rust/arrow/src/compute/mod.rs
copy to rust/arrow/src/compute/kernels/mod.rs
index d718a7a..fbff1ab 100644
--- a/rust/arrow/src/compute/mod.rs
+++ b/rust/arrow/src/compute/kernels/mod.rs
@@ -17,14 +17,4 @@
//! Computation kernels on Arrow Arrays
-pub mod arithmetic_kernels;
-pub mod array_ops;
-pub mod boolean_kernels;
-pub mod comparison_kernels;
-
-mod util;
-
-pub use self::arithmetic_kernels::*;
-pub use self::array_ops::*;
-pub use self::boolean_kernels::*;
-pub use self::comparison_kernels::*;
+pub mod temporal;
diff --git a/rust/arrow/src/compute/kernels/temporal.rs
b/rust/arrow/src/compute/kernels/temporal.rs
new file mode 100644
index 0000000..6e68975
--- /dev/null
+++ b/rust/arrow/src/compute/kernels/temporal.rs
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines temporal kernels for time and date related functions.
+
+use chrono::Timelike;
+
+use crate::array::*;
+use crate::builder::Int32Builder;
+use crate::datatypes::*;
+use crate::error::Result;
+
+/// Extracts the hours of a given temporal array as an array of integers
+pub fn hour<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
+where
+ T: ArrowTemporalType + ArrowNumericType,
+ i64: std::convert::From<T::Native>,
+{
+ let mut b = Int32Builder::new(array.len());
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ b.append_null()?;
+ } else {
+ match array.data_type() {
+ &DataType::Time32(_) | &DataType::Time64(_) => {
+ match array.value_as_time(i) {
+ Some(time) => b.append_value(time.hour() as i32)?,
+ None => b.append_null()?,
+ }
+ }
+ _ => match array.value_as_datetime(i) {
+ Some(dt) => b.append_value(dt.hour() as i32)?,
+ None => b.append_null()?,
+ },
+ }
+ }
+ }
+
+ Ok(b.finish())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_temporal_array_date64_hour() {
+ let a: PrimitiveArray<Date64Type> =
+ vec![Some(1514764800000), None, Some(1550636625000)].into();
+
+ // get hour from temporal
+ let b = hour(&a).unwrap();
+ assert_eq!(0, b.value(0));
+ assert_eq!(false, b.is_valid(1));
+ assert_eq!(4, b.value(2));
+ }
+
+ #[test]
+ fn test_temporal_array_time32_second_hour() {
+ let a: PrimitiveArray<Time32SecondType> = vec![37800, 86339].into();
+
+ // get hour from temporal
+ let b = hour(&a).unwrap();
+ assert_eq!(10, b.value(0));
+ assert_eq!(23, b.value(1));
+ }
+}
diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs
index d718a7a..8a4d2e3 100644
--- a/rust/arrow/src/compute/mod.rs
+++ b/rust/arrow/src/compute/mod.rs
@@ -21,6 +21,7 @@ pub mod arithmetic_kernels;
pub mod array_ops;
pub mod boolean_kernels;
pub mod comparison_kernels;
+pub mod kernels;
mod util;
@@ -28,3 +29,4 @@ pub use self::arithmetic_kernels::*;
pub use self::array_ops::*;
pub use self::boolean_kernels::*;
pub use self::comparison_kernels::*;
+pub use self::kernels::temporal::*;
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 92c91d2..e0b6d70 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -59,7 +59,8 @@ pub enum DataType {
Float32,
Float64,
Timestamp(TimeUnit),
- Date(DateUnit),
+ Date32(DateUnit),
+ Date64(DateUnit),
Time32(TimeUnit),
Time64(TimeUnit),
Interval(IntervalUnit),
@@ -120,10 +121,20 @@ pub trait ArrowPrimitiveType: 'static {
fn default_value() -> Self::Native;
}
-macro_rules! make_type {
- ($name:ident, $native_ty:ty, $data_ty:path, $bit_width:expr,
$default_val:expr) => {
- impl ArrowNativeType for $native_ty {}
+impl ArrowNativeType for bool {}
+impl ArrowNativeType for i8 {}
+impl ArrowNativeType for i16 {}
+impl ArrowNativeType for i32 {}
+impl ArrowNativeType for i64 {}
+impl ArrowNativeType for u8 {}
+impl ArrowNativeType for u16 {}
+impl ArrowNativeType for u32 {}
+impl ArrowNativeType for u64 {}
+impl ArrowNativeType for f32 {}
+impl ArrowNativeType for f64 {}
+macro_rules! make_type {
+ ($name:ident, $native_ty:ty, $data_ty:expr, $bit_width:expr,
$default_val:expr) => {
pub struct $name {}
impl ArrowPrimitiveType for $name {
@@ -155,6 +166,84 @@ make_type!(UInt32Type, u32, DataType::UInt32, 32, 0u32);
make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64);
make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32);
make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64);
+make_type!(
+ TimestampSecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Second),
+ 64,
+ 0i64
+);
+make_type!(
+ TimestampMillisecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Millisecond),
+ 64,
+ 0i64
+);
+make_type!(
+ TimestampMicrosecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Microsecond),
+ 64,
+ 0i64
+);
+make_type!(
+ TimestampNanosecondType,
+ i64,
+ DataType::Timestamp(TimeUnit::Nanosecond),
+ 64,
+ 0i64
+);
+make_type!(Date32Type, i32, DataType::Date32(DateUnit::Day), 32, 0i32);
+make_type!(
+ Date64Type,
+ i64,
+ DataType::Date64(DateUnit::Millisecond),
+ 64,
+ 0i64
+);
+make_type!(
+ Time32SecondType,
+ i32,
+ DataType::Time32(TimeUnit::Second),
+ 32,
+ 0i32
+);
+make_type!(
+ Time32MillisecondType,
+ i32,
+ DataType::Time32(TimeUnit::Millisecond),
+ 32,
+ 0i32
+);
+make_type!(
+ Time64MicrosecondType,
+ i64,
+ DataType::Time64(TimeUnit::Microsecond),
+ 64,
+ 0i64
+);
+make_type!(
+ Time64NanosecondType,
+ i64,
+ DataType::Time64(TimeUnit::Nanosecond),
+ 64,
+ 0i64
+);
+make_type!(
+ IntervalYearMonthType,
+ i64,
+ DataType::Interval(IntervalUnit::YearMonth),
+ 64,
+ 0i64
+);
+make_type!(
+ IntervalDayTimeType,
+ i64,
+ DataType::Interval(IntervalUnit::DayTime),
+ 64,
+ 0i64
+);
/// A subtype of primitive type that represents numeric values.
///
@@ -286,6 +375,35 @@ make_numeric_type!(UInt64Type, u64, u64x8, m64x8);
make_numeric_type!(Float32Type, f32, f32x16, m32x16);
make_numeric_type!(Float64Type, f64, f64x8, m64x8);
+make_numeric_type!(TimestampSecondType, i64, i64x8, m64x8);
+make_numeric_type!(TimestampMillisecondType, i64, i64x8, m64x8);
+make_numeric_type!(TimestampMicrosecondType, i64, i64x8, m64x8);
+make_numeric_type!(TimestampNanosecondType, i64, i64x8, m64x8);
+make_numeric_type!(Date32Type, i32, i32x16, m32x16);
+make_numeric_type!(Date64Type, i64, i64x8, m64x8);
+make_numeric_type!(Time32SecondType, i32, i32x16, m32x16);
+make_numeric_type!(Time32MillisecondType, i32, i32x16, m32x16);
+make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8);
+make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8);
+make_numeric_type!(IntervalYearMonthType, i64, i64x8, m64x8);
+make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8);
+
+/// A subtype of primitive type that represents temporal values.
+pub trait ArrowTemporalType: ArrowPrimitiveType {}
+
+impl ArrowTemporalType for TimestampSecondType {}
+impl ArrowTemporalType for TimestampMillisecondType {}
+impl ArrowTemporalType for TimestampMicrosecondType {}
+impl ArrowTemporalType for TimestampNanosecondType {}
+impl ArrowTemporalType for Date32Type {}
+impl ArrowTemporalType for Date64Type {}
+impl ArrowTemporalType for Time32SecondType {}
+impl ArrowTemporalType for Time32MillisecondType {}
+impl ArrowTemporalType for Time64MicrosecondType {}
+impl ArrowTemporalType for Time64NanosecondType {}
+impl ArrowTemporalType for IntervalYearMonthType {}
+impl ArrowTemporalType for IntervalDayTimeType {}
+
/// Allows conversion from supported Arrow types to a byte slice.
pub trait ToByteSlice {
/// Converts this instance into a byte slice
@@ -337,9 +455,9 @@ impl DataType {
)),
},
Some(s) if s == "date" => match map.get("unit") {
- Some(p) if p == "DAY" => Ok(DataType::Date(DateUnit::Day)),
+ Some(p) if p == "DAY" =>
Ok(DataType::Date32(DateUnit::Day)),
Some(p) if p == "MILLISECOND" => {
- Ok(DataType::Date(DateUnit::Millisecond))
+ Ok(DataType::Date64(DateUnit::Millisecond))
}
_ => Err(ArrowError::ParseError(
"date unit missing or invalid".to_string(),
@@ -470,10 +588,12 @@ impl DataType {
TimeUnit::Nanosecond => "NANOSECOND",
}})
}
- DataType::Date(unit) => json!({"name": "date", "unit": match unit {
- DateUnit::Day => "DAY",
- DateUnit::Millisecond => "MILLISECOND",
- }}),
+ DataType::Date32(unit) | DataType::Date64(unit) => {
+ json!({"name": "date", "unit": match unit {
+ DateUnit::Day => "DAY",
+ DateUnit::Millisecond => "MILLISECOND",
+ }})
+ }
DataType::Timestamp(unit) => json!({"name": "timestamp", "unit":
match unit {
TimeUnit::Second => "SECOND",
TimeUnit::Millisecond => "MILLISECOND",
@@ -769,8 +889,8 @@ mod tests {
fn schema_json() {
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
- Field::new("c2", DataType::Date(DateUnit::Day), false),
- Field::new("c3", DataType::Date(DateUnit::Millisecond), false),
+ Field::new("c2", DataType::Date32(DateUnit::Day), false),
+ Field::new("c3", DataType::Date64(DateUnit::Millisecond), false),
Field::new("c7", DataType::Time32(TimeUnit::Second), false),
Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false),