This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 61c4f12e8 Support timestamp/time and date json decoding (#3835)
61c4f12e8 is described below

commit 61c4f12e84330db243789fc98375512d67628e57
Author: bold <[email protected]>
AuthorDate: Fri Mar 10 12:30:54 2023 +0100

    Support timestamp/time and date json decoding (#3835)
    
    * Support timestamp/time and date json decoding
    
    * Don't support timezones for now
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
---
 arrow-json/src/raw/mod.rs | 191 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 187 insertions(+), 4 deletions(-)

diff --git a/arrow-json/src/raw/mod.rs b/arrow-json/src/raw/mod.rs
index 5b699b1d5..1ab879d20 100644
--- a/arrow-json/src/raw/mod.rs
+++ b/arrow-json/src/raw/mod.rs
@@ -30,7 +30,7 @@ use crate::raw::tape::{Tape, TapeDecoder, TapeElement};
 use arrow_array::types::*;
 use arrow_array::{downcast_integer, make_array, RecordBatch, 
RecordBatchReader};
 use arrow_data::ArrayData;
-use arrow_schema::{ArrowError, DataType, SchemaRef};
+use arrow_schema::{ArrowError, DataType, SchemaRef, TimeUnit};
 use std::io::BufRead;
 
 mod boolean_array;
@@ -293,6 +293,16 @@ fn make_decoder(
         data_type => (primitive_decoder, data_type),
         DataType::Float32 => primitive_decoder!(Float32Type, data_type),
         DataType::Float64 => primitive_decoder!(Float64Type, data_type),
+        DataType::Timestamp(TimeUnit::Second, None) => 
primitive_decoder!(TimestampSecondType, data_type),
+        DataType::Timestamp(TimeUnit::Millisecond, None) => 
primitive_decoder!(TimestampMillisecondType, data_type),
+        DataType::Timestamp(TimeUnit::Microsecond, None) => 
primitive_decoder!(TimestampMicrosecondType, data_type),
+        DataType::Timestamp(TimeUnit::Nanosecond, None) => 
primitive_decoder!(TimestampNanosecondType, data_type),
+        DataType::Date32 => primitive_decoder!(Date32Type, data_type),
+        DataType::Date64 => primitive_decoder!(Date64Type, data_type),
+        DataType::Time32(TimeUnit::Second) => 
primitive_decoder!(Time32SecondType, data_type),
+        DataType::Time32(TimeUnit::Millisecond) => 
primitive_decoder!(Time32MillisecondType, data_type),
+        DataType::Time64(TimeUnit::Microsecond) => 
primitive_decoder!(Time64MicrosecondType, data_type),
+        DataType::Time64(TimeUnit::Nanosecond) => 
primitive_decoder!(Time64NanosecondType, data_type),
         DataType::Decimal128(p, s) => 
Ok(Box::new(DecimalArrayDecoder::<Decimal128Type>::new(p, s))),
         DataType::Decimal256(p, s) => 
Ok(Box::new(DecimalArrayDecoder::<Decimal256Type>::new(p, s))),
         DataType::Boolean => Ok(Box::<BooleanArrayDecoder>::default()),
@@ -373,10 +383,10 @@ mod tests {
     #[test]
     fn test_basic() {
         let buf = r#"
-        {"a": 1, "b": 2, "c": true}
-        {"a": 2E0, "b": 4, "c": false}
+        {"a": 1, "b": 2, "c": true, "d": 1}
+        {"a": 2E0, "b": 4, "c": false, "d": 2, "e": 254}
 
-        {"b": 6, "a": 2.0}
+        {"b": 6, "a": 2.0, "d": 45}
         {"b": "5", "a": 2}
         {"b": 4e0}
         {"b": 7, "a": null}
@@ -386,6 +396,8 @@ mod tests {
             Field::new("a", DataType::Int64, true),
             Field::new("b", DataType::Int32, true),
             Field::new("c", DataType::Boolean, true),
+            Field::new("d", DataType::Date32, true),
+            Field::new("e", DataType::Date64, true),
         ]));
 
         let batches = do_read(buf, 1024, false, schema);
@@ -407,6 +419,18 @@ mod tests {
         assert!(!col3.is_null(0));
         assert!(!col3.value(1));
         assert!(!col3.is_null(1));
+
+        let col4 = as_primitive_array::<Date32Type>(batches[0].column(3));
+        assert_eq!(col4.null_count(), 3);
+        assert!(col4.is_null(3));
+        assert_eq!(col4.values(), &[1, 2, 45, 0, 0, 0]);
+
+        let col5 = as_primitive_array::<Date64Type>(batches[0].column(4));
+        assert_eq!(col5.null_count(), 5);
+        assert!(col5.is_null(0));
+        assert!(col5.is_null(2));
+        assert!(col5.is_null(3));
+        assert_eq!(col5.values(), &[0, 254, 0, 0, 0, 0]);
     }
 
     #[test]
@@ -782,4 +806,163 @@ mod tests {
         test_decimal::<Decimal128Type>(DataType::Decimal128(10, 2));
         test_decimal::<Decimal256Type>(DataType::Decimal256(10, 2));
     }
+
+    fn test_timestamp<T: ArrowTimestampType>() {
+        let buf = r#"
+        {"a": 1, "b": "2020-09-08T13:42:29.190855+00:00", "c": 38.30}
+        {"a": 2, "b": "2020-09-08T13:42:29.190855Z", "c": 123.456}
+
+        {"b": 1337, "b": "2020-09-08T13:42:29Z", "c": 
"1997-01-31T09:26:56.123"}
+        {"b": 40, "c": "2020-09-08T13:42:29.190855+00:00"}
+        {"b": 1234, "a": null, "c": "1997-01-31 09:26:56.123Z"}
+        {"c": "1997-01-31T14:26:56.123-05:00"}
+        "#;
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", T::DATA_TYPE, true),
+            Field::new("b", T::DATA_TYPE, true),
+            Field::new("c", T::DATA_TYPE, true),
+        ]));
+
+        let batches = do_read(buf, 1024, true, schema);
+        assert_eq!(batches.len(), 1);
+
+        let unit = match T::DATA_TYPE {
+            DataType::Timestamp(unit, _) => unit,
+            _ => unreachable!(),
+        };
+        let unit_in_nanos = match unit {
+            TimeUnit::Second => 1_000_000_000,
+            TimeUnit::Millisecond => 1_000_000,
+            TimeUnit::Microsecond => 1_000,
+            TimeUnit::Nanosecond => 1,
+        };
+
+        let col1 = as_primitive_array::<T>(batches[0].column(0));
+        assert_eq!(col1.null_count(), 4);
+        assert!(col1.is_null(2));
+        assert!(col1.is_null(3));
+        assert!(col1.is_null(4));
+        assert!(col1.is_null(5));
+        assert_eq!(col1.values(), &[1, 2, 0, 0, 0, 
0].map(T::Native::usize_as));
+
+        let col2 = as_primitive_array::<T>(batches[0].column(1));
+        assert_eq!(col2.null_count(), 1);
+        assert!(col2.is_null(5));
+        assert_eq!(
+            col2.values(),
+            &[
+                1599572549190855000 / unit_in_nanos,
+                1599572549190855000 / unit_in_nanos,
+                1599572549000000000 / unit_in_nanos,
+                40,
+                1234,
+                0
+            ]
+            .map(T::Native::usize_as)
+        );
+
+        let col3 = as_primitive_array::<T>(batches[0].column(2));
+        assert_eq!(col3.null_count(), 0);
+        assert_eq!(
+            col3.values(),
+            &[
+                38,
+                123,
+                854702816123000000 / unit_in_nanos,
+                1599572549190855000 / unit_in_nanos,
+                854702816123000000 / unit_in_nanos,
+                854738816123000000 / unit_in_nanos
+            ]
+            .map(T::Native::usize_as)
+        );
+    }
+
+    #[test]
+    fn test_timestamps() {
+        test_timestamp::<TimestampSecondType>();
+        test_timestamp::<TimestampMillisecondType>();
+        test_timestamp::<TimestampMicrosecondType>();
+        test_timestamp::<TimestampNanosecondType>();
+    }
+
+    fn test_time<T: ArrowTemporalType>() {
+        let buf = r#"
+        {"a": 1, "b": "09:26:56.123 AM", "c": 38.30}
+        {"a": 2, "b": "23:59:59", "c": 123.456}
+
+        {"b": 1337, "b": "6:00 pm", "c": "09:26:56.123"}
+        {"b": 40, "c": "13:42:29.190855"}
+        {"b": 1234, "a": null, "c": "09:26:56.123"}
+        {"c": "14:26:56.123"}
+        "#;
+
+        let unit = match T::DATA_TYPE {
+            DataType::Time32(unit) | DataType::Time64(unit) => unit,
+            _ => unreachable!(),
+        };
+
+        let unit_in_nanos = match unit {
+            TimeUnit::Second => 1_000_000_000,
+            TimeUnit::Millisecond => 1_000_000,
+            TimeUnit::Microsecond => 1_000,
+            TimeUnit::Nanosecond => 1,
+        };
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", T::DATA_TYPE, true),
+            Field::new("b", T::DATA_TYPE, true),
+            Field::new("c", T::DATA_TYPE, true),
+        ]));
+
+        let batches = do_read(buf, 1024, true, schema);
+        assert_eq!(batches.len(), 1);
+
+        let col1 = as_primitive_array::<T>(batches[0].column(0));
+        assert_eq!(col1.null_count(), 4);
+        assert!(col1.is_null(2));
+        assert!(col1.is_null(3));
+        assert!(col1.is_null(4));
+        assert!(col1.is_null(5));
+        assert_eq!(col1.values(), &[1, 2, 0, 0, 0, 
0].map(T::Native::usize_as));
+
+        let col2 = as_primitive_array::<T>(batches[0].column(1));
+        assert_eq!(col2.null_count(), 1);
+        assert!(col2.is_null(5));
+        assert_eq!(
+            col2.values(),
+            &[
+                34016123000000 / unit_in_nanos,
+                86399000000000 / unit_in_nanos,
+                64800000000000 / unit_in_nanos,
+                40,
+                1234,
+                0
+            ]
+            .map(T::Native::usize_as)
+        );
+
+        let col3 = as_primitive_array::<T>(batches[0].column(2));
+        assert_eq!(col3.null_count(), 0);
+        assert_eq!(
+            col3.values(),
+            &[
+                38,
+                123,
+                34016123000000 / unit_in_nanos,
+                49349190855000 / unit_in_nanos,
+                34016123000000 / unit_in_nanos,
+                52016123000000 / unit_in_nanos
+            ]
+            .map(T::Native::usize_as)
+        );
+    }
+
+    #[test]
+    fn test_times() {
+        test_time::<Time32MillisecondType>();
+        test_time::<Time32SecondType>();
+        test_time::<Time64MicrosecondType>();
+        test_time::<Time64NanosecondType>();
+    }
 }

Reply via email to