alamb commented on a change in pull request #849:
URL: https://github.com/apache/arrow-rs/pull/849#discussion_r746624516



##########
File path: arrow/src/compute/kernels/temporal.rs
##########
@@ -435,4 +452,78 @@ mod tests {
         ));
         assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
     }
+
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_using_chrono_tz() {
+        let sydney_offset = FixedOffset::east(10 * 60 * 60);
+        assert_eq!(
+            using_chrono_tz(&"Australia/Sydney".to_string()),
+            Some(sydney_offset)
+        );
+
+        let nyc_offset = FixedOffset::west(5 * 60 * 60);
+        assert_eq!(
+            using_chrono_tz(&"America/New_York".to_string()),
+            Some(nyc_offset)
+        );
+    }
+
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_using_chrono_tz_and_utc_naive_date_time() {
+        let sydney_tz = "Australia/Sydney".to_string();
+        let sydney_offset_without_dst = FixedOffset::east(10 * 60 * 60);
+        let sydney_offset_with_dst = FixedOffset::east(11 * 60 * 60);
+        // Daylight savings ends
+        // When local daylight time was about to reach
+        // Sunday, 4 April 2021, 3:00:00 am clocks were turned backward 1 hour 
to
+        // Sunday, 4 April 2021, 2:00:00 am local standard time instead.
+
+        // Daylight savings starts
+        // When local standard time was about to reach
+        // Sunday, 3 October 2021, 2:00:00 am clocks were turned forward 1 
hour to
+        // Sunday, 3 October 2021, 3:00:00 am local daylight time instead.
+
+        // Sydney 2021-04-04T02:30:00+11:00 is 2021-04-03T15:30:00Z

Review comment:
       I really like the names of the variables and comments in this test 👍 . 
It makes them very easy to understand

##########
File path: arrow/src/csv/writer.rs
##########
@@ -626,6 +787,134 @@ sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n";
         assert_eq!(Some(left.to_string()), right.ok());
     }
 
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_export_csv_timestamps() {
+        let schema = Schema::new(vec![
+            Field::new(
+                "c1",
+                DataType::Timestamp(
+                    TimeUnit::Millisecond,
+                    Some("Australia/Sydney".to_string()),
+                ),
+                true,
+            ),
+            Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), 
true),
+        ]);
+
+        let c1 = TimestampMillisecondArray::from_opt_vec(
+            // 1555584887 converts to 2019-04-18, 20:54:47 in time zone 
Australia/Sydney (AEST).
+            // The offset (difference to UTC) is +10:00.
+            // 1635577147 converts to 2021-10-30 17:59:07 in time zone 
Australia/Sydney (AEDT)

Review comment:
       👍 

##########
File path: arrow/src/csv/writer.rs
##########
@@ -675,4 +964,76 @@ sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n";
         let expected = vec![Some(3), Some(2), Some(1)];
         assert_eq!(actual, expected);
     }
+
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_conversion_consistency() {
+        // test if we can serialize and deserialize whilst retaining the same 
type information/ precision

Review comment:
       👍 

##########
File path: arrow/src/csv/writer.rs
##########
@@ -626,6 +787,134 @@ sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n";
         assert_eq!(Some(left.to_string()), right.ok());
     }
 
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_export_csv_timestamps() {
+        let schema = Schema::new(vec![
+            Field::new(
+                "c1",
+                DataType::Timestamp(
+                    TimeUnit::Millisecond,
+                    Some("Australia/Sydney".to_string()),
+                ),
+                true,
+            ),
+            Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), 
true),
+        ]);
+
+        let c1 = TimestampMillisecondArray::from_opt_vec(
+            // 1555584887 converts to 2019-04-18, 20:54:47 in time zone 
Australia/Sydney (AEST).
+            // The offset (difference to UTC) is +10:00.
+            // 1635577147 converts to 2021-10-30 17:59:07 in time zone 
Australia/Sydney (AEDT)
+            // The offset (difference to UTC) is +11:00. Note that daylight 
savings is in effect on 2021-10-30.
+            //
+            vec![Some(1555584887378), Some(1635577147000)],
+            Some("Australia/Sydney".to_string()),
+        );
+        let c2 = TimestampMillisecondArray::from_opt_vec(
+            vec![Some(1555584887378), Some(1635577147000)],
+            None,
+        );
+        let batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), 
Arc::new(c2)])
+                .unwrap();
+
+        let sw = StringWriter::new();
+        let mut writer = Writer::new(sw);
+        let batches = vec![&batch];
+        for batch in batches {
+            writer.write(batch).unwrap();
+        }
+
+        let left = "c1,c2
+2019-04-18T20:54:47.378000000+10:00,2019-04-18T10:54:47.378000000Z
+2021-10-30T17:59:07.000000000+11:00,2021-10-30T06:59:07.000000000Z\n";
+        let right = writer.writer.into_inner().map(|s| s.to_string());
+        assert_eq!(Some(left.to_string()), right.ok());
+    }
+
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_export_csv_string() {

Review comment:
       is this testing anything that is not covered in `test_csv_writer()`?

##########
File path: arrow/src/csv/writer.rs
##########
@@ -499,6 +582,83 @@ sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
         );
     }
 
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_write_csv() {

Review comment:
       What do you think about reducing the replication in the test by keeping 
a single `test_write_csv` function and instead conditionalize the expected 
output. This would not only reduce the replication, it would also make it 
clearer what was the expected difference
   
   Something like (untested)
   ```rust
       #[test]
       fn test_write_csv() {
   ...
       #[cfg(not(feature = "chrono-tz"))]
       let expected = r#"c1,c2,c3,c4,c5,c6,c7
   Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
   consectetur adipiscing 
elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,cupcakes
   sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
   Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
   consectetur adipiscing 
elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,cupcakes
   sed do eiu
   "#;
   
       #[cfg(feature = "chrono-tz")]
       let expected = r#"c1,c2,c3,c4,c5,c6,c7
   Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
   consectetur adipiscing 
elit,,2,false,2019-04-18T10:54:47.378000000Z,06:51:20,cupcakes
   sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000Z,23:46:03,foo
   Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
   consectetur adipiscing 
elit,,2,false,2019-04-18T10:54:47.378000000Z,06:51:20,cupcakes
   sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000Z,23:46:03,foo
   "#;
   
   assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
   ```
   

##########
File path: arrow/src/csv/writer.rs
##########
@@ -258,6 +240,100 @@ impl<W: Write> Writer<W> {
         Ok(())
     }
 
+    #[cfg(not(feature = "chrono-tz"))]
+    fn handle_timestamp(
+        &self,
+        time_unit: &TimeUnit,
+        _time_zone: &Option<String>,
+        row_index: usize,
+        col: &ArrayRef,
+    ) -> Result<String> {
+        use TimeUnit::*;
+        let datetime = match time_unit {
+            Second => col
+                .as_any()
+                .downcast_ref::<TimestampSecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+            Millisecond => col
+                .as_any()
+                .downcast_ref::<TimestampMillisecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+            Microsecond => col
+                .as_any()
+                .downcast_ref::<TimestampMicrosecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+            Nanosecond => col
+                .as_any()
+                .downcast_ref::<TimestampNanosecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+        };
+        Ok(format!("{}", datetime.format(&self.timestamp_format)))
+    }
+
+    #[cfg(feature = "chrono-tz")]
+    fn handle_timestamp(
+        &self,
+        time_unit: &TimeUnit,
+        time_zone: &Option<String>,
+        row_index: usize,
+        col: &ArrayRef,
+    ) -> Result<String> {
+        use TimeUnit::*;
+
+        let datetime = match time_unit {
+            Second => col
+                .as_any()
+                .downcast_ref::<TimestampSecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+            Millisecond => col
+                .as_any()
+                .downcast_ref::<TimestampMillisecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+            Microsecond => col
+                .as_any()
+                .downcast_ref::<TimestampMicrosecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+            Nanosecond => col
+                .as_any()
+                .downcast_ref::<TimestampNanosecondArray>()
+                .unwrap()
+                .value_as_datetime(row_index)
+                .unwrap(),
+        };
+        match time_zone {
+            None => Ok(format!("{}Z", 
datetime.format(&self.timestamp_format))),
+            Some(tzs) => match using_chrono_tz_and_utc_naive_date_time(tzs, 
datetime) {

Review comment:
       I think we can avoid some of this code duplication by following the 
model of
   
   What would you think about following the model of `using_chrono_tz` -- and 
providing an implementation of `using_chrono_tz_and_utc_naive_date_time` that 
doesn't do any conversion of `chrono-tz` isn't available. 
   
   Something like the following (not tested)
   
   ```rust
   /// Ignore timezones if chrono-tz is not enabled and as utc
   #[not(cfg(feature = "chrono-tz")]
   pub fn using_chrono_tz_and_utc_naive_date_time(
       tz: &str,
       utc: chrono::NaiveDateTime,
   ) -> Option<FixedOffset> {
     Some(FixedOffset(0))
   }
   ```
   
   The code in `arrow/src/csv/writer.rs` could thenbe simpler and always call 
`using_chrono_tz_and_utc_naive_date_time` but if `chrono-tz` wasn't enabled 
then it would retain the current behavior
   
   

##########
File path: arrow/src/csv/writer.rs
##########
@@ -675,4 +964,76 @@ sed do eiusmod 
tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n";
         let expected = vec![Some(3), Some(2), Some(1)];
         assert_eq!(actual, expected);
     }
+
+    #[cfg(feature = "chrono-tz")]
+    #[test]
+    fn test_conversion_consistency() {
+        // test if we can serialize and deserialize whilst retaining the same 
type information/ precision
+
+        let schema = Schema::new(vec![
+            Field::new("c1", DataType::Date32, false),
+            Field::new("c2", DataType::Date64, false),
+            Field::new("c3", DataType::Timestamp(TimeUnit::Nanosecond, None), 
false),
+        ]);
+
+        let c1 = Date32Array::from(vec![3, 2, 1]);
+        let c2 = Date64Array::from(vec![3, 2, 1]);
+        let c3 = TimestampNanosecondArray::from_vec(
+            vec![
+                1599566300000000000,
+                1599566200000000000,
+                1599566100000000000,
+            ],
+            None,
+        );
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(c1), Arc::new(c2), Arc::new(c3)],
+        )
+        .unwrap();
+
+        let builder = WriterBuilder::new().has_headers(false);
+
+        let mut buf: Cursor<Vec<u8>> = Default::default();
+        // drop the writer early to release the borrow.
+        {
+            let mut writer = builder.build(&mut buf);
+            writer.write(&batch).unwrap();
+        }
+        buf.set_position(0);
+
+        let mut reader = Reader::new(
+            buf,
+            Arc::new(schema),
+            false,
+            None,
+            3,
+            // starting at row 2 and up to row 6.
+            None,
+            None,
+        );
+        let rb = reader.next().unwrap().unwrap();
+        let c1 = rb.column(0).as_any().downcast_ref::<Date32Array>().unwrap();
+        let c2 = rb.column(1).as_any().downcast_ref::<Date64Array>().unwrap();
+        let c3 = rb
+            .column(2)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .unwrap();
+
+        let actual = c1.into_iter().collect::<Vec<_>>();
+        let expected = vec![Some(3), Some(2), Some(1)];
+        assert_eq!(actual, expected);
+        let actual = c2.into_iter().collect::<Vec<_>>();
+        let expected = vec![Some(3), Some(2), Some(1)];
+        assert_eq!(actual, expected);
+        let actual = c3.into_iter().collect::<Vec<_>>();
+        let expected = vec![

Review comment:
       I wonder if it would be possible to use the same vec in the creation of 
the input and the expected output which would make it explicit they were 
supposed to be the same
   
   Something like 
   
   ```rust
           let c3 = TimestampNanosecondArray::from_vec(expected.clone(), None);
   ```
   
   above, perhaps




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to