This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new d8a4f9847 Treat legacy TIMSETAMP_X converted types as UTC (#4309)
d8a4f9847 is described below

commit d8a4f984783653462d324530ccbf95400a703a54
Author: Sergii Mikhtoniuk <[email protected]>
AuthorDate: Wed May 31 03:04:43 2023 -0700

    Treat legacy TIMSETAMP_X converted types as UTC (#4309)
---
 parquet/src/arrow/array_reader/primitive_array.rs | 24 ++++++++++++++++++-----
 parquet/src/arrow/schema/mod.rs                   |  4 ++--
 parquet/src/arrow/schema/primitive.rs             | 14 +++++++------
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/parquet/src/arrow/array_reader/primitive_array.rs 
b/parquet/src/arrow/array_reader/primitive_array.rs
index bef27dc7a..ec0d29e8b 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -438,7 +438,15 @@ mod tests {
     }
 
     macro_rules! test_primitive_array_reader_one_type {
-        ($arrow_parquet_type:ty, $physical_type:expr, 
$converted_type_str:expr, $result_arrow_type:ty, $result_arrow_cast_type:ty, 
$result_primitive_type:ty) => {{
+        (
+            $arrow_parquet_type:ty,
+            $physical_type:expr,
+            $converted_type_str:expr,
+            $result_arrow_type:ty,
+            $result_arrow_cast_type:ty,
+            $result_primitive_type:ty
+            $(, $timezone:expr)?
+        ) => {{
             let message_type = format!(
                 "
             message test_schema {{
@@ -493,7 +501,9 @@ mod tests {
                             result_data_type
                         )
                         .as_str(),
-                    );
+                    )
+                    $(.with_timezone($timezone))?
+                    ;
 
                 // create expected array as primitive, and cast to result type
                 let expected = PrimitiveArray::<$result_arrow_cast_type>::from(
@@ -516,7 +526,9 @@ mod tests {
                             result_data_type
                         )
                         .as_str(),
-                    );
+                    )
+                    $(.with_timezone($timezone))?
+                    ;
                 assert_eq!(expected, array);
             }
         }};
@@ -554,7 +566,8 @@ mod tests {
             "TIMESTAMP_MILLIS",
             arrow::datatypes::TimestampMillisecondType,
             arrow::datatypes::Int64Type,
-            i64
+            i64,
+            "UTC"
         );
         test_primitive_array_reader_one_type!(
             crate::data_type::Int64Type,
@@ -562,7 +575,8 @@ mod tests {
             "TIMESTAMP_MICROS",
             arrow::datatypes::TimestampMicrosecondType,
             arrow::datatypes::Int64Type,
-            i64
+            i64,
+            "UTC"
         );
     }
 
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index ffae1eae5..a80d4add3 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -1278,12 +1278,12 @@ mod tests {
             Field::new("time_nano", DataType::Time64(TimeUnit::Nanosecond), 
true),
             Field::new(
                 "ts_milli",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
+                DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())),
                 true,
             ),
             Field::new(
                 "ts_micro",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
+                DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())),
                 false,
             ),
             Field::new(
diff --git a/parquet/src/arrow/schema/primitive.rs 
b/parquet/src/arrow/schema/primitive.rs
index d4db28915..62133f157 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -218,12 +218,14 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, 
precision: i32) -> Result<DataTy
         (None, ConvertedType::INT_64) => Ok(DataType::Int64),
         (None, ConvertedType::UINT_64) => Ok(DataType::UInt64),
         (None, ConvertedType::TIME_MICROS) => 
Ok(DataType::Time64(TimeUnit::Microsecond)),
-        (None, ConvertedType::TIMESTAMP_MILLIS) => {
-            Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
-        }
-        (None, ConvertedType::TIMESTAMP_MICROS) => {
-            Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
-        }
+        (None, ConvertedType::TIMESTAMP_MILLIS) => Ok(DataType::Timestamp(
+            TimeUnit::Millisecond,
+            Some("UTC".into()),
+        )),
+        (None, ConvertedType::TIMESTAMP_MICROS) => Ok(DataType::Timestamp(
+            TimeUnit::Microsecond,
+            Some("UTC".into()),
+        )),
         (Some(LogicalType::Decimal { scale, precision }), _) => {
             decimal_128_type(scale, precision)
         }

Reply via email to