This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new d8a4f9847 Treat legacy TIMSETAMP_X converted types as UTC (#4309)
d8a4f9847 is described below
commit d8a4f984783653462d324530ccbf95400a703a54
Author: Sergii Mikhtoniuk <[email protected]>
AuthorDate: Wed May 31 03:04:43 2023 -0700
Treat legacy TIMSETAMP_X converted types as UTC (#4309)
---
parquet/src/arrow/array_reader/primitive_array.rs | 24 ++++++++++++++++++-----
parquet/src/arrow/schema/mod.rs | 4 ++--
parquet/src/arrow/schema/primitive.rs | 14 +++++++------
3 files changed, 29 insertions(+), 13 deletions(-)
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs
b/parquet/src/arrow/array_reader/primitive_array.rs
index bef27dc7a..ec0d29e8b 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -438,7 +438,15 @@ mod tests {
}
macro_rules! test_primitive_array_reader_one_type {
- ($arrow_parquet_type:ty, $physical_type:expr,
$converted_type_str:expr, $result_arrow_type:ty, $result_arrow_cast_type:ty,
$result_primitive_type:ty) => {{
+ (
+ $arrow_parquet_type:ty,
+ $physical_type:expr,
+ $converted_type_str:expr,
+ $result_arrow_type:ty,
+ $result_arrow_cast_type:ty,
+ $result_primitive_type:ty
+ $(, $timezone:expr)?
+ ) => {{
let message_type = format!(
"
message test_schema {{
@@ -493,7 +501,9 @@ mod tests {
result_data_type
)
.as_str(),
- );
+ )
+ $(.with_timezone($timezone))?
+ ;
// create expected array as primitive, and cast to result type
let expected = PrimitiveArray::<$result_arrow_cast_type>::from(
@@ -516,7 +526,9 @@ mod tests {
result_data_type
)
.as_str(),
- );
+ )
+ $(.with_timezone($timezone))?
+ ;
assert_eq!(expected, array);
}
}};
@@ -554,7 +566,8 @@ mod tests {
"TIMESTAMP_MILLIS",
arrow::datatypes::TimestampMillisecondType,
arrow::datatypes::Int64Type,
- i64
+ i64,
+ "UTC"
);
test_primitive_array_reader_one_type!(
crate::data_type::Int64Type,
@@ -562,7 +575,8 @@ mod tests {
"TIMESTAMP_MICROS",
arrow::datatypes::TimestampMicrosecondType,
arrow::datatypes::Int64Type,
- i64
+ i64,
+ "UTC"
);
}
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index ffae1eae5..a80d4add3 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -1278,12 +1278,12 @@ mod tests {
Field::new("time_nano", DataType::Time64(TimeUnit::Nanosecond),
true),
Field::new(
"ts_milli",
- DataType::Timestamp(TimeUnit::Millisecond, None),
+ DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())),
true,
),
Field::new(
"ts_micro",
- DataType::Timestamp(TimeUnit::Microsecond, None),
+ DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())),
false,
),
Field::new(
diff --git a/parquet/src/arrow/schema/primitive.rs
b/parquet/src/arrow/schema/primitive.rs
index d4db28915..62133f157 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -218,12 +218,14 @@ fn from_int64(info: &BasicTypeInfo, scale: i32,
precision: i32) -> Result<DataTy
(None, ConvertedType::INT_64) => Ok(DataType::Int64),
(None, ConvertedType::UINT_64) => Ok(DataType::UInt64),
(None, ConvertedType::TIME_MICROS) =>
Ok(DataType::Time64(TimeUnit::Microsecond)),
- (None, ConvertedType::TIMESTAMP_MILLIS) => {
- Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
- }
- (None, ConvertedType::TIMESTAMP_MICROS) => {
- Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
- }
+ (None, ConvertedType::TIMESTAMP_MILLIS) => Ok(DataType::Timestamp(
+ TimeUnit::Millisecond,
+ Some("UTC".into()),
+ )),
+ (None, ConvertedType::TIMESTAMP_MICROS) => Ok(DataType::Timestamp(
+ TimeUnit::Microsecond,
+ Some("UTC".into()),
+ )),
(Some(LogicalType::Decimal { scale, precision }), _) => {
decimal_128_type(scale, precision)
}