This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new e2f274930c Improve Display formatting of DataType::Timestamp (#8425)
e2f274930c is described below
commit e2f274930cb0830ba0fd1df91526e90b2dbb7e68
Author: Emil Ernerfeldt <[email protected]>
AuthorDate: Wed Sep 24 20:47:16 2025 +0200
Improve Display formatting of DataType::Timestamp (#8425)
# Which issue does this PR close?
* Part of https://github.com/apache/arrow-rs/issues/8351
# Rationale for this change
DataType:s end up in a lot of error messages, and we want them easy and
readable, without and Rust-stuff in them like `Some` and `None`
# What changes are included in this PR?
Before:
> `Timestamp(Millisecond, None)`
> `Timestamp(Nanosecond, Some("UTC"))`
After
> `Timestamp(ms)`
> `Timestamp(ns, "UTC")`
# Are these changes tested?
Yes
# Are there any user-facing changes?
Yes, this is a **breaking change**
---
arrow-arith/src/temporal.rs | 2 +-
arrow-array/src/array/primitive_array.rs | 30 ++++++------
arrow-array/src/builder/struct_builder.rs | 2 +-
arrow-cast/src/cast/decimal.rs | 3 +-
arrow-cast/src/cast/mod.rs | 22 +++++++--
arrow-cast/src/cast/string.rs | 3 +-
arrow-schema/src/datatype.rs | 11 +++++
arrow-schema/src/datatype_display.rs | 24 ++++++----
arrow-schema/src/datatype_parse.rs | 77 +++++++++++++++----------------
parquet/tests/variant_integration.rs | 42 ++++-------------
10 files changed, 107 insertions(+), 109 deletions(-)
diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs
index a9682742bb..83e1e7f1b5 100644
--- a/arrow-arith/src/temporal.rs
+++ b/arrow-arith/src/temporal.rs
@@ -649,7 +649,7 @@ impl ExtractDatePartExt for
PrimitiveArray<DurationNanosecondType> {
macro_rules! return_compute_error_with {
($msg:expr, $param:expr) => {
- return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg,
$param))) }
+ return { Err(ArrowError::ComputeError(format!("{}: {}", $msg,
$param))) }
};
}
diff --git a/arrow-array/src/array/primitive_array.rs
b/arrow-array/src/array/primitive_array.rs
index 9551c121e8..ec121e5805 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -2099,7 +2099,7 @@ mod tests {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000,
-1546214400000]);
assert_eq!(
- "PrimitiveArray<Timestamp(Millisecond, None)>\n[\n
2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n 1921-01-02T00:00:00,\n]",
+ "PrimitiveArray<Timestamp(ms)>\n[\n 2018-12-31T00:00:00,\n
2018-12-31T00:00:00,\n 1921-01-02T00:00:00,\n]",
format!("{arr:?}")
);
}
@@ -2110,7 +2110,7 @@ mod tests {
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000,
-1546214400000])
.with_timezone_utc();
assert_eq!(
- "PrimitiveArray<Timestamp(Millisecond, Some(\"+00:00\"))>\n[\n
2018-12-31T00:00:00+00:00,\n 2018-12-31T00:00:00+00:00,\n
1921-01-02T00:00:00+00:00,\n]",
+ "PrimitiveArray<Timestamp(ms, \"+00:00\")>\n[\n
2018-12-31T00:00:00+00:00,\n 2018-12-31T00:00:00+00:00,\n
1921-01-02T00:00:00+00:00,\n]",
format!("{arr:?}")
);
}
@@ -2122,7 +2122,7 @@ mod tests {
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000,
-1546214400000])
.with_timezone("Asia/Taipei".to_string());
assert_eq!(
- "PrimitiveArray<Timestamp(Millisecond,
Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T08:00:00+08:00,\n
2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]",
+ "PrimitiveArray<Timestamp(ms, \"Asia/Taipei\")>\n[\n
2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n
1921-01-02T08:00:00+08:00,\n]",
format!("{arr:?}")
);
}
@@ -2137,7 +2137,7 @@ mod tests {
println!("{arr:?}");
assert_eq!(
- "PrimitiveArray<Timestamp(Millisecond,
Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T00:00:00 (Unknown Time Zone
'Asia/Taipei'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n
1921-01-02T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n]",
+ "PrimitiveArray<Timestamp(ms, \"Asia/Taipei\")>\n[\n
2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n 2018-12-31T00:00:00
(Unknown Time Zone 'Asia/Taipei'),\n 1921-01-02T00:00:00 (Unknown Time Zone
'Asia/Taipei'),\n]",
format!("{arr:?}")
);
}
@@ -2148,7 +2148,7 @@ mod tests {
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000,
-1546214400000])
.with_timezone("+08:00".to_string());
assert_eq!(
- "PrimitiveArray<Timestamp(Millisecond, Some(\"+08:00\"))>\n[\n
2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n
1921-01-02T08:00:00+08:00,\n]",
+ "PrimitiveArray<Timestamp(ms, \"+08:00\")>\n[\n
2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n
1921-01-02T08:00:00+08:00,\n]",
format!("{arr:?}")
);
}
@@ -2159,7 +2159,7 @@ mod tests {
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000,
-1546214400000])
.with_timezone("xxx".to_string());
assert_eq!(
- "PrimitiveArray<Timestamp(Millisecond, Some(\"xxx\"))>\n[\n
2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 2018-12-31T00:00:00 (Unknown
Time Zone 'xxx'),\n 1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]",
+ "PrimitiveArray<Timestamp(ms, \"xxx\")>\n[\n 2018-12-31T00:00:00
(Unknown Time Zone 'xxx'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n
1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]",
format!("{arr:?}")
);
}
@@ -2175,7 +2175,7 @@ mod tests {
])
.with_timezone("America/Denver".to_string());
assert_eq!(
- "PrimitiveArray<Timestamp(Millisecond,
Some(\"America/Denver\"))>\n[\n 2022-03-13T01:59:59-07:00,\n
2022-03-13T03:00:00-06:00,\n 2022-11-06T00:59:59-06:00,\n
2022-11-06T01:00:00-06:00,\n]",
+ "PrimitiveArray<Timestamp(ms, \"America/Denver\")>\n[\n
2022-03-13T01:59:59-07:00,\n 2022-03-13T03:00:00-06:00,\n
2022-11-06T00:59:59-06:00,\n 2022-11-06T01:00:00-06:00,\n]",
format!("{arr:?}")
);
}
@@ -2193,7 +2193,7 @@ mod tests {
fn test_time32second_fmt_debug() {
let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into();
assert_eq!(
- "PrimitiveArray<Time32(Second)>\n[\n 02:00:01,\n 16:40:54,\n]",
+ "PrimitiveArray<Time32(s)>\n[\n 02:00:01,\n 16:40:54,\n]",
format!("{arr:?}")
);
}
@@ -2203,8 +2203,8 @@ mod tests {
// chrono::NaiveDatetime::from_timestamp_opt returns None while input
is invalid
let arr: PrimitiveArray<Time32SecondType> = vec![-7201, -60054].into();
assert_eq!(
- "PrimitiveArray<Time32(Second)>\n[\n Cast error: Failed to convert
-7201 to temporal for Time32(Second),\n Cast error: Failed to convert -60054
to temporal for Time32(Second),\n]",
- // "PrimitiveArray<Time32(Second)>\n[\n null,\n null,\n]",
+ "PrimitiveArray<Time32(s)>\n[\n Cast error: Failed to convert -7201
to temporal for Time32(s),\n Cast error: Failed to convert -60054 to temporal
for Time32(s),\n]",
+ // "PrimitiveArray<Time32(s)>\n[\n null,\n null,\n]",
format!("{arr:?}")
)
}
@@ -2214,7 +2214,7 @@ mod tests {
// replicate the issue from
https://github.com/apache/arrow-datafusion/issues/3832
let arr: PrimitiveArray<TimestampMicrosecondType> =
vec![9065525203050843594].into();
assert_eq!(
- "PrimitiveArray<Timestamp(Microsecond, None)>\n[\n null,\n]",
+ "PrimitiveArray<Timestamp(µs)>\n[\n null,\n]",
format!("{arr:?}")
)
}
@@ -2855,7 +2855,7 @@ mod tests {
]
.into();
let debug_str = format!("{array:?}");
- assert_eq!("PrimitiveArray<Time32(Second)>\n[\n Cast error: Failed to
convert -1 to temporal for Time32(Second),\n 00:00:00,\n 23:59:59,\n Cast
error: Failed to convert 86400 to temporal for Time32(Second),\n Cast error:
Failed to convert 86401 to temporal for Time32(Second),\n null,\n]",
+ assert_eq!("PrimitiveArray<Time32(s)>\n[\n Cast error: Failed to
convert -1 to temporal for Time32(s),\n 00:00:00,\n 23:59:59,\n Cast error:
Failed to convert 86400 to temporal for Time32(s),\n Cast error: Failed to
convert 86401 to temporal for Time32(s),\n null,\n]",
debug_str
);
}
@@ -2872,7 +2872,7 @@ mod tests {
]
.into();
let debug_str = format!("{array:?}");
- assert_eq!("PrimitiveArray<Time32(Millisecond)>\n[\n Cast error:
Failed to convert -1 to temporal for Time32(Millisecond),\n 00:00:00,\n
23:59:59,\n Cast error: Failed to convert 86400000 to temporal for
Time32(Millisecond),\n Cast error: Failed to convert 86401000 to temporal for
Time32(Millisecond),\n null,\n]",
+ assert_eq!("PrimitiveArray<Time32(ms)>\n[\n Cast error: Failed to
convert -1 to temporal for Time32(ms),\n 00:00:00,\n 23:59:59,\n Cast error:
Failed to convert 86400000 to temporal for Time32(ms),\n Cast error: Failed to
convert 86401000 to temporal for Time32(ms),\n null,\n]",
debug_str
);
}
@@ -2890,7 +2890,7 @@ mod tests {
.into();
let debug_str = format!("{array:?}");
assert_eq!(
- "PrimitiveArray<Time64(Nanosecond)>\n[\n Cast error: Failed to
convert -1 to temporal for Time64(Nanosecond),\n 00:00:00,\n 23:59:59,\n
Cast error: Failed to convert 86400000000000 to temporal for
Time64(Nanosecond),\n Cast error: Failed to convert 86401000000000 to temporal
for Time64(Nanosecond),\n null,\n]",
+ "PrimitiveArray<Time64(ns)>\n[\n Cast error: Failed to convert -1 to
temporal for Time64(ns),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to
convert 86400000000000 to temporal for Time64(ns),\n Cast error: Failed to
convert 86401000000000 to temporal for Time64(ns),\n null,\n]",
debug_str
);
}
@@ -2907,7 +2907,7 @@ mod tests {
]
.into();
let debug_str = format!("{array:?}");
- assert_eq!("PrimitiveArray<Time64(Microsecond)>\n[\n Cast error:
Failed to convert -1 to temporal for Time64(Microsecond),\n 00:00:00,\n
23:59:59,\n Cast error: Failed to convert 86400000000 to temporal for
Time64(Microsecond),\n Cast error: Failed to convert 86401000000 to temporal
for Time64(Microsecond),\n null,\n]", debug_str);
+ assert_eq!("PrimitiveArray<Time64(µs)>\n[\n Cast error: Failed to
convert -1 to temporal for Time64(µs),\n 00:00:00,\n 23:59:59,\n Cast error:
Failed to convert 86400000000 to temporal for Time64(µs),\n Cast error: Failed
to convert 86401000000 to temporal for Time64(µs),\n null,\n]", debug_str);
}
#[test]
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index d5109ec192..7f9400b52c 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -690,7 +690,7 @@ mod tests {
#[test]
#[should_panic(
- expected = "Incorrect datatype for StructArray field
\\\"timestamp\\\", expected Timestamp(Nanosecond, Some(\\\"UTC\\\")) got
Timestamp(Nanosecond, None)"
+ expected = "Incorrect datatype for StructArray field
\\\"timestamp\\\", expected Timestamp(ns, \\\"UTC\\\") got Timestamp(ns)"
)]
fn test_struct_array_mismatch_builder() {
let fields = vec![Field::new(
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index 6c2b6f388e..a73b593491 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -488,8 +488,7 @@ where
parse_string_to_decimal_native::<T>(v, scale as usize)
.map_err(|_| {
ArrowError::CastError(format!(
- "Cannot cast string '{}' to value of {:?}
type",
- v,
+ "Cannot cast string '{v}' to value of {} type",
T::DATA_TYPE,
))
})
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 43ad4b0c6f..0330ce9138 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -4905,7 +4905,10 @@ mod tests {
format_options: FormatOptions::default(),
};
let err = cast_with_options(array, &to_type,
&options).unwrap_err();
- assert_eq!(err.to_string(), "Cast error: Cannot cast string
'08:08:61.091323414' to value of Time32(Second) type");
+ assert_eq!(
+ err.to_string(),
+ "Cast error: Cannot cast string '08:08:61.091323414' to value
of Time32(s) type"
+ );
}
}
@@ -4947,7 +4950,10 @@ mod tests {
format_options: FormatOptions::default(),
};
let err = cast_with_options(array, &to_type,
&options).unwrap_err();
- assert_eq!(err.to_string(), "Cast error: Cannot cast string
'08:08:61.091323414' to value of Time32(Millisecond) type");
+ assert_eq!(
+ err.to_string(),
+ "Cast error: Cannot cast string '08:08:61.091323414' to value
of Time32(ms) type"
+ );
}
}
@@ -4981,7 +4987,10 @@ mod tests {
format_options: FormatOptions::default(),
};
let err = cast_with_options(array, &to_type,
&options).unwrap_err();
- assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a
valid time' to value of Time64(Microsecond) type");
+ assert_eq!(
+ err.to_string(),
+ "Cast error: Cannot cast string 'Not a valid time' to value of
Time64(µs) type"
+ );
}
}
@@ -5015,7 +5024,10 @@ mod tests {
format_options: FormatOptions::default(),
};
let err = cast_with_options(array, &to_type,
&options).unwrap_err();
- assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a
valid time' to value of Time64(Nanosecond) type");
+ assert_eq!(
+ err.to_string(),
+ "Cast error: Cannot cast string 'Not a valid time' to value of
Time64(ns) type"
+ );
}
}
@@ -8704,7 +8716,7 @@ mod tests {
};
assert_eq!(
t,
- r#"Casting from Map(Field { "entries": Struct(key Utf8, value
nullable Interval(DayTime)) }, false) to Map(Field { "entries": Struct(key
Utf8, value Duration(Second)) }, true) not supported"#
+ r#"Casting from Map(Field { "entries": Struct(key Utf8, value
nullable Interval(DayTime)) }, false) to Map(Field { "entries": Struct(key
Utf8, value Duration(s)) }, true) not supported"#
);
}
diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs
index 09a9978ff7..7cc42450f4 100644
--- a/arrow-cast/src/cast/string.rs
+++ b/arrow-cast/src/cast/string.rs
@@ -107,8 +107,7 @@ fn parse_string_iter<
.map(|x| match x {
Some(v) => P::parse(v).ok_or_else(|| {
ArrowError::CastError(format!(
- "Cannot cast string '{}' to value of {:?} type",
- v,
+ "Cannot cast string '{v}' to value of {} type",
P::DATA_TYPE
))
}),
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 32bce33474..e4c676543a 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -454,6 +454,17 @@ pub enum TimeUnit {
Nanosecond,
}
+impl std::fmt::Display for TimeUnit {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ TimeUnit::Second => write!(f, "s"),
+ TimeUnit::Millisecond => write!(f, "ms"),
+ TimeUnit::Microsecond => write!(f, "µs"),
+ TimeUnit::Nanosecond => write!(f, "ns"),
+ }
+ }
+}
+
/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
diff --git a/arrow-schema/src/datatype_display.rs
b/arrow-schema/src/datatype_display.rs
index e1bd86cba0..f23beb489d 100644
--- a/arrow-schema/src/datatype_display.rs
+++ b/arrow-schema/src/datatype_display.rs
@@ -50,13 +50,17 @@ impl fmt::Display for DataType {
Self::Float32 => write!(f, "Float32"),
Self::Float64 => write!(f, "Float64"),
Self::Timestamp(time_unit, timezone) => {
- write!(f, "Timestamp({time_unit:?}, {timezone:?})")
+ if let Some(timezone) = timezone {
+ write!(f, "Timestamp({time_unit}, {timezone:?})")
+ } else {
+ write!(f, "Timestamp({time_unit})")
+ }
}
Self::Date32 => write!(f, "Date32"),
Self::Date64 => write!(f, "Date64"),
- Self::Time32(time_unit) => write!(f, "Time32({time_unit:?})"),
- Self::Time64(time_unit) => write!(f, "Time64({time_unit:?})"),
- Self::Duration(time_unit) => write!(f, "Duration({time_unit:?})"),
+ Self::Time32(time_unit) => write!(f, "Time32({time_unit})"),
+ Self::Time64(time_unit) => write!(f, "Time64({time_unit})"),
+ Self::Duration(time_unit) => write!(f, "Duration({time_unit})"),
Self::Interval(interval_unit) => write!(f,
"Interval({interval_unit:?})"),
Self::Binary => write!(f, "Binary"),
Self::FixedSizeBinary(bytes_per_value) => {
@@ -131,13 +135,13 @@ impl fmt::Display for DataType {
write!(f, "Union({union_fields:?}, {union_mode:?})")
}
Self::Dictionary(data_type, data_type1) => {
- write!(f, "Dictionary({data_type}, {data_type1:?})")
+ write!(f, "Dictionary({data_type}, {data_type1})")
}
- Self::Decimal32(precision, scale) => write!(f,
"Decimal32({precision:?}, {scale:?})"),
- Self::Decimal64(precision, scale) => write!(f,
"Decimal64({precision:?}, {scale:?})"),
- Self::Decimal128(precision, scale) => write!(f,
"Decimal128({precision:?}, {scale:?})"),
- Self::Decimal256(precision, scale) => write!(f,
"Decimal256({precision:?}, {scale:?})"),
- Self::Map(field, keys_are_sorted) => write!(f, "Map({field},
{keys_are_sorted:?})"),
+ Self::Decimal32(precision, scale) => write!(f,
"Decimal32({precision}, {scale})"),
+ Self::Decimal64(precision, scale) => write!(f,
"Decimal64({precision}, {scale})"),
+ Self::Decimal128(precision, scale) => write!(f,
"Decimal128({precision}, {scale})"),
+ Self::Decimal256(precision, scale) => write!(f,
"Decimal256({precision}, {scale})"),
+ Self::Map(field, keys_are_sorted) => write!(f, "Map({field},
{keys_are_sorted})"),
Self::RunEndEncoded(run_ends_field, values_field) => {
write!(f, "RunEndEncoded({run_ends_field}, {values_field})")
}
diff --git a/arrow-schema/src/datatype_parse.rs
b/arrow-schema/src/datatype_parse.rs
index 8b48ecd17f..f465871ad0 100644
--- a/arrow-schema/src/datatype_parse.rs
+++ b/arrow-schema/src/datatype_parse.rs
@@ -26,7 +26,7 @@ pub(crate) fn parse_data_type(val: &str) ->
ArrowResult<DataType> {
type ArrowResult<T> = Result<T, ArrowError>;
fn make_error(val: &str, msg: &str) -> ArrowError {
- let msg = format!("Unsupported type '{val}'. Must be a supported arrow
type name such as 'Int32' or 'Timestamp(Nanosecond, None)'. Error {msg}" );
+ let msg = format!("Unsupported type '{val}'. Must be a supported arrow
type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}" );
ArrowError::ParseError(msg)
}
@@ -135,23 +135,6 @@ impl<'a> Parser<'a> {
}
}
- /// Parses the next timezone
- fn parse_timezone(&mut self, context: &str) -> ArrowResult<Option<String>>
{
- match self.next_token()? {
- Token::None => Ok(None),
- Token::Some => {
- self.expect_token(Token::LParen)?;
- let timezone = self.parse_double_quoted_string("Timezone")?;
- self.expect_token(Token::RParen)?;
- Ok(Some(timezone))
- }
- tok => Err(make_error(
- self.val,
- &format!("finding Timezone for {context}, got {tok}"),
- )),
- }
- }
-
/// Parses the next double quoted string
fn parse_double_quoted_string(&mut self, context: &str) ->
ArrowResult<String> {
match self.next_token()? {
@@ -214,9 +197,23 @@ impl<'a> Parser<'a> {
fn parse_timestamp(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let time_unit = self.parse_time_unit("Timestamp")?;
- self.expect_token(Token::Comma)?;
- let timezone = self.parse_timezone("Timestamp")?;
- self.expect_token(Token::RParen)?;
+
+ let timezone;
+ match self.next_token()? {
+ Token::Comma => {
+ timezone = Some(self.parse_double_quoted_string("Timezone")?);
+ self.expect_token(Token::RParen)?;
+ }
+ Token::RParen => {
+ timezone = None;
+ }
+ next_token => {
+ return Err(make_error(
+ self.val,
+ &format!("Expected comma followed by a timezone, or an ),
got {next_token:?}"),
+ ));
+ }
+ }
Ok(DataType::Timestamp(time_unit, timezone.map(Into::into)))
}
@@ -392,13 +389,11 @@ fn is_separator(c: char) -> bool {
#[derive(Debug)]
/// Splits a strings like Dictionary(Int32, Int64) into tokens sutable for
parsing
///
-/// For example the string "Timestamp(Nanosecond, None)" would be parsed into:
+/// For example the string "Timestamp(ns)" would be parsed into:
///
/// * Token::Timestamp
/// * Token::Lparen
/// * Token::IntervalUnit(IntervalUnit::Nanosecond)
-/// * Token::Comma,
-/// * Token::None,
/// * Token::Rparen,
struct Tokenizer<'a> {
val: &'a str,
@@ -529,10 +524,10 @@ impl<'a> Tokenizer<'a> {
"LargeList" => Token::LargeList,
"FixedSizeList" => Token::FixedSizeList,
- "Second" => Token::TimeUnit(TimeUnit::Second),
- "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
- "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
- "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
+ "s" | "Second" => Token::TimeUnit(TimeUnit::Second),
+ "ms" | "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
+ "µs" | "us" | "Microsecond" =>
Token::TimeUnit(TimeUnit::Microsecond),
+ "ns" | "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
"Timestamp" => Token::Timestamp,
"Time32" => Token::Time32,
@@ -679,7 +674,7 @@ mod test {
/// verifying it is the same
fn round_trip(data_type: DataType) {
let data_type_string = data_type.to_string();
- println!("Input '{data_type_string}' ({data_type})");
+ println!("Input '{data_type_string}' ({data_type:?})");
let parsed_type = parse_data_type(&data_type_string).unwrap();
assert_eq!(
data_type, parsed_type,
@@ -808,19 +803,19 @@ mod test {
let cases = [
("Int8", DataType::Int8),
(
- "Timestamp (Nanosecond, None)",
+ "Timestamp (ns)",
DataType::Timestamp(TimeUnit::Nanosecond, None),
),
(
- "Timestamp (Nanosecond, None) ",
+ "Timestamp (ns) ",
DataType::Timestamp(TimeUnit::Nanosecond, None),
),
(
- " Timestamp (Nanosecond, None
)",
+ " Timestamp (ns )",
DataType::Timestamp(TimeUnit::Nanosecond, None),
),
(
- "Timestamp (Nanosecond, None ) ",
+ "Timestamp (ns ) ",
DataType::Timestamp(TimeUnit::Nanosecond, None),
),
];
@@ -841,22 +836,22 @@ mod test {
("null", "Unsupported type 'null'"),
("Nu", "Unsupported type 'Nu'"),
(
- r#"Timestamp(Nanosecond, Some(+00:00))"#,
+ r#"Timestamp(ns, +00:00)"#,
"Error unrecognized word: +00:00",
),
(
- r#"Timestamp(Nanosecond, Some("+00:00))"#,
+ r#"Timestamp(ns, "+00:00)"#,
r#"parsing "+00:00 as double quoted string: last char must be
""#,
),
(
- r#"Timestamp(Nanosecond, Some(""))"#,
+ r#"Timestamp(ns, "")"#,
r#"parsing "" as double quoted string: empty string isn't
supported"#,
),
(
- r#"Timestamp(Nanosecond, Some("+00:00""))"#,
+ r#"Timestamp(ns, "+00:00"")"#,
r#"parsing "+00:00"" as double quoted string: escaped double
quote isn't supported"#,
),
- ("Timestamp(Nanosecond, ", "Error finding next token"),
+ ("Timestamp(ns, ", "Error finding next token"),
(
"Float32 Float32",
"trailing content after parsing 'Float32'",
@@ -892,7 +887,9 @@ mod test {
"\n\ndid not find expected in actual.\n\nexpected:
{expected_message}\nactual:{message}\n"
);
// errors should also contain a help message
- assert!(message.contains("Must be a supported arrow type
name such as 'Int32' or 'Timestamp(Nanosecond, None)'"));
+ assert!(message.contains(
+ "Must be a supported arrow type name such as 'Int32'
or 'Timestamp(ns)'"
+ ));
}
}
}
@@ -902,6 +899,6 @@ mod test {
fn parse_error_type() {
let err = parse_data_type("foobar").unwrap_err();
assert!(matches!(err, ArrowError::ParseError(_)));
- assert_eq!(err.to_string(), "Parser error: Unsupported type 'foobar'.
Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond,
None)'. Error unrecognized word: foobar");
+ assert_eq!(err.to_string(), "Parser error: Unsupported type 'foobar'.
Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error
unrecognized word: foobar");
}
}
diff --git a/parquet/tests/variant_integration.rs
b/parquet/tests/variant_integration.rs
index dcab658bcd..01ae4175c4 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -92,22 +92,10 @@ variant_test_case!(17);
variant_test_case!(18);
variant_test_case!(19);
// https://github.com/apache/arrow-rs/issues/8331
-variant_test_case!(
- 20,
- "Unsupported typed_value type: Timestamp(Microsecond, Some(\"UTC\"))"
-);
-variant_test_case!(
- 21,
- "Unsupported typed_value type: Timestamp(Microsecond, Some(\"UTC\"))"
-);
-variant_test_case!(
- 22,
- "Unsupported typed_value type: Timestamp(Microsecond, None)"
-);
-variant_test_case!(
- 23,
- "Unsupported typed_value type: Timestamp(Microsecond, None)"
-);
+variant_test_case!(20, "Unsupported typed_value type: Timestamp(µs, \"UTC\")");
+variant_test_case!(21, "Unsupported typed_value type: Timestamp(µs, \"UTC\")");
+variant_test_case!(22, "Unsupported typed_value type: Timestamp(µs)");
+variant_test_case!(23, "Unsupported typed_value type: Timestamp(µs)");
// https://github.com/apache/arrow-rs/issues/8332
variant_test_case!(24, "Unsupported typed_value type: Decimal128(9, 4)");
variant_test_case!(25, "Unsupported typed_value type: Decimal128(9, 4)");
@@ -118,24 +106,12 @@ variant_test_case!(29, "Unsupported typed_value type:
Decimal128(38, 9)");
variant_test_case!(30);
variant_test_case!(31);
// https://github.com/apache/arrow-rs/issues/8334
-variant_test_case!(32, "Unsupported typed_value type: Time64(Microsecond)");
+variant_test_case!(32, "Unsupported typed_value type: Time64(µs)");
// https://github.com/apache/arrow-rs/issues/8331
-variant_test_case!(
- 33,
- "Unsupported typed_value type: Timestamp(Nanosecond, Some(\"UTC\"))"
-);
-variant_test_case!(
- 34,
- "Unsupported typed_value type: Timestamp(Nanosecond, Some(\"UTC\"))"
-);
-variant_test_case!(
- 35,
- "Unsupported typed_value type: Timestamp(Nanosecond, None)"
-);
-variant_test_case!(
- 36,
- "Unsupported typed_value type: Timestamp(Nanosecond, None)"
-);
+variant_test_case!(33, "Unsupported typed_value type: Timestamp(ns, \"UTC\")");
+variant_test_case!(34, "Unsupported typed_value type: Timestamp(ns, \"UTC\")");
+variant_test_case!(35, "Unsupported typed_value type: Timestamp(ns)");
+variant_test_case!(36, "Unsupported typed_value type: Timestamp(ns)");
variant_test_case!(37);
// https://github.com/apache/arrow-rs/issues/8336
variant_test_case!(38, "Unsupported typed_value type: Struct(");