This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new fb926a4ff9 Configurable Duration Display (#4581)
fb926a4ff9 is described below
commit fb926a4ff9f84fcfb5c853b6f3cb5d2d11bdf916
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Sun Jul 30 16:20:19 2023 +0100
Configurable Duration Display (#4581)
* Make FormatOptions const (#4580)
* Add non-ISO duration display (#4554)
* Review feedback
---
arrow-cast/src/cast.rs | 10 ++
arrow-cast/src/display.rs | 260 +++++++++++++++++++++++++++++++++++++++-------
2 files changed, 234 insertions(+), 36 deletions(-)
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 2ee8c51b0a..e7ca2d0ed4 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -8959,4 +8959,14 @@ mod tests {
assert_eq!(formatted.value(0).to_string(), "[[1], [2], [3]]");
assert_eq!(formatted.value(1).to_string(), "[[4], [null], [6]]");
}
+
+ const CAST_OPTIONS: CastOptions<'static> = CastOptions {
+ safe: true,
+ format_options: FormatOptions::new(),
+ };
+
+ #[test]
+ fn test_const_options() {
+ assert!(CAST_OPTIONS.safe)
+ }
}
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index 07e78f8984..b373891ecb 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -34,6 +34,16 @@ use lexical_core::FormattedSize;
type TimeFormat<'a> = Option<&'a str>;
+/// Format for displaying durations
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+#[non_exhaustive]
+pub enum DurationFormat {
+ /// ISO 8601 - `P198DT72932.972880S`
+ ISO8601,
+ /// A human readable representation - `198 days 16 hours 34 mins
15.407810000 secs`
+ Pretty,
+}
+
/// Options for formatting arrays
///
/// By default nulls are formatted as `""` and temporal types formatted
@@ -56,10 +66,18 @@ pub struct FormatOptions<'a> {
timestamp_tz_format: TimeFormat<'a>,
/// Time format for time arrays
time_format: TimeFormat<'a>,
+ /// Duration format
+ duration_format: DurationFormat,
}
impl<'a> Default for FormatOptions<'a> {
fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a> FormatOptions<'a> {
+ pub const fn new() -> Self {
Self {
safe: true,
null: "",
@@ -68,14 +86,13 @@ impl<'a> Default for FormatOptions<'a> {
timestamp_format: None,
timestamp_tz_format: None,
time_format: None,
+ duration_format: DurationFormat::ISO8601,
}
}
-}
-impl<'a> FormatOptions<'a> {
/// If set to `true` any formatting errors will be written to the output
/// instead of being converted into a [`std::fmt::Error`]
- pub fn with_display_error(mut self, safe: bool) -> Self {
+ pub const fn with_display_error(mut self, safe: bool) -> Self {
self.safe = safe;
self
}
@@ -83,12 +100,12 @@ impl<'a> FormatOptions<'a> {
/// Overrides the string used to represent a null
///
/// Defaults to `""`
- pub fn with_null(self, null: &'a str) -> Self {
+ pub const fn with_null(self, null: &'a str) -> Self {
Self { null, ..self }
}
/// Overrides the format used for [`DataType::Date32`] columns
- pub fn with_date_format(self, date_format: Option<&'a str>) -> Self {
+ pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self {
Self {
date_format,
..self
@@ -96,7 +113,7 @@ impl<'a> FormatOptions<'a> {
}
/// Overrides the format used for [`DataType::Date64`] columns
- pub fn with_datetime_format(self, datetime_format: Option<&'a str>) ->
Self {
+ pub const fn with_datetime_format(self, datetime_format: Option<&'a str>)
-> Self {
Self {
datetime_format,
..self
@@ -104,7 +121,7 @@ impl<'a> FormatOptions<'a> {
}
/// Overrides the format used for [`DataType::Timestamp`] columns without
a timezone
- pub fn with_timestamp_format(self, timestamp_format: Option<&'a str>) ->
Self {
+ pub const fn with_timestamp_format(self, timestamp_format: Option<&'a
str>) -> Self {
Self {
timestamp_format,
..self
@@ -112,7 +129,10 @@ impl<'a> FormatOptions<'a> {
}
/// Overrides the format used for [`DataType::Timestamp`] columns with a
timezone
- pub fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a
str>) -> Self {
+ pub const fn with_timestamp_tz_format(
+ self,
+ timestamp_tz_format: Option<&'a str>,
+ ) -> Self {
Self {
timestamp_tz_format,
..self
@@ -120,12 +140,22 @@ impl<'a> FormatOptions<'a> {
}
/// Overrides the format used for [`DataType::Time32`] and
[`DataType::Time64`] columns
- pub fn with_time_format(self, time_format: Option<&'a str>) -> Self {
+ pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self {
Self {
time_format,
..self
}
}
+
+ /// Overrides the format used for duration columns
+ ///
+ /// Defaults to [`DurationFormat::ISO8601`]
+ pub const fn with_duration_format(self, duration_format: DurationFormat)
-> Self {
+ Self {
+ duration_format,
+ ..self
+ }
+ }
}
/// Implements [`Display`] for a specific array value
@@ -534,20 +564,82 @@ temporal_display!(time64us_to_time, time_format,
Time64MicrosecondType);
temporal_display!(time64ns_to_time, time_format, Time64NanosecondType);
macro_rules! duration_display {
- ($convert:ident, $t:ty) => {
- impl<'a> DisplayIndex for &'a PrimitiveArray<$t> {
- fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
- write!(f, "{}", $convert(self.value(idx)))?;
+ ($convert:ident, $t:ty, $scale:tt) => {
+ impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
+ type State = DurationFormat;
+
+ fn prepare(
+ &self,
+ options: &FormatOptions<'a>,
+ ) -> Result<Self::State, ArrowError> {
+ Ok(options.duration_format)
+ }
+
+ fn write(
+ &self,
+ fmt: &Self::State,
+ idx: usize,
+ f: &mut dyn Write,
+ ) -> FormatResult {
+ let v = self.value(idx);
+ match fmt {
+ DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
+ DurationFormat::Pretty => duration_fmt!(f, v, $scale)?,
+ }
Ok(())
}
}
};
}
-duration_display!(duration_s_to_duration, DurationSecondType);
-duration_display!(duration_ms_to_duration, DurationMillisecondType);
-duration_display!(duration_us_to_duration, DurationMicrosecondType);
-duration_display!(duration_ns_to_duration, DurationNanosecondType);
+macro_rules! duration_fmt {
+ ($f:ident, $v:expr, 0) => {{
+ let secs = $v;
+ let mins = secs / 60;
+ let hours = mins / 60;
+ let days = hours / 24;
+
+ let secs = secs - (mins * 60);
+ let mins = mins - (hours * 60);
+ write!($f, "{days} days {hours} hours {mins} mins {secs} secs")
+ }};
+ ($f:ident, $v:expr, $scale:tt) => {{
+ let subsec = $v;
+ let secs = subsec / 10_i64.pow($scale);
+ let mins = secs / 60;
+ let hours = mins / 60;
+ let days = hours / 24;
+
+ let subsec = subsec - (secs * 10_i64.pow($scale));
+ let secs = secs - (mins * 60);
+ let mins = mins - (hours * 60);
+ match subsec.is_negative() {
+ true => {
+ write!(
+ $f,
+ concat!("{} days {} hours {} mins -{}.{:0", $scale, "}
secs"),
+ days,
+ hours,
+ mins,
+ secs.abs(),
+ subsec.abs()
+ )
+ }
+ false => {
+ write!(
+ $f,
+ concat!("{} days {} hours {} mins {}.{:0", $scale, "}
secs"),
+ days, hours, mins, secs, subsec
+ )
+ }
+ }
+ }};
+}
+
+duration_display!(duration_s_to_duration, DurationSecondType, 0);
+duration_display!(duration_ms_to_duration, DurationMillisecondType, 3);
+duration_display!(duration_us_to_duration, DurationMicrosecondType, 6);
+duration_display!(duration_ns_to_duration, DurationNanosecondType, 9);
impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalYearMonthType> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
@@ -866,8 +958,18 @@ pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N)
-> String {
mod tests {
use super::*;
+ /// Test to verify options can be constant. See #4580
+ const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new()
+ .with_date_format(Some("foo"))
+ .with_timestamp_format(Some("404"));
+
+ #[test]
+ fn test_const_options() {
+ assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
+ }
+
#[test]
- fn test_map_arry_to_string() {
+ fn test_map_array_to_string() {
let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60,
70]);
@@ -887,25 +989,111 @@ mod tests {
);
}
+ fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> {
+ let fmt = ArrayFormatter::try_new(array, fmt).unwrap();
+ (0..array.len()).map(|x| fmt.value(x).to_string()).collect()
+ }
+
#[test]
fn test_array_value_to_string_duration() {
- let ns_array = DurationNanosecondArray::from(vec![Some(1), None]);
- assert_eq!(
- array_value_to_string(&ns_array, 0).unwrap(),
- "PT0.000000001S"
- );
- assert_eq!(array_value_to_string(&ns_array, 1).unwrap(), "");
-
- let us_array = DurationMicrosecondArray::from(vec![Some(1), None]);
- assert_eq!(array_value_to_string(&us_array, 0).unwrap(),
"PT0.000001S");
- assert_eq!(array_value_to_string(&us_array, 1).unwrap(), "");
-
- let ms_array = DurationMillisecondArray::from(vec![Some(1), None]);
- assert_eq!(array_value_to_string(&ms_array, 0).unwrap(), "PT0.001S");
- assert_eq!(array_value_to_string(&ms_array, 1).unwrap(), "");
-
- let s_array = DurationSecondArray::from(vec![Some(1), None]);
- assert_eq!(array_value_to_string(&s_array, 0).unwrap(), "PT1S");
- assert_eq!(array_value_to_string(&s_array, 1).unwrap(), "");
+ let iso_fmt = FormatOptions::new();
+ let pretty_fmt =
+ FormatOptions::new().with_duration_format(DurationFormat::Pretty);
+
+ let array = DurationNanosecondArray::from(vec![
+ 1,
+ -1,
+ 1000,
+ -1000,
+ (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 +
123456789,
+ -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000
- 123456789,
+ ]);
+ let iso = format_array(&array, &iso_fmt);
+ let pretty = format_array(&array, &pretty_fmt);
+
+ assert_eq!(iso[0], "PT0.000000001S");
+ assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs");
+ assert_eq!(iso[1], "-PT0.000000001S");
+ assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs");
+ assert_eq!(iso[2], "PT0.000001S");
+ assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs");
+ assert_eq!(iso[3], "-PT0.000001S");
+ assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs");
+ assert_eq!(iso[4], "P45DT50554.123456789S");
+ assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123456789 secs");
+ assert_eq!(iso[5], "-P45DT50554.123456789S");
+ assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123456789
secs");
+
+ let array = DurationMicrosecondArray::from(vec![
+ 1,
+ -1,
+ 1000,
+ -1000,
+ (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 +
123456,
+ -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 -
123456,
+ ]);
+ let iso = format_array(&array, &iso_fmt);
+ let pretty = format_array(&array, &pretty_fmt);
+
+ assert_eq!(iso[0], "PT0.000001S");
+ assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs");
+ assert_eq!(iso[1], "-PT0.000001S");
+ assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs");
+ assert_eq!(iso[2], "PT0.001S");
+ assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs");
+ assert_eq!(iso[3], "-PT0.001S");
+ assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs");
+ assert_eq!(iso[4], "P45DT50554.123456S");
+ assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123456 secs");
+ assert_eq!(iso[5], "-P45DT50554.123456S");
+ assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123456 secs");
+
+ let array = DurationMillisecondArray::from(vec![
+ 1,
+ -1,
+ 1000,
+ -1000,
+ (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123,
+ -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123,
+ ]);
+ let iso = format_array(&array, &iso_fmt);
+ let pretty = format_array(&array, &pretty_fmt);
+
+ assert_eq!(iso[0], "PT0.001S");
+ assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs");
+ assert_eq!(iso[1], "-PT0.001S");
+ assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs");
+ assert_eq!(iso[2], "PT1S");
+ assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs");
+ assert_eq!(iso[3], "-PT1S");
+ assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs");
+ assert_eq!(iso[4], "P45DT50554.123S");
+ assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123 secs");
+ assert_eq!(iso[5], "-P45DT50554.123S");
+ assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123 secs");
+
+ let array = DurationSecondArray::from(vec![
+ 1,
+ -1,
+ 1000,
+ -1000,
+ 45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34,
+ -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34,
+ ]);
+ let iso = format_array(&array, &iso_fmt);
+ let pretty = format_array(&array, &pretty_fmt);
+
+ assert_eq!(iso[0], "PT1S");
+ assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs");
+ assert_eq!(iso[1], "-PT1S");
+ assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs");
+ assert_eq!(iso[2], "PT1000S");
+ assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs");
+ assert_eq!(iso[3], "-PT1000S");
+ assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs");
+ assert_eq!(iso[4], "P45DT50554S");
+ assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34 secs");
+ assert_eq!(iso[5], "-P45DT50554S");
+ assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34 secs");
}
}