This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch cherry_pick_fb451125 in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
commit d812a5ab14225b124f33e0389d4f6c9bd92601c8 Author: Ádám Lippai <[email protected]> AuthorDate: Sun Jun 13 02:20:08 2021 +0200 Add Decimal to CsvWriter and improve debug display (#406) * Add Decimal to CsvWriter and improve debug display * Measure CSV writer instead of file and data creation * Re-use decimal formatting --- arrow/src/array/array_binary.rs | 36 ++++++++++++++++++++++++------------ arrow/src/csv/writer.rs | 23 ++++++++++++++++------- arrow/src/util/display.rs | 27 ++++++++++++--------------- 3 files changed, 52 insertions(+), 34 deletions(-) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index 0cb4db4..0b374db 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -666,6 +666,17 @@ impl DecimalArray { self.length * i as i32 } + #[inline] + pub fn value_as_string(&self, row: usize) -> String { + let decimal_string = self.value(row).to_string(); + if self.scale == 0 { + decimal_string + } else { + let splits = decimal_string.split_at(decimal_string.len() - self.scale); + format!("{}.{}", splits.0, splits.1) + } + } + pub fn from_fixed_size_list_array( v: FixedSizeListArray, precision: usize, @@ -729,7 +740,9 @@ impl fmt::Debug for DecimalArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + let formatted_decimal = array.value_as_string(index); + + write!(f, "{}", formatted_decimal) })?; write!(f, "]") } @@ -758,7 +771,7 @@ impl Array for DecimalArray { #[cfg(test)] mod tests { use crate::{ - array::{LargeListArray, ListArray}, + array::{DecimalBuilder, LargeListArray, ListArray}, datatypes::Field, }; @@ -1163,17 +1176,16 @@ mod tests { #[test] fn test_decimal_array_fmt_debug() { - let values: [u8; 32] = [ - 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]; - let array_data = ArrayData::builder(DataType::Decimal(23, 6)) - .len(2) - .add_buffer(Buffer::from(&values[..])) - .build(); - let arr = DecimalArray::from(array_data); + let values: Vec<i128> = vec![8887000000, -8887000000]; + let mut decimal_builder = DecimalBuilder::new(3, 23, 6); + + values.iter().for_each(|&value| { + decimal_builder.append_value(value).unwrap(); + }); + decimal_builder.append_null().unwrap(); + let arr = decimal_builder.finish(); assert_eq!( - "DecimalArray<23, 6>\n[\n 8887000000,\n -8887000000,\n]", + "DecimalArray<23, 6>\n[\n 8887.000000,\n -8887.000000,\n null,\n]", format!("{:?}", arr) ); } diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs index f2f4ce8..b94036c 100644 --- a/arrow/src/csv/writer.rs +++ b/arrow/src/csv/writer.rs @@ -72,6 +72,7 @@ use std::io::Write; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::record_batch::RecordBatch; +use crate::util::display::make_string_from_decimal; use crate::{array::*, util::serialization::lexical_to_string}; const DEFAULT_DATE_FORMAT: &str = "%F"; const DEFAULT_TIME_FORMAT: &str = "%T"; @@ -244,6 +245,7 @@ impl<W: Write> Writer<W> { }; format!("{}", datetime.format(&self.timestamp_format)) } + DataType::Decimal(..) => make_string_from_decimal(col, row_index)?, t => { // List and Struct arrays not supported by the writer, any // other type needs to be implemented @@ -568,6 +570,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo Field::new("c4", DataType::Boolean, true), Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true), Field::new("c6", DataType::Time32(TimeUnit::Second), false), + Field::new("c7", DataType::Decimal(6, 2), false), ]); let c1 = StringArray::from(vec![ @@ -587,6 +590,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo None, ); let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]); + let mut c7_builder = DecimalBuilder::new(5, 6, 2); + c7_builder.append_value(12345_i128).unwrap(); + c7_builder.append_value(-12345_i128).unwrap(); + c7_builder.append_null().unwrap(); + let c7 = c7_builder.finish(); let batch = RecordBatch::try_new( Arc::new(schema), @@ -597,6 +605,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo Arc::new(c4), Arc::new(c5), Arc::new(c6), + Arc::new(c7), ], ) .unwrap(); @@ -608,13 +617,13 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo writer.write(batch).unwrap(); } - let left = "c1,c2,c3,c4,c5,c6 -Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34 -consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20 -sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03 -Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34 -consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20 -sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03\n"; + let left = "c1,c2,c3,c4,c5,c6,c7 +Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45 +consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45 +sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03, +Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45 +consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45 +sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n"; let right = writer.writer.into_inner().map(|s| s.to_string()); assert_eq!(Some(left.to_string()), right.ok()); } diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index 61f549a..999e424 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -19,6 +19,8 @@ //! purposes. See the `pretty` crate for additional functions for //! record batch pretty printing. +use std::sync::Arc; + use crate::array::Array; use crate::datatypes::{ ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, @@ -192,18 +194,15 @@ macro_rules! make_string_from_list { }}; } -macro_rules! make_string_from_decimal { - ($array_type: ty, $column: ident, $row: ident, $scale: ident) => {{ - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); - let decimal_string = array.value($row).to_string(); - let formatted_decimal = if *$scale == 0 { - decimal_string - } else { - let splits = decimal_string.split_at(decimal_string.len() - *$scale); - format!("{}.{}", splits.0, splits.1) - }; - Ok(formatted_decimal) - }}; +#[inline(always)] +pub fn make_string_from_decimal(column: &Arc<dyn Array>, row: usize) -> Result<String> { + let array = column + .as_any() + .downcast_ref::<array::DecimalArray>() + .unwrap(); + + let formatted_decimal = array.value_as_string(row); + Ok(formatted_decimal) } /// Get the value at the given row in an array as a String. @@ -231,9 +230,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str DataType::Float16 => make_string!(array::Float32Array, column, row), DataType::Float32 => make_string!(array::Float32Array, column, row), DataType::Float64 => make_string!(array::Float64Array, column, row), - DataType::Decimal(_, scale) => { - make_string_from_decimal!(array::DecimalArray, column, row, scale) - } + DataType::Decimal(..) => make_string_from_decimal(column, row), DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => { make_string_datetime!(array::TimestampSecondArray, column, row) }
