This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new cefb8c1bbb Use ArrayFormatter in cast kernel (#4668)
cefb8c1bbb is described below
commit cefb8c1bbb2807fbb420e62f108676eeb80ec198
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Wed Aug 9 22:30:12 2023 +0100
Use ArrayFormatter in cast kernel (#4668)
* Use ArrayFormatter in cast kernel
* Add test
* Clippy
---
arrow-cast/src/cast.rs | 86 ++++++++++++++++++--------------------------------
1 file changed, 31 insertions(+), 55 deletions(-)
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index c730452a8d..c7fd082de2 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -41,7 +41,7 @@ use chrono::{NaiveTime, Offset, TimeZone, Utc};
use std::cmp::Ordering;
use std::sync::Arc;
-use crate::display::{array_value_to_string, ArrayFormatter, FormatOptions};
+use crate::display::{ArrayFormatter, FormatOptions};
use crate::parse::{
parse_interval_day_time, parse_interval_month_day_nano,
parse_interval_year_month,
string_to_datetime, Parser,
@@ -622,21 +622,6 @@ where
Ok(Arc::new(array))
}
-// cast the List array to Utf8 array
-macro_rules! cast_list_to_string {
- ($ARRAY:expr, $SIZE:ident) => {{
- let mut value_builder: GenericStringBuilder<$SIZE> =
GenericStringBuilder::new();
- for i in 0..$ARRAY.len() {
- if $ARRAY.is_null(i) {
- value_builder.append_null();
- } else {
- value_builder.append_value(array_value_to_string($ARRAY, i)?);
- }
- }
- Ok(Arc::new(value_builder.finish()))
- }};
-}
-
fn make_timestamp_array(
array: &PrimitiveArray<Int64Type>,
unit: TimeUnit,
@@ -800,8 +785,8 @@ pub fn cast_with_options(
}
}
(List(_) | LargeList(_), _) => match to_type {
- Utf8 => cast_list_to_string!(array, i32),
- LargeUtf8 => cast_list_to_string!(array, i64),
+ Utf8 => value_to_string::<i32>(array, cast_options),
+ LargeUtf8 => value_to_string::<i64>(array, cast_options),
_ => Err(ArrowError::CastError(
"Cannot cast list to non-list data types".to_string(),
)),
@@ -924,8 +909,8 @@ pub fn cast_with_options(
x as f64 / 10_f64.powi(*scale as i32)
})
}
- Utf8 => value_to_string::<i32>(array,
Some(&cast_options.format_options)),
- LargeUtf8 => value_to_string::<i64>(array,
Some(&cast_options.format_options)),
+ Utf8 => value_to_string::<i32>(array, cast_options),
+ LargeUtf8 => value_to_string::<i64>(array, cast_options),
Null => Ok(new_null_array(to_type, array.len())),
_ => Err(ArrowError::CastError(format!(
"Casting from {from_type:?} to {to_type:?} not supported"
@@ -993,8 +978,8 @@ pub fn cast_with_options(
x.to_f64().unwrap() / 10_f64.powi(*scale as i32)
})
}
- Utf8 => value_to_string::<i32>(array,
Some(&cast_options.format_options)),
- LargeUtf8 => value_to_string::<i64>(array,
Some(&cast_options.format_options)),
+ Utf8 => value_to_string::<i32>(array, cast_options),
+ LargeUtf8 => value_to_string::<i64>(array, cast_options),
Null => Ok(new_null_array(to_type, array.len())),
_ => Err(ArrowError::CastError(format!(
"Casting from {from_type:?} to {to_type:?} not supported"
@@ -1215,24 +1200,8 @@ pub fn cast_with_options(
Float16 => cast_bool_to_numeric::<Float16Type>(array,
cast_options),
Float32 => cast_bool_to_numeric::<Float32Type>(array,
cast_options),
Float64 => cast_bool_to_numeric::<Float64Type>(array,
cast_options),
- Utf8 => {
- let array =
array.as_any().downcast_ref::<BooleanArray>().unwrap();
- Ok(Arc::new(
- array
- .iter()
- .map(|value| value.map(|value| if value { "true" }
else { "false" }))
- .collect::<StringArray>(),
- ))
- }
- LargeUtf8 => {
- let array =
array.as_any().downcast_ref::<BooleanArray>().unwrap();
- Ok(Arc::new(
- array
- .iter()
- .map(|value| value.map(|value| if value { "true" }
else { "false" }))
- .collect::<LargeStringArray>(),
- ))
- }
+ Utf8 => value_to_string::<i32>(array, cast_options),
+ LargeUtf8 => value_to_string::<i64>(array, cast_options),
_ => Err(ArrowError::CastError(format!(
"Casting from {from_type:?} to {to_type:?} not supported",
))),
@@ -1374,8 +1343,8 @@ pub fn cast_with_options(
"Casting from {from_type:?} to {to_type:?} not supported",
))),
},
- (from_type, LargeUtf8) if from_type.is_primitive() =>
value_to_string::<i64>(array, Some(&cast_options.format_options)),
- (from_type, Utf8) if from_type.is_primitive() =>
value_to_string::<i32>(array, Some(&cast_options.format_options)),
+ (from_type, LargeUtf8) if from_type.is_primitive() =>
value_to_string::<i64>(array, cast_options),
+ (from_type, Utf8) if from_type.is_primitive() =>
value_to_string::<i32>(array, cast_options),
// start numeric casts
(UInt8, UInt16) => {
cast_numeric_arrays::<UInt8Type, UInt16Type>(array, cast_options)
@@ -2461,14 +2430,10 @@ where
fn value_to_string<O: OffsetSizeTrait>(
array: &dyn Array,
- options: Option<&FormatOptions>,
+ options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
let mut builder = GenericStringBuilder::<O>::new();
- let mut fmt_options = &FormatOptions::default();
- if let Some(fmt_opts) = options {
- fmt_options = fmt_opts;
- };
- let formatter = ArrayFormatter::try_new(array, fmt_options)?;
+ let formatter = ArrayFormatter::try_new(array, &options.format_options)?;
let nulls = array.nulls();
for i in 0..array.len() {
match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
@@ -7369,14 +7334,10 @@ mod tests {
/// Print the `DictionaryArray` `array` as a vector of strings
fn array_to_strings(array: &ArrayRef) -> Vec<String> {
+ let options = FormatOptions::new().with_null("null");
+ let formatter = ArrayFormatter::try_new(array.as_ref(),
&options).unwrap();
(0..array.len())
- .map(|i| {
- if array.is_null(i) {
- "null".to_string()
- } else {
- array_value_to_string(array, i).expect("Convert array to
String")
- }
- })
+ .map(|i| formatter.value(i).to_string())
.collect()
}
@@ -8989,4 +8950,19 @@ mod tests {
fn test_const_options() {
assert!(CAST_OPTIONS.safe)
}
+
+ #[test]
+ fn test_list_format_options() {
+ let options = CastOptions {
+ safe: false,
+ format_options: FormatOptions::default().with_null("null"),
+ };
+ let array = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+ Some(vec![Some(0), Some(1), Some(2)]),
+ Some(vec![Some(0), None, Some(2)]),
+ ]);
+ let a = cast_with_options(&array, &DataType::Utf8, &options).unwrap();
+ let r: Vec<_> = a.as_string::<i32>().iter().map(|x|
x.unwrap()).collect();
+ assert_eq!(r, &["[0, 1, 2]", "[0, null, 2]"]);
+ }
}