This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ae85263a97 feat: support casting FixedSizeList with new child type
(#5360)
ae85263a97 is described below
commit ae85263a9715b873ccc399cbd5b4adef01987a79
Author: Will Jones <[email protected]>
AuthorDate: Tue Feb 6 10:05:27 2024 -0800
feat: support casting FixedSizeList with new child type (#5360)
---
arrow-cast/src/cast.rs | 185 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 131 insertions(+), 54 deletions(-)
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index e321ae1b1b..a813c5f6c8 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -133,11 +133,12 @@ pub fn can_cast_types(from_type: &DataType, to_type:
&DataType) -> bool {
can_cast_types(list_from.data_type(), list_to.data_type())
}
(List(_), _) => false,
- (FixedSizeList(list_from,_), List(list_to)) => {
- list_from.data_type() == list_to.data_type()
- }
+ (FixedSizeList(list_from,_), List(list_to)) |
(FixedSizeList(list_from,_), LargeList(list_to)) => {
- list_from.data_type() == list_to.data_type()
+ can_cast_types(list_from.data_type(), list_to.data_type())
+ }
+ (FixedSizeList(inner, size), FixedSizeList(inner_to, size_to)) if size
== size_to => {
+ can_cast_types(inner.data_type(), inner_to.data_type())
}
(_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
(_, LargeList(list_to)) => can_cast_types(from_type,
list_to.data_type()),
@@ -784,24 +785,41 @@ pub fn cast_with_options(
"Cannot cast list to non-list data types".to_string(),
)),
},
- (FixedSizeList(list_from, _), List(list_to)) => {
+ (FixedSizeList(list_from, size), List(list_to)) => {
if list_to.data_type() != list_from.data_type() {
- Err(ArrowError::CastError(
- "cannot cast fixed-size-list to list with different child
data".into(),
- ))
+ // To transform inner type, can first cast to FSL with new
inner type.
+ let fsl_to = DataType::FixedSizeList(list_to.clone(), *size);
+ let array = cast_with_options(array, &fsl_to, cast_options)?;
+ cast_fixed_size_list_to_list::<i32>(array.as_ref())
} else {
cast_fixed_size_list_to_list::<i32>(array)
}
}
- (FixedSizeList(list_from, _), LargeList(list_to)) => {
+ (FixedSizeList(list_from, size), LargeList(list_to)) => {
if list_to.data_type() != list_from.data_type() {
- Err(ArrowError::CastError(
- "cannot cast fixed-size-list to largelist with different
child data".into(),
- ))
+ // To transform inner type, can first cast to FSL with new
inner type.
+ let fsl_to = DataType::FixedSizeList(list_to.clone(), *size);
+ let array = cast_with_options(array, &fsl_to, cast_options)?;
+ cast_fixed_size_list_to_list::<i64>(array.as_ref())
} else {
cast_fixed_size_list_to_list::<i64>(array)
}
}
+ (FixedSizeList(_, size_from), FixedSizeList(list_to, size_to)) => {
+ if size_from != size_to {
+ return Err(ArrowError::CastError(
+ "cannot cast fixed-size-list to fixed-size-list with
different size".into(),
+ ));
+ }
+ let array =
array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
+ let values = cast_with_options(array.values(),
list_to.data_type(), cast_options)?;
+ Ok(Arc::new(FixedSizeListArray::try_new(
+ list_to.clone(),
+ *size_from,
+ values,
+ array.nulls().cloned(),
+ )?))
+ }
(_, List(ref to)) => cast_values_to_list::<i32>(array, to,
cast_options),
(_, LargeList(ref to)) => cast_values_to_list::<i64>(array, to,
cast_options),
(_, FixedSizeList(ref to, size)) if *size == 1 => {
@@ -7561,6 +7579,37 @@ mod tests {
assert_eq!(946684800000, c.value(5));
}
+ #[test]
+ fn test_can_cast_fsl_to_fsl() {
+ let from_array = Arc::new(
+ FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
+ [Some([Some(1.0), Some(2.0)]), None],
+ 2,
+ ),
+ ) as ArrayRef;
+ let to_array = Arc::new(
+ FixedSizeListArray::from_iter_primitive::<Float16Type, _, _>(
+ [
+ Some([Some(f16::from_f32(1.0)), Some(f16::from_f32(2.0))]),
+ None,
+ ],
+ 2,
+ ),
+ ) as ArrayRef;
+
+ assert!(can_cast_types(from_array.data_type(), to_array.data_type()));
+ let actual = cast(&from_array, to_array.data_type()).unwrap();
+ assert_eq!(actual.data_type(), to_array.data_type());
+
+ let invalid_target =
+ DataType::FixedSizeList(Arc::new(Field::new("item",
DataType::Binary, true)), 2);
+ assert!(!can_cast_types(from_array.data_type(), &invalid_target));
+
+ let invalid_size =
+ DataType::FixedSizeList(Arc::new(Field::new("item",
DataType::Float16, true)), 5);
+ assert!(!can_cast_types(from_array.data_type(), &invalid_size));
+ }
+
#[test]
fn test_can_cast_types_fixed_size_list_to_list() {
// DataType::List
@@ -7580,50 +7629,78 @@ mod tests {
#[test]
fn test_cast_fixed_size_list_to_list() {
- // DataType::List
- let array1 = Arc::new(make_fixed_size_list_array()) as ArrayRef;
- let list_array1 = cast(
- &array1,
- &DataType::List(Arc::new(Field::new("", DataType::Int32, false))),
- )
- .unwrap();
- let actual = list_array1.as_any().downcast_ref::<ListArray>().unwrap();
- let expected = array1
- .as_any()
- .downcast_ref::<FixedSizeListArray>()
- .unwrap();
+ // Important cases:
+ // 1. With/without nulls
+ // 2. LargeList and List
+ // 3. With and without inner casts
+
+ let cases = [
+ // fixed_size_list<i32, 2> => list<i32>
+ (
+ Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type,
_, _>(
+ [[1, 1].map(Some), [2, 2].map(Some)].map(Some),
+ 2,
+ )) as ArrayRef,
+ Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>([
+ Some([Some(1), Some(1)]),
+ Some([Some(2), Some(2)]),
+ ])) as ArrayRef,
+ ),
+ // fixed_size_list<i32, 2> => list<i32> (nullable)
+ (
+ Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type,
_, _>(
+ [None, Some([Some(2), Some(2)])],
+ 2,
+ )) as ArrayRef,
+ Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>([
+ None,
+ Some([Some(2), Some(2)]),
+ ])) as ArrayRef,
+ ),
+ // fixed_size_list<i32, 2> => large_list<i64>
+ (
+ Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type,
_, _>(
+ [[1, 1].map(Some), [2, 2].map(Some)].map(Some),
+ 2,
+ )) as ArrayRef,
+ Arc::new(LargeListArray::from_iter_primitive::<Int64Type, _,
_>([
+ Some([Some(1), Some(1)]),
+ Some([Some(2), Some(2)]),
+ ])) as ArrayRef,
+ ),
+ // fixed_size_list<i32, 2> => large_list<i64> (nullable)
+ (
+ Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type,
_, _>(
+ [None, Some([Some(2), Some(2)])],
+ 2,
+ )) as ArrayRef,
+ Arc::new(LargeListArray::from_iter_primitive::<Int64Type, _,
_>([
+ None,
+ Some([Some(2), Some(2)]),
+ ])) as ArrayRef,
+ ),
+ ];
- assert_eq!(expected.values(), actual.values());
- assert_eq!(expected.len(), actual.len());
+ for (array, expected) in cases {
+ let array = Arc::new(array) as ArrayRef;
- // DataType::LargeList
- let array2 = Arc::new(make_fixed_size_list_array_for_large_list()) as
ArrayRef;
- let list_array2 = cast(
- &array2,
- &DataType::LargeList(Arc::new(Field::new("", DataType::Int64,
false))),
- )
- .unwrap();
- let actual = list_array2
- .as_any()
- .downcast_ref::<LargeListArray>()
- .unwrap();
- let expected = array2
- .as_any()
- .downcast_ref::<FixedSizeListArray>()
- .unwrap();
- assert_eq!(expected.values(), actual.values());
- assert_eq!(expected.len(), actual.len());
-
- // Cast previous LargeList to List
- let array3 = Arc::new(actual.clone()) as ArrayRef;
- let list_array3 = cast(
- &array3,
- &DataType::List(Arc::new(Field::new("", DataType::Int64, false))),
- )
- .unwrap();
- let actual = list_array3.as_any().downcast_ref::<ListArray>().unwrap();
- let expected =
array3.as_any().downcast_ref::<LargeListArray>().unwrap();
- assert_eq!(expected.values(), actual.values());
+ assert!(
+ can_cast_types(array.data_type(), expected.data_type()),
+ "can_cast_types claims we cannot cast {:?} to {:?}",
+ array.data_type(),
+ expected.data_type()
+ );
+
+ let list_array = cast(&array, expected.data_type())
+ .unwrap_or_else(|_| panic!("Failed to cast {:?} to {:?}",
array, expected));
+ assert_eq!(
+ list_array.as_ref(),
+ &expected,
+ "Incorrect result from casting {:?} to {:?}",
+ array,
+ expected
+ );
+ }
}
#[test]