This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 0bbfc03e4c fix: first none/empty list in `ListArray` panics in
`cast_with_options` (#7065)
0bbfc03e4c is described below
commit 0bbfc03e4c0eab10354c0a76ab36e7d8eb5ad2ad
Author: irenjj <[email protected]>
AuthorDate: Sun Feb 9 00:29:25 2025 +0800
fix: first none/empty list in `ListArray` panics in `cast_with_options`
(#7065)
* fix: first none in `ListArray` panics in `cast_with_options`
* simplify
* fix
* Update arrow-cast/src/cast/list.rs
Co-authored-by: Jeffrey Vo <[email protected]>
---------
Co-authored-by: Jeffrey Vo <[email protected]>
---
arrow-cast/src/cast/list.rs | 13 ++++++++++++-
arrow-cast/src/cast/mod.rs | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs
index ec7a5c57d5..ddcbca361b 100644
--- a/arrow-cast/src/cast/list.rs
+++ b/arrow-cast/src/cast/list.rs
@@ -88,6 +88,17 @@ where
let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
// The end position in values of the last incorrectly-sized list slice
let mut last_pos = 0;
+
+ // Need to flag when previous vector(s) are empty/None to distinguish from
'All slices were correct length' cases.
+ let is_prev_empty = if array.offsets().len() < 2 {
+ false
+ } else {
+ let first_offset = array.offsets()[0].as_usize();
+ let second_offset = array.offsets()[1].as_usize();
+
+ first_offset == 0 && second_offset == 0
+ };
+
for (idx, w) in array.offsets().windows(2).enumerate() {
let start_pos = w[0].as_usize();
let end_pos = w[1].as_usize();
@@ -113,7 +124,7 @@ where
}
let values = match last_pos {
- 0 => array.values().slice(0, cap), // All slices were the correct
length
+ 0 if !is_prev_empty => array.values().slice(0, cap), // All slices
were the correct length
_ => {
if mutable.len() != cap {
// Remaining slices were all correct length
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 53069c1a16..4bb4fb3e79 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -9952,4 +9952,41 @@ mod tests {
assert_eq!(result.unwrap_err().to_string(),
"Invalid argument error: 123456789 is too large to store in
a Decimal256 of precision 6. Max is 999999");
}
+
+ #[test]
+ fn test_first_none() {
+ let array = Arc::new(ListArray::from_iter_primitive::<Int64Type, _,
_>(vec![
+ None,
+ Some(vec![Some(1), Some(2)]),
+ ])) as ArrayRef;
+ let data_type =
+ DataType::FixedSizeList(FieldRef::new(Field::new("item",
DataType::Int64, true)), 2);
+ let opt = CastOptions::default();
+ let r = cast_with_options(&array, &data_type, &opt).unwrap();
+
+ let fixed_array =
Arc::new(FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
+ vec![None, Some(vec![Some(1), Some(2)])],
+ 2,
+ )) as ArrayRef;
+ assert_eq!(*fixed_array, *r);
+ }
+
+ #[test]
+ fn test_first_last_none() {
+ let array = Arc::new(ListArray::from_iter_primitive::<Int64Type, _,
_>(vec![
+ None,
+ Some(vec![Some(1), Some(2)]),
+ None,
+ ])) as ArrayRef;
+ let data_type =
+ DataType::FixedSizeList(FieldRef::new(Field::new("item",
DataType::Int64, true)), 2);
+ let opt = CastOptions::default();
+ let r = cast_with_options(&array, &data_type, &opt).unwrap();
+
+ let fixed_array =
Arc::new(FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
+ vec![None, Some(vec![Some(1), Some(2)]), None],
+ 2,
+ )) as ArrayRef;
+ assert_eq!(*fixed_array, *r);
+ }
}