alamb commented on code in PR #7596:
URL: https://github.com/apache/arrow-rs/pull/7596#discussion_r2132466026
##########
arrow-array/src/array/struct_array.rs:
##########
@@ -852,6 +855,67 @@ mod tests {
assert!(sliced_c1.is_null(2));
}
+ #[test]
+ fn test_struct_array_data_slice() {
+ let x = Int32Array::from(vec![Some(0), Some(1), Some(2), Some(3),
None, Some(5)]);
+ let struct_array = StructArray::new(
+ Fields::from(vec![Field::new("x", DataType::Int32, true)]),
+ vec![Arc::new(x.clone())],
+ Some(NullBuffer::from(vec![true, true, true, false, true, true])),
+ )
+ .into_data();
+ let sliced = struct_array.slice(1, 4);
+
+ let arr = make_array(sliced);
+ assert_eq!(
+ arr.as_struct().column(0).as_primitive::<Int32Type>(),
+ &x.slice(1, 4)
+ );
+
+ // This next test case is a regression test of a bug detected when
converting a sliced
+ // struct from C++
+ //
+ // In arrow-rs struct slicing is done by setting the offset and length
of
+ // child arrays.
+ //
+ // In the C++ implementation of Arrow struct slicing is done by
setting the offset
+ // and length of the struct array.
+ //
+ // arrow-rs should be able to parse data received from C++ correctly.
+ let x = Int32Array::from(vec![Some(0), Some(1), Some(2), Some(3),
None, Some(5)]);
+ let y = Int32Array::from(vec![Some(5), Some(6), None, Some(8),
Some(9), Some(10)]);
+ let struct_array = StructArray::new(
+ Fields::from(vec![
+ Field::new("x", DataType::Int32, true),
+ Field::new("y", DataType::Int32, true),
+ ]),
+ vec![Arc::new(x), Arc::new(y)],
+ Some(NullBuffer::from(vec![true, true, true, false, true, true])),
+ );
+ let struct_array = StructArray::new(
+ Fields::from(vec![Field::new(
+ "inner",
+ struct_array.data_type().clone(),
+ true,
+ )]),
+ vec![Arc::new(struct_array)],
+ Some(NullBuffer::from(vec![true, false, true, true, true, true])),
+ );
+
+ let cpp_sliced_array = make_array(
Review Comment:
I wonder if there is some way to add a reproducer with actual data that came
from C/C++. I am not sure how to verify that this reproducer accurately
reflects the C/C++ code (not that I doubt you, I just don't know how to double
check it)
Maybe we can add something to
https://github.com/apache/arrow-testing/tree/master/data?
Or does it only happen when passing across IPC boundaries? If so maybe we
can add a test to the integration tests?
##########
arrow-array/src/array/struct_array.rs:
##########
@@ -852,6 +855,67 @@ mod tests {
assert!(sliced_c1.is_null(2));
}
+ #[test]
+ fn test_struct_array_data_slice() {
Review Comment:
I did verify that this test fails without the change in this PR
```
assertion failed: (offset + length) <= self.len()
thread 'array::struct_array::tests::test_struct_array_data_slice' panicked
at arrow-data/src/data.rs:550:9:
assertion failed: (offset + length) <= self.len()
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]