westonpace commented on code in PR #7596:
URL: https://github.com/apache/arrow-rs/pull/7596#discussion_r2132968264
##########
arrow-array/src/array/struct_array.rs:
##########
@@ -852,6 +855,67 @@ mod tests {
assert!(sliced_c1.is_null(2));
}
+ #[test]
+ fn test_struct_array_data_slice() {
+ let x = Int32Array::from(vec![Some(0), Some(1), Some(2), Some(3),
None, Some(5)]);
+ let struct_array = StructArray::new(
+ Fields::from(vec![Field::new("x", DataType::Int32, true)]),
+ vec![Arc::new(x.clone())],
+ Some(NullBuffer::from(vec![true, true, true, false, true, true])),
+ )
+ .into_data();
+ let sliced = struct_array.slice(1, 4);
+
+ let arr = make_array(sliced);
+ assert_eq!(
+ arr.as_struct().column(0).as_primitive::<Int32Type>(),
+ &x.slice(1, 4)
+ );
+
+ // This next test case is a regression test of a bug detected when
converting a sliced
+ // struct from C++
+ //
+ // In arrow-rs struct slicing is done by setting the offset and length
of
+ // child arrays.
+ //
+ // In the C++ implementation of Arrow struct slicing is done by
setting the offset
+ // and length of the struct array.
+ //
+ // arrow-rs should be able to parse data received from C++ correctly.
+ let x = Int32Array::from(vec![Some(0), Some(1), Some(2), Some(3),
None, Some(5)]);
+ let y = Int32Array::from(vec![Some(5), Some(6), None, Some(8),
Some(9), Some(10)]);
+ let struct_array = StructArray::new(
+ Fields::from(vec![
+ Field::new("x", DataType::Int32, true),
+ Field::new("y", DataType::Int32, true),
+ ]),
+ vec![Arc::new(x), Arc::new(y)],
+ Some(NullBuffer::from(vec![true, true, true, false, true, true])),
+ );
+ let struct_array = StructArray::new(
+ Fields::from(vec![Field::new(
+ "inner",
+ struct_array.data_type().clone(),
+ true,
+ )]),
+ vec![Arc::new(struct_array)],
+ Some(NullBuffer::from(vec![true, false, true, true, true, true])),
+ );
+
+ let cpp_sliced_array = make_array(
Review Comment:
Hmm, good question. I believe (but am not 100% sure) that the IPC format
does not have an equivalent of array offsets.
However, I agree that some kind of C data integration test would be a good
idea. I'll think on how best to add this. Are there any integration tests
today that are able to utilize datafusion-python? If so, `pyarrow` makes it
pretty easy to create this kind of integration testing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]