This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 3b90fc96ab bugfix: correct offsets when serializing a list of fixed
sized list and non-zero start offset (#7318)
3b90fc96ab is described below
commit 3b90fc96abd56796e79d293814460032d4df9829
Author: Tim Saucer <[email protected]>
AuthorDate: Mon Mar 24 12:53:59 2025 -0400
bugfix: correct offsets when serializing a list of fixed sized list and
non-zero start offset (#7318)
* When serializing fixed length arrays, adjust the offsets for writing out
* Add unit test
* clippy warnings
* Add unit test for nulls
* Update unit test to account for which schema had nulls
---
arrow-ipc/src/writer.rs | 231 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 231 insertions(+)
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index f5ba6b77a8..9bba420574 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -1727,6 +1727,26 @@ fn write_array_data(
write_options,
)?;
return Ok(offset);
+ } else if let DataType::FixedSizeList(_, fixed_size) = data_type {
+ assert_eq!(array_data.child_data().len(), 1);
+ let fixed_size = *fixed_size as usize;
+
+ let child_offset = array_data.offset() * fixed_size;
+ let child_length = array_data.len() * fixed_size;
+ let child_data = array_data.child_data()[0].slice(child_offset,
child_length);
+
+ offset = write_array_data(
+ &child_data,
+ buffers,
+ arrow_data,
+ nodes,
+ offset,
+ child_data.len(),
+ child_data.null_count(),
+ compression_codec,
+ write_options,
+ )?;
+ return Ok(offset);
} else {
for buffer in array_data.buffers() {
offset = write_buffer(
@@ -1837,6 +1857,9 @@ mod tests {
use std::io::Cursor;
use std::io::Seek;
+ use arrow_array::builder::FixedSizeListBuilder;
+ use arrow_array::builder::Float32Builder;
+ use arrow_array::builder::Int64Builder;
use arrow_array::builder::MapBuilder;
use arrow_array::builder::UnionBuilder;
use arrow_array::builder::{GenericListBuilder, ListBuilder, StringBuilder};
@@ -3075,4 +3098,212 @@ mod tests {
assert_eq!(stream_bytes_written_on_flush,
expected_stream_flushed_bytes);
assert_eq!(file_bytes_written_on_flush, expected_file_flushed_bytes);
}
+
+ #[test]
+ fn test_roundtrip_list_of_fixed_list() -> Result<(), ArrowError> {
+ let l1_type =
+ DataType::FixedSizeList(Arc::new(Field::new("item",
DataType::Float32, false)), 3);
+ let l2_type = DataType::List(Arc::new(Field::new("item",
l1_type.clone(), false)));
+
+ let l0_builder = Float32Builder::new();
+ let l1_builder = FixedSizeListBuilder::new(l0_builder,
3).with_field(Arc::new(Field::new(
+ "item",
+ DataType::Float32,
+ false,
+ )));
+ let mut l2_builder =
+
ListBuilder::new(l1_builder).with_field(Arc::new(Field::new("item", l1_type,
false)));
+
+ for point in [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] {
+ l2_builder.values().values().append_value(point[0]);
+ l2_builder.values().values().append_value(point[1]);
+ l2_builder.values().values().append_value(point[2]);
+
+ l2_builder.values().append(true);
+ }
+ l2_builder.append(true);
+
+ let point = [10., 11., 12.];
+ l2_builder.values().values().append_value(point[0]);
+ l2_builder.values().values().append_value(point[1]);
+ l2_builder.values().values().append_value(point[2]);
+
+ l2_builder.values().append(true);
+ l2_builder.append(true);
+
+ let array = Arc::new(l2_builder.finish()) as ArrayRef;
+
+ let schema = Arc::new(Schema::new_with_metadata(
+ vec![Field::new("points", l2_type, false)],
+ HashMap::default(),
+ ));
+
+ // Test a variety of combinations that include 0 and non-zero offsets
+ // and also portions or the rest of the array
+ test_slices(&array, &schema, 0, 1)?;
+ test_slices(&array, &schema, 0, 2)?;
+ test_slices(&array, &schema, 1, 1)?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_roundtrip_list_of_fixed_list_w_nulls() -> Result<(), ArrowError> {
+ let l0_builder = Float32Builder::new();
+ let l1_builder = FixedSizeListBuilder::new(l0_builder, 3);
+ let mut l2_builder = ListBuilder::new(l1_builder);
+
+ for point in [
+ [Some(1.0), Some(2.0), None],
+ [Some(4.0), Some(5.0), Some(6.0)],
+ [None, Some(8.0), Some(9.0)],
+ ] {
+ for p in point {
+ match p {
+ Some(p) => l2_builder.values().values().append_value(p),
+ None => l2_builder.values().values().append_null(),
+ }
+ }
+
+ l2_builder.values().append(true);
+ }
+ l2_builder.append(true);
+
+ let point = [Some(10.), None, None];
+ for p in point {
+ match p {
+ Some(p) => l2_builder.values().values().append_value(p),
+ None => l2_builder.values().values().append_null(),
+ }
+ }
+
+ l2_builder.values().append(true);
+ l2_builder.append(true);
+
+ let array = Arc::new(l2_builder.finish()) as ArrayRef;
+
+ let schema = Arc::new(Schema::new_with_metadata(
+ vec![Field::new(
+ "points",
+ DataType::List(Arc::new(Field::new(
+ "item",
+ DataType::FixedSizeList(
+ Arc::new(Field::new("item", DataType::Float32, true)),
+ 3,
+ ),
+ true,
+ ))),
+ true,
+ )],
+ HashMap::default(),
+ ));
+
+ // Test a variety of combinations that include 0 and non-zero offsets
+ // and also portions or the rest of the array
+ test_slices(&array, &schema, 0, 1)?;
+ test_slices(&array, &schema, 0, 2)?;
+ test_slices(&array, &schema, 1, 1)?;
+
+ Ok(())
+ }
+
+ fn test_slices(
+ parent_array: &ArrayRef,
+ schema: &SchemaRef,
+ offset: usize,
+ length: usize,
+ ) -> Result<(), ArrowError> {
+ let subarray = parent_array.slice(offset, length);
+ let original_batch = RecordBatch::try_new(schema.clone(),
vec![subarray])?;
+
+ let mut bytes = Vec::new();
+ let mut writer = StreamWriter::try_new(&mut bytes, schema)?;
+ writer.write(&original_batch)?;
+ writer.finish()?;
+
+ let mut cursor = std::io::Cursor::new(bytes);
+ let mut reader = StreamReader::try_new(&mut cursor, None)?;
+ let returned_batch = reader.next().unwrap()?;
+
+ assert_eq!(original_batch, returned_batch);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_roundtrip_fixed_list() -> Result<(), ArrowError> {
+ let int_builder = Int64Builder::new();
+ let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3)
+ .with_field(Arc::new(Field::new("item", DataType::Int64, false)));
+
+ for point in [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] {
+ fixed_list_builder.values().append_value(point[0]);
+ fixed_list_builder.values().append_value(point[1]);
+ fixed_list_builder.values().append_value(point[2]);
+
+ fixed_list_builder.append(true);
+ }
+
+ let array = Arc::new(fixed_list_builder.finish()) as ArrayRef;
+
+ let schema = Arc::new(Schema::new_with_metadata(
+ vec![Field::new(
+ "points",
+ DataType::FixedSizeList(Arc::new(Field::new("item",
DataType::Int64, false)), 3),
+ false,
+ )],
+ HashMap::default(),
+ ));
+
+ // Test a variety of combinations that include 0 and non-zero offsets
+ // and also portions or the rest of the array
+ test_slices(&array, &schema, 0, 4)?;
+ test_slices(&array, &schema, 0, 2)?;
+ test_slices(&array, &schema, 1, 3)?;
+ test_slices(&array, &schema, 2, 1)?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_roundtrip_fixed_list_w_nulls() -> Result<(), ArrowError> {
+ let int_builder = Int64Builder::new();
+ let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3);
+
+ for point in [
+ [Some(1), Some(2), None],
+ [Some(4), Some(5), Some(6)],
+ [None, Some(8), Some(9)],
+ [Some(10), None, None],
+ ] {
+ for p in point {
+ match p {
+ Some(p) => fixed_list_builder.values().append_value(p),
+ None => fixed_list_builder.values().append_null(),
+ }
+ }
+
+ fixed_list_builder.append(true);
+ }
+
+ let array = Arc::new(fixed_list_builder.finish()) as ArrayRef;
+
+ let schema = Arc::new(Schema::new_with_metadata(
+ vec![Field::new(
+ "points",
+ DataType::FixedSizeList(Arc::new(Field::new("item",
DataType::Int64, true)), 3),
+ true,
+ )],
+ HashMap::default(),
+ ));
+
+ // Test a variety of combinations that include 0 and non-zero offsets
+ // and also portions or the rest of the array
+ test_slices(&array, &schema, 0, 4)?;
+ test_slices(&array, &schema, 0, 2)?;
+ test_slices(&array, &schema, 1, 3)?;
+ test_slices(&array, &schema, 2, 1)?;
+
+ Ok(())
+ }
}