This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 1ef55f1dde arrow-ipc: Add tests for nested dicts for Map and Union 
arrays (#9146)
1ef55f1dde is described below

commit 1ef55f1ddeedd7b9d30762270ba81a57f5bf10a6
Author: Frederic Branczyk <[email protected]>
AuthorDate: Tue Jan 13 13:14:50 2026 +0100

    arrow-ipc: Add tests for nested dicts for Map and Union arrays (#9146)
    
    # Which issue does this PR close?
    
    No known issue, but this confirms that nested dicts do indeed work in
    Map and Union arrays. It was brought up here:
    https://github.com/apache/arrow-rs/pull/9126#discussion_r2678500873
    
    # Rationale for this change
    
    Ensure that IPC roundtripping nested dicts works in Map and Union
    arrays.
    
    # What changes are included in this PR?
    
    Unit tests testing the functionality.
    
    # Are these changes tested?
    
    The whole PR consists of tests only.
    
    # Are there any user-facing changes?
    
    No
    
    @alamb @Jefffrey
---
 arrow-ipc/src/writer.rs | 223 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 223 insertions(+)

diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index 8afc3c8ed2..86376c8e5e 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -3538,6 +3538,229 @@ mod tests {
         assert_eq!(sliced_batch, output_batch);
     }
 
+    #[test]
+    fn test_roundtrip_dense_union_of_dict() {
+        let values = StringArray::from(vec![Some("alpha"), None, Some("beta"), 
Some("gamma")]);
+        let keys = Int32Array::from_iter_values([0, 0, 1, 2, 3, 0, 2]);
+        let dict_array = DictionaryArray::new(keys, Arc::new(values));
+
+        #[allow(deprecated)]
+        let dict_field = Arc::new(Field::new_dict(
+            "dict",
+            DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8)),
+            true,
+            1,
+            false,
+        ));
+        let int_field = Arc::new(Field::new("int", DataType::Int32, false));
+        let union_fields = UnionFields::try_new(vec![0, 1], vec![dict_field, 
int_field]).unwrap();
+
+        let types = ScalarBuffer::from(vec![0i8, 0, 1, 0, 1, 0, 0]);
+        let offsets = ScalarBuffer::from(vec![0i32, 1, 0, 2, 1, 3, 4]);
+
+        let int_array = Int32Array::from(vec![100, 200]);
+
+        let union = UnionArray::try_new(
+            union_fields.clone(),
+            types,
+            Some(offsets),
+            vec![Arc::new(dict_array), Arc::new(int_array)],
+        )
+        .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "union",
+            DataType::Union(union_fields, UnionMode::Dense),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, 
vec![Arc::new(union)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_sparse_union_of_dict() {
+        let values = StringArray::from(vec![Some("alpha"), None, Some("beta"), 
Some("gamma")]);
+        let keys = Int32Array::from_iter_values([0, 0, 1, 2, 3, 0, 2]);
+        let dict_array = DictionaryArray::new(keys, Arc::new(values));
+
+        #[allow(deprecated)]
+        let dict_field = Arc::new(Field::new_dict(
+            "dict",
+            DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8)),
+            true,
+            2,
+            false,
+        ));
+        let int_field = Arc::new(Field::new("int", DataType::Int32, false));
+        let union_fields = UnionFields::try_new(vec![0, 1], vec![dict_field, 
int_field]).unwrap();
+
+        let types = ScalarBuffer::from(vec![0i8, 0, 1, 0, 1, 0, 0]);
+
+        let int_array = Int32Array::from(vec![0, 0, 100, 0, 200, 0, 0]);
+
+        let union = UnionArray::try_new(
+            union_fields.clone(),
+            types,
+            None,
+            vec![Arc::new(dict_array), Arc::new(int_array)],
+        )
+        .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "union",
+            DataType::Union(union_fields, UnionMode::Sparse),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, 
vec![Arc::new(union)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_map_with_dict_keys() {
+        // Building a map array is a bit involved. We first build a struct 
arary that has a key and
+        // value field and then use that to build the actual map array.
+        let key_values = StringArray::from(vec!["key_a", "key_b", "key_c"]);
+        let keys = Int32Array::from_iter_values([0, 1, 2, 0, 1, 0]);
+        let dict_keys = DictionaryArray::new(keys, Arc::new(key_values));
+
+        let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
+
+        #[allow(deprecated)]
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(
+                vec![
+                    Field::new_dict(
+                        "key",
+                        DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8)),
+                        false,
+                        1,
+                        false,
+                    ),
+                    Field::new("value", DataType::Int32, true),
+                ]
+                .into(),
+            ),
+            false,
+        ));
+
+        let entries = StructArray::from(vec![
+            (
+                Arc::new(Field::new(
+                    "key",
+                    DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8)),
+                    false,
+                )),
+                Arc::new(dict_keys) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("value", DataType::Int32, true)),
+                Arc::new(values) as ArrayRef,
+            ),
+        ]);
+
+        let offsets = Buffer::from_slice_ref([0i32, 2, 4, 6]);
+
+        let map_data = ArrayData::builder(DataType::Map(entries_field, false))
+            .len(3)
+            .add_buffer(offsets)
+            .add_child_data(entries.into_data())
+            .build()
+            .unwrap();
+        let map_array = MapArray::from(map_data);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "map",
+            map_array.data_type().clone(),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, 
vec![Arc::new(map_array)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_map_with_dict_values() {
+        // Building a map array is a bit involved. We first build a struct 
arary that has a key and
+        // value field and then use that to build the actual map array.
+        let keys = StringArray::from(vec!["a", "b", "c", "d", "e", "f"]);
+
+        let value_values = StringArray::from(vec!["val_x", "val_y", "val_z"]);
+        let value_keys = Int32Array::from_iter_values([0, 1, 2, 0, 1, 0]);
+        let dict_values = DictionaryArray::new(value_keys, 
Arc::new(value_values));
+
+        #[allow(deprecated)]
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(
+                vec![
+                    Field::new("key", DataType::Utf8, false),
+                    Field::new_dict(
+                        "value",
+                        DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8)),
+                        true,
+                        2,
+                        false,
+                    ),
+                ]
+                .into(),
+            ),
+            false,
+        ));
+
+        let entries = StructArray::from(vec![
+            (
+                Arc::new(Field::new("key", DataType::Utf8, false)),
+                Arc::new(keys) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new(
+                    "value",
+                    DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8)),
+                    true,
+                )),
+                Arc::new(dict_values) as ArrayRef,
+            ),
+        ]);
+
+        let offsets = Buffer::from_slice_ref([0i32, 2, 4, 6]);
+
+        let map_data = ArrayData::builder(DataType::Map(entries_field, false))
+            .len(3)
+            .add_buffer(offsets)
+            .add_child_data(entries.into_data())
+            .build()
+            .unwrap();
+        let map_array = MapArray::from(map_data);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "map",
+            map_array.data_type().clone(),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, 
vec![Arc::new(map_array)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
     #[test]
     fn test_decimal128_alignment16_is_sufficient() {
         const IPC_ALIGNMENT: usize = 16;

Reply via email to