This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new c663d88327 fix(data): map type ID to child index before indexing a 
union child array (#4598)
c663d88327 is described below

commit c663d88327dfd6958b102ec7b1ca310cc20b40c4
Author: Tomoaki Kawada <[email protected]>
AuthorDate: Mon Jul 31 22:53:41 2023 +0900

    fix(data): map type ID to child index before indexing a union child array 
(#4598)
    
    * test: add a test for `MutableArrayData` and dense union
    
    * fix(data): map type ID to child index before indexing union child array
---
 arrow-data/src/transform/union.rs | 13 +++++++--
 arrow/tests/array_transform.rs    | 59 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/arrow-data/src/transform/union.rs 
b/arrow-data/src/transform/union.rs
index 8d1ea34c31..d7083588d7 100644
--- a/arrow-data/src/transform/union.rs
+++ b/arrow-data/src/transform/union.rs
@@ -39,6 +39,9 @@ pub(super) fn build_extend_sparse(array: &ArrayData) -> 
Extend {
 pub(super) fn build_extend_dense(array: &ArrayData) -> Extend {
     let type_ids = array.buffer::<i8>(0);
     let offsets = array.buffer::<i32>(1);
+    let arrow_schema::DataType::Union(src_fields, _) = array.data_type() else {
+        unreachable!();
+    };
 
     Box::new(
         move |mutable: &mut _MutableArrayData, index: usize, start: usize, 
len: usize| {
@@ -48,14 +51,18 @@ pub(super) fn build_extend_dense(array: &ArrayData) -> 
Extend {
                 .extend_from_slice(&type_ids[start..start + len]);
 
             (start..start + len).for_each(|i| {
-                let type_id = type_ids[i] as usize;
+                let type_id = type_ids[i];
+                let child_index = src_fields
+                    .iter()
+                    .position(|(r, _)| r == type_id)
+                    .expect("invalid union type ID");
                 let src_offset = offsets[i] as usize;
-                let child_data = &mut mutable.child_data[type_id];
+                let child_data = &mut mutable.child_data[child_index];
                 let dst_offset = child_data.len();
 
                 // Extend offsets
                 mutable.buffer2.push(dst_offset as i32);
-                mutable.child_data[type_id].extend(index, src_offset, 
src_offset + 1)
+                mutable.child_data[child_index].extend(index, src_offset, 
src_offset + 1)
             })
         },
     )
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index ebbadc00ae..15141eb208 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -19,7 +19,7 @@ use arrow::array::{
     Array, ArrayRef, BooleanArray, Decimal128Array, DictionaryArray,
     FixedSizeBinaryArray, Int16Array, Int32Array, Int64Array, Int64Builder, 
ListArray,
     ListBuilder, MapBuilder, NullArray, StringArray, StringBuilder,
-    StringDictionaryBuilder, StructArray, UInt8Array,
+    StringDictionaryBuilder, StructArray, UInt8Array, UnionArray,
 };
 use arrow::datatypes::Int16Type;
 use arrow_buffer::Buffer;
@@ -488,6 +488,63 @@ fn test_struct_many() {
     assert_eq!(array, expected)
 }
 
+#[test]
+fn test_union_dense() {
+    // Input data
+    let strings: ArrayRef = Arc::new(StringArray::from(vec![
+        Some("joe"),
+        Some("mark"),
+        Some("doe"),
+    ]));
+    let ints: ArrayRef = Arc::new(Int32Array::from(vec![
+        Some(1),
+        Some(2),
+        Some(3),
+        Some(4),
+        Some(5),
+    ]));
+    let offsets = Buffer::from_slice_ref([0, 0, 1, 1, 2, 2, 3, 4i32]);
+    let type_ids = Buffer::from_slice_ref([42, 84, 42, 84, 84, 42, 84, 84i8]);
+
+    let array = UnionArray::try_new(
+        &[84, 42],
+        type_ids,
+        Some(offsets),
+        vec![
+            (Field::new("int", DataType::Int32, false), ints),
+            (Field::new("string", DataType::Utf8, false), strings),
+        ],
+    )
+    .unwrap()
+    .into_data();
+    let arrays = vec![&array];
+    let mut mutable = MutableArrayData::new(arrays, false, 0);
+
+    // Slice it by `MutableArrayData`
+    mutable.extend(0, 4, 7);
+    let data = mutable.freeze();
+    let array = UnionArray::from(data);
+
+    // Expected data
+    let strings: ArrayRef = Arc::new(StringArray::from(vec![Some("doe")]));
+    let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(3), Some(4)]));
+    let offsets = Buffer::from_slice_ref([0, 0, 1i32]);
+    let type_ids = Buffer::from_slice_ref([84, 42, 84i8]);
+
+    let expected = UnionArray::try_new(
+        &[84, 42],
+        type_ids,
+        Some(offsets),
+        vec![
+            (Field::new("int", DataType::Int32, false), ints),
+            (Field::new("string", DataType::Utf8, false), strings),
+        ],
+    )
+    .unwrap();
+
+    assert_eq!(array.to_data(), expected.to_data());
+}
+
 #[test]
 fn test_binary_fixed_sized_offsets() {
     let array = FixedSizeBinaryArray::try_from_iter(

Reply via email to