This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new c663d88327 fix(data): map type ID to child index before indexing a
union child array (#4598)
c663d88327 is described below
commit c663d88327dfd6958b102ec7b1ca310cc20b40c4
Author: Tomoaki Kawada <[email protected]>
AuthorDate: Mon Jul 31 22:53:41 2023 +0900
fix(data): map type ID to child index before indexing a union child array
(#4598)
* test: add a test for `MutableArrayData` and dense union
* fix(data): map type ID to child index before indexing union child array
---
arrow-data/src/transform/union.rs | 13 +++++++--
arrow/tests/array_transform.rs | 59 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 68 insertions(+), 4 deletions(-)
diff --git a/arrow-data/src/transform/union.rs
b/arrow-data/src/transform/union.rs
index 8d1ea34c31..d7083588d7 100644
--- a/arrow-data/src/transform/union.rs
+++ b/arrow-data/src/transform/union.rs
@@ -39,6 +39,9 @@ pub(super) fn build_extend_sparse(array: &ArrayData) ->
Extend {
pub(super) fn build_extend_dense(array: &ArrayData) -> Extend {
let type_ids = array.buffer::<i8>(0);
let offsets = array.buffer::<i32>(1);
+ let arrow_schema::DataType::Union(src_fields, _) = array.data_type() else {
+ unreachable!();
+ };
Box::new(
move |mutable: &mut _MutableArrayData, index: usize, start: usize,
len: usize| {
@@ -48,14 +51,18 @@ pub(super) fn build_extend_dense(array: &ArrayData) ->
Extend {
.extend_from_slice(&type_ids[start..start + len]);
(start..start + len).for_each(|i| {
- let type_id = type_ids[i] as usize;
+ let type_id = type_ids[i];
+ let child_index = src_fields
+ .iter()
+ .position(|(r, _)| r == type_id)
+ .expect("invalid union type ID");
let src_offset = offsets[i] as usize;
- let child_data = &mut mutable.child_data[type_id];
+ let child_data = &mut mutable.child_data[child_index];
let dst_offset = child_data.len();
// Extend offsets
mutable.buffer2.push(dst_offset as i32);
- mutable.child_data[type_id].extend(index, src_offset,
src_offset + 1)
+ mutable.child_data[child_index].extend(index, src_offset,
src_offset + 1)
})
},
)
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index ebbadc00ae..15141eb208 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -19,7 +19,7 @@ use arrow::array::{
Array, ArrayRef, BooleanArray, Decimal128Array, DictionaryArray,
FixedSizeBinaryArray, Int16Array, Int32Array, Int64Array, Int64Builder,
ListArray,
ListBuilder, MapBuilder, NullArray, StringArray, StringBuilder,
- StringDictionaryBuilder, StructArray, UInt8Array,
+ StringDictionaryBuilder, StructArray, UInt8Array, UnionArray,
};
use arrow::datatypes::Int16Type;
use arrow_buffer::Buffer;
@@ -488,6 +488,63 @@ fn test_struct_many() {
assert_eq!(array, expected)
}
+#[test]
+fn test_union_dense() {
+ // Input data
+ let strings: ArrayRef = Arc::new(StringArray::from(vec![
+ Some("joe"),
+ Some("mark"),
+ Some("doe"),
+ ]));
+ let ints: ArrayRef = Arc::new(Int32Array::from(vec![
+ Some(1),
+ Some(2),
+ Some(3),
+ Some(4),
+ Some(5),
+ ]));
+ let offsets = Buffer::from_slice_ref([0, 0, 1, 1, 2, 2, 3, 4i32]);
+ let type_ids = Buffer::from_slice_ref([42, 84, 42, 84, 84, 42, 84, 84i8]);
+
+ let array = UnionArray::try_new(
+ &[84, 42],
+ type_ids,
+ Some(offsets),
+ vec![
+ (Field::new("int", DataType::Int32, false), ints),
+ (Field::new("string", DataType::Utf8, false), strings),
+ ],
+ )
+ .unwrap()
+ .into_data();
+ let arrays = vec![&array];
+ let mut mutable = MutableArrayData::new(arrays, false, 0);
+
+ // Slice it by `MutableArrayData`
+ mutable.extend(0, 4, 7);
+ let data = mutable.freeze();
+ let array = UnionArray::from(data);
+
+ // Expected data
+ let strings: ArrayRef = Arc::new(StringArray::from(vec![Some("doe")]));
+ let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(3), Some(4)]));
+ let offsets = Buffer::from_slice_ref([0, 0, 1i32]);
+ let type_ids = Buffer::from_slice_ref([84, 42, 84i8]);
+
+ let expected = UnionArray::try_new(
+ &[84, 42],
+ type_ids,
+ Some(offsets),
+ vec![
+ (Field::new("int", DataType::Int32, false), ints),
+ (Field::new("string", DataType::Utf8, false), strings),
+ ],
+ )
+ .unwrap();
+
+ assert_eq!(array.to_data(), expected.to_data());
+}
+
#[test]
fn test_binary_fixed_sized_offsets() {
let array = FixedSizeBinaryArray::try_from_iter(