This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 05eb63f fix: fix a bug in offset calculation for unions (#863)
05eb63f is described below
commit 05eb63f81ddbcfd9f597f893d200c3cda742262d
Author: Helgi Kristvin Sigurbjarnarson <[email protected]>
AuthorDate: Tue Oct 26 13:29:27 2021 -0700
fix: fix a bug in offset calculation for unions (#863)
The `value_offset` function only read the least significant byte in the
offset array, causing issues with unions with more than 255 rows of any
given variant. Fix the issue by reading the entire i32 offset and add a
unit test.
---
arrow/src/array/array_union.rs | 47 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 45 insertions(+), 2 deletions(-)
diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs
index ba563ec..6460e07 100644
--- a/arrow/src/array/array_union.rs
+++ b/arrow/src/array/array_union.rs
@@ -24,7 +24,6 @@ use crate::error::{ArrowError, Result};
use core::fmt;
use std::any::Any;
-use std::mem::size_of;
/// An Array that can represent slots of varying types.
///
@@ -177,7 +176,9 @@ impl UnionArray {
Some(b) => b.count_set_bits_offset(0, index),
None => index,
};
- self.data().buffers()[1].as_slice()[valid_slots *
size_of::<i32>()] as i32
+ // safety: reinterpreting is safe since the offset buffer contains
`i32` values and is
+ // properly aligned.
+ unsafe { self.data().buffers()[1].typed_data::<i32>()[valid_slots]
}
} else {
index as i32
}
@@ -335,6 +336,48 @@ mod tests {
}
#[test]
+ fn test_dense_i32_large() {
+ let mut builder = UnionBuilder::new_dense(1024);
+
+ let expected_type_ids = vec![0_i8; 1024];
+ let expected_value_offsets: Vec<_> = (0..1024).collect();
+ let expected_array_values: Vec<_> = (1..=1024).collect();
+
+ expected_array_values
+ .iter()
+ .for_each(|v| builder.append::<Int32Type>("a", *v).unwrap());
+
+ let union = builder.build().unwrap();
+
+ // Check type ids
+ assert_eq!(
+ union.data().buffers()[0],
+ Buffer::from_slice_ref(&expected_type_ids)
+ );
+ for (i, id) in expected_type_ids.iter().enumerate() {
+ assert_eq!(id, &union.type_id(i));
+ }
+
+ // Check offsets
+ assert_eq!(
+ union.data().buffers()[1],
+ Buffer::from_slice_ref(&expected_value_offsets)
+ );
+ for (i, id) in expected_value_offsets.iter().enumerate() {
+ assert_eq!(&union.value_offset(i), id);
+ }
+
+ for (i, expected_value) in expected_array_values.iter().enumerate() {
+ assert!(!union.is_null(i));
+ let slot = union.value(i);
+ let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
+ assert_eq!(slot.len(), 1);
+ let value = slot.value(0);
+ assert_eq!(expected_value, &value);
+ }
+ }
+
+ #[test]
fn test_dense_mixed() {
let mut builder = UnionBuilder::new_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();