This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 54e91811b type_id and value_offset are incorrect for sliced UnionArray
(#2087)
54e91811b is described below
commit 54e91811bd774b3c2dc8fdcf15c07b751256a5ed
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Sat Jul 16 11:22:58 2022 -0700
type_id and value_offset are incorrect for sliced UnionArray (#2087)
* Fix slice UnionArray
* For review
---
arrow/src/array/array_union.rs | 80 ++++++++++++++++++++++++++++++++++++++----
1 file changed, 73 insertions(+), 7 deletions(-)
diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs
index 639b82ae9..e7ddec01f 100644
--- a/arrow/src/array/array_union.rs
+++ b/arrow/src/array/array_union.rs
@@ -243,8 +243,8 @@ impl UnionArray {
///
/// Panics if `index` is greater than the length of the array.
pub fn type_id(&self, index: usize) -> i8 {
- assert!(index - self.offset() < self.len());
- self.data().buffers()[0].as_slice()[index] as i8
+ assert!(index < self.len());
+ self.data().buffers()[0].as_slice()[self.offset() + index] as i8
}
/// Returns the offset into the underlying values array for the array slot
at `index`.
@@ -253,11 +253,11 @@ impl UnionArray {
///
/// Panics if `index` is greater than the length of the array.
pub fn value_offset(&self, index: usize) -> i32 {
- assert!(index - self.offset() < self.len());
+ assert!(index < self.len());
if self.is_dense() {
- self.data().buffers()[1].typed_data::<i32>()[index]
+ self.data().buffers()[1].typed_data::<i32>()[self.offset() + index]
} else {
- index as i32
+ (self.offset() + index) as i32
}
}
@@ -267,8 +267,8 @@ impl UnionArray {
///
/// Panics if `index` is greater than the length of the array.
pub fn value(&self, index: usize) -> ArrayRef {
- let type_id = self.type_id(self.offset() + index);
- let value_offset = self.value_offset(self.offset() + index) as usize;
+ let type_id = self.type_id(index);
+ let value_offset = self.value_offset(index) as usize;
let child_data = self.boxed_fields[type_id as usize].clone();
child_data.slice(value_offset, 1)
}
@@ -383,6 +383,7 @@ mod tests {
use crate::array::*;
use crate::buffer::Buffer;
use crate::datatypes::{DataType, Field};
+ use crate::record_batch::RecordBatch;
#[test]
fn test_dense_i32() {
@@ -956,4 +957,69 @@ mod tests {
let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
assert!(err.contains("Attempt to write col \"a\" with type Int32
doesn't match existing type Float32"), "{}", err);
}
+
+ #[test]
+ fn slice_union_array() {
+ // [1, null, 3.0, null, 4]
+ fn create_union(mut builder: UnionBuilder) -> UnionArray {
+ builder.append::<Int32Type>("a", 1).unwrap();
+ builder.append_null::<Int32Type>("a").unwrap();
+ builder.append::<Float64Type>("c", 3.0).unwrap();
+ builder.append_null::<Float64Type>("c").unwrap();
+ builder.append::<Int32Type>("a", 4).unwrap();
+ builder.build().unwrap()
+ }
+
+ fn create_batch(union: UnionArray) -> RecordBatch {
+ let schema = Schema::new(vec![Field::new(
+ "struct_array",
+ union.data_type().clone(),
+ true,
+ )]);
+
+ RecordBatch::try_new(Arc::new(schema),
vec![Arc::new(union)]).unwrap()
+ }
+
+ fn test_slice_union(record_batch_slice: RecordBatch) {
+ let union_slice = record_batch_slice
+ .column(0)
+ .as_any()
+ .downcast_ref::<UnionArray>()
+ .unwrap();
+
+ assert_eq!(union_slice.type_id(0), 0);
+ assert_eq!(union_slice.type_id(1), 1);
+ assert_eq!(union_slice.type_id(2), 1);
+
+ let slot = union_slice.value(0);
+ let array = slot.as_any().downcast_ref::<Int32Array>().unwrap();
+ assert_eq!(array.len(), 1);
+ assert!(array.is_null(0));
+
+ let slot = union_slice.value(1);
+ let array = slot.as_any().downcast_ref::<Float64Array>().unwrap();
+ assert_eq!(array.len(), 1);
+ assert!(array.is_valid(0));
+ assert_eq!(array.value(0), 3.0);
+
+ let slot = union_slice.value(2);
+ let array = slot.as_any().downcast_ref::<Float64Array>().unwrap();
+ assert_eq!(array.len(), 1);
+ assert!(array.is_null(0));
+ }
+
+ // Sparse Union
+ let builder = UnionBuilder::new_sparse(5);
+ let record_batch = create_batch(create_union(builder));
+ // [null, 3.0, null]
+ let record_batch_slice = record_batch.slice(1, 3);
+ test_slice_union(record_batch_slice);
+
+ // Dense Union
+ let builder = UnionBuilder::new_dense(5);
+ let record_batch = create_batch(create_union(builder));
+ // [null, 3.0, null]
+ let record_batch_slice = record_batch.slice(1, 3);
+ test_slice_union(record_batch_slice);
+ }
}