This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 89d5273 Update Union Array to add `UnionMode`, match latest Arrow
Spec, and rename `new` -> `unsafe new_unchecked()` (#885)
89d5273 is described below
commit 89d52733bdff651f0ed27dc687e48c371d77bc85
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Jan 2 09:37:45 2022 -0500
Update Union Array to add `UnionMode`, match latest Arrow Spec, and rename
`new` -> `unsafe new_unchecked()` (#885)
* Update union array to new null handling
* Update arrow/src/array/array_union.rs
* correct comment
---
arrow/src/array/array.rs | 4 +-
arrow/src/array/array_union.rs | 53 +++++++-----
arrow/src/array/builder.rs | 14 +--
arrow/src/array/data.rs | 178 +++++++++++++++++++++++++++++++++-----
arrow/src/array/equal/mod.rs | 2 +-
arrow/src/array/equal/utils.rs | 2 +-
arrow/src/compute/kernels/cast.rs | 11 ++-
arrow/src/datatypes/datatype.rs | 11 ++-
arrow/src/datatypes/field.rs | 6 +-
arrow/src/datatypes/mod.rs | 35 +++++---
arrow/src/ipc/writer.rs | 2 +-
parquet/src/arrow/arrow_writer.rs | 2 +-
parquet/src/arrow/levels.rs | 6 +-
parquet/src/arrow/schema.rs | 2 +-
14 files changed, 247 insertions(+), 81 deletions(-)
diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 7f790ef..ce3751d 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -301,7 +301,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as
ArrayRef,
DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
- DataType::Union(_) => Arc::new(UnionArray::from(data)) as ArrayRef,
+ DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
DataType::FixedSizeList(_, _) => {
Arc::new(FixedSizeListArray::from(data)) as ArrayRef
}
@@ -472,7 +472,7 @@ pub fn new_null_array(data_type: &DataType, length: usize)
-> ArrayRef {
DataType::Map(field, _keys_sorted) => {
new_null_list_array::<i32>(data_type, field.data_type(), length)
}
- DataType::Union(_) => {
+ DataType::Union(_, _) => {
unimplemented!("Creating null Union array not yet supported")
}
DataType::Dictionary(key, value) => {
diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs
index 56efcfb..3657729 100644
--- a/arrow/src/array/array_union.rs
+++ b/arrow/src/array/array_union.rs
@@ -17,7 +17,7 @@
/// Contains the `UnionArray` type.
///
-use crate::array::{data::count_nulls, make_array, Array, ArrayData, ArrayRef};
+use crate::array::{make_array, Array, ArrayData, ArrayRef};
use crate::buffer::Buffer;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
@@ -48,7 +48,7 @@ impl UnionArray {
/// caller and assumes that each of the components are correct and
consistent with each other.
/// See `try_new` for an alternative that validates the data provided.
///
- /// # Data Consistency
+ /// # Safety
///
/// The `type_ids` `Buffer` should contain `i8` values. These values
should be greater than
/// zero and must be less than the number of children provided in
`child_arrays`. These values
@@ -56,8 +56,8 @@ impl UnionArray {
///
/// The `value_offsets` `Buffer` is only provided in the case of a dense
union, sparse unions
/// should use `None`. If provided the `value_offsets` `Buffer` should
contain `i32` values.
- /// These values should be greater than zero and must be less than the
length of the overall
- /// array.
+ /// The values in this array should be greater than zero and must be less
than the length of the
+ /// overall array.
///
/// In both cases above we use signed integer types to maintain
compatibility with other
/// Arrow implementations.
@@ -65,7 +65,7 @@ impl UnionArray {
/// In both of the cases above we are accepting `Buffer`'s which are
assumed to be representing
/// `i8` and `i32` values respectively. `Buffer` objects are untyped and
no attempt is made
/// to ensure that the data provided is valid.
- pub fn new(
+ pub unsafe fn new_unchecked(
type_ids: Buffer,
value_offsets: Option<Buffer>,
child_arrays: Vec<(Field, ArrayRef)>,
@@ -74,22 +74,28 @@ impl UnionArray {
let (field_types, field_values): (Vec<_>, Vec<_>) =
child_arrays.into_iter().unzip();
let len = type_ids.len();
- let mut builder = ArrayData::builder(DataType::Union(field_types))
+
+ let mode = if value_offsets.is_some() {
+ UnionMode::Dense
+ } else {
+ UnionMode::Sparse
+ };
+
+ let mut builder = ArrayData::builder(DataType::Union(field_types,
mode))
.add_buffer(type_ids)
.child_data(field_values.into_iter().map(|a|
a.data().clone()).collect())
.len(len);
if let Some(bitmap) = bitmap_data {
builder = builder.null_bit_buffer(bitmap)
}
- let data = unsafe {
- match value_offsets {
- Some(b) => builder.add_buffer(b).build_unchecked(),
- None => builder.build_unchecked(),
- }
+ let data = match value_offsets {
+ Some(b) => builder.add_buffer(b).build_unchecked(),
+ None => builder.build_unchecked(),
};
Self::from(data)
}
- /// Attempts to create a new `UnionArray` and validates the inputs
provided.
+
+ /// Attempts to create a new `UnionArray`, validating the inputs provided.
pub fn try_new(
type_ids: Buffer,
value_offsets: Option<Buffer>,
@@ -97,8 +103,7 @@ impl UnionArray {
bitmap: Option<Buffer>,
) -> Result<Self> {
if let Some(b) = &value_offsets {
- let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len());
- if ((type_ids.len() - nulls) * 4) != b.len() {
+ if ((type_ids.len()) * 4) != b.len() {
return Err(ArrowError::InvalidArgumentError(
"Type Ids and Offsets represent a different number of
array slots."
.to_string(),
@@ -137,7 +142,10 @@ impl UnionArray {
}
}
- let new_self = Self::new(type_ids, value_offsets, child_arrays,
bitmap);
+ // Unsafe Justification: arguments were validated above (and
+ // re-revalidated as part of data().validate() below)
+ let new_self =
+ unsafe { Self::new_unchecked(type_ids, value_offsets,
child_arrays, bitmap) };
new_self.data().validate()?;
Ok(new_self)
@@ -173,15 +181,9 @@ impl UnionArray {
pub fn value_offset(&self, index: usize) -> i32 {
assert!(index - self.offset() < self.len());
if self.is_dense() {
- // In format v4 unions had their own validity bitmap and offsets
are compressed by omitting null values
- // Starting with v5 unions don't have a validity bitmap and it's
possible to directly index into the offsets buffer
- let valid_slots = match self.data.null_buffer() {
- Some(b) => b.count_set_bits_offset(0, index),
- None => index,
- };
// safety: reinterpreting is safe since the offset buffer contains
`i32` values and is
// properly aligned.
- unsafe { self.data().buffers()[1].typed_data::<i32>()[valid_slots]
}
+ unsafe { self.data().buffers()[1].typed_data::<i32>()[index] }
} else {
index as i32
}
@@ -202,7 +204,7 @@ impl UnionArray {
/// Returns the names of the types in the union.
pub fn type_names(&self) -> Vec<&str> {
match self.data.data_type() {
- DataType::Union(fields) => fields
+ DataType::Union(fields, _) => fields
.iter()
.map(|f| f.name().as_str())
.collect::<Vec<&str>>(),
@@ -212,7 +214,10 @@ impl UnionArray {
/// Returns whether the `UnionArray` is dense (or sparse if `false`).
fn is_dense(&self) -> bool {
- self.data().buffers().len() == 2
+ match self.data.data_type() {
+ DataType::Union(_, mode) => mode == &UnionMode::Dense,
+ _ => unreachable!("Union array's data type is not a union!"),
+ }
}
}
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 8a5ef6c..446967b 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -2143,12 +2143,16 @@ impl UnionBuilder {
self.type_id_builder.append(i8::default());
- // Handle sparse union
- if self.value_offset_builder.is_none() {
- for (_, fd) in self.fields.iter_mut() {
- fd.append_null_dynamic()?;
+ match &mut self.value_offset_builder {
+ // Handle dense union
+ Some(value_offset_builder) =>
value_offset_builder.append(i32::default()),
+ // Handle sparse union
+ None => {
+ for (_, fd) in self.fields.iter_mut() {
+ fd.append_null_dynamic()?;
+ }
}
- }
+ };
self.len += 1;
Ok(())
}
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 684d087..ae8b49f 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -18,7 +18,7 @@
//! Contains `ArrayData`, a generic representation of Arrow array data which
encapsulates
//! common attributes and operations for Arrow array.
-use crate::datatypes::{DataType, IntervalUnit};
+use crate::datatypes::{DataType, IntervalUnit, UnionMode};
use crate::error::{ArrowError, Result};
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use crate::{
@@ -194,7 +194,7 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity:
usize) -> [MutableBuff
MutableBuffer::new(capacity * mem::size_of::<u8>()),
empty_buffer,
],
- DataType::Union(_) => unimplemented!(),
+ DataType::Union(_, _) => unimplemented!(),
}
}
@@ -560,7 +560,7 @@ impl ArrayData {
DataType::Map(field, _) => {
vec![Self::new_empty(field.data_type())]
}
- DataType::Union(_) => unimplemented!(),
+ DataType::Union(_, _) => unimplemented!(),
DataType::Dictionary(_, data_type) => {
vec![Self::new_empty(data_type)]
}
@@ -597,11 +597,6 @@ impl ArrayData {
// Check that the data layout conforms to the spec
let layout = layout(&self.data_type);
- // Will validate Union when conforms to new spec:
- // https://github.com/apache/arrow-rs/issues/85
- if matches!(&self.data_type, DataType::Union(_)) {
- return Ok(());
- }
if self.buffers.len() != layout.buffers.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Expected {} buffers in array of type {:?}, got {}",
@@ -827,10 +822,21 @@ impl ArrayData {
}
Ok(())
}
- DataType::Union(_fields) => {
- // Validate Union Array as part of implementing new Union
semantics
- // See comments in `ArrayData::validate()`
- // https://github.com/apache/arrow-rs/issues/85
+ DataType::Union(fields, mode) => {
+ self.validate_num_child_data(fields.len())?;
+
+ for (i, field) in fields.iter().enumerate() {
+ let field_data = self.get_valid_child_data(i,
field.data_type())?;
+
+ if mode == &UnionMode::Sparse
+ && field_data.len < (self.len + self.offset)
+ {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Sparse union child array #{} has length smaller
than expected for union array ({} < {})",
+ i, field_data.len, self.len + self.offset
+ )));
+ }
+ }
Ok(())
}
DataType::Dictionary(_key_type, value_type) => {
@@ -951,10 +957,12 @@ impl ArrayData {
let child = &self.child_data[0];
self.validate_offsets_full::<i64>(child.len + child.offset)?;
}
- DataType::Union(_) => {
+ DataType::Union(_, _) => {
// Validate Union Array as part of implementing new Union
semantics
// See comments in `ArrayData::validate()`
// https://github.com/apache/arrow-rs/issues/85
+ //
+ // TODO file follow on ticket for full union validation
}
DataType::Dictionary(key_type, _value_type) => {
let dictionary_length: i64 =
self.child_data[0].len.try_into().unwrap();
@@ -1200,11 +1208,26 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
DataType::FixedSizeList(_, _) => DataTypeLayout::new_empty(), // all
in child data
DataType::LargeList(_) =>
DataTypeLayout::new_fixed_width(size_of::<i32>()),
DataType::Struct(_) => DataTypeLayout::new_empty(), // all in child
data,
- DataType::Union(_) => {
- DataTypeLayout::new_fixed_width(size_of::<u8>())
- // Note sparse unions only have one buffer (u8) type_ids,
- // and dense unions have 2 (type_ids as well as offsets).
- // https://github.com/apache/arrow-rs/issues/85
+ DataType::Union(_, mode) => {
+ let type_ids = BufferSpec::FixedWidth {
+ byte_width: size_of::<i8>(),
+ };
+
+ DataTypeLayout {
+ buffers: match mode {
+ UnionMode::Sparse => {
+ vec![type_ids]
+ }
+ UnionMode::Dense => {
+ vec![
+ type_ids,
+ BufferSpec::FixedWidth {
+ byte_width: size_of::<i32>(),
+ },
+ ]
+ }
+ },
+ }
}
DataType::Dictionary(key_type, _value_type) => layout(key_type),
DataType::Decimal(_, _) => {
@@ -1389,8 +1412,8 @@ mod tests {
use super::*;
use crate::array::{
- Array, BooleanBuilder, Int32Array, Int32Builder, StringArray,
StructBuilder,
- UInt64Array,
+ Array, BooleanBuilder, Int32Array, Int32Builder, Int64Array,
StringArray,
+ StructBuilder, UInt64Array,
};
use crate::buffer::Buffer;
use crate::datatypes::Field;
@@ -2273,6 +2296,121 @@ mod tests {
}
#[test]
+ #[should_panic(expected = "Expected Int64 but child data had Int32")]
+ fn test_validate_union_different_types() {
+ let field1 = vec![Some(1),
Some(2)].into_iter().collect::<Int32Array>();
+
+ let field2 = vec![Some(1),
Some(2)].into_iter().collect::<Int32Array>();
+
+ let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+
+ ArrayData::try_new(
+ DataType::Union(
+ vec![
+ Field::new("field1", DataType::Int32, true),
+ Field::new("field2", DataType::Int64, true), // data is
int32
+ ],
+ UnionMode::Sparse,
+ ),
+ 2,
+ None,
+ None,
+ 0,
+ vec![type_ids],
+ vec![field1.data().clone(), field2.data().clone()],
+ )
+ .unwrap();
+ }
+
+ // sparse with wrong sized children
+ #[test]
+ #[should_panic(
+ expected = "Sparse union child array #1 has length smaller than
expected for union array (1 < 2)"
+ )]
+ fn test_validate_union_sparse_different_child_len() {
+ let field1 = vec![Some(1),
Some(2)].into_iter().collect::<Int32Array>();
+
+ // field 2 only has 1 item but array should have 2
+ let field2 = vec![Some(1)].into_iter().collect::<Int64Array>();
+
+ let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+
+ ArrayData::try_new(
+ DataType::Union(
+ vec![
+ Field::new("field1", DataType::Int32, true),
+ Field::new("field2", DataType::Int64, true),
+ ],
+ UnionMode::Sparse,
+ ),
+ 2,
+ None,
+ None,
+ 0,
+ vec![type_ids],
+ vec![field1.data().clone(), field2.data().clone()],
+ )
+ .unwrap();
+ }
+
+ #[test]
+ #[should_panic(expected = "Expected 2 buffers in array of type Union")]
+ fn test_validate_union_dense_without_offsets() {
+ let field1 = vec![Some(1),
Some(2)].into_iter().collect::<Int32Array>();
+
+ let field2 = vec![Some(1)].into_iter().collect::<Int64Array>();
+
+ let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+
+ ArrayData::try_new(
+ DataType::Union(
+ vec![
+ Field::new("field1", DataType::Int32, true),
+ Field::new("field2", DataType::Int64, true),
+ ],
+ UnionMode::Dense,
+ ),
+ 2,
+ None,
+ None,
+ 0,
+ vec![type_ids], // need offsets buffer here too
+ vec![field1.data().clone(), field2.data().clone()],
+ )
+ .unwrap();
+ }
+
+ #[test]
+ #[should_panic(
+ expected = "Need at least 8 bytes in buffers[1] in array of type Union"
+ )]
+ fn test_validate_union_dense_with_bad_len() {
+ let field1 = vec![Some(1),
Some(2)].into_iter().collect::<Int32Array>();
+
+ let field2 = vec![Some(1)].into_iter().collect::<Int64Array>();
+
+ let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+ let offsets = Buffer::from_slice_ref(&[0i32]); // should have 2
offsets, but only have 1
+
+ ArrayData::try_new(
+ DataType::Union(
+ vec![
+ Field::new("field1", DataType::Int32, true),
+ Field::new("field2", DataType::Int64, true),
+ ],
+ UnionMode::Dense,
+ ),
+ 2,
+ None,
+ None,
+ 0,
+ vec![type_ids, offsets],
+ vec![field1.data().clone(), field2.data().clone()],
+ )
+ .unwrap();
+ }
+
+ #[test]
fn test_try_new_sliced_struct() {
let mut builder = StructBuilder::new(
vec![
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index 9a044e6..742eeec 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -226,7 +226,7 @@ fn equal_values(
DataType::Struct(_) => {
struct_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start,
len)
}
- DataType::Union(_) => unimplemented!("See ARROW-8576"),
+ DataType::Union(_, _) => unimplemented!("See ARROW-8576"),
DataType::Dictionary(data_type, _) => match data_type.as_ref() {
DataType::Int8 => dictionary_equal::<i8>(
lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
diff --git a/arrow/src/array/equal/utils.rs b/arrow/src/array/equal/utils.rs
index 7ce8e14..819ae32 100644
--- a/arrow/src/array/equal/utils.rs
+++ b/arrow/src/array/equal/utils.rs
@@ -161,7 +161,7 @@ pub(super) fn child_logical_null_buffer(
});
Some(buffer.into())
}
- DataType::Union(_) => {
+ DataType::Union(_, _) => {
unimplemented!("Logical equality not yet implemented for union
arrays")
}
DataType::Dictionary(_, _) => {
diff --git a/arrow/src/compute/kernels/cast.rs
b/arrow/src/compute/kernels/cast.rs
index 3a3fe53..34b7810 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -4454,10 +4454,13 @@ mod tests {
Field::new("f1", DataType::Int32, false),
Field::new("f2", DataType::Utf8, true),
]),
- Union(vec![
- Field::new("f1", DataType::Int32, false),
- Field::new("f2", DataType::Utf8, true),
- ]),
+ Union(
+ vec![
+ Field::new("f1", DataType::Int32, false),
+ Field::new("f2", DataType::Utf8, true),
+ ],
+ UnionMode::Dense,
+ ),
Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)),
Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index ae61f08..3653ebb 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -115,7 +115,7 @@ pub enum DataType {
/// A nested datatype that contains a number of sub-fields.
Struct(Vec<Field>),
/// A nested datatype that can represent slots of differing types.
- Union(Vec<Field>),
+ Union(Vec<Field>, UnionMode),
/// A dictionary encoded array (`key_type`, `value_type`), where
/// each array element is an index of `key_type` into an
/// associated dictionary of `value_type`.
@@ -176,6 +176,13 @@ pub enum IntervalUnit {
MonthDayNano,
}
+// Sparse or Dense union layouts
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash,
PartialOrd, Ord)]
+pub enum UnionMode {
+ Sparse,
+ Dense,
+}
+
impl fmt::Display for DataType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
@@ -406,7 +413,7 @@ impl DataType {
json!({"name": "fixedsizebinary", "byteWidth": byte_width})
}
DataType::Struct(_) => json!({"name": "struct"}),
- DataType::Union(_) => json!({"name": "union"}),
+ DataType::Union(_, _) => json!({"name": "union"}),
DataType::List(_) => json!({ "name": "list"}),
DataType::LargeList(_) => json!({ "name": "largelist"}),
DataType::FixedSizeList(_, length) => {
diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs
index 22e23fa..edf01a2 100644
--- a/arrow/src/datatypes/field.rs
+++ b/arrow/src/datatypes/field.rs
@@ -111,7 +111,7 @@ impl Field {
pub(crate) fn fields(&self) -> Vec<&Field> {
let mut collected_fields = vec![self];
match &self.data_type {
- DataType::Struct(fields) | DataType::Union(fields) => {
+ DataType::Struct(fields) | DataType::Union(fields, _) => {
collected_fields.extend(fields.iter().map(|f|
f.fields()).flatten())
}
DataType::List(field)
@@ -484,8 +484,8 @@ impl Field {
));
}
},
- DataType::Union(nested_fields) => match &from.data_type {
- DataType::Union(from_nested_fields) => {
+ DataType::Union(nested_fields, _) => match &from.data_type {
+ DataType::Union(from_nested_fields, _) => {
for from_field in from_nested_fields {
let mut is_new_field = true;
for self_field in nested_fields.iter_mut() {
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index bc866b0..bcbef58 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -1379,28 +1379,37 @@ mod tests {
Schema::try_merge(vec![
Schema::new(vec![Field::new(
"c1",
- DataType::Union(vec![
- Field::new("c11", DataType::Utf8, true),
- Field::new("c12", DataType::Utf8, true),
- ]),
+ DataType::Union(
+ vec![
+ Field::new("c11", DataType::Utf8, true),
+ Field::new("c12", DataType::Utf8, true),
+ ],
+ UnionMode::Dense
+ ),
false
),]),
Schema::new(vec![Field::new(
"c1",
- DataType::Union(vec![
- Field::new("c12", DataType::Utf8, true),
- Field::new("c13", DataType::Time64(TimeUnit::Second),
true),
- ]),
+ DataType::Union(
+ vec![
+ Field::new("c12", DataType::Utf8, true),
+ Field::new("c13",
DataType::Time64(TimeUnit::Second), true),
+ ],
+ UnionMode::Dense
+ ),
false
),])
])?,
Schema::new(vec![Field::new(
"c1",
- DataType::Union(vec![
- Field::new("c11", DataType::Utf8, true),
- Field::new("c12", DataType::Utf8, true),
- Field::new("c13", DataType::Time64(TimeUnit::Second),
true),
- ]),
+ DataType::Union(
+ vec![
+ Field::new("c11", DataType::Utf8, true),
+ Field::new("c12", DataType::Utf8, true),
+ Field::new("c13", DataType::Time64(TimeUnit::Second),
true),
+ ],
+ UnionMode::Dense
+ ),
false
),]),
);
diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs
index c354eb4..7316209 100644
--- a/arrow/src/ipc/writer.rs
+++ b/arrow/src/ipc/writer.rs
@@ -159,7 +159,7 @@ impl IpcDataGenerator {
)?;
}
}
- DataType::Union(fields) => {
+ DataType::Union(fields, _) => {
let union = as_union_array(column);
for (field, ref column) in fields
.iter()
diff --git a/parquet/src/arrow/arrow_writer.rs
b/parquet/src/arrow/arrow_writer.rs
index 9f87428..82c6d03 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer.rs
@@ -224,7 +224,7 @@ fn write_leaves(
ArrowDataType::Float16 => Err(ParquetError::ArrowError(
"Float16 arrays not supported".to_string(),
)),
- ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => {
+ ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_, _) => {
Err(ParquetError::NYI(
format!(
"Attempting to write an Arrow type {:?} to parquet that is
not yet implemented",
diff --git a/parquet/src/arrow/levels.rs b/parquet/src/arrow/levels.rs
index c9b6052..601e2c0 100644
--- a/parquet/src/arrow/levels.rs
+++ b/parquet/src/arrow/levels.rs
@@ -241,7 +241,7 @@ impl LevelInfo {
list_level.calculate_array_levels(&child_array,
list_field)
}
DataType::FixedSizeList(_, _) => unimplemented!(),
- DataType::Union(_) => unimplemented!(),
+ DataType::Union(_, _) => unimplemented!(),
}
}
DataType::Map(map_field, _) => {
@@ -304,7 +304,7 @@ impl LevelInfo {
});
struct_levels
}
- DataType::Union(_) => unimplemented!(),
+ DataType::Union(_, _) => unimplemented!(),
DataType::Dictionary(_, _) => {
// Need to check for these cases not implemented in C++:
// - "Writing DictionaryArray with nested dictionary type not
yet supported"
@@ -743,7 +743,7 @@ impl LevelInfo {
array_mask,
)
}
- DataType::FixedSizeList(_, _) | DataType::Union(_) => {
+ DataType::FixedSizeList(_, _) | DataType::Union(_, _) => {
unimplemented!("Getting offsets not yet implemented")
}
}
diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs
index 5fe94ce..51a7a04 100644
--- a/parquet/src/arrow/schema.rs
+++ b/parquet/src/arrow/schema.rs
@@ -536,7 +536,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
))
}
}
- DataType::Union(_) => unimplemented!("See ARROW-8817."),
+ DataType::Union(_, _) => unimplemented!("See ARROW-8817."),
DataType::Dictionary(_, ref value) => {
// Dictionary encoding not handled at the schema level
let dict_field = Field::new(name, *value.clone(),
field.is_nullable());