This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 89d5273  Update Union Array to add `UnionMode`,  match latest Arrow 
Spec, and rename `new` -> `unsafe new_unchecked()` (#885)
89d5273 is described below

commit 89d52733bdff651f0ed27dc687e48c371d77bc85
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Jan 2 09:37:45 2022 -0500

    Update Union Array to add `UnionMode`,  match latest Arrow Spec, and rename 
`new` -> `unsafe new_unchecked()` (#885)
    
    * Update union array to new null handling
    
    * Update arrow/src/array/array_union.rs
    
    * correct comment
---
 arrow/src/array/array.rs          |   4 +-
 arrow/src/array/array_union.rs    |  53 +++++++-----
 arrow/src/array/builder.rs        |  14 +--
 arrow/src/array/data.rs           | 178 +++++++++++++++++++++++++++++++++-----
 arrow/src/array/equal/mod.rs      |   2 +-
 arrow/src/array/equal/utils.rs    |   2 +-
 arrow/src/compute/kernels/cast.rs |  11 ++-
 arrow/src/datatypes/datatype.rs   |  11 ++-
 arrow/src/datatypes/field.rs      |   6 +-
 arrow/src/datatypes/mod.rs        |  35 +++++---
 arrow/src/ipc/writer.rs           |   2 +-
 parquet/src/arrow/arrow_writer.rs |   2 +-
 parquet/src/arrow/levels.rs       |   6 +-
 parquet/src/arrow/schema.rs       |   2 +-
 14 files changed, 247 insertions(+), 81 deletions(-)

diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 7f790ef..ce3751d 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -301,7 +301,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
         DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as 
ArrayRef,
         DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
         DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
-        DataType::Union(_) => Arc::new(UnionArray::from(data)) as ArrayRef,
+        DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
         DataType::FixedSizeList(_, _) => {
             Arc::new(FixedSizeListArray::from(data)) as ArrayRef
         }
@@ -472,7 +472,7 @@ pub fn new_null_array(data_type: &DataType, length: usize) 
-> ArrayRef {
         DataType::Map(field, _keys_sorted) => {
             new_null_list_array::<i32>(data_type, field.data_type(), length)
         }
-        DataType::Union(_) => {
+        DataType::Union(_, _) => {
             unimplemented!("Creating null Union array not yet supported")
         }
         DataType::Dictionary(key, value) => {
diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs
index 56efcfb..3657729 100644
--- a/arrow/src/array/array_union.rs
+++ b/arrow/src/array/array_union.rs
@@ -17,7 +17,7 @@
 
 /// Contains the `UnionArray` type.
 ///
-use crate::array::{data::count_nulls, make_array, Array, ArrayData, ArrayRef};
+use crate::array::{make_array, Array, ArrayData, ArrayRef};
 use crate::buffer::Buffer;
 use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
@@ -48,7 +48,7 @@ impl UnionArray {
     /// caller and assumes that each of the components are correct and 
consistent with each other.
     /// See `try_new` for an alternative that validates the data provided.
     ///
-    /// # Data Consistency
+    /// # Safety
     ///
     /// The `type_ids` `Buffer` should contain `i8` values.  These values 
should be greater than
     /// zero and must be less than the number of children provided in 
`child_arrays`.  These values
@@ -56,8 +56,8 @@ impl UnionArray {
     ///
     /// The `value_offsets` `Buffer` is only provided in the case of a dense 
union, sparse unions
     /// should use `None`.  If provided the `value_offsets` `Buffer` should 
contain `i32` values.
-    /// These values should be greater than zero and must be less than the 
length of the overall
-    /// array.
+    /// The values in this array should be greater than zero and must be less 
than the length of the
+    /// overall array.
     ///
     /// In both cases above we use signed integer types to maintain 
compatibility with other
     /// Arrow implementations.
@@ -65,7 +65,7 @@ impl UnionArray {
     /// In both of the cases above we are accepting `Buffer`'s which are 
assumed to be representing
     /// `i8` and `i32` values respectively.  `Buffer` objects are untyped and 
no attempt is made
     /// to ensure that the data provided is valid.
-    pub fn new(
+    pub unsafe fn new_unchecked(
         type_ids: Buffer,
         value_offsets: Option<Buffer>,
         child_arrays: Vec<(Field, ArrayRef)>,
@@ -74,22 +74,28 @@ impl UnionArray {
         let (field_types, field_values): (Vec<_>, Vec<_>) =
             child_arrays.into_iter().unzip();
         let len = type_ids.len();
-        let mut builder = ArrayData::builder(DataType::Union(field_types))
+
+        let mode = if value_offsets.is_some() {
+            UnionMode::Dense
+        } else {
+            UnionMode::Sparse
+        };
+
+        let mut builder = ArrayData::builder(DataType::Union(field_types, 
mode))
             .add_buffer(type_ids)
             .child_data(field_values.into_iter().map(|a| 
a.data().clone()).collect())
             .len(len);
         if let Some(bitmap) = bitmap_data {
             builder = builder.null_bit_buffer(bitmap)
         }
-        let data = unsafe {
-            match value_offsets {
-                Some(b) => builder.add_buffer(b).build_unchecked(),
-                None => builder.build_unchecked(),
-            }
+        let data = match value_offsets {
+            Some(b) => builder.add_buffer(b).build_unchecked(),
+            None => builder.build_unchecked(),
         };
         Self::from(data)
     }
-    /// Attempts to create a new `UnionArray` and validates the inputs 
provided.
+
+    /// Attempts to create a new `UnionArray`, validating the inputs provided.
     pub fn try_new(
         type_ids: Buffer,
         value_offsets: Option<Buffer>,
@@ -97,8 +103,7 @@ impl UnionArray {
         bitmap: Option<Buffer>,
     ) -> Result<Self> {
         if let Some(b) = &value_offsets {
-            let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len());
-            if ((type_ids.len() - nulls) * 4) != b.len() {
+            if ((type_ids.len()) * 4) != b.len() {
                 return Err(ArrowError::InvalidArgumentError(
                     "Type Ids and Offsets represent a different number of 
array slots."
                         .to_string(),
@@ -137,7 +142,10 @@ impl UnionArray {
             }
         }
 
-        let new_self = Self::new(type_ids, value_offsets, child_arrays, 
bitmap);
+        // Unsafe Justification: arguments were validated above (and
+        // re-revalidated as part of data().validate() below)
+        let new_self =
+            unsafe { Self::new_unchecked(type_ids, value_offsets, 
child_arrays, bitmap) };
         new_self.data().validate()?;
 
         Ok(new_self)
@@ -173,15 +181,9 @@ impl UnionArray {
     pub fn value_offset(&self, index: usize) -> i32 {
         assert!(index - self.offset() < self.len());
         if self.is_dense() {
-            // In format v4 unions had their own validity bitmap and offsets 
are compressed by omitting null values
-            // Starting with v5 unions don't have a validity bitmap and it's 
possible to directly index into the offsets buffer
-            let valid_slots = match self.data.null_buffer() {
-                Some(b) => b.count_set_bits_offset(0, index),
-                None => index,
-            };
             // safety: reinterpreting is safe since the offset buffer contains 
`i32` values and is
             // properly aligned.
-            unsafe { self.data().buffers()[1].typed_data::<i32>()[valid_slots] 
}
+            unsafe { self.data().buffers()[1].typed_data::<i32>()[index] }
         } else {
             index as i32
         }
@@ -202,7 +204,7 @@ impl UnionArray {
     /// Returns the names of the types in the union.
     pub fn type_names(&self) -> Vec<&str> {
         match self.data.data_type() {
-            DataType::Union(fields) => fields
+            DataType::Union(fields, _) => fields
                 .iter()
                 .map(|f| f.name().as_str())
                 .collect::<Vec<&str>>(),
@@ -212,7 +214,10 @@ impl UnionArray {
 
     /// Returns whether the `UnionArray` is dense (or sparse if `false`).
     fn is_dense(&self) -> bool {
-        self.data().buffers().len() == 2
+        match self.data.data_type() {
+            DataType::Union(_, mode) => mode == &UnionMode::Dense,
+            _ => unreachable!("Union array's data type is not a union!"),
+        }
     }
 }
 
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 8a5ef6c..446967b 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -2143,12 +2143,16 @@ impl UnionBuilder {
 
         self.type_id_builder.append(i8::default());
 
-        // Handle sparse union
-        if self.value_offset_builder.is_none() {
-            for (_, fd) in self.fields.iter_mut() {
-                fd.append_null_dynamic()?;
+        match &mut self.value_offset_builder {
+            // Handle dense union
+            Some(value_offset_builder) => 
value_offset_builder.append(i32::default()),
+            // Handle sparse union
+            None => {
+                for (_, fd) in self.fields.iter_mut() {
+                    fd.append_null_dynamic()?;
+                }
             }
-        }
+        };
         self.len += 1;
         Ok(())
     }
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 684d087..ae8b49f 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -18,7 +18,7 @@
 //! Contains `ArrayData`, a generic representation of Arrow array data which 
encapsulates
 //! common attributes and operations for Arrow array.
 
-use crate::datatypes::{DataType, IntervalUnit};
+use crate::datatypes::{DataType, IntervalUnit, UnionMode};
 use crate::error::{ArrowError, Result};
 use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
 use crate::{
@@ -194,7 +194,7 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: 
usize) -> [MutableBuff
             MutableBuffer::new(capacity * mem::size_of::<u8>()),
             empty_buffer,
         ],
-        DataType::Union(_) => unimplemented!(),
+        DataType::Union(_, _) => unimplemented!(),
     }
 }
 
@@ -560,7 +560,7 @@ impl ArrayData {
             DataType::Map(field, _) => {
                 vec![Self::new_empty(field.data_type())]
             }
-            DataType::Union(_) => unimplemented!(),
+            DataType::Union(_, _) => unimplemented!(),
             DataType::Dictionary(_, data_type) => {
                 vec![Self::new_empty(data_type)]
             }
@@ -597,11 +597,6 @@ impl ArrayData {
         // Check that the data layout conforms to the spec
         let layout = layout(&self.data_type);
 
-        // Will validate Union when conforms to new spec:
-        // https://github.com/apache/arrow-rs/issues/85
-        if matches!(&self.data_type, DataType::Union(_)) {
-            return Ok(());
-        }
         if self.buffers.len() != layout.buffers.len() {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Expected {} buffers in array of type {:?}, got {}",
@@ -827,10 +822,21 @@ impl ArrayData {
                 }
                 Ok(())
             }
-            DataType::Union(_fields) => {
-                // Validate Union Array as part of implementing new Union 
semantics
-                // See comments in `ArrayData::validate()`
-                // https://github.com/apache/arrow-rs/issues/85
+            DataType::Union(fields, mode) => {
+                self.validate_num_child_data(fields.len())?;
+
+                for (i, field) in fields.iter().enumerate() {
+                    let field_data = self.get_valid_child_data(i, 
field.data_type())?;
+
+                    if mode == &UnionMode::Sparse
+                        && field_data.len < (self.len + self.offset)
+                    {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Sparse union child array #{} has length smaller 
than expected for union array ({} < {})",
+                            i, field_data.len, self.len + self.offset
+                        )));
+                    }
+                }
                 Ok(())
             }
             DataType::Dictionary(_key_type, value_type) => {
@@ -951,10 +957,12 @@ impl ArrayData {
                 let child = &self.child_data[0];
                 self.validate_offsets_full::<i64>(child.len + child.offset)?;
             }
-            DataType::Union(_) => {
+            DataType::Union(_, _) => {
                 // Validate Union Array as part of implementing new Union 
semantics
                 // See comments in `ArrayData::validate()`
                 // https://github.com/apache/arrow-rs/issues/85
+                //
+                // TODO file follow on ticket for full union validation
             }
             DataType::Dictionary(key_type, _value_type) => {
                 let dictionary_length: i64 = 
self.child_data[0].len.try_into().unwrap();
@@ -1200,11 +1208,26 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
         DataType::FixedSizeList(_, _) => DataTypeLayout::new_empty(), // all 
in child data
         DataType::LargeList(_) => 
DataTypeLayout::new_fixed_width(size_of::<i32>()),
         DataType::Struct(_) => DataTypeLayout::new_empty(), // all in child 
data,
-        DataType::Union(_) => {
-            DataTypeLayout::new_fixed_width(size_of::<u8>())
-            // Note sparse unions only have one buffer (u8) type_ids,
-            // and dense unions have 2 (type_ids as well as offsets).
-            // https://github.com/apache/arrow-rs/issues/85
+        DataType::Union(_, mode) => {
+            let type_ids = BufferSpec::FixedWidth {
+                byte_width: size_of::<i8>(),
+            };
+
+            DataTypeLayout {
+                buffers: match mode {
+                    UnionMode::Sparse => {
+                        vec![type_ids]
+                    }
+                    UnionMode::Dense => {
+                        vec![
+                            type_ids,
+                            BufferSpec::FixedWidth {
+                                byte_width: size_of::<i32>(),
+                            },
+                        ]
+                    }
+                },
+            }
         }
         DataType::Dictionary(key_type, _value_type) => layout(key_type),
         DataType::Decimal(_, _) => {
@@ -1389,8 +1412,8 @@ mod tests {
     use super::*;
 
     use crate::array::{
-        Array, BooleanBuilder, Int32Array, Int32Builder, StringArray, 
StructBuilder,
-        UInt64Array,
+        Array, BooleanBuilder, Int32Array, Int32Builder, Int64Array, 
StringArray,
+        StructBuilder, UInt64Array,
     };
     use crate::buffer::Buffer;
     use crate::datatypes::Field;
@@ -2273,6 +2296,121 @@ mod tests {
     }
 
     #[test]
+    #[should_panic(expected = "Expected Int64 but child data had Int32")]
+    fn test_validate_union_different_types() {
+        let field1 = vec![Some(1), 
Some(2)].into_iter().collect::<Int32Array>();
+
+        let field2 = vec![Some(1), 
Some(2)].into_iter().collect::<Int32Array>();
+
+        let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+
+        ArrayData::try_new(
+            DataType::Union(
+                vec![
+                    Field::new("field1", DataType::Int32, true),
+                    Field::new("field2", DataType::Int64, true), // data is 
int32
+                ],
+                UnionMode::Sparse,
+            ),
+            2,
+            None,
+            None,
+            0,
+            vec![type_ids],
+            vec![field1.data().clone(), field2.data().clone()],
+        )
+        .unwrap();
+    }
+
+    // sparse with wrong sized children
+    #[test]
+    #[should_panic(
+        expected = "Sparse union child array #1 has length smaller than 
expected for union array (1 < 2)"
+    )]
+    fn test_validate_union_sparse_different_child_len() {
+        let field1 = vec![Some(1), 
Some(2)].into_iter().collect::<Int32Array>();
+
+        // field 2 only has 1 item but array should have 2
+        let field2 = vec![Some(1)].into_iter().collect::<Int64Array>();
+
+        let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+
+        ArrayData::try_new(
+            DataType::Union(
+                vec![
+                    Field::new("field1", DataType::Int32, true),
+                    Field::new("field2", DataType::Int64, true),
+                ],
+                UnionMode::Sparse,
+            ),
+            2,
+            None,
+            None,
+            0,
+            vec![type_ids],
+            vec![field1.data().clone(), field2.data().clone()],
+        )
+        .unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Expected 2 buffers in array of type Union")]
+    fn test_validate_union_dense_without_offsets() {
+        let field1 = vec![Some(1), 
Some(2)].into_iter().collect::<Int32Array>();
+
+        let field2 = vec![Some(1)].into_iter().collect::<Int64Array>();
+
+        let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+
+        ArrayData::try_new(
+            DataType::Union(
+                vec![
+                    Field::new("field1", DataType::Int32, true),
+                    Field::new("field2", DataType::Int64, true),
+                ],
+                UnionMode::Dense,
+            ),
+            2,
+            None,
+            None,
+            0,
+            vec![type_ids], // need offsets buffer here too
+            vec![field1.data().clone(), field2.data().clone()],
+        )
+        .unwrap();
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "Need at least 8 bytes in buffers[1] in array of type Union"
+    )]
+    fn test_validate_union_dense_with_bad_len() {
+        let field1 = vec![Some(1), 
Some(2)].into_iter().collect::<Int32Array>();
+
+        let field2 = vec![Some(1)].into_iter().collect::<Int64Array>();
+
+        let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]);
+        let offsets = Buffer::from_slice_ref(&[0i32]); // should have 2 
offsets, but only have 1
+
+        ArrayData::try_new(
+            DataType::Union(
+                vec![
+                    Field::new("field1", DataType::Int32, true),
+                    Field::new("field2", DataType::Int64, true),
+                ],
+                UnionMode::Dense,
+            ),
+            2,
+            None,
+            None,
+            0,
+            vec![type_ids, offsets],
+            vec![field1.data().clone(), field2.data().clone()],
+        )
+        .unwrap();
+    }
+
+    #[test]
     fn test_try_new_sliced_struct() {
         let mut builder = StructBuilder::new(
             vec![
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index 9a044e6..742eeec 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -226,7 +226,7 @@ fn equal_values(
         DataType::Struct(_) => {
             struct_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, 
len)
         }
-        DataType::Union(_) => unimplemented!("See ARROW-8576"),
+        DataType::Union(_, _) => unimplemented!("See ARROW-8576"),
         DataType::Dictionary(data_type, _) => match data_type.as_ref() {
             DataType::Int8 => dictionary_equal::<i8>(
                 lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
diff --git a/arrow/src/array/equal/utils.rs b/arrow/src/array/equal/utils.rs
index 7ce8e14..819ae32 100644
--- a/arrow/src/array/equal/utils.rs
+++ b/arrow/src/array/equal/utils.rs
@@ -161,7 +161,7 @@ pub(super) fn child_logical_null_buffer(
             });
             Some(buffer.into())
         }
-        DataType::Union(_) => {
+        DataType::Union(_, _) => {
             unimplemented!("Logical equality not yet implemented for union 
arrays")
         }
         DataType::Dictionary(_, _) => {
diff --git a/arrow/src/compute/kernels/cast.rs 
b/arrow/src/compute/kernels/cast.rs
index 3a3fe53..34b7810 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -4454,10 +4454,13 @@ mod tests {
                 Field::new("f1", DataType::Int32, false),
                 Field::new("f2", DataType::Utf8, true),
             ]),
-            Union(vec![
-                Field::new("f1", DataType::Int32, false),
-                Field::new("f2", DataType::Utf8, true),
-            ]),
+            Union(
+                vec![
+                    Field::new("f1", DataType::Int32, false),
+                    Field::new("f2", DataType::Utf8, true),
+                ],
+                UnionMode::Dense,
+            ),
             Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)),
             Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
             Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index ae61f08..3653ebb 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -115,7 +115,7 @@ pub enum DataType {
     /// A nested datatype that contains a number of sub-fields.
     Struct(Vec<Field>),
     /// A nested datatype that can represent slots of differing types.
-    Union(Vec<Field>),
+    Union(Vec<Field>, UnionMode),
     /// A dictionary encoded array (`key_type`, `value_type`), where
     /// each array element is an index of `key_type` into an
     /// associated dictionary of `value_type`.
@@ -176,6 +176,13 @@ pub enum IntervalUnit {
     MonthDayNano,
 }
 
+// Sparse or Dense union layouts
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, 
PartialOrd, Ord)]
+pub enum UnionMode {
+    Sparse,
+    Dense,
+}
+
 impl fmt::Display for DataType {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{:?}", self)
@@ -406,7 +413,7 @@ impl DataType {
                 json!({"name": "fixedsizebinary", "byteWidth": byte_width})
             }
             DataType::Struct(_) => json!({"name": "struct"}),
-            DataType::Union(_) => json!({"name": "union"}),
+            DataType::Union(_, _) => json!({"name": "union"}),
             DataType::List(_) => json!({ "name": "list"}),
             DataType::LargeList(_) => json!({ "name": "largelist"}),
             DataType::FixedSizeList(_, length) => {
diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs
index 22e23fa..edf01a2 100644
--- a/arrow/src/datatypes/field.rs
+++ b/arrow/src/datatypes/field.rs
@@ -111,7 +111,7 @@ impl Field {
     pub(crate) fn fields(&self) -> Vec<&Field> {
         let mut collected_fields = vec![self];
         match &self.data_type {
-            DataType::Struct(fields) | DataType::Union(fields) => {
+            DataType::Struct(fields) | DataType::Union(fields, _) => {
                 collected_fields.extend(fields.iter().map(|f| 
f.fields()).flatten())
             }
             DataType::List(field)
@@ -484,8 +484,8 @@ impl Field {
                     ));
                 }
             },
-            DataType::Union(nested_fields) => match &from.data_type {
-                DataType::Union(from_nested_fields) => {
+            DataType::Union(nested_fields, _) => match &from.data_type {
+                DataType::Union(from_nested_fields, _) => {
                     for from_field in from_nested_fields {
                         let mut is_new_field = true;
                         for self_field in nested_fields.iter_mut() {
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index bc866b0..bcbef58 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -1379,28 +1379,37 @@ mod tests {
             Schema::try_merge(vec![
                 Schema::new(vec![Field::new(
                     "c1",
-                    DataType::Union(vec![
-                        Field::new("c11", DataType::Utf8, true),
-                        Field::new("c12", DataType::Utf8, true),
-                    ]),
+                    DataType::Union(
+                        vec![
+                            Field::new("c11", DataType::Utf8, true),
+                            Field::new("c12", DataType::Utf8, true),
+                        ],
+                        UnionMode::Dense
+                    ),
                     false
                 ),]),
                 Schema::new(vec![Field::new(
                     "c1",
-                    DataType::Union(vec![
-                        Field::new("c12", DataType::Utf8, true),
-                        Field::new("c13", DataType::Time64(TimeUnit::Second), 
true),
-                    ]),
+                    DataType::Union(
+                        vec![
+                            Field::new("c12", DataType::Utf8, true),
+                            Field::new("c13", 
DataType::Time64(TimeUnit::Second), true),
+                        ],
+                        UnionMode::Dense
+                    ),
                     false
                 ),])
             ])?,
             Schema::new(vec![Field::new(
                 "c1",
-                DataType::Union(vec![
-                    Field::new("c11", DataType::Utf8, true),
-                    Field::new("c12", DataType::Utf8, true),
-                    Field::new("c13", DataType::Time64(TimeUnit::Second), 
true),
-                ]),
+                DataType::Union(
+                    vec![
+                        Field::new("c11", DataType::Utf8, true),
+                        Field::new("c12", DataType::Utf8, true),
+                        Field::new("c13", DataType::Time64(TimeUnit::Second), 
true),
+                    ],
+                    UnionMode::Dense
+                ),
                 false
             ),]),
         );
diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs
index c354eb4..7316209 100644
--- a/arrow/src/ipc/writer.rs
+++ b/arrow/src/ipc/writer.rs
@@ -159,7 +159,7 @@ impl IpcDataGenerator {
                     )?;
                 }
             }
-            DataType::Union(fields) => {
+            DataType::Union(fields, _) => {
                 let union = as_union_array(column);
                 for (field, ref column) in fields
                     .iter()
diff --git a/parquet/src/arrow/arrow_writer.rs 
b/parquet/src/arrow/arrow_writer.rs
index 9f87428..82c6d03 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer.rs
@@ -224,7 +224,7 @@ fn write_leaves(
         ArrowDataType::Float16 => Err(ParquetError::ArrowError(
             "Float16 arrays not supported".to_string(),
         )),
-        ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => {
+        ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_, _) => {
             Err(ParquetError::NYI(
                 format!(
                     "Attempting to write an Arrow type {:?} to parquet that is 
not yet implemented",
diff --git a/parquet/src/arrow/levels.rs b/parquet/src/arrow/levels.rs
index c9b6052..601e2c0 100644
--- a/parquet/src/arrow/levels.rs
+++ b/parquet/src/arrow/levels.rs
@@ -241,7 +241,7 @@ impl LevelInfo {
                         list_level.calculate_array_levels(&child_array, 
list_field)
                     }
                     DataType::FixedSizeList(_, _) => unimplemented!(),
-                    DataType::Union(_) => unimplemented!(),
+                    DataType::Union(_, _) => unimplemented!(),
                 }
             }
             DataType::Map(map_field, _) => {
@@ -304,7 +304,7 @@ impl LevelInfo {
                     });
                 struct_levels
             }
-            DataType::Union(_) => unimplemented!(),
+            DataType::Union(_, _) => unimplemented!(),
             DataType::Dictionary(_, _) => {
                 // Need to check for these cases not implemented in C++:
                 // - "Writing DictionaryArray with nested dictionary type not 
yet supported"
@@ -743,7 +743,7 @@ impl LevelInfo {
                     array_mask,
                 )
             }
-            DataType::FixedSizeList(_, _) | DataType::Union(_) => {
+            DataType::FixedSizeList(_, _) | DataType::Union(_, _) => {
                 unimplemented!("Getting offsets not yet implemented")
             }
         }
diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs
index 5fe94ce..51a7a04 100644
--- a/parquet/src/arrow/schema.rs
+++ b/parquet/src/arrow/schema.rs
@@ -536,7 +536,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
                 ))
             }
         }
-        DataType::Union(_) => unimplemented!("See ARROW-8817."),
+        DataType::Union(_, _) => unimplemented!("See ARROW-8817."),
         DataType::Dictionary(_, ref value) => {
             // Dictionary encoding not handled at the schema level
             let dict_field = Field::new(name, *value.clone(), 
field.is_nullable());

Reply via email to