This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 7852e763f ArrayData Enumeration for Remaining Layouts (#3769)
7852e763f is described below

commit 7852e763fea66b33a2766b6d6421cafcf6a58c29
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Wed Mar 1 11:54:52 2023 +0000

    ArrayData Enumeration for Remaining Layouts (#3769)
    
    * Add StructArrayData
    
    * Add ListArrayData
    
    * Add DictionaryArrayData
    
    * Format
    
    * Add FixedSizeBinaryArrayData
    
    * Add UnionArrayData
    
    * Docs
    
    * Add FixedSizeListArrayData
    
    * Derive Debug and Clone
    
    * Add RunArrayData
    
    * Review feedback
---
 arrow-data/src/data/bytes.rs      |  80 ++++++++++++-
 arrow-data/src/data/dictionary.rs | 174 +++++++++++++++++++++++++++
 arrow-data/src/data/list.rs       | 241 ++++++++++++++++++++++++++++++++++++++
 arrow-data/src/data/mod.rs        |  10 ++
 arrow-data/src/data/primitive.rs  |  22 ++--
 arrow-data/src/data/run.rs        | 149 +++++++++++++++++++++++
 arrow-data/src/data/struct.rs     |  81 +++++++++++++
 arrow-data/src/data/types.rs      |   3 +-
 arrow-data/src/data/union.rs      |  77 ++++++++++++
 9 files changed, 819 insertions(+), 18 deletions(-)

diff --git a/arrow-data/src/data/bytes.rs b/arrow-data/src/data/bytes.rs
index 86839c671..521c1959a 100644
--- a/arrow-data/src/data/bytes.rs
+++ b/arrow-data/src/data/bytes.rs
@@ -73,7 +73,7 @@ mod private {
 }
 
 /// Types backed by a variable length slice of bytes
-pub trait Bytes: private::BytesSealed {
+pub trait Bytes: private::BytesSealed + std::fmt::Debug {
     const TYPE: BytesType;
 }
 
@@ -195,6 +195,7 @@ impl private::BytesOffsetSealed for i64 {
 }
 
 /// An enumeration of the types of [`ArrayDataBytesOffset`]
+#[derive(Debug, Clone)]
 pub enum ArrayDataBytes {
     Binary(ArrayDataBytesOffset<[u8]>),
     Utf8(ArrayDataBytesOffset<str>),
@@ -217,18 +218,29 @@ impl ArrayDataBytes {
 }
 
 /// An enumeration of the types of [`BytesArrayData`]
+#[derive(Debug)]
 pub enum ArrayDataBytesOffset<B: Bytes + ?Sized> {
     Small(BytesArrayData<i32, B>),
     Large(BytesArrayData<i64, B>),
 }
 
+impl<B: Bytes + ?Sized> Clone for ArrayDataBytesOffset<B> {
+    fn clone(&self) -> Self {
+        match self {
+            Self::Small(v) => Self::Small(v.clone()),
+            Self::Large(v) => Self::Large(v.clone()),
+        }
+    }
+}
+
 impl<O: BytesOffset, B: Bytes + ?Sized> From<BytesArrayData<O, B>> for 
ArrayDataBytes {
     fn from(value: BytesArrayData<O, B>) -> Self {
         B::upcast(O::upcast(value))
     }
 }
 
-/// ArrayData for arrays of [`Bytes`]
+/// ArrayData for [variable-sized 
arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout)
 of [`Bytes`]
+#[derive(Debug)]
 pub struct BytesArrayData<O: BytesOffset, B: Bytes + ?Sized> {
     data_type: DataType,
     nulls: Option<NullBuffer>,
@@ -237,13 +249,25 @@ pub struct BytesArrayData<O: BytesOffset, B: Bytes + 
?Sized> {
     phantom: PhantomData<B>,
 }
 
-impl<O: BytesOffset, B: Bytes> BytesArrayData<O, B> {
+impl<O: BytesOffset, B: Bytes + ?Sized> Clone for BytesArrayData<O, B> {
+    fn clone(&self) -> Self {
+        Self {
+            data_type: self.data_type.clone(),
+            nulls: self.nulls.clone(),
+            offsets: self.offsets.clone(),
+            values: self.values.clone(),
+            phantom: Default::default(),
+        }
+    }
+}
+
+impl<O: BytesOffset, B: Bytes + ?Sized> BytesArrayData<O, B> {
     /// Creates a new [`BytesArrayData`]
     ///
     /// # Safety
     ///
     /// - Each consecutive window of `offsets` must identify a valid slice of 
`values`
-    /// - `nulls.len() == offsets.len() + 1`
+    /// - `nulls.len() == offsets.len() - 1`
     /// - `data_type` must be valid for this layout
     pub unsafe fn new_unchecked(
         data_type: DataType,
@@ -270,7 +294,7 @@ impl<O: BytesOffset, B: Bytes> BytesArrayData<O, B> {
 
     /// Returns the offsets
     #[inline]
-    pub fn value_offsets(&self) -> &[O] {
+    pub fn offsets(&self) -> &[O] {
         &self.offsets
     }
 
@@ -286,3 +310,49 @@ impl<O: BytesOffset, B: Bytes> BytesArrayData<O, B> {
         &self.data_type
     }
 }
+
+/// ArrayData for [fixed-size 
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
 of bytes
+#[derive(Debug, Clone)]
+pub struct FixedSizeBinaryArrayData {
+    data_type: DataType,
+    nulls: Option<NullBuffer>,
+    values: Buffer,
+}
+
+impl FixedSizeBinaryArrayData {
+    /// Creates a new [`FixedSizeBinaryArrayData`]
+    ///
+    /// # Safety
+    ///
+    /// - `data_type` must be valid for this layout
+    /// - `nulls.len() == values.len() / element_size`
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        values: Buffer,
+        nulls: Option<NullBuffer>,
+    ) -> Self {
+        Self {
+            data_type,
+            nulls,
+            values,
+        }
+    }
+
+    /// Returns the raw byte data
+    #[inline]
+    pub fn values(&self) -> &[u8] {
+        &self.values
+    }
+
+    /// Returns the null buffer if any
+    #[inline]
+    pub fn null_buffer(&self) -> Option<&NullBuffer> {
+        self.nulls.as_ref()
+    }
+
+    /// Returns the data type of this array
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+}
diff --git a/arrow-data/src/data/dictionary.rs 
b/arrow-data/src/data/dictionary.rs
new file mode 100644
index 000000000..2ec4ee005
--- /dev/null
+++ b/arrow-data/src/data/dictionary.rs
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::data::types::DictionaryKeyType;
+use crate::ArrayData;
+use arrow_buffer::buffer::{NullBuffer, ScalarBuffer};
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::DataType;
+
+mod private {
+    use super::*;
+
+    pub trait DictionaryKeySealed {
+        /// Downcast [`ArrayDataDictionary`] to `[DictionaryArrayData`]
+        fn downcast_ref(data: &ArrayDataDictionary) -> 
Option<&DictionaryArrayData<Self>>
+        where
+            Self: DictionaryKey;
+
+        /// Downcast [`ArrayDataDictionary`] to `[DictionaryArrayData`]
+        fn downcast(data: ArrayDataDictionary) -> 
Option<DictionaryArrayData<Self>>
+        where
+            Self: DictionaryKey;
+
+        /// Cast [`DictionaryArrayData`] to [`ArrayDataDictionary`]
+        fn upcast(v: DictionaryArrayData<Self>) -> ArrayDataDictionary
+        where
+            Self: DictionaryKey;
+    }
+}
+
+/// Types of dictionary key used by dictionary arrays
+pub trait DictionaryKey: private::DictionaryKeySealed + ArrowNativeType {
+    const TYPE: DictionaryKeyType;
+}
+
+macro_rules! dictionary {
+    ($t:ty,$v:ident) => {
+        impl DictionaryKey for $t {
+            const TYPE: DictionaryKeyType = DictionaryKeyType::$v;
+        }
+        impl private::DictionaryKeySealed for $t {
+            fn downcast_ref(
+                data: &ArrayDataDictionary,
+            ) -> Option<&DictionaryArrayData<Self>> {
+                match data {
+                    ArrayDataDictionary::$v(v) => Some(v),
+                    _ => None,
+                }
+            }
+
+            fn downcast(data: ArrayDataDictionary) -> 
Option<DictionaryArrayData<Self>> {
+                match data {
+                    ArrayDataDictionary::$v(v) => Some(v),
+                    _ => None,
+                }
+            }
+
+            fn upcast(v: DictionaryArrayData<Self>) -> ArrayDataDictionary {
+                ArrayDataDictionary::$v(v)
+            }
+        }
+    };
+}
+
+dictionary!(i8, Int8);
+dictionary!(i16, Int16);
+dictionary!(i32, Int32);
+dictionary!(i64, Int64);
+dictionary!(u8, UInt8);
+dictionary!(u16, UInt16);
+dictionary!(u32, UInt32);
+dictionary!(u64, UInt64);
+
+/// An enumeration of the types of [`DictionaryArrayData`]
+#[derive(Debug, Clone)]
+pub enum ArrayDataDictionary {
+    Int8(DictionaryArrayData<i8>),
+    Int16(DictionaryArrayData<i16>),
+    Int32(DictionaryArrayData<i32>),
+    Int64(DictionaryArrayData<i64>),
+    UInt8(DictionaryArrayData<u8>),
+    UInt16(DictionaryArrayData<u16>),
+    UInt32(DictionaryArrayData<u32>),
+    UInt64(DictionaryArrayData<u64>),
+}
+
+impl ArrayDataDictionary {
+    /// Downcast this [`ArrayDataDictionary`] to the corresponding 
[`DictionaryArrayData`]
+    pub fn downcast_ref<K: DictionaryKey>(&self) -> 
Option<&DictionaryArrayData<K>> {
+        K::downcast_ref(self)
+    }
+
+    /// Downcast this [`ArrayDataDictionary`] to the corresponding 
[`DictionaryArrayData`]
+    pub fn downcast<K: DictionaryKey>(self) -> Option<DictionaryArrayData<K>> {
+        K::downcast(self)
+    }
+}
+
+impl<K: DictionaryKey> From<DictionaryArrayData<K>> for ArrayDataDictionary {
+    fn from(value: DictionaryArrayData<K>) -> Self {
+        K::upcast(value)
+    }
+}
+
+/// ArrayData for [dictionary 
arrays](https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout)
+#[derive(Debug, Clone)]
+pub struct DictionaryArrayData<K: DictionaryKey> {
+    data_type: DataType,
+    nulls: Option<NullBuffer>,
+    keys: ScalarBuffer<K>,
+    child: Box<ArrayData>,
+}
+
+impl<K: DictionaryKey> DictionaryArrayData<K> {
+    /// Create a new [`DictionaryArrayData`]
+    ///
+    /// # Safety
+    ///
+    /// - `data_type` must be valid for this layout
+    /// - child must have a type matching `data_type`
+    /// - all values in `keys` must be `0 < v < child.len()` or be a null 
according to `nulls`
+    /// - `nulls` must have the same length as `child`
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        keys: ScalarBuffer<K>,
+        nulls: Option<NullBuffer>,
+        child: ArrayData,
+    ) -> Self {
+        Self {
+            data_type,
+            nulls,
+            keys,
+            child: Box::new(child),
+        }
+    }
+
+    /// Returns the null buffer if any
+    #[inline]
+    pub fn nulls(&self) -> Option<&NullBuffer> {
+        self.nulls.as_ref()
+    }
+
+    /// Returns the keys
+    #[inline]
+    pub fn keys(&self) -> &[K] {
+        &self.keys
+    }
+
+    /// Returns the child data
+    #[inline]
+    pub fn child(&self) -> &ArrayData {
+        self.child.as_ref()
+    }
+
+    /// Returns the data type of this array
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+}
diff --git a/arrow-data/src/data/list.rs b/arrow-data/src/data/list.rs
new file mode 100644
index 000000000..59909289e
--- /dev/null
+++ b/arrow-data/src/data/list.rs
@@ -0,0 +1,241 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::data::types::OffsetType;
+use crate::ArrayData;
+use arrow_buffer::buffer::{NullBuffer, ScalarBuffer};
+use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_schema::DataType;
+
+mod private {
+    use super::*;
+
+    pub trait ListOffsetSealed {
+        /// Downcast [`ArrayDataList`] to `[ListArrayData`]
+        fn downcast_ref(data: &ArrayDataList) -> Option<&ListArrayData<Self>>
+        where
+            Self: ListOffset;
+
+        /// Downcast [`ArrayDataList`] to `[ListArrayData`]
+        fn downcast(data: ArrayDataList) -> Option<ListArrayData<Self>>
+        where
+            Self: ListOffset;
+
+        /// Cast [`ListArrayData`] to [`ArrayDataList`]
+        fn upcast(v: ListArrayData<Self>) -> ArrayDataList
+        where
+            Self: ListOffset;
+    }
+}
+
+/// Types of offset used by variable length list arrays
+pub trait ListOffset: private::ListOffsetSealed + ArrowNativeType {
+    const TYPE: OffsetType;
+}
+
+impl ListOffset for i32 {
+    const TYPE: OffsetType = OffsetType::Int32;
+}
+
+impl private::ListOffsetSealed for i32 {
+    fn downcast_ref(data: &ArrayDataList) -> Option<&ListArrayData<Self>>
+    where
+        Self: ListOffset,
+    {
+        match data {
+            ArrayDataList::Small(v) => Some(v),
+            ArrayDataList::Large(_) => None,
+        }
+    }
+
+    fn downcast(data: ArrayDataList) -> Option<ListArrayData<Self>>
+    where
+        Self: ListOffset,
+    {
+        match data {
+            ArrayDataList::Small(v) => Some(v),
+            ArrayDataList::Large(_) => None,
+        }
+    }
+
+    fn upcast(v: ListArrayData<Self>) -> ArrayDataList
+    where
+        Self: ListOffset,
+    {
+        ArrayDataList::Small(v)
+    }
+}
+
+impl ListOffset for i64 {
+    const TYPE: OffsetType = OffsetType::Int64;
+}
+
+impl private::ListOffsetSealed for i64 {
+    fn downcast_ref(data: &ArrayDataList) -> Option<&ListArrayData<Self>>
+    where
+        Self: ListOffset,
+    {
+        match data {
+            ArrayDataList::Small(_) => None,
+            ArrayDataList::Large(v) => Some(v),
+        }
+    }
+
+    fn downcast(data: ArrayDataList) -> Option<ListArrayData<Self>>
+    where
+        Self: ListOffset,
+    {
+        match data {
+            ArrayDataList::Small(_) => None,
+            ArrayDataList::Large(v) => Some(v),
+        }
+    }
+
+    fn upcast(v: ListArrayData<Self>) -> ArrayDataList
+    where
+        Self: ListOffset,
+    {
+        ArrayDataList::Large(v)
+    }
+}
+
+/// An enumeration of the types of [`ListArrayData`]
+#[derive(Debug, Clone)]
+pub enum ArrayDataList {
+    Small(ListArrayData<i32>),
+    Large(ListArrayData<i64>),
+}
+
+impl ArrayDataList {
+    /// Downcast this [`ArrayDataList`] to the corresponding [`ListArrayData`]
+    pub fn downcast_ref<O: ListOffset>(&self) -> Option<&ListArrayData<O>> {
+        O::downcast_ref(self)
+    }
+
+    /// Downcast this [`ArrayDataList`] to the corresponding [`ListArrayData`]
+    pub fn downcast<O: ListOffset>(self) -> Option<ListArrayData<O>> {
+        O::downcast(self)
+    }
+}
+
+impl<O: ListOffset> From<ListArrayData<O>> for ArrayDataList {
+    fn from(value: ListArrayData<O>) -> Self {
+        O::upcast(value)
+    }
+}
+
+/// ArrayData for [variable-size list 
arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout)
+#[derive(Debug, Clone)]
+pub struct ListArrayData<O: ListOffset> {
+    data_type: DataType,
+    nulls: Option<NullBuffer>,
+    offsets: ScalarBuffer<O>,
+    child: Box<ArrayData>,
+}
+
+impl<O: ListOffset> ListArrayData<O> {
+    /// Create a new [`ListArrayData`]
+    ///
+    /// # Safety
+    ///
+    /// - Each consecutive window of `offsets` must identify a valid slice of 
`child`
+    /// - `nulls.len() == offsets.len() - 1`
+    /// - `data_type` must be valid for this layout
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        offsets: ScalarBuffer<O>,
+        nulls: Option<NullBuffer>,
+        child: ArrayData,
+    ) -> Self {
+        Self {
+            data_type,
+            nulls,
+            offsets,
+            child: Box::new(child),
+        }
+    }
+
+    /// Returns the null buffer if any
+    #[inline]
+    pub fn nulls(&self) -> Option<&NullBuffer> {
+        self.nulls.as_ref()
+    }
+
+    /// Returns the offsets
+    #[inline]
+    pub fn offsets(&self) -> &[O] {
+        &self.offsets
+    }
+
+    /// Returns the child data
+    #[inline]
+    pub fn child(&self) -> &ArrayData {
+        self.child.as_ref()
+    }
+
+    /// Returns the data type of this array
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+}
+
+/// ArrayData for [fixed-size list 
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-list-layout)
+#[derive(Debug, Clone)]
+pub struct FixedSizeListArrayData {
+    data_type: DataType,
+    nulls: Option<NullBuffer>,
+    child: Box<ArrayData>,
+}
+
+impl FixedSizeListArrayData {
+    /// Create a new [`FixedSizeListArrayData`]
+    ///
+    /// # Safety
+    ///
+    /// - `data_type` must be valid for this layout
+    /// - `nulls.len() == values.len() / element_size`
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        nulls: Option<NullBuffer>,
+        child: ArrayData,
+    ) -> Self {
+        Self {
+            data_type,
+            nulls,
+            child: Box::new(child),
+        }
+    }
+
+    /// Returns the null buffer if any
+    #[inline]
+    pub fn nulls(&self) -> Option<&NullBuffer> {
+        self.nulls.as_ref()
+    }
+
+    /// Returns the child data
+    #[inline]
+    pub fn child(&self) -> &ArrayData {
+        self.child.as_ref()
+    }
+
+    /// Returns the data type of this array
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+}
diff --git a/arrow-data/src/data/mod.rs b/arrow-data/src/data/mod.rs
index eb1fe2bcf..2f9e142b1 100644
--- a/arrow-data/src/data/mod.rs
+++ b/arrow-data/src/data/mod.rs
@@ -32,9 +32,19 @@ use crate::equal;
 #[allow(unused)] // Private until ready (#1176)
 mod bytes;
 #[allow(unused)] // Private until ready (#1176)
+mod dictionary;
+#[allow(unused)] // Private until ready (#1176)
+mod list;
+#[allow(unused)] // Private until ready (#1176)
 mod primitive;
 #[allow(unused)] // Private until ready (#1176)
+mod run;
+#[allow(unused)] // Private until ready (#1176)
+mod r#struct;
+#[allow(unused)] // Private until ready (#1176)
 mod types;
+#[allow(unused)] // Private until ready (#1176)
+mod union;
 
 #[inline]
 pub(crate) fn contains_nulls(
diff --git a/arrow-data/src/data/primitive.rs b/arrow-data/src/data/primitive.rs
index d34ef42db..058b3e822 100644
--- a/arrow-data/src/data/primitive.rs
+++ b/arrow-data/src/data/primitive.rs
@@ -43,13 +43,13 @@ mod private {
 }
 
 pub trait Primitive: private::PrimitiveSealed + ArrowNativeType {
-    const VARIANT: PrimitiveType;
+    const TYPE: PrimitiveType;
 }
 
 macro_rules! primitive {
     ($t:ty,$v:ident) => {
         impl Primitive for $t {
-            const VARIANT: PrimitiveType = PrimitiveType::$v;
+            const TYPE: PrimitiveType = PrimitiveType::$v;
         }
         impl private::PrimitiveSealed for $t {
             fn downcast_ref(
@@ -118,7 +118,13 @@ impl ArrayDataPrimitive {
     }
 }
 
-/// ArrayData for arrays of [`Primitive`]
+impl<P: Primitive> From<PrimitiveArrayData<P>> for ArrayDataPrimitive {
+    fn from(value: PrimitiveArrayData<P>) -> Self {
+        P::upcast(value)
+    }
+}
+
+/// ArrayData for [fixed size 
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
 of [`Primitive`]
 #[derive(Debug, Clone)]
 pub struct PrimitiveArrayData<T: Primitive> {
     data_type: DataType,
@@ -126,12 +132,6 @@ pub struct PrimitiveArrayData<T: Primitive> {
     values: ScalarBuffer<T>,
 }
 
-impl<P: Primitive> From<PrimitiveArrayData<P>> for ArrayDataPrimitive {
-    fn from(value: PrimitiveArrayData<P>) -> Self {
-        P::upcast(value)
-    }
-}
-
 impl<T: Primitive> PrimitiveArrayData<T> {
     /// Create a new [`PrimitiveArrayData`]
     ///
@@ -147,10 +147,10 @@ impl<T: Primitive> PrimitiveArrayData<T> {
     ) -> Self {
         let physical = PhysicalType::from(&data_type);
         assert!(
-            matches!(physical, PhysicalType::Primitive(p) if p == T::VARIANT),
+            matches!(physical, PhysicalType::Primitive(p) if p == T::TYPE),
             "Illegal physical type for PrimitiveArrayData of datatype {:?}, 
expected {:?} got {:?}",
             data_type,
-            T::VARIANT,
+            T::TYPE,
             physical
         );
 
diff --git a/arrow-data/src/data/run.rs b/arrow-data/src/data/run.rs
new file mode 100644
index 000000000..cd993de1b
--- /dev/null
+++ b/arrow-data/src/data/run.rs
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::data::types::RunEndType;
+use crate::ArrayData;
+use arrow_buffer::buffer::ScalarBuffer;
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::DataType;
+use std::marker::PhantomData;
+
+mod private {
+    use super::*;
+
+    pub trait RunEndSealed {
+        /// Downcast [`ArrayDataRun`] to `[RunArrayData`]
+        fn downcast_ref(data: &ArrayDataRun) -> Option<&RunArrayData<Self>>
+        where
+            Self: RunEnd;
+
+        /// Downcast [`ArrayDataRun`] to `[RunArrayData`]
+        fn downcast(data: ArrayDataRun) -> Option<RunArrayData<Self>>
+        where
+            Self: RunEnd;
+
+        /// Cast [`RunArrayData`] to [`ArrayDataRun`]
+        fn upcast(v: RunArrayData<Self>) -> ArrayDataRun
+        where
+            Self: RunEnd;
+    }
+}
+
+pub trait RunEnd: private::RunEndSealed + ArrowNativeType {
+    const TYPE: RunEndType;
+}
+
+macro_rules! run_end {
+    ($t:ty,$v:ident) => {
+        impl RunEnd for $t {
+            const TYPE: RunEndType = RunEndType::$v;
+        }
+        impl private::RunEndSealed for $t {
+            fn downcast_ref(data: &ArrayDataRun) -> 
Option<&RunArrayData<Self>> {
+                match data {
+                    ArrayDataRun::$v(v) => Some(v),
+                    _ => None,
+                }
+            }
+
+            fn downcast(data: ArrayDataRun) -> Option<RunArrayData<Self>> {
+                match data {
+                    ArrayDataRun::$v(v) => Some(v),
+                    _ => None,
+                }
+            }
+
+            fn upcast(v: RunArrayData<Self>) -> ArrayDataRun {
+                ArrayDataRun::$v(v)
+            }
+        }
+    };
+}
+
+run_end!(i16, Int16);
+run_end!(i32, Int32);
+run_end!(i64, Int64);
+
+/// An enumeration of the types of [`RunArrayData`]
+pub enum ArrayDataRun {
+    Int16(RunArrayData<i16>),
+    Int32(RunArrayData<i32>),
+    Int64(RunArrayData<i64>),
+}
+
+impl ArrayDataRun {
+    /// Downcast this [`ArrayDataRun`] to the corresponding [`RunArrayData`]
+    pub fn downcast_ref<E: RunEnd>(&self) -> Option<&RunArrayData<E>> {
+        E::downcast_ref(self)
+    }
+
+    /// Downcast this [`ArrayDataRun`] to the corresponding [`RunArrayData`]
+    pub fn downcast<E: RunEnd>(self) -> Option<RunArrayData<E>> {
+        E::downcast(self)
+    }
+}
+
+impl<E: RunEnd> From<RunArrayData<E>> for ArrayDataRun {
+    fn from(value: RunArrayData<E>) -> Self {
+        E::upcast(value)
+    }
+}
+
+/// ArrayData for [run-end encoded 
arrays](https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout)
+pub struct RunArrayData<E: RunEnd> {
+    data_type: DataType,
+    run_ends: ScalarBuffer<E>,
+    child: Box<ArrayData>,
+}
+
+impl<E: RunEnd> RunArrayData<E> {
+    /// Create a new [`RunArrayData`]
+    ///
+    /// # Safety
+    ///
+    /// - `data_type` must be valid for this layout
+    /// - `run_ends` must contain monotonically increasing, positive values 
`<= child.len()`
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        run_ends: ScalarBuffer<E>,
+        child: ArrayData,
+    ) -> Self {
+        Self {
+            data_type,
+            run_ends,
+            child: Box::new(child),
+        }
+    }
+
+    /// Returns the run ends
+    #[inline]
+    pub fn run_ends(&self) -> &[E] {
+        &self.run_ends
+    }
+
+    /// Returns the data type of this array
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+
+    /// Returns the child data
+    #[inline]
+    pub fn child(&self) -> &ArrayData {
+        self.child.as_ref()
+    }
+}
diff --git a/arrow-data/src/data/struct.rs b/arrow-data/src/data/struct.rs
new file mode 100644
index 000000000..d99992619
--- /dev/null
+++ b/arrow-data/src/data/struct.rs
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::ArrayData;
+use arrow_buffer::buffer::NullBuffer;
+use arrow_schema::DataType;
+
+/// ArrayData for [struct 
arrays](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
+#[derive(Debug, Clone)]
+pub struct StructArrayData {
+    data_type: DataType,
+    len: usize,
+    nulls: Option<NullBuffer>,
+    children: Vec<ArrayData>,
+}
+
+impl StructArrayData {
+    /// Create a new [`StructArrayData`]
+    ///
+    /// # Safety
+    ///
+    /// - data_type must be a StructArray with fields matching `child_data`
+    /// - all child data and nulls must have length matching `len`
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        len: usize,
+        nulls: Option<NullBuffer>,
+        children: Vec<ArrayData>,
+    ) -> Self {
+        Self {
+            data_type,
+            len,
+            nulls,
+            children,
+        }
+    }
+
+    /// Returns the length of this [`StructArrayData`]
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns `true` if this [`StructArrayData`] has zero length
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Returns the null buffer if any
+    #[inline]
+    pub fn nulls(&self) -> Option<&NullBuffer> {
+        self.nulls.as_ref()
+    }
+
+    /// Returns the primitive values
+    #[inline]
+    pub fn children(&self) -> &[ArrayData] {
+        &self.children
+    }
+
+    /// Returns the data type of this array
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+}
diff --git a/arrow-data/src/data/types.rs b/arrow-data/src/data/types.rs
index 09e169f6a..3414e481c 100644
--- a/arrow-data/src/data/types.rs
+++ b/arrow-data/src/data/types.rs
@@ -80,7 +80,6 @@ pub enum PhysicalType {
     Bytes(OffsetType, BytesType),
     FixedSizeList,
     List(OffsetType),
-    Map,
     Struct,
     Union,
     Dictionary(DictionaryKeyType),
@@ -141,7 +140,7 @@ impl From<&DataType> for PhysicalType {
                 DataType::UInt64 => 
Self::Dictionary(DictionaryKeyType::UInt64),
                 d => panic!("illegal dictionary key data type {d}"),
             },
-            DataType::Map(_, _) => Self::Map,
+            DataType::Map(_, _) => Self::List(OffsetType::Int32),
             DataType::RunEndEncoded(f, _) => match f.data_type() {
                 DataType::Int16 => Self::Run(RunEndType::Int16),
                 DataType::Int32 => Self::Run(RunEndType::Int32),
diff --git a/arrow-data/src/data/union.rs b/arrow-data/src/data/union.rs
new file mode 100644
index 000000000..7861bd154
--- /dev/null
+++ b/arrow-data/src/data/union.rs
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::ArrayData;
+use arrow_buffer::buffer::ScalarBuffer;
+use arrow_schema::DataType;
+
+/// ArrayData for [union 
arrays](https://arrow.apache.org/docs/format/Columnar.html#union-layout)
+#[derive(Debug, Clone)]
+pub struct UnionArrayData {
+    data_type: DataType,
+    type_ids: ScalarBuffer<i8>,
+    offsets: Option<ScalarBuffer<i32>>,
+    children: Vec<ArrayData>,
+}
+
+impl UnionArrayData {
+    /// Creates a new [`UnionArrayData`]
+    ///
+    /// # Safety
+    ///
+    /// - `data_type` must be valid for this layout
+    /// - `type_ids` must only contain values corresponding to a field in 
`data_type`
+    /// - `children` must match the field definitions in `data_type`
+    /// - For each value id in type_ids, the corresponding offset, must be in 
bounds for the child
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        type_ids: ScalarBuffer<i8>,
+        offsets: Option<ScalarBuffer<i32>>,
+        children: Vec<ArrayData>,
+    ) -> Self {
+        Self {
+            data_type,
+            type_ids,
+            offsets,
+            children,
+        }
+    }
+
+    /// Returns the type ids for this array
+    #[inline]
+    pub fn type_ids(&self) -> &[i8] {
+        &self.type_ids
+    }
+
+    /// Returns the offsets for this array if this is a dense union
+    #[inline]
+    pub fn offsets(&self) -> Option<&[i32]> {
+        self.offsets.as_deref()
+    }
+
+    /// Returns the children of this array
+    #[inline]
+    pub fn children(&self) -> &[ArrayData] {
+        &self.children
+    }
+
+    /// Returns the data type of this array
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+}

Reply via email to