This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ae4db6016 Revert structured ArrayData (#1799) (#3894)
ae4db6016 is described below
commit ae4db601642c752b63a0331a4545ee71f8b6d7cd
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Mar 21 19:01:03 2023 +0000
Revert structured ArrayData (#1799) (#3894)
---
arrow-data/src/data/boolean.rs | 139 ----------
arrow-data/src/data/buffers.rs | 10 -
arrow-data/src/data/bytes.rs | 559 --------------------------------------
arrow-data/src/data/dictionary.rs | 289 --------------------
arrow-data/src/data/list.rs | 422 ----------------------------
arrow-data/src/data/mod.rs | 230 +++++-----------
arrow-data/src/data/null.rs | 104 -------
arrow-data/src/data/primitive.rs | 304 ---------------------
arrow-data/src/data/run.rs | 277 -------------------
arrow-data/src/data/struct.rs | 129 ---------
arrow-data/src/data/types.rs | 152 -----------
arrow-data/src/data/union.rs | 171 ------------
12 files changed, 72 insertions(+), 2714 deletions(-)
diff --git a/arrow-data/src/data/boolean.rs b/arrow-data/src/data/boolean.rs
deleted file mode 100644
index 258624cc1..000000000
--- a/arrow-data/src/data/boolean.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::types::PhysicalType;
-use crate::data::ArrayDataLayout;
-use crate::{ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
-use arrow_schema::DataType;
-
-#[derive(Debug, Clone)]
-pub struct BooleanArrayData {
- data_type: DataType,
- values: BooleanBuffer,
- nulls: Option<NullBuffer>,
-}
-
-impl BooleanArrayData {
- /// Create a new [`BooleanArrayData`]
- ///
- /// # Panics
- ///
- /// Panics if
- /// - `nulls` and `values` are different lengths
- /// - `PhysicalType::from(&data_type) != PhysicalType::Boolean`
- pub fn new(
- data_type: DataType,
- values: BooleanBuffer,
- nulls: Option<NullBuffer>,
- ) -> Self {
- let physical = PhysicalType::from(&data_type);
- assert_eq!(
- physical, PhysicalType::Boolean,
- "Illegal physical type for BooleanArrayData of datatype {:?},
expected {:?} got {:?}",
- data_type,
- PhysicalType::Boolean,
- physical
- );
-
- if let Some(n) = nulls.as_ref() {
- assert_eq!(values.len(), n.len())
- }
- Self {
- data_type,
- values,
- nulls,
- }
- }
-
- /// Create a new [`BooleanArrayData`]
- ///
- /// # Safety
- ///
- /// - `nulls` and `values` are the same lengths
- /// - `PhysicalType::from(&data_type) == PhysicalType::Boolean`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- values: BooleanBuffer,
- nulls: Option<NullBuffer>,
- ) -> Self {
- Self {
- data_type,
- values,
- nulls,
- }
- }
-
- /// Creates a new [`BooleanArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`BooleanArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- let values = builder.buffers.into_iter().next().unwrap();
- let values = BooleanBuffer::new(values, builder.offset, builder.len);
- Self {
- values,
- data_type: builder.data_type,
- nulls: builder.nulls,
- }
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the boolean values
- #[inline]
- pub fn values(&self) -> &BooleanBuffer {
- &self.values
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`BooleanArrayData`]
- pub fn into_parts(self) -> (DataType, BooleanBuffer, Option<NullBuffer>) {
- (self.data_type, self.values, self.nulls)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- Self {
- data_type: self.data_type.clone(),
- values: self.values.slice(offset, len),
- nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.values.len(),
- offset: self.values.offset(),
- nulls: self.nulls.as_ref(),
- buffers: Buffers::one(self.values().inner()),
- child_data: &[],
- }
- }
-}
diff --git a/arrow-data/src/data/buffers.rs b/arrow-data/src/data/buffers.rs
index 8a498d319..883e92e36 100644
--- a/arrow-data/src/data/buffers.rs
+++ b/arrow-data/src/data/buffers.rs
@@ -33,16 +33,6 @@ impl<'a> Buffers<'a> {
}
}
- #[inline]
- pub(crate) fn one(b: &'a Buffer) -> Self {
- Self([Some(b), None])
- }
-
- #[inline]
- pub(crate) fn two(a: &'a Buffer, b: &'a Buffer) -> Self {
- Self([Some(a), Some(b)])
- }
-
/// Returns the number of [`Buffer`] in this collection
#[inline]
pub fn len(&self) -> usize {
diff --git a/arrow-data/src/data/bytes.rs b/arrow-data/src/data/bytes.rs
deleted file mode 100644
index 9ac267130..000000000
--- a/arrow-data/src/data/bytes.rs
+++ /dev/null
@@ -1,559 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::types::{BytesType, OffsetType};
-use crate::data::ArrayDataLayout;
-use crate::{ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
-use arrow_buffer::{ArrowNativeType, Buffer};
-use arrow_schema::DataType;
-use std::marker::PhantomData;
-
-mod private {
- use super::*;
-
- pub trait BytesSealed {
- /// Create from bytes without performing any validation
- ///
- /// # Safety
- ///
- /// If `str`, `b` must be a valid UTF-8 sequence
- unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self;
-
- /// Downcast [`ArrayDataBytes`] to `[ArrayDataBytesOffset`]
- fn downcast_ref(data: &ArrayDataBytes) ->
Option<&ArrayDataBytesOffset<Self>>
- where
- Self: Bytes;
-
- /// Downcast [`ArrayDataBytes`] to `[ArrayDataBytesOffset`]
- fn downcast(data: ArrayDataBytes) -> Option<ArrayDataBytesOffset<Self>>
- where
- Self: Bytes;
-
- /// Cast [`ArrayDataBytesOffset`] to [`ArrayDataBytes`]
- fn upcast(v: ArrayDataBytesOffset<Self>) -> ArrayDataBytes
- where
- Self: Bytes;
- }
-
- pub trait BytesOffsetSealed {
- /// Downcast [`ArrayDataBytesOffset`] to `[BytesArrayData`]
- fn downcast_ref<B: Bytes + ?Sized>(
- data: &ArrayDataBytesOffset<B>,
- ) -> Option<&BytesArrayData<Self, B>>
- where
- Self: BytesOffset;
-
- /// Downcast [`ArrayDataBytesOffset`] to `[BytesArrayData`]
- fn downcast<B: Bytes + ?Sized>(
- data: ArrayDataBytesOffset<B>,
- ) -> Option<BytesArrayData<Self, B>>
- where
- Self: BytesOffset;
-
- /// Cast [`BytesArrayData`] to [`ArrayDataBytesOffset`]
- fn upcast<B: Bytes + ?Sized>(
- v: BytesArrayData<Self, B>,
- ) -> ArrayDataBytesOffset<B>
- where
- Self: BytesOffset;
- }
-}
-
-/// Types backed by a variable length slice of bytes
-pub trait Bytes: private::BytesSealed + std::fmt::Debug {
- const TYPE: BytesType;
-}
-
-impl Bytes for [u8] {
- const TYPE: BytesType = BytesType::Binary;
-}
-
-impl private::BytesSealed for [u8] {
- unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
- b
- }
-
- fn downcast_ref(data: &ArrayDataBytes) ->
Option<&ArrayDataBytesOffset<Self>> {
- match data {
- ArrayDataBytes::Binary(v) => Some(v),
- ArrayDataBytes::Utf8(_) => None,
- }
- }
-
- fn downcast(data: ArrayDataBytes) -> Option<ArrayDataBytesOffset<Self>> {
- match data {
- ArrayDataBytes::Binary(v) => Some(v),
- ArrayDataBytes::Utf8(_) => None,
- }
- }
-
- fn upcast(v: ArrayDataBytesOffset<Self>) -> ArrayDataBytes {
- ArrayDataBytes::Binary(v)
- }
-}
-
-impl Bytes for str {
- const TYPE: BytesType = BytesType::Utf8;
-}
-
-impl private::BytesSealed for str {
- unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
- std::str::from_utf8_unchecked(b)
- }
-
- fn downcast_ref(data: &ArrayDataBytes) ->
Option<&ArrayDataBytesOffset<Self>> {
- match data {
- ArrayDataBytes::Binary(_) => None,
- ArrayDataBytes::Utf8(v) => Some(v),
- }
- }
-
- fn downcast(data: ArrayDataBytes) -> Option<ArrayDataBytesOffset<Self>> {
- match data {
- ArrayDataBytes::Binary(_) => None,
- ArrayDataBytes::Utf8(v) => Some(v),
- }
- }
-
- fn upcast(v: ArrayDataBytesOffset<Self>) -> ArrayDataBytes {
- ArrayDataBytes::Utf8(v)
- }
-}
-
-/// Types of offset used by variable length byte arrays
-pub trait BytesOffset: private::BytesOffsetSealed + ArrowNativeType {
- const TYPE: OffsetType;
-}
-
-impl BytesOffset for i32 {
- const TYPE: OffsetType = OffsetType::Int32;
-}
-
-impl private::BytesOffsetSealed for i32 {
- fn downcast_ref<B: Bytes + ?Sized>(
- data: &ArrayDataBytesOffset<B>,
- ) -> Option<&BytesArrayData<Self, B>> {
- match data {
- ArrayDataBytesOffset::Small(v) => Some(v),
- ArrayDataBytesOffset::Large(_) => None,
- }
- }
-
- fn downcast<B: Bytes + ?Sized>(
- data: ArrayDataBytesOffset<B>,
- ) -> Option<BytesArrayData<Self, B>> {
- match data {
- ArrayDataBytesOffset::Small(v) => Some(v),
- ArrayDataBytesOffset::Large(_) => None,
- }
- }
-
- fn upcast<B: Bytes + ?Sized>(v: BytesArrayData<Self, B>) ->
ArrayDataBytesOffset<B> {
- ArrayDataBytesOffset::Small(v)
- }
-}
-
-impl BytesOffset for i64 {
- const TYPE: OffsetType = OffsetType::Int64;
-}
-
-impl private::BytesOffsetSealed for i64 {
- fn downcast_ref<B: Bytes + ?Sized>(
- data: &ArrayDataBytesOffset<B>,
- ) -> Option<&BytesArrayData<Self, B>> {
- match data {
- ArrayDataBytesOffset::Small(_) => None,
- ArrayDataBytesOffset::Large(v) => Some(v),
- }
- }
-
- fn downcast<B: Bytes + ?Sized>(
- data: ArrayDataBytesOffset<B>,
- ) -> Option<BytesArrayData<Self, B>> {
- match data {
- ArrayDataBytesOffset::Small(_) => None,
- ArrayDataBytesOffset::Large(v) => Some(v),
- }
- }
-
- fn upcast<B: Bytes + ?Sized>(v: BytesArrayData<Self, B>) ->
ArrayDataBytesOffset<B> {
- ArrayDataBytesOffset::Large(v)
- }
-}
-
-/// Applies op to each variant of [`ArrayDataBytes`]
-macro_rules! bytes_op {
- ($array:ident, $op:block) => {
- match $array {
- ArrayDataBytes::Binary($array) => match $array {
- ArrayDataBytesOffset::Small($array) => $op
- ArrayDataBytesOffset::Large($array) => $op
- }
- ArrayDataBytes::Utf8($array) => match $array {
- ArrayDataBytesOffset::Small($array) => $op
- ArrayDataBytesOffset::Large($array) => $op
- }
- }
- };
-}
-
-/// An enumeration of the types of [`ArrayDataBytesOffset`]
-#[derive(Debug, Clone)]
-pub enum ArrayDataBytes {
- Binary(ArrayDataBytesOffset<[u8]>),
- Utf8(ArrayDataBytesOffset<str>),
-}
-
-impl ArrayDataBytes {
- /// Downcast this [`ArrayDataBytes`] to the corresponding
[`BytesArrayData`]
- pub fn downcast_ref<O: BytesOffset, B: Bytes + ?Sized>(
- &self,
- ) -> Option<&BytesArrayData<O, B>> {
- O::downcast_ref(B::downcast_ref(self)?)
- }
-
- /// Downcast this [`ArrayDataBytes`] to the corresponding
[`BytesArrayData`]
- pub fn downcast<O: BytesOffset, B: Bytes + ?Sized>(
- self,
- ) -> Option<BytesArrayData<O, B>> {
- O::downcast(B::downcast(self)?)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let s = self;
- bytes_op!(s, { s.slice(offset, len).into() })
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- let s = self;
- bytes_op!(s, { s.layout() })
- }
-
- /// Creates a new [`ArrayDataBytes`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`BytesArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(
- builder: ArrayDataBuilder,
- offset: OffsetType,
- bytes: BytesType,
- ) -> Self {
- match bytes {
- BytesType::Binary => Self::Binary(match offset {
- OffsetType::Int32 => {
-
ArrayDataBytesOffset::Small(BytesArrayData::from_raw(builder))
- }
- OffsetType::Int64 => {
-
ArrayDataBytesOffset::Large(BytesArrayData::from_raw(builder))
- }
- }),
- BytesType::Utf8 => Self::Utf8(match offset {
- OffsetType::Int32 => {
-
ArrayDataBytesOffset::Small(BytesArrayData::from_raw(builder))
- }
- OffsetType::Int64 => {
-
ArrayDataBytesOffset::Large(BytesArrayData::from_raw(builder))
- }
- }),
- }
- }
-}
-
-/// An enumeration of the types of [`BytesArrayData`]
-#[derive(Debug)]
-pub enum ArrayDataBytesOffset<B: Bytes + ?Sized> {
- Small(BytesArrayData<i32, B>),
- Large(BytesArrayData<i64, B>),
-}
-
-impl<B: Bytes + ?Sized> Clone for ArrayDataBytesOffset<B> {
- fn clone(&self) -> Self {
- match self {
- Self::Small(v) => Self::Small(v.clone()),
- Self::Large(v) => Self::Large(v.clone()),
- }
- }
-}
-
-impl<O: BytesOffset, B: Bytes + ?Sized> From<BytesArrayData<O, B>> for
ArrayDataBytes {
- fn from(value: BytesArrayData<O, B>) -> Self {
- B::upcast(O::upcast(value))
- }
-}
-
-/// ArrayData for [variable-sized
arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout)
of [`Bytes`]
-#[derive(Debug)]
-pub struct BytesArrayData<O: BytesOffset, B: Bytes + ?Sized> {
- data_type: DataType,
- offsets: OffsetBuffer<O>,
- values: Buffer,
- nulls: Option<NullBuffer>,
- phantom: PhantomData<B>,
-}
-
-impl<O: BytesOffset, B: Bytes + ?Sized> Clone for BytesArrayData<O, B> {
- fn clone(&self) -> Self {
- Self {
- data_type: self.data_type.clone(),
- nulls: self.nulls.clone(),
- offsets: self.offsets.clone(),
- values: self.values.clone(),
- phantom: Default::default(),
- }
- }
-}
-
-impl<O: BytesOffset, B: Bytes + ?Sized> BytesArrayData<O, B> {
- /// Creates a new [`BytesArrayData`]
- ///
- /// # Safety
- ///
- /// - Each consecutive window of `offsets` must identify a valid slice of
`values`
- /// - `nulls.len() == offsets.len() - 1`
- /// - `PhysicalType::from(&data_type) == PhysicalType::Bytes(O::TYPE,
B::TYPE)`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- offsets: OffsetBuffer<O>,
- values: Buffer,
- nulls: Option<NullBuffer>,
- ) -> Self {
- Self {
- data_type,
- nulls,
- offsets,
- values,
- phantom: Default::default(),
- }
- }
-
- /// Creates a new [`BytesArrayData`] from an [`ArrayDataBuilder`]
- ///
- /// # Safety
- ///
- /// See [`Self::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- let mut iter = builder.buffers.into_iter();
- let offsets = iter.next().unwrap();
- let values = iter.next().unwrap();
-
- let offsets = match builder.len {
- 0 => OffsetBuffer::new_empty(),
- _ => OffsetBuffer::new_unchecked(ScalarBuffer::new(
- offsets,
- builder.offset,
- builder.len + 1,
- )),
- };
-
- Self {
- values,
- offsets,
- data_type: builder.data_type,
- nulls: builder.nulls,
- phantom: Default::default(),
- }
- }
-
- /// Returns the length
- #[inline]
- pub fn len(&self) -> usize {
- self.offsets.len().wrapping_sub(1)
- }
-
- /// Returns true if this array is empty
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.offsets.len() <= 1
- }
-
- /// Returns the raw byte data
- #[inline]
- pub fn values(&self) -> &B {
- // Safety:
- // Bytes must be valid
- unsafe { B::from_bytes_unchecked(self.values.as_slice()) }
- }
-
- /// Returns the offsets
- #[inline]
- pub fn offsets(&self) -> &OffsetBuffer<O> {
- &self.offsets
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`BytesArrayData`]
- pub fn into_parts(self) -> (DataType, OffsetBuffer<O>, Buffer,
Option<NullBuffer>) {
- (self.data_type, self.offsets, self.values, self.nulls)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- Self {
- values: self.values.clone(),
- offsets: self.offsets.slice(offset, len),
- data_type: self.data_type.clone(),
- nulls: self.nulls().as_ref().map(|x| x.slice(offset, len)),
- phantom: Default::default(),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.offsets.len().wrapping_sub(1),
- offset: 0,
- nulls: self.nulls.as_ref(),
- buffers: Buffers::two(self.offsets.inner().inner(), &self.values),
- child_data: &[],
- }
- }
-}
-
-/// ArrayData for [fixed-size
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
of bytes
-#[derive(Debug, Clone)]
-pub struct FixedSizeBinaryArrayData {
- data_type: DataType,
- len: usize,
- element_size: usize,
- values: Buffer,
- nulls: Option<NullBuffer>,
-}
-
-impl FixedSizeBinaryArrayData {
- /// Creates a new [`FixedSizeBinaryArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) ==
PhysicalType::FixedSizeBinary(element_size)`
- /// - `nulls.len() == values.len() / element_size == len`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- len: usize,
- element_size: usize,
- values: Buffer,
- nulls: Option<NullBuffer>,
- ) -> Self {
- Self {
- data_type,
- nulls,
- values,
- len,
- element_size,
- }
- }
-
- /// Creates a new [`FixedSizeBinaryArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`FixedSizeBinaryArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder, size: usize) ->
Self {
- let values = builder.buffers[0]
- .slice_with_length(builder.offset * size, builder.len * size);
- Self {
- values,
- data_type: builder.data_type,
- len: builder.len,
- element_size: size,
- nulls: builder.nulls,
- }
- }
-
- /// Returns the length
- #[inline]
- pub fn len(&self) -> usize {
- self.len
- }
-
- /// Returns true if this array is empty
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.len == 0
- }
-
- /// Returns the size of each element
- #[inline]
- pub fn element_size(&self) -> usize {
- self.element_size
- }
-
- /// Returns the raw byte data
- #[inline]
- pub fn values(&self) -> &[u8] {
- &self.values
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`FixedSizeBinaryArrayData`]
- pub fn into_parts(self) -> (DataType, Buffer, Option<NullBuffer>) {
- (self.data_type, self.values, self.nulls)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let offset_element =
offset.checked_mul(self.element_size).expect("overflow");
- let len_element =
len.checked_mul(self.element_size).expect("overflow");
- let values = self.values.slice_with_length(offset_element,
len_element);
-
- Self {
- len,
- values,
- data_type: self.data_type.clone(),
- element_size: self.element_size,
- nulls: self.nulls().as_ref().map(|x| x.slice(offset, len)),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.len,
- offset: 0,
- nulls: self.nulls.as_ref(),
- buffers: Buffers::one(&self.values),
- child_data: &[],
- }
- }
-}
diff --git a/arrow-data/src/data/dictionary.rs
b/arrow-data/src/data/dictionary.rs
deleted file mode 100644
index c95ee464b..000000000
--- a/arrow-data/src/data/dictionary.rs
+++ /dev/null
@@ -1,289 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::types::DictionaryKeyType;
-use crate::data::ArrayDataLayout;
-use crate::{ArrayData, ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::{NullBuffer, ScalarBuffer};
-use arrow_buffer::ArrowNativeType;
-use arrow_schema::DataType;
-
-mod private {
- use super::*;
-
- pub trait DictionaryKeySealed {
- /// Downcast [`ArrayDataDictionary`] to `[DictionaryArrayData`]
- fn downcast_ref(data: &ArrayDataDictionary) ->
Option<&DictionaryArrayData<Self>>
- where
- Self: DictionaryKey;
-
- /// Downcast [`ArrayDataDictionary`] to `[DictionaryArrayData`]
- fn downcast(data: ArrayDataDictionary) ->
Option<DictionaryArrayData<Self>>
- where
- Self: DictionaryKey;
-
- /// Cast [`DictionaryArrayData`] to [`ArrayDataDictionary`]
- fn upcast(v: DictionaryArrayData<Self>) -> ArrayDataDictionary
- where
- Self: DictionaryKey;
- }
-}
-
-/// Types of dictionary key used by dictionary arrays
-pub trait DictionaryKey: private::DictionaryKeySealed + ArrowNativeType {
- const TYPE: DictionaryKeyType;
-}
-
-macro_rules! dictionary {
- ($t:ty,$v:ident) => {
- impl DictionaryKey for $t {
- const TYPE: DictionaryKeyType = DictionaryKeyType::$v;
- }
- impl private::DictionaryKeySealed for $t {
- fn downcast_ref(
- data: &ArrayDataDictionary,
- ) -> Option<&DictionaryArrayData<Self>> {
- match data {
- ArrayDataDictionary::$v(v) => Some(v),
- _ => None,
- }
- }
-
- fn downcast(data: ArrayDataDictionary) ->
Option<DictionaryArrayData<Self>> {
- match data {
- ArrayDataDictionary::$v(v) => Some(v),
- _ => None,
- }
- }
-
- fn upcast(v: DictionaryArrayData<Self>) -> ArrayDataDictionary {
- ArrayDataDictionary::$v(v)
- }
- }
- };
-}
-
-dictionary!(i8, Int8);
-dictionary!(i16, Int16);
-dictionary!(i32, Int32);
-dictionary!(i64, Int64);
-dictionary!(u8, UInt8);
-dictionary!(u16, UInt16);
-dictionary!(u32, UInt32);
-dictionary!(u64, UInt64);
-
-/// Applies op to each variant of [`ArrayDataDictionary`]
-macro_rules! dictionary_op {
- ($array:ident, $op:block) => {
- match $array {
- ArrayDataDictionary::Int8($array) => $op
- ArrayDataDictionary::Int16($array) => $op
- ArrayDataDictionary::Int32($array) => $op
- ArrayDataDictionary::Int64($array) => $op
- ArrayDataDictionary::UInt8($array) => $op
- ArrayDataDictionary::UInt16($array) => $op
- ArrayDataDictionary::UInt32($array) => $op
- ArrayDataDictionary::UInt64($array) => $op
- }
- };
-}
-
-/// An enumeration of the types of [`DictionaryArrayData`]
-#[derive(Debug, Clone)]
-pub enum ArrayDataDictionary {
- Int8(DictionaryArrayData<i8>),
- Int16(DictionaryArrayData<i16>),
- Int32(DictionaryArrayData<i32>),
- Int64(DictionaryArrayData<i64>),
- UInt8(DictionaryArrayData<u8>),
- UInt16(DictionaryArrayData<u16>),
- UInt32(DictionaryArrayData<u32>),
- UInt64(DictionaryArrayData<u64>),
-}
-
-impl ArrayDataDictionary {
- /// Downcast this [`ArrayDataDictionary`] to the corresponding
[`DictionaryArrayData`]
- pub fn downcast_ref<K: DictionaryKey>(&self) ->
Option<&DictionaryArrayData<K>> {
- K::downcast_ref(self)
- }
-
- /// Downcast this [`ArrayDataDictionary`] to the corresponding
[`DictionaryArrayData`]
- pub fn downcast<K: DictionaryKey>(self) -> Option<DictionaryArrayData<K>> {
- K::downcast(self)
- }
-
- /// Returns the values of this dictionary
- pub fn values(&self) -> &ArrayData {
- let s = self;
- dictionary_op!(s, { s.values() })
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let s = self;
- dictionary_op!(s, { s.slice(offset, len).into() })
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- let s = self;
- dictionary_op!(s, { s.layout() })
- }
-
- /// Creates a new [`ArrayDataDictionary`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`DictionaryArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(
- builder: ArrayDataBuilder,
- key: DictionaryKeyType,
- ) -> Self {
- use DictionaryKeyType::*;
- match key {
- Int8 => Self::Int8(DictionaryArrayData::from_raw(builder)),
- Int16 => Self::Int16(DictionaryArrayData::from_raw(builder)),
- Int32 => Self::Int32(DictionaryArrayData::from_raw(builder)),
- Int64 => Self::Int64(DictionaryArrayData::from_raw(builder)),
- UInt8 => Self::UInt8(DictionaryArrayData::from_raw(builder)),
- UInt16 => Self::UInt16(DictionaryArrayData::from_raw(builder)),
- UInt32 => Self::UInt32(DictionaryArrayData::from_raw(builder)),
- UInt64 => Self::UInt64(DictionaryArrayData::from_raw(builder)),
- }
- }
-}
-
-impl<K: DictionaryKey> From<DictionaryArrayData<K>> for ArrayDataDictionary {
- fn from(value: DictionaryArrayData<K>) -> Self {
- K::upcast(value)
- }
-}
-
-/// ArrayData for [dictionary
arrays](https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout)
-#[derive(Debug, Clone)]
-pub struct DictionaryArrayData<K: DictionaryKey> {
- data_type: DataType,
- nulls: Option<NullBuffer>,
- keys: ScalarBuffer<K>,
- values: Box<ArrayData>,
-}
-
-impl<K: DictionaryKey> DictionaryArrayData<K> {
- /// Create a new [`DictionaryArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) == PhysicalType::Dictionary(K::TYPE)`
- /// - child must have a type matching `data_type`
- /// - all values in `keys` must be `0 < v < child.len()` or be a null
according to `nulls`
- /// - `nulls` must have the same length as `child`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- keys: ScalarBuffer<K>,
- nulls: Option<NullBuffer>,
- child: ArrayData,
- ) -> Self {
- Self {
- data_type,
- nulls,
- keys,
- values: Box::new(child),
- }
- }
-
- /// Creates a new [`DictionaryArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`Self::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- let keys = builder.buffers.into_iter().next().unwrap();
- let keys = ScalarBuffer::new(keys, builder.offset, builder.len);
- let values = builder.child_data.into_iter().next().unwrap();
- Self {
- keys,
- data_type: builder.data_type,
- nulls: builder.nulls,
- values: Box::new(values),
- }
- }
-
- /// Returns the length
- #[inline]
- pub fn len(&self) -> usize {
- self.keys.len()
- }
-
- /// Returns true if this array is empty
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.keys.is_empty()
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the keys
- #[inline]
- pub fn keys(&self) -> &[K] {
- &self.keys
- }
-
- /// Returns the values data
- #[inline]
- pub fn values(&self) -> &ArrayData {
- self.values.as_ref()
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`DictionaryArrayData`]
- pub fn into_parts(
- self,
- ) -> (DataType, ScalarBuffer<K>, Option<NullBuffer>, ArrayData) {
- (self.data_type, self.keys, self.nulls, *self.values)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- Self {
- keys: self.keys.slice(offset, len),
- data_type: self.data_type.clone(),
- nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
- values: self.values.clone(),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.keys.len(),
- offset: 0,
- nulls: self.nulls.as_ref(),
- buffers: Buffers::one(self.keys.inner()),
- child_data: std::slice::from_ref(self.values.as_ref()),
- }
- }
-}
diff --git a/arrow-data/src/data/list.rs b/arrow-data/src/data/list.rs
deleted file mode 100644
index bcc89f8ba..000000000
--- a/arrow-data/src/data/list.rs
+++ /dev/null
@@ -1,422 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::types::OffsetType;
-use crate::data::ArrayDataLayout;
-use crate::{ArrayData, ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
-use arrow_buffer::ArrowNativeType;
-use arrow_schema::DataType;
-
-mod private {
- use super::*;
-
- pub trait ListOffsetSealed {
- /// Downcast [`ArrayDataList`] to `[ListArrayData`]
- fn downcast_ref(data: &ArrayDataList) -> Option<&ListArrayData<Self>>
- where
- Self: ListOffset;
-
- /// Downcast [`ArrayDataList`] to `[ListArrayData`]
- fn downcast(data: ArrayDataList) -> Option<ListArrayData<Self>>
- where
- Self: ListOffset;
-
- /// Cast [`ListArrayData`] to [`ArrayDataList`]
- fn upcast(v: ListArrayData<Self>) -> ArrayDataList
- where
- Self: ListOffset;
- }
-}
-
-/// Types of offset used by variable length list arrays
-pub trait ListOffset: private::ListOffsetSealed + ArrowNativeType {
- const TYPE: OffsetType;
-}
-
-impl ListOffset for i32 {
- const TYPE: OffsetType = OffsetType::Int32;
-}
-
-impl private::ListOffsetSealed for i32 {
- fn downcast_ref(data: &ArrayDataList) -> Option<&ListArrayData<Self>>
- where
- Self: ListOffset,
- {
- match data {
- ArrayDataList::Small(v) => Some(v),
- ArrayDataList::Large(_) => None,
- }
- }
-
- fn downcast(data: ArrayDataList) -> Option<ListArrayData<Self>>
- where
- Self: ListOffset,
- {
- match data {
- ArrayDataList::Small(v) => Some(v),
- ArrayDataList::Large(_) => None,
- }
- }
-
- fn upcast(v: ListArrayData<Self>) -> ArrayDataList
- where
- Self: ListOffset,
- {
- ArrayDataList::Small(v)
- }
-}
-
-impl ListOffset for i64 {
- const TYPE: OffsetType = OffsetType::Int64;
-}
-
-impl private::ListOffsetSealed for i64 {
- fn downcast_ref(data: &ArrayDataList) -> Option<&ListArrayData<Self>>
- where
- Self: ListOffset,
- {
- match data {
- ArrayDataList::Small(_) => None,
- ArrayDataList::Large(v) => Some(v),
- }
- }
-
- fn downcast(data: ArrayDataList) -> Option<ListArrayData<Self>>
- where
- Self: ListOffset,
- {
- match data {
- ArrayDataList::Small(_) => None,
- ArrayDataList::Large(v) => Some(v),
- }
- }
-
- fn upcast(v: ListArrayData<Self>) -> ArrayDataList
- where
- Self: ListOffset,
- {
- ArrayDataList::Large(v)
- }
-}
-
-/// Applies op to each variant of [`ListArrayData`]
-macro_rules! list_op {
- ($array:ident, $op:block) => {
- match $array {
- ArrayDataList::Small($array) => $op
- ArrayDataList::Large($array) => $op
- }
- };
-}
-
-/// An enumeration of the types of [`ListArrayData`]
-#[derive(Debug, Clone)]
-pub enum ArrayDataList {
- Small(ListArrayData<i32>),
- Large(ListArrayData<i64>),
-}
-
-impl ArrayDataList {
- /// Downcast this [`ArrayDataList`] to the corresponding [`ListArrayData`]
- pub fn downcast_ref<O: ListOffset>(&self) -> Option<&ListArrayData<O>> {
- O::downcast_ref(self)
- }
-
- /// Downcast this [`ArrayDataList`] to the corresponding [`ListArrayData`]
- pub fn downcast<O: ListOffset>(self) -> Option<ListArrayData<O>> {
- O::downcast(self)
- }
-
- /// Returns the values of this [`ArrayDataList`]
- pub fn values(&self) -> &ArrayData {
- let s = self;
- list_op!(s, { s.values() })
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let s = self;
- list_op!(s, { s.slice(offset, len).into() })
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- let s = self;
- list_op!(s, { s.layout() })
- }
-
- /// Creates a new [`ArrayDataList`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`ListArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder, offset:
OffsetType) -> Self {
- match offset {
- OffsetType::Int32 => Self::Small(ListArrayData::from_raw(builder)),
- OffsetType::Int64 => Self::Large(ListArrayData::from_raw(builder)),
- }
- }
-}
-
-impl<O: ListOffset> From<ListArrayData<O>> for ArrayDataList {
- fn from(value: ListArrayData<O>) -> Self {
- O::upcast(value)
- }
-}
-
-/// ArrayData for [variable-size list
arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout)
-#[derive(Debug, Clone)]
-pub struct ListArrayData<O: ListOffset> {
- data_type: DataType,
- nulls: Option<NullBuffer>,
- offsets: OffsetBuffer<O>,
- values: Box<ArrayData>,
-}
-
-impl<O: ListOffset> ListArrayData<O> {
- /// Create a new [`ListArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) == PhysicalType::List(O::TYPE)`
- /// - Each consecutive window of `offsets` must identify a valid slice of
`child`
- /// - `nulls.len() == offsets.len() - 1`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- offsets: OffsetBuffer<O>,
- nulls: Option<NullBuffer>,
- values: ArrayData,
- ) -> Self {
- Self {
- data_type,
- nulls,
- offsets,
- values: Box::new(values),
- }
- }
-
- /// Creates a new [`ListArrayData`] from an [`ArrayDataBuilder`]
- ///
- /// # Safety
- ///
- /// See [`Self::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- let offsets = builder.buffers.into_iter().next().unwrap();
- let values = builder.child_data.into_iter().next().unwrap();
-
- let offsets = match builder.len {
- 0 => OffsetBuffer::new_empty(),
- _ => OffsetBuffer::new_unchecked(ScalarBuffer::new(
- offsets,
- builder.offset,
- builder.len + 1,
- )),
- };
-
- Self {
- offsets,
- data_type: builder.data_type,
- nulls: builder.nulls,
- values: Box::new(values),
- }
- }
-
- /// Returns the length
- #[inline]
- pub fn len(&self) -> usize {
- self.offsets.len().wrapping_sub(1)
- }
-
- /// Returns true if this array is empty
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.offsets.len() <= 1
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the offsets
- #[inline]
- pub fn offsets(&self) -> &OffsetBuffer<O> {
- &self.offsets
- }
-
- /// Returns the values of this [`ListArrayData`]
- #[inline]
- pub fn values(&self) -> &ArrayData {
- self.values.as_ref()
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`ListArrayData`]
- pub fn into_parts(
- self,
- ) -> (DataType, OffsetBuffer<O>, Option<NullBuffer>, ArrayData) {
- (self.data_type, self.offsets, self.nulls, *self.values)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- Self {
- data_type: self.data_type.clone(),
- nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
- offsets: self.offsets.slice(offset, len),
- values: self.values.clone(),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.len(),
- offset: 0,
- nulls: self.nulls.as_ref(),
- buffers: Buffers::one(self.offsets.inner().inner()),
- child_data: std::slice::from_ref(self.values.as_ref()),
- }
- }
-}
-
-/// ArrayData for [fixed-size list
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-list-layout)
-#[derive(Debug, Clone)]
-pub struct FixedSizeListArrayData {
- data_type: DataType,
- len: usize,
- element_size: usize,
- nulls: Option<NullBuffer>,
- child: Box<ArrayData>,
-}
-
-impl FixedSizeListArrayData {
- /// Create a new [`FixedSizeListArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) ==
PhysicalType::FixedSizeList(element_size)`
- /// - `nulls.len() == values.len() / element_size == len`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- len: usize,
- element_size: usize,
- nulls: Option<NullBuffer>,
- child: ArrayData,
- ) -> Self {
- Self {
- data_type,
- len,
- element_size,
- nulls,
- child: Box::new(child),
- }
- }
-
- /// Creates a new [`FixedSizeListArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`FixedSizeListArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder, size: usize) ->
Self {
- let child =
- builder.child_data[0].slice(builder.offset * size, builder.len *
size);
- Self {
- data_type: builder.data_type,
- len: builder.len,
- element_size: size,
- nulls: builder.nulls,
- child: Box::new(child),
- }
- }
-
- /// Returns the length
- #[inline]
- pub fn len(&self) -> usize {
- self.len
- }
-
- /// Returns true if this array is empty
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.len == 0
- }
-
- /// Returns the size of each element
- #[inline]
- pub fn element_size(&self) -> usize {
- self.element_size
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the child data
- #[inline]
- pub fn child(&self) -> &ArrayData {
- self.child.as_ref()
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`FixedSizeListArrayData`]
- pub fn into_parts(self) -> (DataType, Option<NullBuffer>, ArrayData) {
- (self.data_type, self.nulls, *self.child)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let offset_element =
offset.checked_mul(self.element_size).expect("overflow");
- let len_element =
len.checked_mul(self.element_size).expect("overflow");
- let child = self.child.slice(offset_element, len_element);
-
- Self {
- len,
- data_type: self.data_type.clone(),
- element_size: self.element_size,
- nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
- child: Box::new(child),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.len,
- offset: 0,
- nulls: self.nulls.as_ref(),
- buffers: Buffers::default(),
- child_data: std::slice::from_ref(self.child.as_ref()),
- }
- }
-}
diff --git a/arrow-data/src/data/mod.rs b/arrow-data/src/data/mod.rs
index 784911dc0..cc908d639 100644
--- a/arrow-data/src/data/mod.rs
+++ b/arrow-data/src/data/mod.rs
@@ -33,27 +33,6 @@ use crate::equal;
mod buffers;
pub use buffers::*;
-#[allow(unused)] // Private until ready (#1799)
-mod boolean;
-#[allow(unused)] // Private until ready (#1799)
-mod bytes;
-#[allow(unused)] // Private until ready (#1799)
-mod dictionary;
-#[allow(unused)] // Private until ready (#1799)
-mod list;
-#[allow(unused)] // Private until ready (#1799)
-mod null;
-#[allow(unused)] // Private until ready (#1799)
-mod primitive;
-#[allow(unused)] // Private until ready (#1799)
-mod run;
-#[allow(unused)] // Private until ready (#1799)
-mod r#struct;
-#[allow(unused)] // Private until ready (#1799)
-mod types;
-#[allow(unused)] // Private until ready (#1799)
-mod union;
-
#[inline]
pub(crate) fn contains_nulls(
null_bit_buffer: Option<&NullBuffer>,
@@ -351,7 +330,7 @@ impl ArrayData {
// We don't need to validate children as we can assume that the
// [`ArrayData`] in `child_data` have already been validated through
// a call to `ArrayData::try_new` or created using unsafe
- ArrayDataLayout::new(&new_self).validate_data()?;
+ new_self.validate_data()?;
Ok(new_self)
}
@@ -441,15 +420,14 @@ impl ArrayData {
/// If multiple [`ArrayData`]s refer to the same underlying
/// [`Buffer`]s they will both report the same size.
pub fn get_buffer_memory_size(&self) -> usize {
- let s = ArrayDataLayout::new(self);
let mut size = 0;
- for buffer in s.buffers {
+ for buffer in &self.buffers {
size += buffer.capacity();
}
- if let Some(bitmap) = s.nulls {
+ if let Some(bitmap) = &self.nulls {
size += bitmap.buffer().capacity()
}
- for child in s.child_data {
+ for child in &self.child_data {
size += child.get_buffer_memory_size();
}
size
@@ -468,15 +446,14 @@ impl ArrayData {
/// first `20` elements, then [`Self::get_slice_memory_size`] on the
/// sliced [`ArrayData`] would return `20 * 8 = 160`.
pub fn get_slice_memory_size(&self) -> Result<usize, ArrowError> {
- let s = ArrayDataLayout::new(self);
let mut result: usize = 0;
- let layout = layout(s.data_type);
+ let layout = layout(&self.data_type);
for spec in layout.buffers.iter() {
match spec {
BufferSpec::FixedWidth { byte_width } => {
let buffer_size =
- s.len.checked_mul(*byte_width).ok_or_else(|| {
+ self.len.checked_mul(*byte_width).ok_or_else(|| {
ArrowError::ComputeError(
"Integer overflow computing buffer
size".to_string(),
)
@@ -485,26 +462,26 @@ impl ArrayData {
}
BufferSpec::VariableWidth => {
let buffer_len: usize;
- match s.data_type {
+ match self.data_type {
DataType::Utf8 | DataType::Binary => {
- let offsets = s.typed_offsets::<i32>()?;
- buffer_len = (offsets[s.len] - offsets[0]) as
usize;
+ let offsets = self.typed_offsets::<i32>()?;
+ buffer_len = (offsets[self.len] - offsets[0] ) as
usize;
}
DataType::LargeUtf8 | DataType::LargeBinary => {
- let offsets = s.typed_offsets::<i64>()?;
- buffer_len = (offsets[s.len] - offsets[0]) as
usize;
+ let offsets = self.typed_offsets::<i64>()?;
+ buffer_len = (offsets[self.len] - offsets[0]) as
usize;
}
_ => {
return Err(ArrowError::NotYetImplemented(format!(
- "Invalid data type for VariableWidth buffer.
Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
- s.data_type
+ "Invalid data type for VariableWidth buffer.
Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
+ self.data_type
)))
}
};
result += buffer_len;
}
BufferSpec::BitMap => {
- let buffer_size = bit_util::ceil(s.len, 8);
+ let buffer_size = bit_util::ceil(self.len, 8);
result += buffer_size;
}
BufferSpec::AlwaysNull => {
@@ -513,11 +490,11 @@ impl ArrayData {
}
}
- if s.nulls.is_some() {
- result += bit_util::ceil(s.len, 8);
+ if self.nulls().is_some() {
+ result += bit_util::ceil(self.len, 8);
}
- for child in s.child_data {
+ for child in &self.child_data {
result += child.get_slice_memory_size()?;
}
Ok(result)
@@ -532,18 +509,17 @@ impl ArrayData {
/// [`Self::get_buffer_memory_size`] +
/// `size_of_val(child)` for all children
pub fn get_array_memory_size(&self) -> usize {
- let s = ArrayDataLayout::new(self);
let mut size = mem::size_of_val(self);
// Calculate rest of the fields top down which contain actual data
- for buffer in s.buffers {
+ for buffer in &self.buffers {
size += mem::size_of::<Buffer>();
size += buffer.capacity();
}
- if let Some(nulls) = s.nulls {
+ if let Some(nulls) = &self.nulls {
size += nulls.buffer().capacity();
}
- for child in s.child_data {
+ for child in &self.child_data {
size += child.get_array_memory_size();
}
@@ -730,101 +706,11 @@ impl ArrayData {
/// See [ArrayData::validate_data] to validate fully the offset content
/// and the validity of utf8 data
pub fn validate(&self) -> Result<(), ArrowError> {
- ArrayDataLayout::new(self).validate()
- }
-
- /// Validate that the data contained within this [`ArrayData`] is valid
- ///
- /// 1. Null count is correct
- /// 2. All offsets are valid
- /// 3. All String data is valid UTF-8
- /// 4. All dictionary offsets are valid
- ///
- /// Internally this calls:
- ///
- /// * [`Self::validate`]
- /// * [`Self::validate_nulls`]
- /// * [`Self::validate_values`]
- ///
- /// Note: this does not recurse into children, for a recursive variant
- /// see [`Self::validate_full`]
- pub fn validate_data(&self) -> Result<(), ArrowError> {
- ArrayDataLayout::new(self).validate_data()
- }
-
- /// Performs a full recursive validation of this [`ArrayData`] and all its
children
- ///
- /// This is equivalent to calling [`Self::validate_data`] on this
[`ArrayData`]
- /// and all its children recursively
- pub fn validate_full(&self) -> Result<(), ArrowError> {
- ArrayDataLayout::new(self).validate_full()
- }
-
- /// Validates the values stored within this [`ArrayData`] are valid
- /// without recursing into child [`ArrayData`]
- ///
- /// Does not (yet) check
- /// 1. Union type_ids are valid see
[#85](https://github.com/apache/arrow-rs/issues/85)
- /// Validates the the null count is correct and that any
- /// nullability requirements of its children are correct
- pub fn validate_nulls(&self) -> Result<(), ArrowError> {
- ArrayDataLayout::new(self).validate_nulls()
- }
-
- /// Validates the values stored within this [`ArrayData`] are valid
- /// without recursing into child [`ArrayData`]
- ///
- /// Does not (yet) check
- /// 1. Union type_ids are valid see
[#85](https://github.com/apache/arrow-rs/issues/85)
- pub fn validate_values(&self) -> Result<(), ArrowError> {
- ArrayDataLayout::new(self).validate_values()
- }
-
- /// Returns true if this `ArrayData` is equal to `other`, using pointer
comparisons
- /// to determine buffer equality. This is cheaper than `PartialEq::eq` but
may
- /// return false when the arrays are logically equal
- pub fn ptr_eq(&self, other: &Self) -> bool {
- ArrayDataLayout::new(self).ptr_eq(&ArrayDataLayout::new(other))
- }
-
- /// Converts this [`ArrayData`] into an [`ArrayDataBuilder`]
- pub fn into_builder(self) -> ArrayDataBuilder {
- self.into()
- }
-}
-
-/// A flat representation of [`ArrayData`]
-///
-/// This is temporary measure to bridge the gap between the strongly-typed
-/// ArrayData enumeration and the older-style struct representation (#1799)
-#[derive(Copy, Clone)]
-pub(crate) struct ArrayDataLayout<'a> {
- data_type: &'a DataType,
- len: usize,
- offset: usize,
- nulls: Option<&'a NullBuffer>,
- buffers: Buffers<'a>,
- child_data: &'a [ArrayData],
-}
-
-impl<'a> ArrayDataLayout<'a> {
- fn new(data: &'a ArrayData) -> Self {
- Self {
- data_type: &data.data_type,
- len: data.len,
- offset: data.offset,
- nulls: data.nulls.as_ref(),
- buffers: Buffers::from_slice(&data.buffers),
- child_data: &data.child_data,
- }
- }
-
- fn validate(&self) -> Result<(), ArrowError> {
// Need at least this mich space in each buffer
let len_plus_offset = self.len + self.offset;
// Check that the data layout conforms to the spec
- let layout = layout(self.data_type);
+ let layout = layout(&self.data_type);
if !layout.can_contain_null_mask && self.nulls.is_some() {
return Err(ArrowError::InvalidArgumentError(format!(
@@ -879,7 +765,7 @@ impl<'a> ArrayDataLayout<'a> {
}
// check null bit buffer size
- if let Some(nulls) = self.nulls {
+ if let Some(nulls) = self.nulls() {
if nulls.null_count() > self.len {
return Err(ArrowError::InvalidArgumentError(format!(
"null_count {} for an array exceeds length of {} elements",
@@ -1141,7 +1027,7 @@ impl<'a> ArrayDataLayout<'a> {
fn get_single_valid_child_data(
&self,
expected_type: &DataType,
- ) -> Result<ArrayDataLayout<'_>, ArrowError> {
+ ) -> Result<&ArrayData, ArrowError> {
self.validate_num_child_data(1)?;
self.get_valid_child_data(0, expected_type)
}
@@ -1166,7 +1052,7 @@ impl<'a> ArrayDataLayout<'a> {
&self,
i: usize,
expected_type: &DataType,
- ) -> Result<ArrayDataLayout, ArrowError> {
+ ) -> Result<&ArrayData, ArrowError> {
let values_data = self.child_data
.get(i)
.ok_or_else(|| {
@@ -1175,9 +1061,8 @@ impl<'a> ArrayDataLayout<'a> {
self.data_type, i+1, self.child_data.len()
))
})?;
- let values_data = ArrayDataLayout::new(values_data);
- if expected_type != values_data.data_type {
+ if expected_type != &values_data.data_type {
return Err(ArrowError::InvalidArgumentError(format!(
"Child type mismatch for {}. Expected {} but child data had
{}",
self.data_type, expected_type, values_data.data_type
@@ -1188,7 +1073,22 @@ impl<'a> ArrayDataLayout<'a> {
Ok(values_data)
}
- fn validate_data(&self) -> Result<(), ArrowError> {
+ /// Validate that the data contained within this [`ArrayData`] is valid
+ ///
+ /// 1. Null count is correct
+ /// 2. All offsets are valid
+ /// 3. All String data is valid UTF-8
+ /// 4. All dictionary offsets are valid
+ ///
+ /// Internally this calls:
+ ///
+ /// * [`Self::validate`]
+ /// * [`Self::validate_nulls`]
+ /// * [`Self::validate_values`]
+ ///
+ /// Note: this does not recurse into children, for a recursive variant
+ /// see [`Self::validate_full`]
+ pub fn validate_data(&self) -> Result<(), ArrowError> {
self.validate()?;
self.validate_nulls()?;
@@ -1196,7 +1096,11 @@ impl<'a> ArrayDataLayout<'a> {
Ok(())
}
- fn validate_full(&self) -> Result<(), ArrowError> {
+ /// Performs a full recursive validation of this [`ArrayData`] and all its
children
+ ///
+ /// This is equivalent to calling [`Self::validate_data`] on this
[`ArrayData`]
+ /// and all its children recursively
+ pub fn validate_full(&self) -> Result<(), ArrowError> {
self.validate_data()?;
// validate all children recursively
self.child_data
@@ -1213,7 +1117,14 @@ impl<'a> ArrayDataLayout<'a> {
Ok(())
}
- fn validate_nulls(&self) -> Result<(), ArrowError> {
+ /// Validates the values stored within this [`ArrayData`] are valid
+ /// without recursing into child [`ArrayData`]
+ ///
+ /// Does not (yet) check
+ /// 1. Union type_ids are valid see
[#85](https://github.com/apache/arrow-rs/issues/85)
+ /// Validates the the null count is correct and that any
+ /// nullability requirements of its children are correct
+ pub fn validate_nulls(&self) -> Result<(), ArrowError> {
if let Some(nulls) = &self.nulls {
let actual = nulls.len() - nulls.inner().count_set_bits();
if actual != nulls.null_count() {
@@ -1231,12 +1142,11 @@ impl<'a> ArrayDataLayout<'a> {
match &self.data_type {
DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _)
=> {
if !f.is_nullable() {
- let child = ArrayDataLayout::new(&self.child_data[0]);
- self.validate_non_nullable(None, 0, child)?
+ self.validate_non_nullable(None, 0, &self.child_data[0])?
}
}
DataType::FixedSizeList(field, len) => {
- let child = ArrayDataLayout::new(&self.child_data[0]);
+ let child = &self.child_data[0];
if !field.is_nullable() {
match &self.nulls {
Some(nulls) => {
@@ -1265,8 +1175,7 @@ impl<'a> ArrayDataLayout<'a> {
}
}
DataType::Struct(fields) => {
- for (field, child) in fields.iter().zip(self.child_data) {
- let child = ArrayDataLayout::new(child);
+ for (field, child) in fields.iter().zip(&self.child_data) {
if !field.is_nullable() {
match &self.nulls {
Some(n) => self.validate_non_nullable(
@@ -1290,11 +1199,11 @@ impl<'a> ArrayDataLayout<'a> {
&self,
mask: Option<&Buffer>,
offset: usize,
- child: ArrayDataLayout<'_>,
+ child: &ArrayData,
) -> Result<(), ArrowError> {
let mask = match mask {
Some(mask) => mask.as_ref(),
- None => return match child.nulls.map(|x|
x.null_count()).unwrap_or_default() {
+ None => return match child.null_count() {
0 => Ok(()),
_ => Err(ArrowError::InvalidArgumentError(format!(
"non-nullable child of type {} contains nulls not present
in parent {}",
@@ -1304,7 +1213,7 @@ impl<'a> ArrayDataLayout<'a> {
},
};
- match child.nulls {
+ match child.nulls() {
Some(nulls) => {
let mask = BitChunks::new(mask, offset, child.len);
let nulls = BitChunks::new(nulls.validity(), nulls.offset(),
child.len);
@@ -1333,7 +1242,7 @@ impl<'a> ArrayDataLayout<'a> {
///
/// Does not (yet) check
/// 1. Union type_ids are valid see
[#85](https://github.com/apache/arrow-rs/issues/85)
- fn validate_values(&self) -> Result<(), ArrowError> {
+ pub fn validate_values(&self) -> Result<(), ArrowError> {
match &self.data_type {
DataType::Utf8 => self.validate_utf8::<i32>(),
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
@@ -1343,11 +1252,11 @@ impl<'a> ArrayDataLayout<'a> {
}
DataType::List(_) | DataType::Map(_, _) => {
let child = &self.child_data[0];
- self.validate_offsets_full::<i32>(child.len())
+ self.validate_offsets_full::<i32>(child.len)
}
DataType::LargeList(_) => {
let child = &self.child_data[0];
- self.validate_offsets_full::<i64>(child.len())
+ self.validate_offsets_full::<i64>(child.len)
}
DataType::Union(_, _, _) => {
// Validate Union Array as part of implementing new Union
semantics
@@ -1358,7 +1267,7 @@ impl<'a> ArrayDataLayout<'a> {
Ok(())
}
DataType::Dictionary(key_type, _value_type) => {
- let dictionary_length: i64 =
self.child_data[0].len().try_into().unwrap();
+ let dictionary_length: i64 =
self.child_data[0].len.try_into().unwrap();
let max_value = dictionary_length - 1;
match key_type.as_ref() {
DataType::UInt8 => self.check_bounds::<u8>(max_value),
@@ -1373,7 +1282,7 @@ impl<'a> ArrayDataLayout<'a> {
}
}
DataType::RunEndEncoded(run_ends, _values) => {
- let run_ends_data = ArrayDataLayout::new(&self.child_data[0]);
+ let run_ends_data = self.child_data()[0].clone();
match run_ends.data_type() {
DataType::Int16 => run_ends_data.check_run_ends::<i16>(),
DataType::Int32 => run_ends_data.check_run_ends::<i32>(),
@@ -1517,7 +1426,7 @@ impl<'a> ArrayDataLayout<'a> {
indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
// Do not check the value is null (value can be arbitrary)
- if self.nulls.map(|x| x.is_null(i)).unwrap_or_default() {
+ if self.is_null(i) {
return Ok(());
}
let dict_index: i64 = dict_index.try_into().map_err(|_| {
@@ -1605,6 +1514,11 @@ impl<'a> ArrayDataLayout<'a> {
.zip(other.child_data.iter())
.all(|(a, b)| a.ptr_eq(b))
}
+
+ /// Converts this [`ArrayData`] into an [`ArrayDataBuilder`]
+ pub fn into_builder(self) -> ArrayDataBuilder {
+ self.into()
+ }
}
/// Return the expected [`DataTypeLayout`] Arrays of this data
@@ -1889,7 +1803,7 @@ impl ArrayDataBuilder {
pub fn build(self) -> Result<ArrayData, ArrowError> {
let data = unsafe { self.build_unchecked() };
#[cfg(not(feature = "force_validate"))]
- ArrayDataLayout::new(&data).validate_data()?;
+ data.validate_data()?;
Ok(data)
}
}
diff --git a/arrow-data/src/data/null.rs b/arrow-data/src/data/null.rs
deleted file mode 100644
index b8a4d7270..000000000
--- a/arrow-data/src/data/null.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::types::PhysicalType;
-use crate::data::ArrayDataLayout;
-use crate::{ArrayDataBuilder, Buffers};
-use arrow_schema::DataType;
-
-/// ArrayData for [null
arrays](https://arrow.apache.org/docs/format/Columnar.html#null-layout)
-#[derive(Debug, Clone)]
-pub struct NullArrayData {
- data_type: DataType,
- len: usize,
-}
-
-impl NullArrayData {
- /// Create a new [`NullArrayData`]
- ///
- /// # Panic
- ///
- /// - `PhysicalType::from(&data_type) != PhysicalType::Null`
- pub fn new(data_type: DataType, len: usize) -> Self {
- assert_eq!(
- PhysicalType::from(&data_type),
- PhysicalType::Null,
- "Illegal physical type for NullArrayData of datatype
{data_type:?}",
- );
- Self { data_type, len }
- }
-
- /// Create a new [`NullArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) == PhysicalType::Null`
- pub unsafe fn new_unchecked(data_type: DataType, len: usize) -> Self {
- Self { data_type, len }
- }
-
- /// Creates a new [`NullArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`NullArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- Self {
- data_type: builder.data_type,
- len: builder.len,
- }
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the length of this array
- #[inline]
- pub fn len(&self) -> usize {
- self.len
- }
-
- /// Returns the [`DataType`] and length of this [`NullArrayData`]
- pub fn into_parts(self) -> (DataType, usize) {
- (self.data_type, self.len)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let new_len = offset.saturating_add(len);
- assert!(new_len <= self.len);
- Self {
- data_type: self.data_type.clone(),
- len,
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.len,
- offset: 0,
- nulls: None,
- buffers: Buffers::default(),
- child_data: &[],
- }
- }
-}
diff --git a/arrow-data/src/data/primitive.rs b/arrow-data/src/data/primitive.rs
deleted file mode 100644
index ed8ed8d7a..000000000
--- a/arrow-data/src/data/primitive.rs
+++ /dev/null
@@ -1,304 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::types::{PhysicalType, PrimitiveType};
-use crate::data::ArrayDataLayout;
-use crate::{ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::{NullBuffer, ScalarBuffer};
-use arrow_buffer::{i256, ArrowNativeType};
-use arrow_schema::DataType;
-use half::f16;
-
-mod private {
- use super::*;
-
- pub trait PrimitiveSealed {
- /// Downcast [`ArrayDataPrimitive`] to `[PrimitiveArrayData`]
- fn downcast_ref(data: &ArrayDataPrimitive) ->
Option<&PrimitiveArrayData<Self>>
- where
- Self: Primitive;
-
- /// Downcast [`ArrayDataPrimitive`] to `[PrimitiveArrayData`]
- fn downcast(data: ArrayDataPrimitive) ->
Option<PrimitiveArrayData<Self>>
- where
- Self: Primitive;
-
- /// Cast [`ArrayDataPrimitive`] to [`ArrayDataPrimitive`]
- fn upcast(v: PrimitiveArrayData<Self>) -> ArrayDataPrimitive
- where
- Self: Primitive;
- }
-}
-
-pub trait Primitive: private::PrimitiveSealed + ArrowNativeType {
- const TYPE: PrimitiveType;
-}
-
-/// Applies op to each variant of [`ArrayDataPrimitive`]
-macro_rules! primitive_op {
- ($array:ident, $op:block) => {
- match $array {
- ArrayDataPrimitive::Int8($array) => $op
- ArrayDataPrimitive::Int16($array) => $op
- ArrayDataPrimitive::Int32($array) => $op
- ArrayDataPrimitive::Int64($array) => $op
- ArrayDataPrimitive::Int128($array) => $op
- ArrayDataPrimitive::Int256($array) => $op
- ArrayDataPrimitive::UInt8($array) => $op
- ArrayDataPrimitive::UInt16($array) => $op
- ArrayDataPrimitive::UInt32($array) => $op
- ArrayDataPrimitive::UInt64($array) => $op
- ArrayDataPrimitive::Float16($array) => $op
- ArrayDataPrimitive::Float32($array) => $op
- ArrayDataPrimitive::Float64($array) => $op
- }
- };
-}
-
-macro_rules! primitive {
- ($t:ty,$v:ident) => {
- impl Primitive for $t {
- const TYPE: PrimitiveType = PrimitiveType::$v;
- }
- impl private::PrimitiveSealed for $t {
- fn downcast_ref(
- data: &ArrayDataPrimitive,
- ) -> Option<&PrimitiveArrayData<Self>> {
- match data {
- ArrayDataPrimitive::$v(v) => Some(v),
- _ => None,
- }
- }
-
- fn downcast(data: ArrayDataPrimitive) ->
Option<PrimitiveArrayData<Self>> {
- match data {
- ArrayDataPrimitive::$v(v) => Some(v),
- _ => None,
- }
- }
-
- fn upcast(v: PrimitiveArrayData<Self>) -> ArrayDataPrimitive {
- ArrayDataPrimitive::$v(v)
- }
- }
- };
-}
-
-primitive!(i8, Int8);
-primitive!(i16, Int16);
-primitive!(i32, Int32);
-primitive!(i64, Int64);
-primitive!(i128, Int128);
-primitive!(i256, Int256);
-primitive!(u8, UInt8);
-primitive!(u16, UInt16);
-primitive!(u32, UInt32);
-primitive!(u64, UInt64);
-primitive!(f16, Float16);
-primitive!(f32, Float32);
-primitive!(f64, Float64);
-
-/// An enumeration of the types of [`PrimitiveArrayData`]
-#[derive(Debug, Clone)]
-pub enum ArrayDataPrimitive {
- Int8(PrimitiveArrayData<i8>),
- Int16(PrimitiveArrayData<i16>),
- Int32(PrimitiveArrayData<i32>),
- Int64(PrimitiveArrayData<i64>),
- Int128(PrimitiveArrayData<i128>),
- Int256(PrimitiveArrayData<i256>),
- UInt8(PrimitiveArrayData<u8>),
- UInt16(PrimitiveArrayData<u16>),
- UInt32(PrimitiveArrayData<u32>),
- UInt64(PrimitiveArrayData<u64>),
- Float16(PrimitiveArrayData<f16>),
- Float32(PrimitiveArrayData<f32>),
- Float64(PrimitiveArrayData<f64>),
-}
-
-impl ArrayDataPrimitive {
- /// Downcast this [`ArrayDataPrimitive`] to the corresponding
[`PrimitiveArrayData`]
- pub fn downcast_ref<P: Primitive>(&self) -> Option<&PrimitiveArrayData<P>>
{
- P::downcast_ref(self)
- }
-
- /// Downcast this [`ArrayDataPrimitive`] to the corresponding
[`PrimitiveArrayData`]
- pub fn downcast<P: Primitive>(self) -> Option<PrimitiveArrayData<P>> {
- P::downcast(self)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let s = self;
- primitive_op!(s, { s.slice(offset, len).into() })
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- let s = self;
- primitive_op!(s, { s.layout() })
- }
-
- /// Creates a new [`ArrayDataPrimitive`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`PrimitiveArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(
- builder: ArrayDataBuilder,
- primitive: PrimitiveType,
- ) -> Self {
- use PrimitiveType::*;
- match primitive {
- Int8 => Self::Int8(PrimitiveArrayData::from_raw(builder)),
- Int16 => Self::Int16(PrimitiveArrayData::from_raw(builder)),
- Int32 => Self::Int32(PrimitiveArrayData::from_raw(builder)),
- Int64 => Self::Int64(PrimitiveArrayData::from_raw(builder)),
- Int128 => Self::Int128(PrimitiveArrayData::from_raw(builder)),
- Int256 => Self::Int256(PrimitiveArrayData::from_raw(builder)),
- UInt8 => Self::UInt8(PrimitiveArrayData::from_raw(builder)),
- UInt16 => Self::UInt16(PrimitiveArrayData::from_raw(builder)),
- UInt32 => Self::UInt32(PrimitiveArrayData::from_raw(builder)),
- UInt64 => Self::UInt64(PrimitiveArrayData::from_raw(builder)),
- Float16 => Self::Float16(PrimitiveArrayData::from_raw(builder)),
- Float32 => Self::Float32(PrimitiveArrayData::from_raw(builder)),
- Float64 => Self::Float64(PrimitiveArrayData::from_raw(builder)),
- }
- }
-}
-
-impl<P: Primitive> From<PrimitiveArrayData<P>> for ArrayDataPrimitive {
- fn from(value: PrimitiveArrayData<P>) -> Self {
- P::upcast(value)
- }
-}
-
-/// ArrayData for [fixed size
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
of [`Primitive`]
-#[derive(Debug, Clone)]
-pub struct PrimitiveArrayData<T: Primitive> {
- data_type: DataType,
- values: ScalarBuffer<T>,
- nulls: Option<NullBuffer>,
-}
-
-impl<T: Primitive> PrimitiveArrayData<T> {
- /// Create a new [`PrimitiveArrayData`]
- ///
- /// # Panics
- ///
- /// Panics if
- /// - `PhysicalType::from(&data_type) != PhysicalType::Primitive(T::TYPE)`
- /// - `nulls` and `values` are different lengths
- pub fn new(
- data_type: DataType,
- values: ScalarBuffer<T>,
- nulls: Option<NullBuffer>,
- ) -> Self {
- assert_eq!(
- PhysicalType::from(&data_type),
- PhysicalType::Primitive(T::TYPE),
- "Illegal physical type for PrimitiveArrayData of datatype
{data_type:?}",
- );
-
- if let Some(n) = nulls.as_ref() {
- assert_eq!(values.len(), n.len())
- }
-
- Self {
- data_type,
- values,
- nulls,
- }
- }
-
- /// Create a new [`PrimitiveArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) == PhysicalType::Primitive(T::TYPE)`
- /// - `nulls` and `values` must be the same length
- pub unsafe fn new_unchecked(
- data_type: DataType,
- values: ScalarBuffer<T>,
- nulls: Option<NullBuffer>,
- ) -> Self {
- Self {
- data_type,
- values,
- nulls,
- }
- }
-
- /// Creates a new [`PrimitiveArrayData`] from an [`ArrayDataBuilder`]
- ///
- /// # Safety
- ///
- /// See [`PrimitiveArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- let values = builder.buffers.into_iter().next().unwrap();
- let values = ScalarBuffer::new(values, builder.offset, builder.len);
- Self {
- values,
- data_type: builder.data_type,
- nulls: builder.nulls,
- }
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the primitive values
- #[inline]
- pub fn values(&self) -> &ScalarBuffer<T> {
- &self.values
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`PrimitiveArrayData`]
- pub fn into_parts(self) -> (DataType, ScalarBuffer<T>, Option<NullBuffer>)
{
- (self.data_type, self.values, self.nulls)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- Self {
- data_type: self.data_type.clone(),
- values: self.values.slice(offset, len),
- nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.values.len(),
- offset: 0,
- nulls: self.nulls.as_ref(),
- buffers: Buffers::one(self.values.inner()),
- child_data: &[],
- }
- }
-}
diff --git a/arrow-data/src/data/run.rs b/arrow-data/src/data/run.rs
deleted file mode 100644
index 7f80206a7..000000000
--- a/arrow-data/src/data/run.rs
+++ /dev/null
@@ -1,277 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::primitive::{Primitive, PrimitiveArrayData};
-use crate::data::types::RunEndType;
-use crate::data::ArrayDataLayout;
-use crate::{ArrayData, ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::{RunEndBuffer, ScalarBuffer};
-use arrow_buffer::ArrowNativeType;
-use arrow_schema::DataType;
-
-mod private {
- use super::*;
-
- pub trait RunEndSealed {
- const ENDS_TYPE: DataType;
-
- /// Downcast [`ArrayDataRun`] to `[RunArrayData`]
- fn downcast_ref(data: &ArrayDataRun) -> Option<&RunArrayData<Self>>
- where
- Self: RunEnd;
-
- /// Downcast [`ArrayDataRun`] to `[RunArrayData`]
- fn downcast(data: ArrayDataRun) -> Option<RunArrayData<Self>>
- where
- Self: RunEnd;
-
- /// Cast [`RunArrayData`] to [`ArrayDataRun`]
- fn upcast(v: RunArrayData<Self>) -> ArrayDataRun
- where
- Self: RunEnd;
- }
-}
-
-pub trait RunEnd: private::RunEndSealed + ArrowNativeType + Primitive {
- const TYPE: RunEndType;
-}
-
-macro_rules! run_end {
- ($t:ty,$v:ident) => {
- impl RunEnd for $t {
- const TYPE: RunEndType = RunEndType::$v;
- }
- impl private::RunEndSealed for $t {
- const ENDS_TYPE: DataType = DataType::$v;
-
- fn downcast_ref(data: &ArrayDataRun) ->
Option<&RunArrayData<Self>> {
- match data {
- ArrayDataRun::$v(v) => Some(v),
- _ => None,
- }
- }
-
- fn downcast(data: ArrayDataRun) -> Option<RunArrayData<Self>> {
- match data {
- ArrayDataRun::$v(v) => Some(v),
- _ => None,
- }
- }
-
- fn upcast(v: RunArrayData<Self>) -> ArrayDataRun {
- ArrayDataRun::$v(v)
- }
- }
- };
-}
-
-run_end!(i16, Int16);
-run_end!(i32, Int32);
-run_end!(i64, Int64);
-
-/// Applies op to each variant of [`ArrayDataRun`]
-macro_rules! run_op {
- ($array:ident, $op:block) => {
- match $array {
- ArrayDataRun::Int16($array) => $op
- ArrayDataRun::Int32($array) => $op
- ArrayDataRun::Int64($array) => $op
- }
- };
-}
-
-/// An enumeration of the types of [`RunArrayData`]
-#[derive(Debug, Clone)]
-pub enum ArrayDataRun {
- Int16(RunArrayData<i16>),
- Int32(RunArrayData<i32>),
- Int64(RunArrayData<i64>),
-}
-
-impl ArrayDataRun {
- /// Downcast this [`ArrayDataRun`] to the corresponding [`RunArrayData`]
- pub fn downcast_ref<E: RunEnd>(&self) -> Option<&RunArrayData<E>> {
- <E as private::RunEndSealed>::downcast_ref(self)
- }
-
- /// Downcast this [`ArrayDataRun`] to the corresponding [`RunArrayData`]
- pub fn downcast<E: RunEnd>(self) -> Option<RunArrayData<E>> {
- <E as private::RunEndSealed>::downcast(self)
- }
-
- /// Returns the values of this [`ArrayDataRun`]
- #[inline]
- pub fn values(&self) -> &ArrayData {
- let s = self;
- run_op!(s, { s.values() })
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let s = self;
- run_op!(s, { s.slice(offset, len).into() })
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- let s = self;
- run_op!(s, { s.layout() })
- }
-
- /// Creates a new [`ArrayDataRun`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`RunArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder, run: RunEndType)
-> Self {
- use RunEndType::*;
- match run {
- Int16 => Self::Int16(RunArrayData::from_raw(builder)),
- Int32 => Self::Int32(RunArrayData::from_raw(builder)),
- Int64 => Self::Int64(RunArrayData::from_raw(builder)),
- }
- }
-}
-
-impl<E: RunEnd> From<RunArrayData<E>> for ArrayDataRun {
- fn from(value: RunArrayData<E>) -> Self {
- <E as private::RunEndSealed>::upcast(value)
- }
-}
-
-/// ArrayData for [run-end encoded
arrays](https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout)
-#[derive(Debug, Clone)]
-pub struct RunArrayData<E: RunEnd> {
- data_type: DataType,
- run_ends: RunEndBuffer<E>,
- /// The children of this RunArrayData:
- /// 1: the run ends
- /// 2: the values
- ///
- /// We store an array so that a slice can be returned in
[`RunArrayData::layout`]
- children: Box<[ArrayData; 2]>,
-}
-
-impl<E: RunEnd> RunArrayData<E> {
- /// Create a new [`RunArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) == PhysicalType::Run(E::TYPE)`
- /// - `run_ends` must contain monotonically increasing, positive values
`<= len`
- /// - `run_ends.get_end_physical_index() < values.len()`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- run_ends: RunEndBuffer<E>,
- values: ArrayData,
- ) -> Self {
- let inner = run_ends.inner();
- let child = ArrayDataBuilder::new(E::ENDS_TYPE)
- .len(inner.len())
- .buffers(vec![inner.inner().clone()])
- .build_unchecked();
-
- Self {
- data_type,
- run_ends,
- children: Box::new([child, values]),
- }
- }
-
- /// Creates a new [`RunArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`RunArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- let mut iter = builder.child_data.into_iter();
- let child1 = iter.next().unwrap();
- let child2 = iter.next().unwrap();
-
- let p = ScalarBuffer::new(child1.buffers[0].clone(), child1.offset,
child1.len);
- let run_ends = RunEndBuffer::new_unchecked(p, builder.offset,
builder.len);
-
- Self {
- run_ends,
- data_type: builder.data_type,
- children: Box::new([child1, child2]),
- }
- }
-
- /// Returns the length
- #[inline]
- pub fn len(&self) -> usize {
- self.run_ends.len()
- }
-
- /// Returns the offset
- #[inline]
- pub fn offset(&self) -> usize {
- self.run_ends.offset()
- }
-
- /// Returns true if this array is empty
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.run_ends.is_empty()
- }
-
- /// Returns the run ends
- #[inline]
- pub fn run_ends(&self) -> &RunEndBuffer<E> {
- &self.run_ends
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the child data
- #[inline]
- pub fn values(&self) -> &ArrayData {
- &self.children[1]
- }
-
- /// Returns the underlying parts of this [`RunArrayData`]
- pub fn into_parts(self) -> (DataType, RunEndBuffer<E>, ArrayData) {
- let child = self.children.into_iter().nth(1).unwrap();
- (self.data_type, self.run_ends, child)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- Self {
- data_type: self.data_type.clone(),
- run_ends: self.run_ends.slice(offset, len),
- children: self.children.clone(),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.run_ends.len(),
- offset: self.run_ends.offset(),
- nulls: None,
- buffers: Buffers::default(),
- child_data: self.children.as_ref(),
- }
- }
-}
diff --git a/arrow-data/src/data/struct.rs b/arrow-data/src/data/struct.rs
deleted file mode 100644
index 229c10912..000000000
--- a/arrow-data/src/data/struct.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::ArrayDataLayout;
-use crate::{ArrayData, ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::NullBuffer;
-use arrow_schema::DataType;
-
-/// ArrayData for [struct
arrays](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
-#[derive(Debug, Clone)]
-pub struct StructArrayData {
- data_type: DataType,
- len: usize,
- nulls: Option<NullBuffer>,
- children: Vec<ArrayData>,
-}
-
-impl StructArrayData {
- /// Create a new [`StructArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) == PhysicalType::Struct`
- /// - all child data and nulls must have length matching `len`
- pub unsafe fn new_unchecked(
- data_type: DataType,
- len: usize,
- nulls: Option<NullBuffer>,
- children: Vec<ArrayData>,
- ) -> Self {
- Self {
- data_type,
- len,
- nulls,
- children,
- }
- }
-
- /// Creates a new [`StructArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`StructArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
- let children = builder
- .child_data
- .into_iter()
- .map(|x| x.slice(builder.offset, builder.len))
- .collect();
-
- Self {
- data_type: builder.data_type,
- len: builder.len,
- nulls: builder.nulls,
- children,
- }
- }
-
- /// Returns the length of this [`StructArrayData`]
- #[inline]
- pub fn len(&self) -> usize {
- self.len
- }
-
- /// Returns `true` if this [`StructArrayData`] has zero length
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.len == 0
- }
-
- /// Returns the null buffer if any
- #[inline]
- pub fn nulls(&self) -> Option<&NullBuffer> {
- self.nulls.as_ref()
- }
-
- /// Returns the primitive values
- #[inline]
- pub fn children(&self) -> &[ArrayData] {
- &self.children
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`StructArrayData`]
- pub fn into_parts(self) -> (DataType, Option<NullBuffer>, Vec<ArrayData>) {
- (self.data_type, self.nulls, self.children)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- Self {
- len,
- data_type: self.data_type.clone(),
- nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
- children: self.children.iter().map(|c| c.slice(offset,
len)).collect(),
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.len,
- offset: 0,
- nulls: self.nulls.as_ref(),
- buffers: Buffers::default(),
- child_data: &self.children,
- }
- }
-}
diff --git a/arrow-data/src/data/types.rs b/arrow-data/src/data/types.rs
deleted file mode 100644
index bb65b4212..000000000
--- a/arrow-data/src/data/types.rs
+++ /dev/null
@@ -1,152 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_schema::{DataType, IntervalUnit, UnionMode};
-
-/// An enumeration of the primitive types implementing
[`ArrowNativeType`](arrow_buffer::ArrowNativeType)
-#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub enum PrimitiveType {
- Int8,
- Int16,
- Int32,
- Int64,
- Int128,
- Int256,
- UInt8,
- UInt16,
- UInt32,
- UInt64,
- Float16,
- Float32,
- Float64,
-}
-
-/// An enumeration of the types of offsets for variable length encodings
-#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub enum OffsetType {
- Int32,
- Int64,
-}
-
-/// An enumeration of the types of variable length byte arrays
-#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub enum BytesType {
- Binary,
- Utf8,
-}
-
-/// An enumeration of the types of dictionary key
-#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub enum DictionaryKeyType {
- Int8,
- Int16,
- Int32,
- Int64,
- UInt8,
- UInt16,
- UInt32,
- UInt64,
-}
-
-/// An enumeration of the types of run key
-#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub enum RunEndType {
- Int16,
- Int32,
- Int64,
-}
-
-/// Describes the physical representation of a given [`DataType`]
-#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub enum PhysicalType {
- Null,
- Boolean,
- Primitive(PrimitiveType),
- FixedSizeBinary(usize),
- Bytes(OffsetType, BytesType),
- FixedSizeList(usize),
- List(OffsetType),
- Struct,
- Union(UnionMode),
- Dictionary(DictionaryKeyType),
- Run(RunEndType),
-}
-
-impl From<&DataType> for PhysicalType {
- fn from(value: &DataType) -> Self {
- match value {
- DataType::Null => Self::Null,
- DataType::Boolean => Self::Boolean,
- DataType::Int8 => Self::Primitive(PrimitiveType::Int8),
- DataType::Int16 => Self::Primitive(PrimitiveType::Int16),
- DataType::Int32 => Self::Primitive(PrimitiveType::Int32),
- DataType::Int64 => Self::Primitive(PrimitiveType::Int64),
- DataType::UInt8 => Self::Primitive(PrimitiveType::UInt8),
- DataType::UInt16 => Self::Primitive(PrimitiveType::UInt16),
- DataType::UInt32 => Self::Primitive(PrimitiveType::UInt32),
- DataType::UInt64 => Self::Primitive(PrimitiveType::UInt64),
- DataType::Float16 => Self::Primitive(PrimitiveType::Float16),
- DataType::Float32 => Self::Primitive(PrimitiveType::Float32),
- DataType::Float64 => Self::Primitive(PrimitiveType::Float64),
- DataType::Timestamp(_, _) => Self::Primitive(PrimitiveType::Int64),
- DataType::Date32 => Self::Primitive(PrimitiveType::Int32),
- DataType::Date64 => Self::Primitive(PrimitiveType::Int64),
- DataType::Time32(_) => Self::Primitive(PrimitiveType::Int32),
- DataType::Time64(_) => Self::Primitive(PrimitiveType::Int64),
- DataType::Duration(_) => Self::Primitive(PrimitiveType::Int64),
- DataType::Decimal128(_, _) =>
Self::Primitive(PrimitiveType::Int128),
- DataType::Decimal256(_, _) =>
Self::Primitive(PrimitiveType::Int256),
- DataType::Interval(IntervalUnit::YearMonth) => {
- Self::Primitive(PrimitiveType::Int32)
- }
- DataType::Interval(IntervalUnit::DayTime) => {
- Self::Primitive(PrimitiveType::Int64)
- }
- DataType::Interval(IntervalUnit::MonthDayNano) => {
- Self::Primitive(PrimitiveType::Int128)
- }
- DataType::FixedSizeBinary(size) => Self::FixedSizeBinary(*size as
usize),
- DataType::Binary => Self::Bytes(OffsetType::Int32,
BytesType::Binary),
- DataType::LargeBinary => Self::Bytes(OffsetType::Int64,
BytesType::Binary),
- DataType::Utf8 => Self::Bytes(OffsetType::Int32, BytesType::Utf8),
- DataType::LargeUtf8 => Self::Bytes(OffsetType::Int64,
BytesType::Utf8),
- DataType::List(_) => Self::List(OffsetType::Int32),
- DataType::FixedSizeList(_, size) => Self::FixedSizeList(*size as
usize),
- DataType::LargeList(_) => Self::List(OffsetType::Int64),
- DataType::Struct(_) => Self::Struct,
- DataType::Union(_, _, mode) => Self::Union(*mode),
- DataType::Dictionary(k, _) => match k.as_ref() {
- DataType::Int8 => Self::Dictionary(DictionaryKeyType::Int8),
- DataType::Int16 => Self::Dictionary(DictionaryKeyType::Int16),
- DataType::Int32 => Self::Dictionary(DictionaryKeyType::Int32),
- DataType::Int64 => Self::Dictionary(DictionaryKeyType::Int64),
- DataType::UInt8 => Self::Dictionary(DictionaryKeyType::UInt8),
- DataType::UInt16 =>
Self::Dictionary(DictionaryKeyType::UInt16),
- DataType::UInt32 =>
Self::Dictionary(DictionaryKeyType::UInt32),
- DataType::UInt64 =>
Self::Dictionary(DictionaryKeyType::UInt64),
- d => panic!("illegal dictionary key data type {d}"),
- },
- DataType::Map(_, _) => Self::List(OffsetType::Int32),
- DataType::RunEndEncoded(f, _) => match f.data_type() {
- DataType::Int16 => Self::Run(RunEndType::Int16),
- DataType::Int32 => Self::Run(RunEndType::Int32),
- DataType::Int64 => Self::Run(RunEndType::Int64),
- d => panic!("illegal run end data type {d}"),
- },
- }
- }
-}
diff --git a/arrow-data/src/data/union.rs b/arrow-data/src/data/union.rs
deleted file mode 100644
index 7d53a1f18..000000000
--- a/arrow-data/src/data/union.rs
+++ /dev/null
@@ -1,171 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data::ArrayDataLayout;
-use crate::{ArrayData, ArrayDataBuilder, Buffers};
-use arrow_buffer::buffer::ScalarBuffer;
-use arrow_schema::{DataType, UnionMode};
-
-/// ArrayData for [union
arrays](https://arrow.apache.org/docs/format/Columnar.html#union-layout)
-#[derive(Debug, Clone)]
-pub struct UnionArrayData {
- data_type: DataType,
- type_ids: ScalarBuffer<i8>,
- offsets: Option<ScalarBuffer<i32>>,
- children: Vec<ArrayData>,
-}
-
-impl UnionArrayData {
- /// Creates a new [`UnionArrayData`]
- ///
- /// # Safety
- ///
- /// - `PhysicalType::from(&data_type) == PhysicalType::Union(mode)`
- /// - `offsets` is `Some` iff the above `mode == UnionMode::Sparse`
- /// - `type_ids` must only contain values corresponding to a field in
`data_type`
- /// - `children` must match the field definitions in `data_type`
- /// - For each value id in type_ids, the corresponding offset, must be in
bounds for the child
- pub unsafe fn new_unchecked(
- data_type: DataType,
- type_ids: ScalarBuffer<i8>,
- offsets: Option<ScalarBuffer<i32>>,
- children: Vec<ArrayData>,
- ) -> Self {
- Self {
- data_type,
- type_ids,
- offsets,
- children,
- }
- }
-
- /// Creates a new [`UnionArrayData`] from raw buffers
- ///
- /// # Safety
- ///
- /// See [`UnionArrayData::new_unchecked`]
- pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder, mode: UnionMode)
-> Self {
- match mode {
- UnionMode::Sparse => {
- let type_ids = builder.buffers.into_iter().next().unwrap();
- let type_ids = ScalarBuffer::new(type_ids, builder.offset,
builder.len);
- let children = builder
- .child_data
- .into_iter()
- .map(|x| x.slice(builder.offset, builder.len))
- .collect();
-
- Self {
- type_ids,
- children,
- data_type: builder.data_type,
- offsets: None,
- }
- }
- UnionMode::Dense => {
- let mut iter = builder.buffers.into_iter();
- let type_ids = iter.next().unwrap();
- let offsets = iter.next().unwrap();
- let type_ids = ScalarBuffer::new(type_ids, builder.offset,
builder.len);
- let offsets = ScalarBuffer::new(offsets, builder.offset,
builder.len);
-
- Self {
- type_ids,
- data_type: builder.data_type,
- offsets: Some(offsets),
- children: builder.child_data,
- }
- }
- }
- }
-
- /// Returns the length of this array
- #[inline]
- pub fn len(&self) -> usize {
- self.type_ids.len()
- }
-
- /// Returns the type ids for this array
- #[inline]
- pub fn type_ids(&self) -> &ScalarBuffer<i8> {
- &self.type_ids
- }
-
- /// Returns the offsets for this array if this is a dense union
- #[inline]
- pub fn offsets(&self) -> Option<&ScalarBuffer<i32>> {
- self.offsets.as_ref()
- }
-
- /// Returns the children of this array
- #[inline]
- pub fn children(&self) -> &[ArrayData] {
- &self.children
- }
-
- /// Returns the data type of this array
- #[inline]
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the underlying parts of this [`UnionArrayData`]
- pub fn into_parts(
- self,
- ) -> (
- DataType,
- ScalarBuffer<i8>,
- Option<ScalarBuffer<i32>>,
- Vec<ArrayData>,
- ) {
- (self.data_type, self.type_ids, self.offsets, self.children)
- }
-
- /// Returns a zero-copy slice of this array
- pub fn slice(&self, offset: usize, len: usize) -> Self {
- let (offsets, children) = match &self.offsets {
- Some(offsets) => (Some(offsets.slice(offset, len)),
self.children.clone()),
- None => (
- None,
- self.children.iter().map(|c| c.slice(offset, len)).collect(),
- ),
- };
- Self {
- data_type: self.data_type.clone(),
- type_ids: self.type_ids.slice(offset, len),
- offsets,
- children,
- }
- }
-
- /// Returns an [`ArrayDataLayout`] representation of this
- pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
- let buffers = match &self.offsets {
- Some(offsets) => Buffers::two(self.type_ids.inner(),
offsets.inner()),
- None => Buffers::one(self.type_ids.inner()),
- };
-
- ArrayDataLayout {
- data_type: &self.data_type,
- len: self.type_ids.len(),
- offset: 0,
- nulls: None,
- buffers,
- child_data: &self.children,
- }
- }
-}