This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 78762dcf20d Split arrow_cast::cast::list into it's own submodule
(#5537)
78762dcf20d is described below
commit 78762dcf20d6587c03b1ff20b7893be2edd1e02b
Author: Clide S <[email protected]>
AuthorDate: Wed Mar 20 21:55:01 2024 -0400
Split arrow_cast::cast::list into it's own submodule (#5537)
* Split up arrow_cast::list
* Update arrow-cast/src/cast/mod.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
* Fix failing tests
---------
Co-authored-by: Clide Stefani <[email protected]>
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---
arrow-cast/src/cast/list.rs | 171 ++++++++++++++++++++++++++++++++
arrow-cast/src/{cast.rs => cast/mod.rs} | 154 +---------------------------
2 files changed, 174 insertions(+), 151 deletions(-)
diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs
new file mode 100644
index 00000000000..33faacdccb9
--- /dev/null
+++ b/arrow-cast/src/cast/list.rs
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::cast::*;
+
+/// Helper function that takes a primitive array and casts to a (generic) list
array.
+pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
+ array: &dyn Array,
+ to: &FieldRef,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ let values = cast_with_options(array, to.data_type(), cast_options)?;
+ let offsets =
OffsetBuffer::from_lengths(std::iter::repeat(1).take(values.len()));
+ let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
+ Ok(Arc::new(list))
+}
+
+/// Helper function that takes a primitive array and casts to a fixed size
list array.
+pub(crate) fn cast_values_to_fixed_size_list(
+ array: &dyn Array,
+ to: &FieldRef,
+ size: i32,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ let values = cast_with_options(array, to.data_type(), cast_options)?;
+ let list = FixedSizeListArray::new(to.clone(), size, values, None);
+ Ok(Arc::new(list))
+}
+
+pub(crate) fn cast_fixed_size_list_to_list<OffsetSize>(
+ array: &dyn Array,
+) -> Result<ArrayRef, ArrowError>
+where
+ OffsetSize: OffsetSizeTrait,
+{
+ let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list();
+ let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into();
+ Ok(Arc::new(list))
+}
+
+pub(crate) fn cast_list_to_fixed_size_list<OffsetSize>(
+ array: &GenericListArray<OffsetSize>,
+ field: &FieldRef,
+ size: i32,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+ OffsetSize: OffsetSizeTrait,
+{
+ let cap = array.len() * size as usize;
+
+ let mut nulls = (cast_options.safe || array.null_count() != 0).then(|| {
+ let mut buffer = BooleanBufferBuilder::new(array.len());
+ match array.nulls() {
+ Some(n) => buffer.append_buffer(n.inner()),
+ None => buffer.append_n(array.len(), true),
+ }
+ buffer
+ });
+
+ // Nulls in FixedSizeListArray take up space and so we must pad the values
+ let values = array.values().to_data();
+ let mut mutable = MutableArrayData::new(vec![&values], cast_options.safe,
cap);
+ // The end position in values of the last incorrectly-sized list slice
+ let mut last_pos = 0;
+ for (idx, w) in array.offsets().windows(2).enumerate() {
+ let start_pos = w[0].as_usize();
+ let end_pos = w[1].as_usize();
+ let len = end_pos - start_pos;
+
+ if len != size as usize {
+ if cast_options.safe || array.is_null(idx) {
+ if last_pos != start_pos {
+ // Extend with valid slices
+ mutable.extend(0, last_pos, start_pos);
+ }
+ // Pad this slice with nulls
+ mutable.extend_nulls(size as _);
+ nulls.as_mut().unwrap().set_bit(idx, false);
+ // Set last_pos to the end of this slice's values
+ last_pos = end_pos
+ } else {
+ return Err(ArrowError::CastError(format!(
+ "Cannot cast to FixedSizeList({size}): value at index
{idx} has length {len}",
+ )));
+ }
+ }
+ }
+
+ let values = match last_pos {
+ 0 => array.values().slice(0, cap), // All slices were the correct
length
+ _ => {
+ if mutable.len() != cap {
+ // Remaining slices were all correct length
+ let remaining = cap - mutable.len();
+ mutable.extend(0, last_pos, last_pos + remaining)
+ }
+ make_array(mutable.freeze())
+ }
+ };
+
+ // Cast the inner values if necessary
+ let values = cast_with_options(values.as_ref(), field.data_type(),
cast_options)?;
+
+ // Construct the FixedSizeListArray
+ let nulls = nulls.map(|mut x| x.finish().into());
+ let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
+ Ok(Arc::new(array))
+}
+
+/// Helper function that takes an Generic list container and casts the inner
datatype.
+pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
+ array: &dyn Array,
+ to: &FieldRef,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ let list = array.as_list::<O>();
+ let values = cast_with_options(list.values(), to.data_type(),
cast_options)?;
+ Ok(Arc::new(GenericListArray::<O>::new(
+ to.clone(),
+ list.offsets().clone(),
+ values,
+ list.nulls().cloned(),
+ )))
+}
+
+/// Cast the container type of List/Largelist array along with the inner
datatype
+pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
+ array: &dyn Array,
+ field: &FieldRef,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ let list = array.as_list::<I>();
+ let values = list.values();
+ let offsets = list.offsets();
+ let nulls = list.nulls().cloned();
+
+ if !O::IS_LARGE && values.len() > i32::MAX as usize {
+ return Err(ArrowError::ComputeError(
+ "LargeList too large to cast to List".into(),
+ ));
+ }
+
+ // Recursively cast values
+ let values = cast_with_options(values, field.data_type(), cast_options)?;
+ let offsets: Vec<_> = offsets.iter().map(|x|
O::usize_as(x.as_usize())).collect();
+
+ // Safety: valid offsets and checked for overflow
+ let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
+
+ Ok(Arc::new(GenericListArray::<O>::new(
+ field.clone(),
+ offsets,
+ values,
+ nulls,
+ )))
+}
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast/mod.rs
similarity index 98%
rename from arrow-cast/src/cast.rs
rename to arrow-cast/src/cast/mod.rs
index 7868946532c..2b9892aa3fb 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -37,6 +37,9 @@
//! assert_eq!(7.0, c.value(2));
//! ```
+mod list;
+use crate::cast::list::*;
+
use chrono::{NaiveTime, Offset, TimeZone, Utc};
use std::cmp::Ordering;
use std::sync::Arc;
@@ -3051,30 +3054,6 @@ where
Ok(Arc::new(b.finish()))
}
-/// Helper function that takes a primitive array and casts to a (generic) list
array.
-fn cast_values_to_list<O: OffsetSizeTrait>(
- array: &dyn Array,
- to: &FieldRef,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError> {
- let values = cast_with_options(array, to.data_type(), cast_options)?;
- let offsets =
OffsetBuffer::from_lengths(std::iter::repeat(1).take(values.len()));
- let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
- Ok(Arc::new(list))
-}
-
-/// Helper function that takes a primitive array and casts to a fixed size
list array.
-fn cast_values_to_fixed_size_list(
- array: &dyn Array,
- to: &FieldRef,
- size: i32,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError> {
- let values = cast_with_options(array, to.data_type(), cast_options)?;
- let list = FixedSizeListArray::new(to.clone(), size, values, None);
- Ok(Arc::new(list))
-}
-
/// A specified helper to cast from `GenericBinaryArray` to
`GenericStringArray` when they have same
/// offset size so re-encoding offset is unnecessary.
fn cast_binary_to_string<O: OffsetSizeTrait>(
@@ -3217,133 +3196,6 @@ where
Ok(Arc::new(GenericByteArray::<TO>::from(array_data)))
}
-fn cast_fixed_size_list_to_list<OffsetSize>(array: &dyn Array) ->
Result<ArrayRef, ArrowError>
-where
- OffsetSize: OffsetSizeTrait,
-{
- let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list();
- let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into();
- Ok(Arc::new(list))
-}
-
-fn cast_list_to_fixed_size_list<OffsetSize>(
- array: &GenericListArray<OffsetSize>,
- field: &FieldRef,
- size: i32,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- OffsetSize: OffsetSizeTrait,
-{
- let cap = array.len() * size as usize;
-
- let mut nulls = (cast_options.safe || array.null_count() != 0).then(|| {
- let mut buffer = BooleanBufferBuilder::new(array.len());
- match array.nulls() {
- Some(n) => buffer.append_buffer(n.inner()),
- None => buffer.append_n(array.len(), true),
- }
- buffer
- });
-
- // Nulls in FixedSizeListArray take up space and so we must pad the values
- let values = array.values().to_data();
- let mut mutable = MutableArrayData::new(vec![&values], cast_options.safe,
cap);
- // The end position in values of the last incorrectly-sized list slice
- let mut last_pos = 0;
- for (idx, w) in array.offsets().windows(2).enumerate() {
- let start_pos = w[0].as_usize();
- let end_pos = w[1].as_usize();
- let len = end_pos - start_pos;
-
- if len != size as usize {
- if cast_options.safe || array.is_null(idx) {
- if last_pos != start_pos {
- // Extend with valid slices
- mutable.extend(0, last_pos, start_pos);
- }
- // Pad this slice with nulls
- mutable.extend_nulls(size as _);
- nulls.as_mut().unwrap().set_bit(idx, false);
- // Set last_pos to the end of this slice's values
- last_pos = end_pos
- } else {
- return Err(ArrowError::CastError(format!(
- "Cannot cast to FixedSizeList({size}): value at index
{idx} has length {len}",
- )));
- }
- }
- }
-
- let values = match last_pos {
- 0 => array.values().slice(0, cap), // All slices were the correct
length
- _ => {
- if mutable.len() != cap {
- // Remaining slices were all correct length
- let remaining = cap - mutable.len();
- mutable.extend(0, last_pos, last_pos + remaining)
- }
- make_array(mutable.freeze())
- }
- };
-
- // Cast the inner values if necessary
- let values = cast_with_options(values.as_ref(), field.data_type(),
cast_options)?;
-
- // Construct the FixedSizeListArray
- let nulls = nulls.map(|mut x| x.finish().into());
- let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
- Ok(Arc::new(array))
-}
-
-/// Helper function that takes an Generic list container and casts the inner
datatype.
-fn cast_list_values<O: OffsetSizeTrait>(
- array: &dyn Array,
- to: &FieldRef,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError> {
- let list = array.as_list::<O>();
- let values = cast_with_options(list.values(), to.data_type(),
cast_options)?;
- Ok(Arc::new(GenericListArray::<O>::new(
- to.clone(),
- list.offsets().clone(),
- values,
- list.nulls().cloned(),
- )))
-}
-
-/// Cast the container type of List/Largelist array along with the inner
datatype
-fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
- array: &dyn Array,
- field: &FieldRef,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError> {
- let list = array.as_list::<I>();
- let values = list.values();
- let offsets = list.offsets();
- let nulls = list.nulls().cloned();
-
- if !O::IS_LARGE && values.len() > i32::MAX as usize {
- return Err(ArrowError::ComputeError(
- "LargeList too large to cast to List".into(),
- ));
- }
-
- // Recursively cast values
- let values = cast_with_options(values, field.data_type(), cast_options)?;
- let offsets: Vec<_> = offsets.iter().map(|x|
O::usize_as(x.as_usize())).collect();
-
- // Safety: valid offsets and checked for overflow
- let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
-
- Ok(Arc::new(GenericListArray::<O>::new(
- field.clone(),
- offsets,
- values,
- nulls,
- )))
-}
-
#[cfg(test)]
mod tests {
use arrow_buffer::{Buffer, NullBuffer};