This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1cc85635b Replace the `fn get_data_type` by `const DATA_TYPE` in
BinaryArray and StringArray (#2289)
1cc85635b is described below
commit 1cc85635bd921ccd1e84b04ab0b8a00e100c26b3
Author: Remzi Yang <[email protected]>
AuthorDate: Wed Aug 3 18:28:30 2022 +0800
Replace the `fn get_data_type` by `const DATA_TYPE` in BinaryArray and
StringArray (#2289)
* const fn get_data_type for binary and string
Signed-off-by: remzi <[email protected]>
* const value instead of const fn
Signed-off-by: remzi <[email protected]>
* deprecate
Signed-off-by: remzi <[email protected]>
---
arrow/src/array/array_binary.rs | 28 ++++++++++----------
arrow/src/array/array_string.rs | 31 +++++++++++------------
arrow/src/array/builder/generic_binary_builder.rs | 15 +++--------
arrow/src/compute/kernels/concat_elements.rs | 4 +--
arrow/src/compute/kernels/substring.rs | 12 ++++-----
arrow/src/compute/kernels/take.rs | 11 ++++----
6 files changed, 47 insertions(+), 54 deletions(-)
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 3e49f6031..dd21e0d51 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -37,15 +37,17 @@ pub struct GenericBinaryArray<OffsetSize: OffsetSizeTrait> {
}
impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
+ /// Data type of the array.
+ pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE {
+ DataType::LargeBinary
+ } else {
+ DataType::Binary
+ };
+
/// Get the data type of the array.
- // Declare this function as `pub const fn` after
- // https://github.com/rust-lang/rust/issues/93706 is merged.
- pub fn get_data_type() -> DataType {
- if OffsetSize::IS_LARGE {
- DataType::LargeBinary
- } else {
- DataType::Binary
- }
+ #[deprecated(note = "please use `Self::DATA_TYPE` instead")]
+ pub const fn get_data_type() -> DataType {
+ Self::DATA_TYPE
}
/// Returns the length for value at index `i`.
@@ -158,7 +160,7 @@ impl<OffsetSize: OffsetSizeTrait>
GenericBinaryArray<OffsetSize> {
"The child array cannot contain null values."
);
- let builder = ArrayData::builder(Self::get_data_type())
+ let builder = ArrayData::builder(Self::DATA_TYPE)
.len(v.len())
.offset(v.offset())
.add_buffer(v.data_ref().buffers()[0].clone())
@@ -197,7 +199,7 @@ impl<OffsetSize: OffsetSizeTrait>
GenericBinaryArray<OffsetSize> {
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) -
1;
- let array_data = ArrayData::builder(Self::get_data_type())
+ let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
@@ -276,7 +278,7 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for
GenericBinaryArray<OffsetS
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
- &Self::get_data_type(),
+ &Self::DATA_TYPE,
"[Large]BinaryArray expects Datatype::[Large]Binary"
);
assert_eq!(
@@ -353,7 +355,7 @@ where
// calculate actual data_len, which may be different from the
iterator's upper bound
let data_len = offsets.len() - 1;
- let array_data = ArrayData::builder(Self::get_data_type())
+ let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(data_len)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
@@ -598,7 +600,7 @@ mod tests {
let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap());
// Array data: ["hello", "", "parquet"]
- let array_data1 =
ArrayData::builder(GenericBinaryArray::<O>::get_data_type())
+ let array_data1 =
ArrayData::builder(GenericBinaryArray::<O>::DATA_TYPE)
.len(3)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs
index c332aa197..1bb99fce7 100644
--- a/arrow/src/array/array_string.rs
+++ b/arrow/src/array/array_string.rs
@@ -39,15 +39,17 @@ pub struct GenericStringArray<OffsetSize: OffsetSizeTrait> {
}
impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
+ /// Data type of the array.
+ pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE {
+ DataType::LargeUtf8
+ } else {
+ DataType::Utf8
+ };
+
/// Get the data type of the array.
- // Declare this function as `pub const fn` after
- // https://github.com/rust-lang/rust/issues/93706 is merged.
- pub fn get_data_type() -> DataType {
- if OffsetSize::IS_LARGE {
- DataType::LargeUtf8
- } else {
- DataType::Utf8
- }
+ #[deprecated(note = "please use `Self::DATA_TYPE` instead")]
+ pub const fn get_data_type() -> DataType {
+ Self::DATA_TYPE
}
/// Returns the length for the element at index `i`.
@@ -148,7 +150,7 @@ impl<OffsetSize: OffsetSizeTrait>
GenericStringArray<OffsetSize> {
"The child array cannot contain null values."
);
- let builder = ArrayData::builder(Self::get_data_type())
+ let builder = ArrayData::builder(Self::DATA_TYPE)
.len(v.len())
.offset(v.offset())
.add_buffer(v.data().buffers()[0].clone())
@@ -187,7 +189,7 @@ impl<OffsetSize: OffsetSizeTrait>
GenericStringArray<OffsetSize> {
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) -
1;
- let array_data = ArrayData::builder(Self::get_data_type())
+ let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
@@ -264,7 +266,7 @@ where
// calculate actual data_len, which may be different from the
iterator's upper bound
let data_len = (offsets.len() / offset_size) - 1;
- let array_data = ArrayData::builder(Self::get_data_type())
+ let array_data = ArrayData::builder(Self::DATA_TYPE)
.len(data_len)
.add_buffer(offsets.into())
.add_buffer(values.into())
@@ -342,10 +344,7 @@ impl<OffsetSize: OffsetSizeTrait>
From<GenericBinaryArray<OffsetSize>>
for GenericStringArray<OffsetSize>
{
fn from(v: GenericBinaryArray<OffsetSize>) -> Self {
- let builder = v
- .into_data()
- .into_builder()
- .data_type(Self::get_data_type());
+ let builder = v.into_data().into_builder().data_type(Self::DATA_TYPE);
let data = unsafe { builder.build_unchecked() };
Self::from(data)
}
@@ -355,7 +354,7 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for
GenericStringArray<OffsetS
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
- &Self::get_data_type(),
+ &Self::DATA_TYPE,
"[Large]StringArray expects Datatype::[Large]Utf8"
);
assert_eq!(
diff --git a/arrow/src/array/builder/generic_binary_builder.rs
b/arrow/src/array/builder/generic_binary_builder.rs
index 8f242243c..aca2e1d96 100644
--- a/arrow/src/array/builder/generic_binary_builder.rs
+++ b/arrow/src/array/builder/generic_binary_builder.rs
@@ -15,12 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-use crate::{
- array::{
- ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray,
OffsetSizeTrait,
- UInt8BufferBuilder,
- },
- datatypes::DataType,
+use crate::array::{
+ ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray,
OffsetSizeTrait,
+ UInt8BufferBuilder,
};
use std::any::Any;
use std::sync::Arc;
@@ -80,11 +77,7 @@ impl<OffsetSize: OffsetSizeTrait>
GenericBinaryBuilder<OffsetSize> {
/// Builds the [`GenericBinaryArray`] and reset this builder.
pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
- let array_type = if OffsetSize::IS_LARGE {
- DataType::LargeBinary
- } else {
- DataType::Binary
- };
+ let array_type = GenericBinaryArray::<OffsetSize>::DATA_TYPE;
let array_builder = ArrayDataBuilder::new(array_type)
.len(self.len())
.add_buffer(self.offsets_builder.finish())
diff --git a/arrow/src/compute/kernels/concat_elements.rs
b/arrow/src/compute/kernels/concat_elements.rs
index 7d460b21c..ac365a096 100644
--- a/arrow/src/compute/kernels/concat_elements.rs
+++ b/arrow/src/compute/kernels/concat_elements.rs
@@ -75,7 +75,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
}
- let builder =
ArrayDataBuilder::new(GenericStringArray::<Offset>::get_data_type())
+ let builder =
ArrayDataBuilder::new(GenericStringArray::<Offset>::DATA_TYPE)
.len(left.len())
.add_buffer(output_offsets.finish())
.add_buffer(output_values.finish())
@@ -155,7 +155,7 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
}
- let builder =
ArrayDataBuilder::new(GenericStringArray::<Offset>::get_data_type())
+ let builder =
ArrayDataBuilder::new(GenericStringArray::<Offset>::DATA_TYPE)
.len(size)
.add_buffer(output_offsets.finish())
.add_buffer(output_values.finish())
diff --git a/arrow/src/compute/kernels/substring.rs
b/arrow/src/compute/kernels/substring.rs
index 024f5633f..5190d0bf0 100644
--- a/arrow/src/compute/kernels/substring.rs
+++ b/arrow/src/compute/kernels/substring.rs
@@ -205,7 +205,7 @@ pub fn substring_by_char<OffsetSize: OffsetSizeTrait>(
});
let data = unsafe {
ArrayData::new_unchecked(
- GenericStringArray::<OffsetSize>::get_data_type(),
+ GenericStringArray::<OffsetSize>::DATA_TYPE,
array.len(),
None,
array
@@ -294,7 +294,7 @@ fn binary_substring<OffsetSize: OffsetSizeTrait>(
let data = unsafe {
ArrayData::new_unchecked(
- GenericBinaryArray::<OffsetSize>::get_data_type(),
+ GenericBinaryArray::<OffsetSize>::DATA_TYPE,
array.len(),
None,
array
@@ -425,7 +425,7 @@ fn utf8_substring<OffsetSize: OffsetSizeTrait>(
let data = unsafe {
ArrayData::new_unchecked(
- GenericStringArray::<OffsetSize>::get_data_type(),
+ GenericStringArray::<OffsetSize>::DATA_TYPE,
array.len(),
None,
array
@@ -587,7 +587,7 @@ mod tests {
// set the first and third element to be valid
let bitmap = [0b101_u8];
- let data = ArrayData::builder(GenericBinaryArray::<O>::get_data_type())
+ let data = ArrayData::builder(GenericBinaryArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from_iter(values))
@@ -814,7 +814,7 @@ mod tests {
// set the first and third element to be valid
let bitmap = [0b101_u8];
- let data = ArrayData::builder(GenericStringArray::<O>::get_data_type())
+ let data = ArrayData::builder(GenericStringArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from(values))
@@ -939,7 +939,7 @@ mod tests {
// set the first and third element to be valid
let bitmap = [0b101_u8];
- let data = ArrayData::builder(GenericStringArray::<O>::get_data_type())
+ let data = ArrayData::builder(GenericStringArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from(values))
diff --git a/arrow/src/compute/kernels/take.rs
b/arrow/src/compute/kernels/take.rs
index 5bfd257fc..ab99acd2c 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -771,12 +771,11 @@ where
};
}
- let array_data =
- ArrayData::builder(GenericStringArray::<OffsetSize>::get_data_type())
- .len(data_len)
- .add_buffer(offsets_buffer.into())
- .add_buffer(values.into())
- .null_bit_buffer(nulls);
+ let array_data =
ArrayData::builder(GenericStringArray::<OffsetSize>::DATA_TYPE)
+ .len(data_len)
+ .add_buffer(offsets_buffer.into())
+ .add_buffer(values.into())
+ .null_bit_buffer(nulls);
let array_data = unsafe { array_data.build_unchecked() };