This is an automated email from the ASF dual-hosted git repository.
nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a6c6ef6 ARROW-10002: [Rust] Remove trait specialization from arrow
crate
a6c6ef6 is described below
commit a6c6ef6ab6a701cf5c22c8a4e366715d798e4956
Author: Jorge C. Leitao <[email protected]>
AuthorDate: Sun Oct 18 21:05:48 2020 +0200
ARROW-10002: [Rust] Remove trait specialization from arrow crate
This PR removes trait specialization by leveraging the compiler to remove
trivial `if` statements.
I verified that the assembly code was the same in a [simple
example](https://rust.godbolt.org/z/qrcW8W). I do not know if this generalizes
to our use-case, but I suspect so as LLVM is (hopefully) removing trivial
branches like `if a != a`.
The change `get_data_type()` to `DATA_TYPE` is not necessary. I did it
before realizing this. IMO it makes it more explicit that this is not a
function, but a constant, but we can revert it.
Closes #8485 from jorgecarleitao/simp_types
Authored-by: Jorge C. Leitao <[email protected]>
Signed-off-by: Neville Dipale <[email protected]>
---
rust/arrow/src/array/array.rs | 250 ++++++++++++---------------
rust/arrow/src/array/builder.rs | 208 +++++++++++-----------
rust/arrow/src/array/equal.rs | 39 ++---
rust/arrow/src/array/union.rs | 6 +-
rust/arrow/src/compute/kernels/arithmetic.rs | 8 +-
rust/arrow/src/compute/kernels/filter.rs | 2 +-
rust/arrow/src/compute/kernels/take.rs | 2 +-
rust/arrow/src/datatypes.rs | 14 +-
rust/arrow/src/lib.rs | 1 -
rust/arrow/src/tensor.rs | 2 +-
rust/parquet/src/arrow/converter.rs | 4 +-
11 files changed, 248 insertions(+), 288 deletions(-)
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index b60e523..2a181c6 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -463,7 +463,7 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
/// Boolean arrays are bit-packed and so implemented separately.
impl<T: ArrowNumericType> PrimitiveArray<T> {
pub fn new(length: usize, values: Buffer, null_count: usize, offset:
usize) -> Self {
- let array_data = ArrayData::builder(T::get_data_type())
+ let array_data = ArrayData::builder(T::DATA_TYPE)
.len(length)
.add_buffer(values)
.null_count(null_count)
@@ -502,6 +502,98 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
}
}
+fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
+ match T::DATA_TYPE {
+ DataType::Date32(_) => {
+ // convert days into seconds
+ Some(NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 0))
+ }
+ DataType::Date64(_) => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from milliseconds
+ v / MILLISECONDS,
+ // discard extracted seconds and convert milliseconds to
nanoseconds
+ (v % MILLISECONDS * MICROSECONDS) as u32,
+ )),
+ DataType::Time32(_) | DataType::Time64(_) => None,
+ DataType::Timestamp(unit, _) => match unit {
+ TimeUnit::Second => Some(NaiveDateTime::from_timestamp(v, 0)),
+ TimeUnit::Millisecond => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from milliseconds
+ v / MILLISECONDS,
+ // discard extracted seconds and convert milliseconds to
nanoseconds
+ (v % MILLISECONDS * MICROSECONDS) as u32,
+ )),
+ TimeUnit::Microsecond => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from microseconds
+ v / MICROSECONDS,
+ // discard extracted seconds and convert microseconds to
nanoseconds
+ (v % MICROSECONDS * MILLISECONDS) as u32,
+ )),
+ TimeUnit::Nanosecond => Some(NaiveDateTime::from_timestamp(
+ // extract seconds from nanoseconds
+ v / NANOSECONDS,
+ // discard extracted seconds
+ (v % NANOSECONDS) as u32,
+ )),
+ },
+ // interval is not yet fully documented [ARROW-3097]
+ DataType::Interval(_) => None,
+ _ => None,
+ }
+}
+
+fn as_date<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDate> {
+ as_datetime::<T>(v).map(|datetime| datetime.date())
+}
+
+fn as_time<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveTime> {
+ match T::DATA_TYPE {
+ DataType::Time32(unit) => {
+ // safe to immediately cast to u32 as `self.value(i)` is positive
i32
+ let v = v as u32;
+ match unit {
+ TimeUnit::Second =>
Some(NaiveTime::from_num_seconds_from_midnight(v, 0)),
+ TimeUnit::Millisecond => {
+ Some(NaiveTime::from_num_seconds_from_midnight(
+ // extract seconds from milliseconds
+ v / MILLISECONDS as u32,
+ // discard extracted seconds and convert milliseconds
to
+ // nanoseconds
+ v % MILLISECONDS as u32 * MICROSECONDS as u32,
+ ))
+ }
+ _ => None,
+ }
+ }
+ DataType::Time64(unit) => {
+ match unit {
+ TimeUnit::Microsecond => {
+ Some(NaiveTime::from_num_seconds_from_midnight(
+ // extract seconds from microseconds
+ (v / MICROSECONDS) as u32,
+ // discard extracted seconds and convert microseconds
to
+ // nanoseconds
+ (v % MICROSECONDS * MILLISECONDS) as u32,
+ ))
+ }
+ TimeUnit::Nanosecond => {
+ Some(NaiveTime::from_num_seconds_from_midnight(
+ // extract seconds from nanoseconds
+ (v / NANOSECONDS) as u32,
+ // discard extracted seconds
+ (v % NANOSECONDS) as u32,
+ ))
+ }
+ _ => None,
+ }
+ }
+ DataType::Timestamp(_, _) => as_datetime::<T>(v).map(|datetime|
datetime.time()),
+ DataType::Date32(_) | DataType::Date64(_) =>
Some(NaiveTime::from_hms(0, 0, 0)),
+ DataType::Interval(_) => None,
+ _ => None,
+ }
+}
+
impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T>
where
i64: std::convert::From<T::Native>,
@@ -511,44 +603,7 @@ where
/// If a data type cannot be converted to `NaiveDateTime`, a `None` is
returned.
/// A valid value is expected, thus the user should first check for
validity.
pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
- let v = i64::from(self.value(i));
- match self.data_type() {
- DataType::Date32(_) => {
- // convert days into seconds
- Some(NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY,
0))
- }
- DataType::Date64(_) => Some(NaiveDateTime::from_timestamp(
- // extract seconds from milliseconds
- v / MILLISECONDS,
- // discard extracted seconds and convert milliseconds to
nanoseconds
- (v % MILLISECONDS * MICROSECONDS) as u32,
- )),
- DataType::Time32(_) | DataType::Time64(_) => None,
- DataType::Timestamp(unit, _) => match unit {
- TimeUnit::Second => Some(NaiveDateTime::from_timestamp(v, 0)),
- TimeUnit::Millisecond => Some(NaiveDateTime::from_timestamp(
- // extract seconds from milliseconds
- v / MILLISECONDS,
- // discard extracted seconds and convert milliseconds to
nanoseconds
- (v % MILLISECONDS * MICROSECONDS) as u32,
- )),
- TimeUnit::Microsecond => Some(NaiveDateTime::from_timestamp(
- // extract seconds from microseconds
- v / MICROSECONDS,
- // discard extracted seconds and convert microseconds to
nanoseconds
- (v % MICROSECONDS * MILLISECONDS) as u32,
- )),
- TimeUnit::Nanosecond => Some(NaiveDateTime::from_timestamp(
- // extract seconds from nanoseconds
- v / NANOSECONDS,
- // discard extracted seconds
- (v % NANOSECONDS) as u32,
- )),
- },
- // interval is not yet fully documented [ARROW-3097]
- DataType::Interval(_) => None,
- _ => None,
- }
+ as_datetime::<T>(i64::from(self.value(i)))
}
/// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
@@ -562,105 +617,36 @@ where
///
/// `Date32` and `Date64` return UTC midnight as they do not have time
resolution
pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
- match self.data_type() {
- DataType::Time32(unit) => {
- // safe to immediately cast to u32 as `self.value(i)` is
positive i32
- let v = i64::from(self.value(i)) as u32;
- match unit {
- TimeUnit::Second => {
- Some(NaiveTime::from_num_seconds_from_midnight(v, 0))
- }
- TimeUnit::Millisecond => {
- Some(NaiveTime::from_num_seconds_from_midnight(
- // extract seconds from milliseconds
- v / MILLISECONDS as u32,
- // discard extracted seconds and convert
milliseconds to
- // nanoseconds
- v % MILLISECONDS as u32 * MICROSECONDS as u32,
- ))
- }
- _ => None,
- }
- }
- DataType::Time64(unit) => {
- let v = i64::from(self.value(i));
- match unit {
- TimeUnit::Microsecond => {
- Some(NaiveTime::from_num_seconds_from_midnight(
- // extract seconds from microseconds
- (v / MICROSECONDS) as u32,
- // discard extracted seconds and convert
microseconds to
- // nanoseconds
- (v % MICROSECONDS * MILLISECONDS) as u32,
- ))
- }
- TimeUnit::Nanosecond => {
- Some(NaiveTime::from_num_seconds_from_midnight(
- // extract seconds from nanoseconds
- (v / NANOSECONDS) as u32,
- // discard extracted seconds
- (v % NANOSECONDS) as u32,
- ))
- }
- _ => None,
- }
- }
- DataType::Timestamp(_, _) => {
- self.value_as_datetime(i).map(|datetime| datetime.time())
- }
- DataType::Date32(_) | DataType::Date64(_) => {
- Some(NaiveTime::from_hms(0, 0, 0))
- }
- DataType::Interval(_) => None,
- _ => None,
- }
+ as_time::<T>(i64::from(self.value(i)))
}
}
impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
- default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
- print_long_array(self, f, |array, index, f| {
- fmt::Debug::fmt(&array.value(index), f)
- })?;
- write!(f, "]")
- }
-}
-
-impl<T: ArrowNumericType> fmt::Debug for PrimitiveArray<T> {
- default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
- print_long_array(self, f, |array, index, f| {
- fmt::Debug::fmt(&array.value(index), f)
- })?;
- write!(f, "]")
- }
-}
-
-impl<T: ArrowNumericType + ArrowTemporalType> fmt::Debug for PrimitiveArray<T>
-where
- i64: std::convert::From<T::Native>,
-{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
- print_long_array(self, f, |array, index, f| match T::get_data_type() {
+ write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?;
+ print_long_array(self, f, |array, index, f| match T::DATA_TYPE {
DataType::Date32(_) | DataType::Date64(_) => {
- match array.value_as_date(index) {
+ let v = self.value(index).to_usize().unwrap() as i64;
+ match as_date::<T>(v) {
Some(date) => write!(f, "{:?}", date),
None => write!(f, "null"),
}
}
DataType::Time32(_) | DataType::Time64(_) => {
- match array.value_as_time(index) {
+ let v = self.value(index).to_usize().unwrap() as i64;
+ match as_time::<T>(v) {
Some(time) => write!(f, "{:?}", time),
None => write!(f, "null"),
}
}
- DataType::Timestamp(_, _) => match array.value_as_datetime(index) {
- Some(datetime) => write!(f, "{:?}", datetime),
- None => write!(f, "null"),
- },
- _ => write!(f, "null"),
+ DataType::Timestamp(_, _) => {
+ let v = self.value(index).to_usize().unwrap() as i64;
+ match as_datetime::<T>(v) {
+ Some(datetime) => write!(f, "{:?}", datetime),
+ None => write!(f, "null"),
+ }
+ }
+ _ => fmt::Debug::fmt(&array.value(index), f),
})?;
write!(f, "]")
}
@@ -684,16 +670,6 @@ impl PrimitiveArray<BooleanType> {
}
}
-impl fmt::Debug for PrimitiveArray<BooleanType> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "PrimitiveArray<{:?}>\n[\n", BooleanType::get_data_type())?;
- print_long_array(self, f, |array, index, f| {
- fmt::Debug::fmt(&array.value(index), f)
- })?;
- write!(f, "]")
- }
-}
-
impl<'a, T: ArrowPrimitiveType> IntoIterator for &'a PrimitiveArray<T> {
type Item = Option<<T as ArrowPrimitiveType>::Native>;
type IntoIter = PrimitiveIter<'a, T>;
@@ -737,7 +713,7 @@ impl<T: ArrowPrimitiveType, Ptr: Borrow<Option<<T as
ArrowPrimitiveType>::Native
});
let data = ArrayData::new(
- T::get_data_type(),
+ T::DATA_TYPE,
data_len,
None,
Some(null_buf.freeze()),
@@ -756,7 +732,7 @@ macro_rules! def_numeric_from_vec {
( $ty:ident ) => {
impl From<Vec<<$ty as ArrowPrimitiveType>::Native>> for
PrimitiveArray<$ty> {
fn from(data: Vec<<$ty as ArrowPrimitiveType>::Native>) -> Self {
- let array_data = ArrayData::builder($ty::get_data_type())
+ let array_data = ArrayData::builder($ty::DATA_TYPE)
.len(data.len())
.add_buffer(Buffer::from(data.to_byte_slice()))
.build();
@@ -898,7 +874,7 @@ impl From<Vec<Option<bool>>> for BooleanArray {
/// Constructs a `PrimitiveArray` from an array data reference.
impl<T: ArrowPrimitiveType> From<ArrayDataRef> for PrimitiveArray<T> {
- default fn from(data: ArrayDataRef) -> Self {
+ fn from(data: ArrayDataRef) -> Self {
assert_eq!(
data.buffers().len(),
1,
@@ -2232,7 +2208,7 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
pub fn keys_array(&self) -> PrimitiveArray<K> {
let data = self.data_ref();
let keys_data = ArrayData::new(
- K::get_data_type(),
+ K::DATA_TYPE,
data.len(),
Some(data.null_count()),
data.null_buffer().cloned(),
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
index 8b20d80..ca45f9e 100644
--- a/rust/arrow/src/array/builder.rs
+++ b/rust/arrow/src/array/builder.rs
@@ -253,8 +253,18 @@ pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
}
impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
- default fn new(capacity: usize) -> Self {
- let buffer = MutableBuffer::new(capacity *
mem::size_of::<T::Native>());
+ #[inline]
+ fn new(capacity: usize) -> Self {
+ let buffer = if T::DATA_TYPE == DataType::Boolean {
+ let byte_capacity = bit_util::ceil(capacity, 8);
+ let actual_capacity =
bit_util::round_upto_multiple_of_64(byte_capacity);
+ let mut buffer = MutableBuffer::new(actual_capacity);
+ buffer.set_null_bits(0, actual_capacity);
+ buffer
+ } else {
+ MutableBuffer::new(capacity * mem::size_of::<T::Native>())
+ };
+
Self {
buffer,
len: 0,
@@ -275,43 +285,112 @@ impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for
BufferBuilder<T> {
bit_capacity / T::get_bit_width()
}
- default fn advance(&mut self, i: usize) -> Result<()> {
- let new_buffer_len = (self.len + i) * mem::size_of::<T::Native>();
+ #[inline]
+ fn advance(&mut self, i: usize) -> Result<()> {
+ let new_buffer_len = if T::DATA_TYPE == DataType::Boolean {
+ bit_util::ceil(self.len + i, 8)
+ } else {
+ (self.len + i) * mem::size_of::<T::Native>()
+ };
self.buffer.resize(new_buffer_len)?;
self.len += i;
Ok(())
}
- default fn reserve(&mut self, n: usize) -> Result<()> {
+ #[inline]
+ fn reserve(&mut self, n: usize) -> Result<()> {
let new_capacity = self.len + n;
- let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
- self.buffer.reserve(byte_capacity)?;
+ if T::DATA_TYPE == DataType::Boolean {
+ if new_capacity > self.capacity() {
+ let new_byte_capacity = bit_util::ceil(new_capacity, 8);
+ let existing_capacity = self.buffer.capacity();
+ let new_capacity = self.buffer.reserve(new_byte_capacity)?;
+ self.buffer
+ .set_null_bits(existing_capacity, new_capacity -
existing_capacity);
+ }
+ } else {
+ let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
+ self.buffer.reserve(byte_capacity)?;
+ }
Ok(())
}
- default fn append(&mut self, v: T::Native) -> Result<()> {
+ #[inline]
+ fn append(&mut self, v: T::Native) -> Result<()> {
self.reserve(1)?;
- self.write_bytes(v.to_byte_slice(), 1)
+ if T::DATA_TYPE == DataType::Boolean {
+ if v != T::default_value() {
+ unsafe {
+ bit_util::set_bit_raw(self.buffer.raw_data_mut(),
self.len);
+ }
+ }
+ self.len += 1;
+ } else {
+ self.write_bytes(v.to_byte_slice(), 1)?;
+ }
+ Ok(())
}
- default fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> {
+ #[inline]
+ fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> {
self.reserve(n)?;
- for _ in 0..n {
- self.write_bytes(v.to_byte_slice(), 1)?;
+ if T::DATA_TYPE == DataType::Boolean {
+ if n != 0 && v != T::default_value() {
+ unsafe {
+ bit_util::set_bits_raw(
+ self.buffer.raw_data_mut(),
+ self.len,
+ self.len + n,
+ )
+ }
+ }
+ self.len += n;
+ } else {
+ for _ in 0..n {
+ self.write_bytes(v.to_byte_slice(), 1)?;
+ }
}
Ok(())
}
- default fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
+ #[inline]
+ fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
let array_slots = slice.len();
self.reserve(array_slots)?;
- self.write_bytes(slice.to_byte_slice(), array_slots)
+
+ if T::DATA_TYPE == DataType::Boolean {
+ for v in slice {
+ if *v != T::default_value() {
+ // For performance the `len` of the buffer is not
+ // updated on each append but is updated in the
+ // `freeze` method instead.
+ unsafe {
+ bit_util::set_bit_raw(self.buffer.raw_data_mut(),
self.len);
+ }
+ }
+ self.len += 1;
+ }
+ Ok(())
+ } else {
+ self.write_bytes(slice.to_byte_slice(), array_slots)
+ }
}
- default fn finish(&mut self) -> Buffer {
- let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
- self.len = 0;
- buf.freeze()
+ #[inline]
+ fn finish(&mut self) -> Buffer {
+ if T::DATA_TYPE == DataType::Boolean {
+ // `append` does not update the buffer's `len` so do it before
`freeze` is called.
+ let new_buffer_len = bit_util::ceil(self.len, 8);
+ debug_assert!(new_buffer_len >= self.buffer.len());
+ let mut buf = std::mem::replace(&mut self.buffer,
MutableBuffer::new(0));
+ self.len = 0;
+ buf.resize(new_buffer_len).unwrap();
+ buf.freeze()
+ } else {
+ let buf = std::mem::replace(&mut self.buffer,
MutableBuffer::new(0));
+ self.len = 0;
+ buf.freeze()
+ }
}
}
@@ -334,89 +413,6 @@ impl<T: ArrowPrimitiveType> BufferBuilder<T> {
}
}
-impl BufferBuilderTrait<BooleanType> for BufferBuilder<BooleanType> {
- fn new(capacity: usize) -> Self {
- let byte_capacity = bit_util::ceil(capacity, 8);
- let actual_capacity =
bit_util::round_upto_multiple_of_64(byte_capacity);
- let mut buffer = MutableBuffer::new(actual_capacity);
- buffer.set_null_bits(0, actual_capacity);
- Self {
- buffer,
- len: 0,
- _marker: PhantomData,
- }
- }
-
- fn advance(&mut self, i: usize) -> Result<()> {
- let new_buffer_len = bit_util::ceil(self.len + i, 8);
- self.buffer.resize(new_buffer_len)?;
- self.len += i;
- Ok(())
- }
-
- fn append(&mut self, v: bool) -> Result<()> {
- self.reserve(1)?;
- if v {
- // For performance the `len` of the buffer is not updated on each
append but
- // is updated in the `freeze` method instead.
- unsafe {
- bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len);
- }
- }
- self.len += 1;
- Ok(())
- }
-
- fn append_n(&mut self, n: usize, v: bool) -> Result<()> {
- self.reserve(n)?;
- if n != 0 && v {
- unsafe {
- bit_util::set_bits_raw(self.buffer.raw_data_mut(), self.len,
self.len + n)
- }
- }
- self.len += n;
- Ok(())
- }
-
- fn append_slice(&mut self, slice: &[bool]) -> Result<()> {
- self.reserve(slice.len())?;
- for v in slice {
- if *v {
- // For performance the `len` of the buffer is not
- // updated on each append but is updated in the
- // `freeze` method instead.
- unsafe {
- bit_util::set_bit_raw(self.buffer.raw_data_mut(),
self.len);
- }
- }
- self.len += 1;
- }
- Ok(())
- }
-
- fn reserve(&mut self, n: usize) -> Result<()> {
- let new_capacity = self.len + n;
- if new_capacity > self.capacity() {
- let new_byte_capacity = bit_util::ceil(new_capacity, 8);
- let existing_capacity = self.buffer.capacity();
- let new_capacity = self.buffer.reserve(new_byte_capacity)?;
- self.buffer
- .set_null_bits(existing_capacity, new_capacity -
existing_capacity);
- }
- Ok(())
- }
-
- fn finish(&mut self) -> Buffer {
- // `append` does not update the buffer's `len` so do it before
`freeze` is called.
- let new_buffer_len = bit_util::ceil(self.len, 8);
- debug_assert!(new_buffer_len >= self.buffer.len());
- let mut buf = std::mem::replace(&mut self.buffer,
MutableBuffer::new(0));
- self.len = 0;
- buf.resize(new_buffer_len).unwrap();
- buf.freeze()
- }
-}
-
/// Trait for dealing with different array builders at runtime
pub trait ArrayBuilder: Any {
/// Returns the number of array slots in the builder
@@ -545,7 +541,7 @@ impl<T: ArrowPrimitiveType> ArrayBuilder for
PrimitiveBuilder<T> {
///
/// This is used for validating array data types in `append_data`
fn data_type(&self) -> DataType {
- T::get_data_type()
+ T::DATA_TYPE
}
/// Builds the array and reset this builder.
@@ -618,7 +614,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
let len = self.len();
let null_bit_buffer = self.bitmap_builder.finish();
let null_count = len -
bit_util::count_set_bits(null_bit_buffer.data());
- let mut builder = ArrayData::builder(T::get_data_type())
+ let mut builder = ArrayData::builder(T::DATA_TYPE)
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
@@ -636,7 +632,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
let null_bit_buffer = self.bitmap_builder.finish();
let null_count = len -
bit_util::count_set_bits(null_bit_buffer.data());
let data_type = DataType::Dictionary(
- Box::new(T::get_data_type()),
+ Box::new(T::DATA_TYPE),
Box::new(values.data_type().clone()),
);
let mut builder = ArrayData::builder(data_type)
@@ -2234,7 +2230,7 @@ where
///
/// This is used for validating array data types in `append_data`
fn data_type(&self) -> DataType {
- DataType::Dictionary(Box::new(K::get_data_type()),
Box::new(V::get_data_type()))
+ DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE))
}
/// Builds the array and reset this builder.
@@ -2399,7 +2395,7 @@ where
///
/// This is used for validating array data types in `append_data`
fn data_type(&self) -> DataType {
- DataType::Dictionary(Box::new(K::get_data_type()),
Box::new(DataType::Utf8))
+ DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8))
}
/// Builds the array and reset this builder.
diff --git a/rust/arrow/src/array/equal.rs b/rust/arrow/src/array/equal.rs
index df480fe..f73152b 100644
--- a/rust/arrow/src/array/equal.rs
+++ b/rust/arrow/src/array/equal.rs
@@ -43,11 +43,15 @@ pub trait ArrayEqual {
}
impl<T: ArrowPrimitiveType> ArrayEqual for PrimitiveArray<T> {
- default fn equals(&self, other: &dyn Array) -> bool {
+ fn equals(&self, other: &dyn Array) -> bool {
if !base_equal(&self.data(), &other.data()) {
return false;
}
+ if T::DATA_TYPE == DataType::Boolean {
+ return bool_equal(self, other);
+ }
+
let value_buf = self.data_ref().buffers()[0].clone();
let other_value_buf = other.data_ref().buffers()[0].clone();
let byte_width = T::get_bit_width() / 8;
@@ -82,7 +86,7 @@ impl<T: ArrowPrimitiveType> ArrayEqual for PrimitiveArray<T> {
true
}
- default fn range_equals(
+ fn range_equals(
&self,
other: &dyn Array,
start_idx: usize,
@@ -106,27 +110,20 @@ impl<T: ArrowPrimitiveType> ArrayEqual for
PrimitiveArray<T> {
}
}
-impl ArrayEqual for BooleanArray {
- fn equals(&self, other: &dyn Array) -> bool {
- if !base_equal(&self.data(), &other.data()) {
- return false;
- }
-
- let values = self.data_ref().buffers()[0].data();
- let other_values = other.data_ref().buffers()[0].data();
+fn bool_equal(lhs: &Array, rhs: &Array) -> bool {
+ let values = lhs.data_ref().buffers()[0].data();
+ let other_values = rhs.data_ref().buffers()[0].data();
- // TODO: we can do this more efficiently if all values are not-null
- for i in 0..self.len() {
- if self.is_valid(i)
- && bit_util::get_bit(values, i + self.offset())
- != bit_util::get_bit(other_values, i + other.offset())
- {
- return false;
- }
+ // TODO: we can do this more efficiently if all values are not-null
+ for i in 0..lhs.len() {
+ if lhs.is_valid(i)
+ && bit_util::get_bit(values, i + lhs.offset())
+ != bit_util::get_bit(other_values, i + rhs.offset())
+ {
+ return false;
}
-
- true
}
+ true
}
impl<T: ArrowNumericType> PartialEq for PrimitiveArray<T> {
@@ -243,7 +240,7 @@ impl<T: ArrowPrimitiveType> ArrayEqual for
DictionaryArray<T> {
self.range_equals(other, 0, self.len(), 0)
}
- default fn range_equals(
+ fn range_equals(
&self,
other: &dyn Array,
start_idx: usize,
diff --git a/rust/arrow/src/array/union.rs b/rust/arrow/src/array/union.rs
index 1f8dc4e..9fd1391 100644
--- a/rust/arrow/src/array/union.rs
+++ b/rust/arrow/src/array/union.rs
@@ -541,13 +541,11 @@ impl UnionBuilder {
let mut field_data = match self.fields.remove(&type_name) {
Some(data) => data,
None => match self.value_offset_builder {
- Some(_) => {
- FieldData::new(self.fields.len() as i8,
T::get_data_type(), None)
- }
+ Some(_) => FieldData::new(self.fields.len() as i8,
T::DATA_TYPE, None),
None => {
let mut fd = FieldData::new(
self.fields.len() as i8,
- T::get_data_type(),
+ T::DATA_TYPE,
Some(BooleanBufferBuilder::new(1)),
);
for _ in 0..self.len {
diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs
b/rust/arrow/src/compute/kernels/arithmetic.rs
index 9b28762..fe1bda5 100644
--- a/rust/arrow/src/compute/kernels/arithmetic.rs
+++ b/rust/arrow/src/compute/kernels/arithmetic.rs
@@ -74,7 +74,7 @@ where
.collect::<Vec<T::Native>>();
let data = ArrayData::new(
- T::get_data_type(),
+ T::DATA_TYPE,
left.len(),
None,
null_bit_buffer,
@@ -139,7 +139,7 @@ where
};
let data = ArrayData::new(
- T::get_data_type(),
+ T::DATA_TYPE,
left.len(),
None,
null_bit_buffer,
@@ -193,7 +193,7 @@ where
}
let data = ArrayData::new(
- T::get_data_type(),
+ T::DATA_TYPE,
left.len(),
None,
null_bit_buffer,
@@ -251,7 +251,7 @@ where
}
let data = ArrayData::new(
- T::get_data_type(),
+ T::DATA_TYPE,
left.len(),
None,
null_bit_buffer,
diff --git a/rust/arrow/src/compute/kernels/filter.rs
b/rust/arrow/src/compute/kernels/filter.rs
index 8286f5c6a..55ce499 100644
--- a/rust/arrow/src/compute/kernels/filter.rs
+++ b/rust/arrow/src/compute/kernels/filter.rs
@@ -731,7 +731,7 @@ impl FilterContext {
where
T: ArrowNumericType,
{
- let array_type = T::get_data_type();
+ let array_type = T::DATA_TYPE;
let value_size = mem::size_of::<T::Native>();
let array_data_builder =
filter_array_impl(self, data_array, array_type, value_size)?;
diff --git a/rust/arrow/src/compute/kernels/take.rs
b/rust/arrow/src/compute/kernels/take.rs
index a76885c..b61e393 100644
--- a/rust/arrow/src/compute/kernels/take.rs
+++ b/rust/arrow/src/compute/kernels/take.rs
@@ -211,7 +211,7 @@ where
};
let data = ArrayData::new(
- T::get_data_type(),
+ T::DATA_TYPE,
indices.len(),
None,
Some(nulls),
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 2db4306..8110e5d 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -214,8 +214,8 @@ pub trait ArrowPrimitiveType: 'static {
/// Corresponding Rust native type for the primitive type.
type Native: ArrowNativeType;
- /// Returns the corresponding Arrow data type of this primitive type.
- fn get_data_type() -> DataType;
+ /// the corresponding Arrow data type of this primitive type.
+ const DATA_TYPE: DataType;
/// Returns the bit width of this primitive type.
fn get_bit_width() -> usize {
@@ -376,10 +376,7 @@ pub struct BooleanType {}
impl ArrowPrimitiveType for BooleanType {
type Native = bool;
-
- fn get_data_type() -> DataType {
- DataType::Boolean
- }
+ const DATA_TYPE: DataType = DataType::Boolean;
fn get_bit_width() -> usize {
1
@@ -400,10 +397,7 @@ macro_rules! make_type {
impl ArrowPrimitiveType for $name {
type Native = $native_ty;
-
- fn get_data_type() -> DataType {
- $data_ty
- }
+ const DATA_TYPE: DataType = $data_ty;
}
};
}
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
index b7090f0..e7ee9d3 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/lib.rs
@@ -124,7 +124,6 @@
//!
//! The parquet implementation is on a [separate
crate](https://crates.io/crates/parquet)
-#![feature(specialization)]
#![allow(dead_code)]
#![allow(non_camel_case_types)]
#![allow(bare_trait_objects)]
diff --git a/rust/arrow/src/tensor.rs b/rust/arrow/src/tensor.rs
index 0fbeddd..c377636 100644
--- a/rust/arrow/src/tensor.rs
+++ b/rust/arrow/src/tensor.rs
@@ -180,7 +180,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
};
Ok(Self {
- data_type: T::get_data_type(),
+ data_type: T::DATA_TYPE,
buffer,
shape,
strides: tensor_strides,
diff --git a/rust/parquet/src/arrow/converter.rs
b/rust/parquet/src/arrow/converter.rs
index 9fbfa33..da0cc6c 100644
--- a/rust/parquet/src/arrow/converter.rs
+++ b/rust/parquet/src/arrow/converter.rs
@@ -90,7 +90,7 @@ where
fn convert(&self, record_reader: &mut RecordReader<ParquetType>) ->
Result<ArrayRef> {
let record_data = record_reader.consume_record_data();
- let mut array_data =
ArrayDataBuilder::new(ArrowSourceType::get_data_type())
+ let mut array_data = ArrayDataBuilder::new(ArrowSourceType::DATA_TYPE)
.len(record_reader.num_values())
.add_buffer(record_data?);
@@ -101,7 +101,7 @@ where
let primitive_array: ArrayRef =
Arc::new(PrimitiveArray::<ArrowSourceType>::from(array_data.build()));
- Ok(cast(&primitive_array, &ArrowTargetType::get_data_type())?)
+ Ok(cast(&primitive_array, &ArrowTargetType::DATA_TYPE)?)
}
}