This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a6c6ef6  ARROW-10002: [Rust] Remove trait specialization from arrow 
crate
a6c6ef6 is described below

commit a6c6ef6ab6a701cf5c22c8a4e366715d798e4956
Author: Jorge C. Leitao <[email protected]>
AuthorDate: Sun Oct 18 21:05:48 2020 +0200

    ARROW-10002: [Rust] Remove trait specialization from arrow crate
    
    This PR removes trait specialization by leveraging the compiler to remove 
trivial `if` statements.
    
    I verified that the assembly code was the same in a [simple 
example](https://rust.godbolt.org/z/qrcW8W). I do not know if this generalizes 
to our use-case, but I suspect so as LLVM is (hopefully) removing trivial 
branches like `if a != a`.
    
    The change `get_data_type()` to `DATA_TYPE` is not necessary. I did it 
before realizing this. IMO it makes it more explicit that this is not a 
function, but a constant, but we can revert it.
    
    Closes #8485 from jorgecarleitao/simp_types
    
    Authored-by: Jorge C. Leitao <[email protected]>
    Signed-off-by: Neville Dipale <[email protected]>
---
 rust/arrow/src/array/array.rs                | 250 ++++++++++++---------------
 rust/arrow/src/array/builder.rs              | 208 +++++++++++-----------
 rust/arrow/src/array/equal.rs                |  39 ++---
 rust/arrow/src/array/union.rs                |   6 +-
 rust/arrow/src/compute/kernels/arithmetic.rs |   8 +-
 rust/arrow/src/compute/kernels/filter.rs     |   2 +-
 rust/arrow/src/compute/kernels/take.rs       |   2 +-
 rust/arrow/src/datatypes.rs                  |  14 +-
 rust/arrow/src/lib.rs                        |   1 -
 rust/arrow/src/tensor.rs                     |   2 +-
 rust/parquet/src/arrow/converter.rs          |   4 +-
 11 files changed, 248 insertions(+), 288 deletions(-)

diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index b60e523..2a181c6 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -463,7 +463,7 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
 /// Boolean arrays are bit-packed and so implemented separately.
 impl<T: ArrowNumericType> PrimitiveArray<T> {
     pub fn new(length: usize, values: Buffer, null_count: usize, offset: 
usize) -> Self {
-        let array_data = ArrayData::builder(T::get_data_type())
+        let array_data = ArrayData::builder(T::DATA_TYPE)
             .len(length)
             .add_buffer(values)
             .null_count(null_count)
@@ -502,6 +502,98 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
     }
 }
 
+fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
+    match T::DATA_TYPE {
+        DataType::Date32(_) => {
+            // convert days into seconds
+            Some(NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 0))
+        }
+        DataType::Date64(_) => Some(NaiveDateTime::from_timestamp(
+            // extract seconds from milliseconds
+            v / MILLISECONDS,
+            // discard extracted seconds and convert milliseconds to 
nanoseconds
+            (v % MILLISECONDS * MICROSECONDS) as u32,
+        )),
+        DataType::Time32(_) | DataType::Time64(_) => None,
+        DataType::Timestamp(unit, _) => match unit {
+            TimeUnit::Second => Some(NaiveDateTime::from_timestamp(v, 0)),
+            TimeUnit::Millisecond => Some(NaiveDateTime::from_timestamp(
+                // extract seconds from milliseconds
+                v / MILLISECONDS,
+                // discard extracted seconds and convert milliseconds to 
nanoseconds
+                (v % MILLISECONDS * MICROSECONDS) as u32,
+            )),
+            TimeUnit::Microsecond => Some(NaiveDateTime::from_timestamp(
+                // extract seconds from microseconds
+                v / MICROSECONDS,
+                // discard extracted seconds and convert microseconds to 
nanoseconds
+                (v % MICROSECONDS * MILLISECONDS) as u32,
+            )),
+            TimeUnit::Nanosecond => Some(NaiveDateTime::from_timestamp(
+                // extract seconds from nanoseconds
+                v / NANOSECONDS,
+                // discard extracted seconds
+                (v % NANOSECONDS) as u32,
+            )),
+        },
+        // interval is not yet fully documented [ARROW-3097]
+        DataType::Interval(_) => None,
+        _ => None,
+    }
+}
+
+fn as_date<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDate> {
+    as_datetime::<T>(v).map(|datetime| datetime.date())
+}
+
+fn as_time<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveTime> {
+    match T::DATA_TYPE {
+        DataType::Time32(unit) => {
+            // safe to immediately cast to u32 as `self.value(i)` is positive 
i32
+            let v = v as u32;
+            match unit {
+                TimeUnit::Second => 
Some(NaiveTime::from_num_seconds_from_midnight(v, 0)),
+                TimeUnit::Millisecond => {
+                    Some(NaiveTime::from_num_seconds_from_midnight(
+                        // extract seconds from milliseconds
+                        v / MILLISECONDS as u32,
+                        // discard extracted seconds and convert milliseconds 
to
+                        // nanoseconds
+                        v % MILLISECONDS as u32 * MICROSECONDS as u32,
+                    ))
+                }
+                _ => None,
+            }
+        }
+        DataType::Time64(unit) => {
+            match unit {
+                TimeUnit::Microsecond => {
+                    Some(NaiveTime::from_num_seconds_from_midnight(
+                        // extract seconds from microseconds
+                        (v / MICROSECONDS) as u32,
+                        // discard extracted seconds and convert microseconds 
to
+                        // nanoseconds
+                        (v % MICROSECONDS * MILLISECONDS) as u32,
+                    ))
+                }
+                TimeUnit::Nanosecond => {
+                    Some(NaiveTime::from_num_seconds_from_midnight(
+                        // extract seconds from nanoseconds
+                        (v / NANOSECONDS) as u32,
+                        // discard extracted seconds
+                        (v % NANOSECONDS) as u32,
+                    ))
+                }
+                _ => None,
+            }
+        }
+        DataType::Timestamp(_, _) => as_datetime::<T>(v).map(|datetime| 
datetime.time()),
+        DataType::Date32(_) | DataType::Date64(_) => 
Some(NaiveTime::from_hms(0, 0, 0)),
+        DataType::Interval(_) => None,
+        _ => None,
+    }
+}
+
 impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T>
 where
     i64: std::convert::From<T::Native>,
@@ -511,44 +603,7 @@ where
     /// If a data type cannot be converted to `NaiveDateTime`, a `None` is 
returned.
     /// A valid value is expected, thus the user should first check for 
validity.
     pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
-        let v = i64::from(self.value(i));
-        match self.data_type() {
-            DataType::Date32(_) => {
-                // convert days into seconds
-                Some(NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 
0))
-            }
-            DataType::Date64(_) => Some(NaiveDateTime::from_timestamp(
-                // extract seconds from milliseconds
-                v / MILLISECONDS,
-                // discard extracted seconds and convert milliseconds to 
nanoseconds
-                (v % MILLISECONDS * MICROSECONDS) as u32,
-            )),
-            DataType::Time32(_) | DataType::Time64(_) => None,
-            DataType::Timestamp(unit, _) => match unit {
-                TimeUnit::Second => Some(NaiveDateTime::from_timestamp(v, 0)),
-                TimeUnit::Millisecond => Some(NaiveDateTime::from_timestamp(
-                    // extract seconds from milliseconds
-                    v / MILLISECONDS,
-                    // discard extracted seconds and convert milliseconds to 
nanoseconds
-                    (v % MILLISECONDS * MICROSECONDS) as u32,
-                )),
-                TimeUnit::Microsecond => Some(NaiveDateTime::from_timestamp(
-                    // extract seconds from microseconds
-                    v / MICROSECONDS,
-                    // discard extracted seconds and convert microseconds to 
nanoseconds
-                    (v % MICROSECONDS * MILLISECONDS) as u32,
-                )),
-                TimeUnit::Nanosecond => Some(NaiveDateTime::from_timestamp(
-                    // extract seconds from nanoseconds
-                    v / NANOSECONDS,
-                    // discard extracted seconds
-                    (v % NANOSECONDS) as u32,
-                )),
-            },
-            // interval is not yet fully documented [ARROW-3097]
-            DataType::Interval(_) => None,
-            _ => None,
-        }
+        as_datetime::<T>(i64::from(self.value(i)))
     }
 
     /// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
@@ -562,105 +617,36 @@ where
     ///
     /// `Date32` and `Date64` return UTC midnight as they do not have time 
resolution
     pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
-        match self.data_type() {
-            DataType::Time32(unit) => {
-                // safe to immediately cast to u32 as `self.value(i)` is 
positive i32
-                let v = i64::from(self.value(i)) as u32;
-                match unit {
-                    TimeUnit::Second => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(v, 0))
-                    }
-                    TimeUnit::Millisecond => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(
-                            // extract seconds from milliseconds
-                            v / MILLISECONDS as u32,
-                            // discard extracted seconds and convert 
milliseconds to
-                            // nanoseconds
-                            v % MILLISECONDS as u32 * MICROSECONDS as u32,
-                        ))
-                    }
-                    _ => None,
-                }
-            }
-            DataType::Time64(unit) => {
-                let v = i64::from(self.value(i));
-                match unit {
-                    TimeUnit::Microsecond => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(
-                            // extract seconds from microseconds
-                            (v / MICROSECONDS) as u32,
-                            // discard extracted seconds and convert 
microseconds to
-                            // nanoseconds
-                            (v % MICROSECONDS * MILLISECONDS) as u32,
-                        ))
-                    }
-                    TimeUnit::Nanosecond => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(
-                            // extract seconds from nanoseconds
-                            (v / NANOSECONDS) as u32,
-                            // discard extracted seconds
-                            (v % NANOSECONDS) as u32,
-                        ))
-                    }
-                    _ => None,
-                }
-            }
-            DataType::Timestamp(_, _) => {
-                self.value_as_datetime(i).map(|datetime| datetime.time())
-            }
-            DataType::Date32(_) | DataType::Date64(_) => {
-                Some(NaiveTime::from_hms(0, 0, 0))
-            }
-            DataType::Interval(_) => None,
-            _ => None,
-        }
+        as_time::<T>(i64::from(self.value(i)))
     }
 }
 
 impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
-    default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<T: ArrowNumericType> fmt::Debug for PrimitiveArray<T> {
-    default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<T: ArrowNumericType + ArrowTemporalType> fmt::Debug for PrimitiveArray<T>
-where
-    i64: std::convert::From<T::Native>,
-{
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
-        print_long_array(self, f, |array, index, f| match T::get_data_type() {
+        write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?;
+        print_long_array(self, f, |array, index, f| match T::DATA_TYPE {
             DataType::Date32(_) | DataType::Date64(_) => {
-                match array.value_as_date(index) {
+                let v = self.value(index).to_usize().unwrap() as i64;
+                match as_date::<T>(v) {
                     Some(date) => write!(f, "{:?}", date),
                     None => write!(f, "null"),
                 }
             }
             DataType::Time32(_) | DataType::Time64(_) => {
-                match array.value_as_time(index) {
+                let v = self.value(index).to_usize().unwrap() as i64;
+                match as_time::<T>(v) {
                     Some(time) => write!(f, "{:?}", time),
                     None => write!(f, "null"),
                 }
             }
-            DataType::Timestamp(_, _) => match array.value_as_datetime(index) {
-                Some(datetime) => write!(f, "{:?}", datetime),
-                None => write!(f, "null"),
-            },
-            _ => write!(f, "null"),
+            DataType::Timestamp(_, _) => {
+                let v = self.value(index).to_usize().unwrap() as i64;
+                match as_datetime::<T>(v) {
+                    Some(datetime) => write!(f, "{:?}", datetime),
+                    None => write!(f, "null"),
+                }
+            }
+            _ => fmt::Debug::fmt(&array.value(index), f),
         })?;
         write!(f, "]")
     }
@@ -684,16 +670,6 @@ impl PrimitiveArray<BooleanType> {
     }
 }
 
-impl fmt::Debug for PrimitiveArray<BooleanType> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", BooleanType::get_data_type())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
 impl<'a, T: ArrowPrimitiveType> IntoIterator for &'a PrimitiveArray<T> {
     type Item = Option<<T as ArrowPrimitiveType>::Native>;
     type IntoIter = PrimitiveIter<'a, T>;
@@ -737,7 +713,7 @@ impl<T: ArrowPrimitiveType, Ptr: Borrow<Option<<T as 
ArrowPrimitiveType>::Native
         });
 
         let data = ArrayData::new(
-            T::get_data_type(),
+            T::DATA_TYPE,
             data_len,
             None,
             Some(null_buf.freeze()),
@@ -756,7 +732,7 @@ macro_rules! def_numeric_from_vec {
     ( $ty:ident ) => {
         impl From<Vec<<$ty as ArrowPrimitiveType>::Native>> for 
PrimitiveArray<$ty> {
             fn from(data: Vec<<$ty as ArrowPrimitiveType>::Native>) -> Self {
-                let array_data = ArrayData::builder($ty::get_data_type())
+                let array_data = ArrayData::builder($ty::DATA_TYPE)
                     .len(data.len())
                     .add_buffer(Buffer::from(data.to_byte_slice()))
                     .build();
@@ -898,7 +874,7 @@ impl From<Vec<Option<bool>>> for BooleanArray {
 
 /// Constructs a `PrimitiveArray` from an array data reference.
 impl<T: ArrowPrimitiveType> From<ArrayDataRef> for PrimitiveArray<T> {
-    default fn from(data: ArrayDataRef) -> Self {
+    fn from(data: ArrayDataRef) -> Self {
         assert_eq!(
             data.buffers().len(),
             1,
@@ -2232,7 +2208,7 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
     pub fn keys_array(&self) -> PrimitiveArray<K> {
         let data = self.data_ref();
         let keys_data = ArrayData::new(
-            K::get_data_type(),
+            K::DATA_TYPE,
             data.len(),
             Some(data.null_count()),
             data.null_buffer().cloned(),
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
index 8b20d80..ca45f9e 100644
--- a/rust/arrow/src/array/builder.rs
+++ b/rust/arrow/src/array/builder.rs
@@ -253,8 +253,18 @@ pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
 }
 
 impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
-    default fn new(capacity: usize) -> Self {
-        let buffer = MutableBuffer::new(capacity * 
mem::size_of::<T::Native>());
+    #[inline]
+    fn new(capacity: usize) -> Self {
+        let buffer = if T::DATA_TYPE == DataType::Boolean {
+            let byte_capacity = bit_util::ceil(capacity, 8);
+            let actual_capacity = 
bit_util::round_upto_multiple_of_64(byte_capacity);
+            let mut buffer = MutableBuffer::new(actual_capacity);
+            buffer.set_null_bits(0, actual_capacity);
+            buffer
+        } else {
+            MutableBuffer::new(capacity * mem::size_of::<T::Native>())
+        };
+
         Self {
             buffer,
             len: 0,
@@ -275,43 +285,112 @@ impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for 
BufferBuilder<T> {
         bit_capacity / T::get_bit_width()
     }
 
-    default fn advance(&mut self, i: usize) -> Result<()> {
-        let new_buffer_len = (self.len + i) * mem::size_of::<T::Native>();
+    #[inline]
+    fn advance(&mut self, i: usize) -> Result<()> {
+        let new_buffer_len = if T::DATA_TYPE == DataType::Boolean {
+            bit_util::ceil(self.len + i, 8)
+        } else {
+            (self.len + i) * mem::size_of::<T::Native>()
+        };
         self.buffer.resize(new_buffer_len)?;
         self.len += i;
         Ok(())
     }
 
-    default fn reserve(&mut self, n: usize) -> Result<()> {
+    #[inline]
+    fn reserve(&mut self, n: usize) -> Result<()> {
         let new_capacity = self.len + n;
-        let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
-        self.buffer.reserve(byte_capacity)?;
+        if T::DATA_TYPE == DataType::Boolean {
+            if new_capacity > self.capacity() {
+                let new_byte_capacity = bit_util::ceil(new_capacity, 8);
+                let existing_capacity = self.buffer.capacity();
+                let new_capacity = self.buffer.reserve(new_byte_capacity)?;
+                self.buffer
+                    .set_null_bits(existing_capacity, new_capacity - 
existing_capacity);
+            }
+        } else {
+            let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
+            self.buffer.reserve(byte_capacity)?;
+        }
         Ok(())
     }
 
-    default fn append(&mut self, v: T::Native) -> Result<()> {
+    #[inline]
+    fn append(&mut self, v: T::Native) -> Result<()> {
         self.reserve(1)?;
-        self.write_bytes(v.to_byte_slice(), 1)
+        if T::DATA_TYPE == DataType::Boolean {
+            if v != T::default_value() {
+                unsafe {
+                    bit_util::set_bit_raw(self.buffer.raw_data_mut(), 
self.len);
+                }
+            }
+            self.len += 1;
+        } else {
+            self.write_bytes(v.to_byte_slice(), 1)?;
+        }
+        Ok(())
     }
 
-    default fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> {
+    #[inline]
+    fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> {
         self.reserve(n)?;
-        for _ in 0..n {
-            self.write_bytes(v.to_byte_slice(), 1)?;
+        if T::DATA_TYPE == DataType::Boolean {
+            if n != 0 && v != T::default_value() {
+                unsafe {
+                    bit_util::set_bits_raw(
+                        self.buffer.raw_data_mut(),
+                        self.len,
+                        self.len + n,
+                    )
+                }
+            }
+            self.len += n;
+        } else {
+            for _ in 0..n {
+                self.write_bytes(v.to_byte_slice(), 1)?;
+            }
         }
         Ok(())
     }
 
-    default fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
+    #[inline]
+    fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
         let array_slots = slice.len();
         self.reserve(array_slots)?;
-        self.write_bytes(slice.to_byte_slice(), array_slots)
+
+        if T::DATA_TYPE == DataType::Boolean {
+            for v in slice {
+                if *v != T::default_value() {
+                    // For performance the `len` of the buffer is not
+                    // updated on each append but is updated in the
+                    // `freeze` method instead.
+                    unsafe {
+                        bit_util::set_bit_raw(self.buffer.raw_data_mut(), 
self.len);
+                    }
+                }
+                self.len += 1;
+            }
+            Ok(())
+        } else {
+            self.write_bytes(slice.to_byte_slice(), array_slots)
+        }
     }
 
-    default fn finish(&mut self) -> Buffer {
-        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.freeze()
+    #[inline]
+    fn finish(&mut self) -> Buffer {
+        if T::DATA_TYPE == DataType::Boolean {
+            // `append` does not update the buffer's `len` so do it before 
`freeze` is called.
+            let new_buffer_len = bit_util::ceil(self.len, 8);
+            debug_assert!(new_buffer_len >= self.buffer.len());
+            let mut buf = std::mem::replace(&mut self.buffer, 
MutableBuffer::new(0));
+            self.len = 0;
+            buf.resize(new_buffer_len).unwrap();
+            buf.freeze()
+        } else {
+            let buf = std::mem::replace(&mut self.buffer, 
MutableBuffer::new(0));
+            self.len = 0;
+            buf.freeze()
+        }
     }
 }
 
@@ -334,89 +413,6 @@ impl<T: ArrowPrimitiveType> BufferBuilder<T> {
     }
 }
 
-impl BufferBuilderTrait<BooleanType> for BufferBuilder<BooleanType> {
-    fn new(capacity: usize) -> Self {
-        let byte_capacity = bit_util::ceil(capacity, 8);
-        let actual_capacity = 
bit_util::round_upto_multiple_of_64(byte_capacity);
-        let mut buffer = MutableBuffer::new(actual_capacity);
-        buffer.set_null_bits(0, actual_capacity);
-        Self {
-            buffer,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-
-    fn advance(&mut self, i: usize) -> Result<()> {
-        let new_buffer_len = bit_util::ceil(self.len + i, 8);
-        self.buffer.resize(new_buffer_len)?;
-        self.len += i;
-        Ok(())
-    }
-
-    fn append(&mut self, v: bool) -> Result<()> {
-        self.reserve(1)?;
-        if v {
-            // For performance the `len` of the buffer is not updated on each 
append but
-            // is updated in the `freeze` method instead.
-            unsafe {
-                bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len);
-            }
-        }
-        self.len += 1;
-        Ok(())
-    }
-
-    fn append_n(&mut self, n: usize, v: bool) -> Result<()> {
-        self.reserve(n)?;
-        if n != 0 && v {
-            unsafe {
-                bit_util::set_bits_raw(self.buffer.raw_data_mut(), self.len, 
self.len + n)
-            }
-        }
-        self.len += n;
-        Ok(())
-    }
-
-    fn append_slice(&mut self, slice: &[bool]) -> Result<()> {
-        self.reserve(slice.len())?;
-        for v in slice {
-            if *v {
-                // For performance the `len` of the buffer is not
-                // updated on each append but is updated in the
-                // `freeze` method instead.
-                unsafe {
-                    bit_util::set_bit_raw(self.buffer.raw_data_mut(), 
self.len);
-                }
-            }
-            self.len += 1;
-        }
-        Ok(())
-    }
-
-    fn reserve(&mut self, n: usize) -> Result<()> {
-        let new_capacity = self.len + n;
-        if new_capacity > self.capacity() {
-            let new_byte_capacity = bit_util::ceil(new_capacity, 8);
-            let existing_capacity = self.buffer.capacity();
-            let new_capacity = self.buffer.reserve(new_byte_capacity)?;
-            self.buffer
-                .set_null_bits(existing_capacity, new_capacity - 
existing_capacity);
-        }
-        Ok(())
-    }
-
-    fn finish(&mut self) -> Buffer {
-        // `append` does not update the buffer's `len` so do it before 
`freeze` is called.
-        let new_buffer_len = bit_util::ceil(self.len, 8);
-        debug_assert!(new_buffer_len >= self.buffer.len());
-        let mut buf = std::mem::replace(&mut self.buffer, 
MutableBuffer::new(0));
-        self.len = 0;
-        buf.resize(new_buffer_len).unwrap();
-        buf.freeze()
-    }
-}
-
 /// Trait for dealing with different array builders at runtime
 pub trait ArrayBuilder: Any {
     /// Returns the number of array slots in the builder
@@ -545,7 +541,7 @@ impl<T: ArrowPrimitiveType> ArrayBuilder for 
PrimitiveBuilder<T> {
     ///
     /// This is used for validating array data types in `append_data`
     fn data_type(&self) -> DataType {
-        T::get_data_type()
+        T::DATA_TYPE
     }
 
     /// Builds the array and reset this builder.
@@ -618,7 +614,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         let len = self.len();
         let null_bit_buffer = self.bitmap_builder.finish();
         let null_count = len - 
bit_util::count_set_bits(null_bit_buffer.data());
-        let mut builder = ArrayData::builder(T::get_data_type())
+        let mut builder = ArrayData::builder(T::DATA_TYPE)
             .len(len)
             .add_buffer(self.values_builder.finish());
         if null_count > 0 {
@@ -636,7 +632,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         let null_bit_buffer = self.bitmap_builder.finish();
         let null_count = len - 
bit_util::count_set_bits(null_bit_buffer.data());
         let data_type = DataType::Dictionary(
-            Box::new(T::get_data_type()),
+            Box::new(T::DATA_TYPE),
             Box::new(values.data_type().clone()),
         );
         let mut builder = ArrayData::builder(data_type)
@@ -2234,7 +2230,7 @@ where
     ///
     /// This is used for validating array data types in `append_data`
     fn data_type(&self) -> DataType {
-        DataType::Dictionary(Box::new(K::get_data_type()), 
Box::new(V::get_data_type()))
+        DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE))
     }
 
     /// Builds the array and reset this builder.
@@ -2399,7 +2395,7 @@ where
     ///
     /// This is used for validating array data types in `append_data`
     fn data_type(&self) -> DataType {
-        DataType::Dictionary(Box::new(K::get_data_type()), 
Box::new(DataType::Utf8))
+        DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8))
     }
 
     /// Builds the array and reset this builder.
diff --git a/rust/arrow/src/array/equal.rs b/rust/arrow/src/array/equal.rs
index df480fe..f73152b 100644
--- a/rust/arrow/src/array/equal.rs
+++ b/rust/arrow/src/array/equal.rs
@@ -43,11 +43,15 @@ pub trait ArrayEqual {
 }
 
 impl<T: ArrowPrimitiveType> ArrayEqual for PrimitiveArray<T> {
-    default fn equals(&self, other: &dyn Array) -> bool {
+    fn equals(&self, other: &dyn Array) -> bool {
         if !base_equal(&self.data(), &other.data()) {
             return false;
         }
 
+        if T::DATA_TYPE == DataType::Boolean {
+            return bool_equal(self, other);
+        }
+
         let value_buf = self.data_ref().buffers()[0].clone();
         let other_value_buf = other.data_ref().buffers()[0].clone();
         let byte_width = T::get_bit_width() / 8;
@@ -82,7 +86,7 @@ impl<T: ArrowPrimitiveType> ArrayEqual for PrimitiveArray<T> {
         true
     }
 
-    default fn range_equals(
+    fn range_equals(
         &self,
         other: &dyn Array,
         start_idx: usize,
@@ -106,27 +110,20 @@ impl<T: ArrowPrimitiveType> ArrayEqual for 
PrimitiveArray<T> {
     }
 }
 
-impl ArrayEqual for BooleanArray {
-    fn equals(&self, other: &dyn Array) -> bool {
-        if !base_equal(&self.data(), &other.data()) {
-            return false;
-        }
-
-        let values = self.data_ref().buffers()[0].data();
-        let other_values = other.data_ref().buffers()[0].data();
+fn bool_equal(lhs: &Array, rhs: &Array) -> bool {
+    let values = lhs.data_ref().buffers()[0].data();
+    let other_values = rhs.data_ref().buffers()[0].data();
 
-        // TODO: we can do this more efficiently if all values are not-null
-        for i in 0..self.len() {
-            if self.is_valid(i)
-                && bit_util::get_bit(values, i + self.offset())
-                    != bit_util::get_bit(other_values, i + other.offset())
-            {
-                return false;
-            }
+    // TODO: we can do this more efficiently if all values are not-null
+    for i in 0..lhs.len() {
+        if lhs.is_valid(i)
+            && bit_util::get_bit(values, i + lhs.offset())
+                != bit_util::get_bit(other_values, i + rhs.offset())
+        {
+            return false;
         }
-
-        true
     }
+    true
 }
 
 impl<T: ArrowNumericType> PartialEq for PrimitiveArray<T> {
@@ -243,7 +240,7 @@ impl<T: ArrowPrimitiveType> ArrayEqual for 
DictionaryArray<T> {
         self.range_equals(other, 0, self.len(), 0)
     }
 
-    default fn range_equals(
+    fn range_equals(
         &self,
         other: &dyn Array,
         start_idx: usize,
diff --git a/rust/arrow/src/array/union.rs b/rust/arrow/src/array/union.rs
index 1f8dc4e..9fd1391 100644
--- a/rust/arrow/src/array/union.rs
+++ b/rust/arrow/src/array/union.rs
@@ -541,13 +541,11 @@ impl UnionBuilder {
         let mut field_data = match self.fields.remove(&type_name) {
             Some(data) => data,
             None => match self.value_offset_builder {
-                Some(_) => {
-                    FieldData::new(self.fields.len() as i8, 
T::get_data_type(), None)
-                }
+                Some(_) => FieldData::new(self.fields.len() as i8, 
T::DATA_TYPE, None),
                 None => {
                     let mut fd = FieldData::new(
                         self.fields.len() as i8,
-                        T::get_data_type(),
+                        T::DATA_TYPE,
                         Some(BooleanBufferBuilder::new(1)),
                     );
                     for _ in 0..self.len {
diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs 
b/rust/arrow/src/compute/kernels/arithmetic.rs
index 9b28762..fe1bda5 100644
--- a/rust/arrow/src/compute/kernels/arithmetic.rs
+++ b/rust/arrow/src/compute/kernels/arithmetic.rs
@@ -74,7 +74,7 @@ where
         .collect::<Vec<T::Native>>();
 
     let data = ArrayData::new(
-        T::get_data_type(),
+        T::DATA_TYPE,
         left.len(),
         None,
         null_bit_buffer,
@@ -139,7 +139,7 @@ where
     };
 
     let data = ArrayData::new(
-        T::get_data_type(),
+        T::DATA_TYPE,
         left.len(),
         None,
         null_bit_buffer,
@@ -193,7 +193,7 @@ where
     }
 
     let data = ArrayData::new(
-        T::get_data_type(),
+        T::DATA_TYPE,
         left.len(),
         None,
         null_bit_buffer,
@@ -251,7 +251,7 @@ where
     }
 
     let data = ArrayData::new(
-        T::get_data_type(),
+        T::DATA_TYPE,
         left.len(),
         None,
         null_bit_buffer,
diff --git a/rust/arrow/src/compute/kernels/filter.rs 
b/rust/arrow/src/compute/kernels/filter.rs
index 8286f5c6a..55ce499 100644
--- a/rust/arrow/src/compute/kernels/filter.rs
+++ b/rust/arrow/src/compute/kernels/filter.rs
@@ -731,7 +731,7 @@ impl FilterContext {
     where
         T: ArrowNumericType,
     {
-        let array_type = T::get_data_type();
+        let array_type = T::DATA_TYPE;
         let value_size = mem::size_of::<T::Native>();
         let array_data_builder =
             filter_array_impl(self, data_array, array_type, value_size)?;
diff --git a/rust/arrow/src/compute/kernels/take.rs 
b/rust/arrow/src/compute/kernels/take.rs
index a76885c..b61e393 100644
--- a/rust/arrow/src/compute/kernels/take.rs
+++ b/rust/arrow/src/compute/kernels/take.rs
@@ -211,7 +211,7 @@ where
     };
 
     let data = ArrayData::new(
-        T::get_data_type(),
+        T::DATA_TYPE,
         indices.len(),
         None,
         Some(nulls),
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 2db4306..8110e5d 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -214,8 +214,8 @@ pub trait ArrowPrimitiveType: 'static {
     /// Corresponding Rust native type for the primitive type.
     type Native: ArrowNativeType;
 
-    /// Returns the corresponding Arrow data type of this primitive type.
-    fn get_data_type() -> DataType;
+    /// the corresponding Arrow data type of this primitive type.
+    const DATA_TYPE: DataType;
 
     /// Returns the bit width of this primitive type.
     fn get_bit_width() -> usize {
@@ -376,10 +376,7 @@ pub struct BooleanType {}
 
 impl ArrowPrimitiveType for BooleanType {
     type Native = bool;
-
-    fn get_data_type() -> DataType {
-        DataType::Boolean
-    }
+    const DATA_TYPE: DataType = DataType::Boolean;
 
     fn get_bit_width() -> usize {
         1
@@ -400,10 +397,7 @@ macro_rules! make_type {
 
         impl ArrowPrimitiveType for $name {
             type Native = $native_ty;
-
-            fn get_data_type() -> DataType {
-                $data_ty
-            }
+            const DATA_TYPE: DataType = $data_ty;
         }
     };
 }
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
index b7090f0..e7ee9d3 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/lib.rs
@@ -124,7 +124,6 @@
 //!
 //! The parquet implementation is on a [separate 
crate](https://crates.io/crates/parquet)
 
-#![feature(specialization)]
 #![allow(dead_code)]
 #![allow(non_camel_case_types)]
 #![allow(bare_trait_objects)]
diff --git a/rust/arrow/src/tensor.rs b/rust/arrow/src/tensor.rs
index 0fbeddd..c377636 100644
--- a/rust/arrow/src/tensor.rs
+++ b/rust/arrow/src/tensor.rs
@@ -180,7 +180,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
         };
 
         Ok(Self {
-            data_type: T::get_data_type(),
+            data_type: T::DATA_TYPE,
             buffer,
             shape,
             strides: tensor_strides,
diff --git a/rust/parquet/src/arrow/converter.rs 
b/rust/parquet/src/arrow/converter.rs
index 9fbfa33..da0cc6c 100644
--- a/rust/parquet/src/arrow/converter.rs
+++ b/rust/parquet/src/arrow/converter.rs
@@ -90,7 +90,7 @@ where
     fn convert(&self, record_reader: &mut RecordReader<ParquetType>) -> 
Result<ArrayRef> {
         let record_data = record_reader.consume_record_data();
 
-        let mut array_data = 
ArrayDataBuilder::new(ArrowSourceType::get_data_type())
+        let mut array_data = ArrayDataBuilder::new(ArrowSourceType::DATA_TYPE)
             .len(record_reader.num_values())
             .add_buffer(record_data?);
 
@@ -101,7 +101,7 @@ where
         let primitive_array: ArrayRef =
             
Arc::new(PrimitiveArray::<ArrowSourceType>::from(array_data.build()));
 
-        Ok(cast(&primitive_array, &ArrowTargetType::get_data_type())?)
+        Ok(cast(&primitive_array, &ArrowTargetType::DATA_TYPE)?)
     }
 }
 

Reply via email to