This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f30466  ARROW-10002: [Rust] Remove `default fn` from 
`PrimitiveArrayOps`
1f30466 is described below

commit 1f30466ac7042354de35cc69fd49ced1acd54b38
Author: Kyle Strand <[email protected]>
AuthorDate: Thu Sep 17 20:32:55 2020 -0600

    ARROW-10002: [Rust] Remove `default fn` from `PrimitiveArrayOps`
    
    This is based on my 
[proof-of-concept](https://github.com/BatmanAoD/arrow-rust-specialization-alternatives/pull/1).
    
    Closes #8206 from BatmanAoD/IndexViaPrimitive
    
    Lead-authored-by: Kyle Strand <[email protected]>
    Co-authored-by: Kyle J Strand <[email protected]>
    Signed-off-by: Andy Grove <[email protected]>
---
 rust/arrow/src/array/array.rs                      |  63 ++-------
 rust/arrow/src/array/mod.rs                        |   2 +-
 rust/arrow/src/compute/kernels/boolean.rs          |   1 +
 rust/arrow/src/compute/kernels/sort.rs             |   2 +-
 rust/arrow/src/datatypes.rs                        | 146 ++++++++++-----------
 rust/arrow/src/util/pretty.rs                      |   2 +-
 rust/datafusion/examples/simple_udf.rs             |   4 +-
 rust/datafusion/src/datasource/parquet.rs          |   1 +
 rust/datafusion/src/execution/context.rs           |   4 +-
 rust/datafusion/src/physical_plan/common.rs        |   2 +-
 rust/datafusion/src/physical_plan/expressions.rs   |   2 +-
 rust/datafusion/src/physical_plan/functions.rs     |   5 +-
 .../datafusion/src/physical_plan/hash_aggregate.rs |   1 +
 .../src/physical_plan/math_expressions.rs          |   4 +-
 rust/datafusion/src/test/mod.rs                    |   1 +
 rust/datafusion/tests/user_defined_plan.rs         |   2 +-
 16 files changed, 102 insertions(+), 140 deletions(-)

diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index 35374e4..6d56a5a 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -401,40 +401,25 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
 
 /// Common operations for primitive types, including numeric types and boolean 
type.
 pub trait PrimitiveArrayOps<T: ArrowPrimitiveType> {
+    /// Returns a `Buffer` holding all the values of this array.
+    ///
+    /// Note this doesn't take the offset of this array into account.
     fn values(&self) -> Buffer;
+
+    /// Returns the primitive value at index `i`.
+    ///
+    /// Note this doesn't do any bound checking, for performance reason.
     fn value(&self, i: usize) -> T::Native;
 }
 
-// This is necessary when caller wants to access `PrimitiveArrayOps`'s methods 
with
-// `ArrowPrimitiveType`. It doesn't have any implementation as the actual 
implementations
-// are delegated to that of `ArrowNumericType` and `BooleanType`.
 impl<T: ArrowPrimitiveType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
-    default fn values(&self) -> Buffer {
-        unimplemented!()
-    }
-
-    default fn value(&self, _: usize) -> T::Native {
-        unimplemented!()
-    }
-}
-
-impl<T: ArrowNumericType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
     fn values(&self) -> Buffer {
-        self.values()
+        self.data.buffers()[0].clone()
     }
 
     fn value(&self, i: usize) -> T::Native {
-        self.value(i)
-    }
-}
-
-impl PrimitiveArrayOps<BooleanType> for BooleanArray {
-    fn values(&self) -> Buffer {
-        self.values()
-    }
-
-    fn value(&self, i: usize) -> bool {
-        self.value(i)
+        let offset = i + self.offset();
+        unsafe { T::index(self.raw_values.get(), offset) }
     }
 }
 
@@ -475,13 +460,6 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
         PrimitiveArray::from(array_data)
     }
 
-    /// Returns a `Buffer` holding all the values of this array.
-    ///
-    /// Note this doesn't take the offset of this array into account.
-    pub fn values(&self) -> Buffer {
-        self.data.buffers()[0].clone()
-    }
-
     /// Returns the length of this array.
     pub fn len(&self) -> usize {
         self.data.len()
@@ -497,13 +475,6 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
         unsafe { self.raw_values.get().add(self.data.offset()) }
     }
 
-    /// Returns the primitive value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    pub fn value(&self, i: usize) -> T::Native {
-        unsafe { *(self.raw_values().add(i)) }
-    }
-
     /// Returns a slice for the given offset and length
     ///
     /// Note this doesn't do any bound checking, for performance reason.
@@ -695,20 +666,6 @@ impl PrimitiveArray<BooleanType> {
         BooleanArray::from(array_data)
     }
 
-    /// Returns a `Buffer` holds all the values of this array.
-    ///
-    /// Note this doesn't take account into the offset of this array.
-    pub fn values(&self) -> Buffer {
-        self.data.buffers()[0].clone()
-    }
-
-    /// Returns the boolean value at index `i`.
-    pub fn value(&self, i: usize) -> bool {
-        assert!(i < self.data.len());
-        let offset = i + self.offset();
-        unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, 
offset) }
-    }
-
     // Returns a new primitive array builder
     pub fn builder(capacity: usize) -> BooleanBuilder {
         BooleanBuilder::new(capacity)
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
index d9784b4..e37c473 100644
--- a/rust/arrow/src/array/mod.rs
+++ b/rust/arrow/src/array/mod.rs
@@ -49,7 +49,7 @@
 //! ```
 //! extern crate arrow;
 //!
-//! use arrow::array::Int16Array;
+//! use arrow::array::{Int16Array, PrimitiveArrayOps};
 //!
 //! // Create a new builder with a capacity of 100
 //! let mut builder = Int16Array::builder(100);
diff --git a/rust/arrow/src/compute/kernels/boolean.rs 
b/rust/arrow/src/compute/kernels/boolean.rs
index a9a75a0..e913409 100644
--- a/rust/arrow/src/compute/kernels/boolean.rs
+++ b/rust/arrow/src/compute/kernels/boolean.rs
@@ -126,6 +126,7 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::array::PrimitiveArrayOps;
 
     #[test]
     fn test_bool_array_and() {
diff --git a/rust/arrow/src/compute/kernels/sort.rs 
b/rust/arrow/src/compute/kernels/sort.rs
index ffee0d2..d881c7f 100644
--- a/rust/arrow/src/compute/kernels/sort.rs
+++ b/rust/arrow/src/compute/kernels/sort.rs
@@ -409,7 +409,7 @@ pub struct SortColumn {
 /// ```
 /// use std::convert::From;
 /// use std::sync::Arc;
-/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray, 
as_primitive_array};
+/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray, 
PrimitiveArrayOps, as_primitive_array};
 /// use arrow::compute::kernels::sort::{SortColumn, SortOptions, lexsort};
 /// use arrow::datatypes::Int64Type;
 ///
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index e0d7539..fcd297a 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -23,6 +23,7 @@
 //!  * [`DataType`](crate::datatypes::DataType) to describe the type of a 
field.
 
 use std::collections::HashMap;
+use std::default::Default;
 use std::fmt;
 use std::mem::size_of;
 #[cfg(feature = "simd")]
@@ -39,6 +40,7 @@ use serde_json::{
 };
 
 use crate::error::{ArrowError, Result};
+use crate::util::bit_util;
 
 /// The set of datatypes that are supported by this implementation of Apache 
Arrow.
 ///
@@ -184,7 +186,7 @@ pub struct Field {
 }
 
 pub trait ArrowNativeType:
-    fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + 'static
+    fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + 'static
 {
     fn into_json_value(self) -> Option<Value>;
 
@@ -208,12 +210,25 @@ pub trait ArrowPrimitiveType: 'static {
     fn get_data_type() -> DataType;
 
     /// Returns the bit width of this primitive type.
-    fn get_bit_width() -> usize;
+    fn get_bit_width() -> usize {
+        size_of::<Self::Native>() * 8
+    }
 
     /// Returns a default value of this primitive type.
     ///
     /// This is useful for aggregate array ops like `sum()`, `mean()`.
-    fn default_value() -> Self::Native;
+    fn default_value() -> Self::Native {
+        Default::default()
+    }
+
+    /// Returns a value offset from the given pointer by the given index. The 
default
+    /// implementation (used for all non-boolean types) is simply equivalent 
to pointer-arithmetic.
+    /// # Safety
+    /// Just like array-access in C: the raw_ptr must be the start of a valid 
array, and the index
+    /// must be less than the size of the array.
+    unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native {
+        *(raw_ptr.add(i))
+    }
 }
 
 impl ArrowNativeType for bool {
@@ -346,8 +361,32 @@ impl ArrowNativeType for f64 {
     }
 }
 
+// BooleanType is special: its bit-width is not the size of the primitive 
type, and its `index`
+// operation assumes bit-packing.
+#[derive(Debug)]
+pub struct BooleanType {}
+
+impl ArrowPrimitiveType for BooleanType {
+    type Native = bool;
+
+    fn get_data_type() -> DataType {
+        DataType::Boolean
+    }
+
+    fn get_bit_width() -> usize {
+        1
+    }
+
+    /// # Safety
+    /// The pointer must be part of a bit-packed boolean array, and the index 
must be less than the
+    /// size of the array.
+    unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native {
+        bit_util::get_bit_raw(raw_ptr as *const u8, i)
+    }
+}
+
 macro_rules! make_type {
-    ($name:ident, $native_ty:ty, $data_ty:expr, $bit_width:expr, 
$default_val:expr) => {
+    ($name:ident, $native_ty:ty, $data_ty:expr) => {
         #[derive(Debug)]
         pub struct $name {}
 
@@ -357,134 +396,87 @@ macro_rules! make_type {
             fn get_data_type() -> DataType {
                 $data_ty
             }
-
-            fn get_bit_width() -> usize {
-                $bit_width
-            }
-
-            fn default_value() -> Self::Native {
-                $default_val
-            }
         }
     };
 }
 
-make_type!(BooleanType, bool, DataType::Boolean, 1, false);
-make_type!(Int8Type, i8, DataType::Int8, 8, 0i8);
-make_type!(Int16Type, i16, DataType::Int16, 16, 0i16);
-make_type!(Int32Type, i32, DataType::Int32, 32, 0i32);
-make_type!(Int64Type, i64, DataType::Int64, 64, 0i64);
-make_type!(UInt8Type, u8, DataType::UInt8, 8, 0u8);
-make_type!(UInt16Type, u16, DataType::UInt16, 16, 0u16);
-make_type!(UInt32Type, u32, DataType::UInt32, 32, 0u32);
-make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64);
-make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32);
-make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64);
+make_type!(Int8Type, i8, DataType::Int8);
+make_type!(Int16Type, i16, DataType::Int16);
+make_type!(Int32Type, i32, DataType::Int32);
+make_type!(Int64Type, i64, DataType::Int64);
+make_type!(UInt8Type, u8, DataType::UInt8);
+make_type!(UInt16Type, u16, DataType::UInt16);
+make_type!(UInt32Type, u32, DataType::UInt32);
+make_type!(UInt64Type, u64, DataType::UInt64);
+make_type!(Float32Type, f32, DataType::Float32);
+make_type!(Float64Type, f64, DataType::Float64);
 make_type!(
     TimestampSecondType,
     i64,
-    DataType::Timestamp(TimeUnit::Second, None),
-    64,
-    0i64
+    DataType::Timestamp(TimeUnit::Second, None)
 );
 make_type!(
     TimestampMillisecondType,
     i64,
-    DataType::Timestamp(TimeUnit::Millisecond, None),
-    64,
-    0i64
+    DataType::Timestamp(TimeUnit::Millisecond, None)
 );
 make_type!(
     TimestampMicrosecondType,
     i64,
-    DataType::Timestamp(TimeUnit::Microsecond, None),
-    64,
-    0i64
+    DataType::Timestamp(TimeUnit::Microsecond, None)
 );
 make_type!(
     TimestampNanosecondType,
     i64,
-    DataType::Timestamp(TimeUnit::Nanosecond, None),
-    64,
-    0i64
-);
-make_type!(Date32Type, i32, DataType::Date32(DateUnit::Day), 32, 0i32);
-make_type!(
-    Date64Type,
-    i64,
-    DataType::Date64(DateUnit::Millisecond),
-    64,
-    0i64
-);
-make_type!(
-    Time32SecondType,
-    i32,
-    DataType::Time32(TimeUnit::Second),
-    32,
-    0i32
+    DataType::Timestamp(TimeUnit::Nanosecond, None)
 );
+make_type!(Date32Type, i32, DataType::Date32(DateUnit::Day));
+make_type!(Date64Type, i64, DataType::Date64(DateUnit::Millisecond));
+make_type!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second));
 make_type!(
     Time32MillisecondType,
     i32,
-    DataType::Time32(TimeUnit::Millisecond),
-    32,
-    0i32
+    DataType::Time32(TimeUnit::Millisecond)
 );
 make_type!(
     Time64MicrosecondType,
     i64,
-    DataType::Time64(TimeUnit::Microsecond),
-    64,
-    0i64
+    DataType::Time64(TimeUnit::Microsecond)
 );
 make_type!(
     Time64NanosecondType,
     i64,
-    DataType::Time64(TimeUnit::Nanosecond),
-    64,
-    0i64
+    DataType::Time64(TimeUnit::Nanosecond)
 );
 make_type!(
     IntervalYearMonthType,
     i32,
-    DataType::Interval(IntervalUnit::YearMonth),
-    32,
-    0i32
+    DataType::Interval(IntervalUnit::YearMonth)
 );
 make_type!(
     IntervalDayTimeType,
     i64,
-    DataType::Interval(IntervalUnit::DayTime),
-    64,
-    0i64
+    DataType::Interval(IntervalUnit::DayTime)
 );
 make_type!(
     DurationSecondType,
     i64,
-    DataType::Duration(TimeUnit::Second),
-    64,
-    0i64
+    DataType::Duration(TimeUnit::Second)
 );
 make_type!(
     DurationMillisecondType,
     i64,
-    DataType::Duration(TimeUnit::Millisecond),
-    64,
-    0i64
+    DataType::Duration(TimeUnit::Millisecond)
 );
 make_type!(
     DurationMicrosecondType,
     i64,
-    DataType::Duration(TimeUnit::Microsecond),
-    64,
-    0i64
+    DataType::Duration(TimeUnit::Microsecond)
 );
 make_type!(
     DurationNanosecondType,
     i64,
-    DataType::Duration(TimeUnit::Nanosecond),
-    64,
-    0i64
+    DataType::Duration(TimeUnit::Nanosecond)
 );
 
 /// A subtype of primitive type that represents legal dictionary keys.
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs
index afa4827..65ec2a8 100644
--- a/rust/arrow/src/util/pretty.rs
+++ b/rust/arrow/src/util/pretty.rs
@@ -18,7 +18,7 @@
 //! Utilities for printing record batches
 
 use crate::array;
-use crate::array::StringArrayOps;
+use crate::array::{PrimitiveArrayOps, StringArrayOps};
 use crate::datatypes::{DataType, TimeUnit};
 use crate::record_batch::RecordBatch;
 
diff --git a/rust/datafusion/examples/simple_udf.rs 
b/rust/datafusion/examples/simple_udf.rs
index 8a8be72..313d916 100644
--- a/rust/datafusion/examples/simple_udf.rs
+++ b/rust/datafusion/examples/simple_udf.rs
@@ -16,7 +16,9 @@
 // under the License.
 
 use arrow::{
-    array::{Array, ArrayRef, Float32Array, Float64Array, Float64Builder},
+    array::{
+        Array, ArrayRef, Float32Array, Float64Array, Float64Builder, 
PrimitiveArrayOps,
+    },
     datatypes::DataType,
     record_batch::RecordBatch,
     util::pretty,
diff --git a/rust/datafusion/src/datasource/parquet.rs 
b/rust/datafusion/src/datasource/parquet.rs
index 117f96d..beb450d 100644
--- a/rust/datafusion/src/datasource/parquet.rs
+++ b/rust/datafusion/src/datasource/parquet.rs
@@ -69,6 +69,7 @@ impl TableProvider for ParquetTable {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow::array::PrimitiveArrayOps;
     use arrow::array::{
         BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array,
         TimestampNanosecondArray,
diff --git a/rust/datafusion/src/execution/context.rs 
b/rust/datafusion/src/execution/context.rs
index 7d24e90..c5c5213 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -515,7 +515,9 @@ mod tests {
     use crate::physical_plan::functions::ScalarFunctionImplementation;
     use crate::test;
     use crate::variable::VarType;
-    use arrow::array::{ArrayRef, Int32Array, StringArray, StringArrayOps};
+    use arrow::array::{
+        ArrayRef, Int32Array, PrimitiveArrayOps, StringArray, StringArrayOps,
+    };
     use arrow::compute::add;
     use std::fs::File;
     use std::{io::prelude::*, sync::Mutex};
diff --git a/rust/datafusion/src/physical_plan/common.rs 
b/rust/datafusion/src/physical_plan/common.rs
index 3378c44..c076a7a 100644
--- a/rust/datafusion/src/physical_plan/common.rs
+++ b/rust/datafusion/src/physical_plan/common.rs
@@ -24,7 +24,7 @@ use std::sync::{Arc, Mutex};
 use crate::error::{ExecutionError, Result};
 
 use crate::logical_plan::ScalarValue;
-use arrow::array::{self, ArrayRef, StringArrayOps};
+use arrow::array::{self, ArrayRef, PrimitiveArrayOps, StringArrayOps};
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::error::Result as ArrowResult;
 use arrow::record_batch::{RecordBatch, RecordBatchReader};
diff --git a/rust/datafusion/src/physical_plan/expressions.rs 
b/rust/datafusion/src/physical_plan/expressions.rs
index ceb9dd1..a82114c 100644
--- a/rust/datafusion/src/physical_plan/expressions.rs
+++ b/rust/datafusion/src/physical_plan/expressions.rs
@@ -1509,7 +1509,7 @@ mod tests {
     use crate::error::Result;
     use crate::physical_plan::common::get_scalar_value;
     use arrow::array::{
-        LargeStringArray, PrimitiveArray, StringArray, StringArrayOps,
+        LargeStringArray, PrimitiveArray, PrimitiveArrayOps, StringArray, 
StringArrayOps,
         Time64NanosecondArray,
     };
     use arrow::datatypes::*;
diff --git a/rust/datafusion/src/physical_plan/functions.rs 
b/rust/datafusion/src/physical_plan/functions.rs
index 5120866..af02c6d 100644
--- a/rust/datafusion/src/physical_plan/functions.rs
+++ b/rust/datafusion/src/physical_plan/functions.rs
@@ -341,7 +341,10 @@ mod tests {
         error::Result, logical_plan::ScalarValue, 
physical_plan::expressions::lit,
     };
     use arrow::{
-        array::{ArrayRef, Float64Array, Int32Array, StringArray, 
StringArrayOps},
+        array::{
+            ArrayRef, Float64Array, Int32Array, PrimitiveArrayOps, StringArray,
+            StringArrayOps,
+        },
         datatypes::Field,
         record_batch::RecordBatch,
     };
diff --git a/rust/datafusion/src/physical_plan/hash_aggregate.rs 
b/rust/datafusion/src/physical_plan/hash_aggregate.rs
index e8a72f5..25c2f38 100644
--- a/rust/datafusion/src/physical_plan/hash_aggregate.rs
+++ b/rust/datafusion/src/physical_plan/hash_aggregate.rs
@@ -26,6 +26,7 @@ use crate::physical_plan::{
     Accumulator, AggregateExpr, Distribution, ExecutionPlan, Partitioning, 
PhysicalExpr,
 };
 
+use arrow::array::PrimitiveArrayOps;
 use arrow::array::{
     ArrayBuilder, ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array,
     Int64Array, Int8Array, StringArray, StringArrayOps, UInt16Array, 
UInt32Array,
diff --git a/rust/datafusion/src/physical_plan/math_expressions.rs 
b/rust/datafusion/src/physical_plan/math_expressions.rs
index 8647699..32049d0 100644
--- a/rust/datafusion/src/physical_plan/math_expressions.rs
+++ b/rust/datafusion/src/physical_plan/math_expressions.rs
@@ -19,7 +19,9 @@
 
 use std::sync::Arc;
 
-use arrow::array::{make_array, Array, ArrayData, ArrayRef, Float32Array, 
Float64Array};
+use arrow::array::{
+    make_array, Array, ArrayData, ArrayRef, Float32Array, Float64Array, 
PrimitiveArrayOps,
+};
 use arrow::buffer::Buffer;
 use arrow::datatypes::{DataType, ToByteSlice};
 
diff --git a/rust/datafusion/src/test/mod.rs b/rust/datafusion/src/test/mod.rs
index faa7770..be46c8a 100644
--- a/rust/datafusion/src/test/mod.rs
+++ b/rust/datafusion/src/test/mod.rs
@@ -23,6 +23,7 @@ use crate::execution::context::ExecutionContext;
 use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
 use crate::physical_plan::ExecutionPlan;
 use arrow::array;
+use arrow::array::PrimitiveArrayOps;
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use std::env;
diff --git a/rust/datafusion/tests/user_defined_plan.rs 
b/rust/datafusion/tests/user_defined_plan.rs
index a6b6919..1f67945 100644
--- a/rust/datafusion/tests/user_defined_plan.rs
+++ b/rust/datafusion/tests/user_defined_plan.rs
@@ -59,7 +59,7 @@
 //!
 
 use arrow::{
-    array::{Int64Array, StringArray, StringArrayOps},
+    array::{Int64Array, PrimitiveArrayOps, StringArray, StringArrayOps},
     datatypes::SchemaRef,
     error::ArrowError,
     record_batch::{RecordBatch, RecordBatchReader},

Reply via email to