This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1f30466 ARROW-10002: [Rust] Remove `default fn` from
`PrimitiveArrayOps`
1f30466 is described below
commit 1f30466ac7042354de35cc69fd49ced1acd54b38
Author: Kyle Strand <[email protected]>
AuthorDate: Thu Sep 17 20:32:55 2020 -0600
ARROW-10002: [Rust] Remove `default fn` from `PrimitiveArrayOps`
This is based on my
[proof-of-concept](https://github.com/BatmanAoD/arrow-rust-specialization-alternatives/pull/1).
Closes #8206 from BatmanAoD/IndexViaPrimitive
Lead-authored-by: Kyle Strand <[email protected]>
Co-authored-by: Kyle J Strand <[email protected]>
Signed-off-by: Andy Grove <[email protected]>
---
rust/arrow/src/array/array.rs | 63 ++-------
rust/arrow/src/array/mod.rs | 2 +-
rust/arrow/src/compute/kernels/boolean.rs | 1 +
rust/arrow/src/compute/kernels/sort.rs | 2 +-
rust/arrow/src/datatypes.rs | 146 ++++++++++-----------
rust/arrow/src/util/pretty.rs | 2 +-
rust/datafusion/examples/simple_udf.rs | 4 +-
rust/datafusion/src/datasource/parquet.rs | 1 +
rust/datafusion/src/execution/context.rs | 4 +-
rust/datafusion/src/physical_plan/common.rs | 2 +-
rust/datafusion/src/physical_plan/expressions.rs | 2 +-
rust/datafusion/src/physical_plan/functions.rs | 5 +-
.../datafusion/src/physical_plan/hash_aggregate.rs | 1 +
.../src/physical_plan/math_expressions.rs | 4 +-
rust/datafusion/src/test/mod.rs | 1 +
rust/datafusion/tests/user_defined_plan.rs | 2 +-
16 files changed, 102 insertions(+), 140 deletions(-)
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index 35374e4..6d56a5a 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -401,40 +401,25 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
/// Common operations for primitive types, including numeric types and boolean
type.
pub trait PrimitiveArrayOps<T: ArrowPrimitiveType> {
+ /// Returns a `Buffer` holding all the values of this array.
+ ///
+ /// Note this doesn't take the offset of this array into account.
fn values(&self) -> Buffer;
+
+ /// Returns the primitive value at index `i`.
+ ///
+ /// Note this doesn't do any bound checking, for performance reason.
fn value(&self, i: usize) -> T::Native;
}
-// This is necessary when caller wants to access `PrimitiveArrayOps`'s methods
with
-// `ArrowPrimitiveType`. It doesn't have any implementation as the actual
implementations
-// are delegated to that of `ArrowNumericType` and `BooleanType`.
impl<T: ArrowPrimitiveType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
- default fn values(&self) -> Buffer {
- unimplemented!()
- }
-
- default fn value(&self, _: usize) -> T::Native {
- unimplemented!()
- }
-}
-
-impl<T: ArrowNumericType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
fn values(&self) -> Buffer {
- self.values()
+ self.data.buffers()[0].clone()
}
fn value(&self, i: usize) -> T::Native {
- self.value(i)
- }
-}
-
-impl PrimitiveArrayOps<BooleanType> for BooleanArray {
- fn values(&self) -> Buffer {
- self.values()
- }
-
- fn value(&self, i: usize) -> bool {
- self.value(i)
+ let offset = i + self.offset();
+ unsafe { T::index(self.raw_values.get(), offset) }
}
}
@@ -475,13 +460,6 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
PrimitiveArray::from(array_data)
}
- /// Returns a `Buffer` holding all the values of this array.
- ///
- /// Note this doesn't take the offset of this array into account.
- pub fn values(&self) -> Buffer {
- self.data.buffers()[0].clone()
- }
-
/// Returns the length of this array.
pub fn len(&self) -> usize {
self.data.len()
@@ -497,13 +475,6 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
unsafe { self.raw_values.get().add(self.data.offset()) }
}
- /// Returns the primitive value at index `i`.
- ///
- /// Note this doesn't do any bound checking, for performance reason.
- pub fn value(&self, i: usize) -> T::Native {
- unsafe { *(self.raw_values().add(i)) }
- }
-
/// Returns a slice for the given offset and length
///
/// Note this doesn't do any bound checking, for performance reason.
@@ -695,20 +666,6 @@ impl PrimitiveArray<BooleanType> {
BooleanArray::from(array_data)
}
- /// Returns a `Buffer` holds all the values of this array.
- ///
- /// Note this doesn't take account into the offset of this array.
- pub fn values(&self) -> Buffer {
- self.data.buffers()[0].clone()
- }
-
- /// Returns the boolean value at index `i`.
- pub fn value(&self, i: usize) -> bool {
- assert!(i < self.data.len());
- let offset = i + self.offset();
- unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8,
offset) }
- }
-
// Returns a new primitive array builder
pub fn builder(capacity: usize) -> BooleanBuilder {
BooleanBuilder::new(capacity)
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
index d9784b4..e37c473 100644
--- a/rust/arrow/src/array/mod.rs
+++ b/rust/arrow/src/array/mod.rs
@@ -49,7 +49,7 @@
//! ```
//! extern crate arrow;
//!
-//! use arrow::array::Int16Array;
+//! use arrow::array::{Int16Array, PrimitiveArrayOps};
//!
//! // Create a new builder with a capacity of 100
//! let mut builder = Int16Array::builder(100);
diff --git a/rust/arrow/src/compute/kernels/boolean.rs
b/rust/arrow/src/compute/kernels/boolean.rs
index a9a75a0..e913409 100644
--- a/rust/arrow/src/compute/kernels/boolean.rs
+++ b/rust/arrow/src/compute/kernels/boolean.rs
@@ -126,6 +126,7 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
#[cfg(test)]
mod tests {
use super::*;
+ use crate::array::PrimitiveArrayOps;
#[test]
fn test_bool_array_and() {
diff --git a/rust/arrow/src/compute/kernels/sort.rs
b/rust/arrow/src/compute/kernels/sort.rs
index ffee0d2..d881c7f 100644
--- a/rust/arrow/src/compute/kernels/sort.rs
+++ b/rust/arrow/src/compute/kernels/sort.rs
@@ -409,7 +409,7 @@ pub struct SortColumn {
/// ```
/// use std::convert::From;
/// use std::sync::Arc;
-/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray,
as_primitive_array};
+/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray,
PrimitiveArrayOps, as_primitive_array};
/// use arrow::compute::kernels::sort::{SortColumn, SortOptions, lexsort};
/// use arrow::datatypes::Int64Type;
///
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index e0d7539..fcd297a 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -23,6 +23,7 @@
//! * [`DataType`](crate::datatypes::DataType) to describe the type of a
field.
use std::collections::HashMap;
+use std::default::Default;
use std::fmt;
use std::mem::size_of;
#[cfg(feature = "simd")]
@@ -39,6 +40,7 @@ use serde_json::{
};
use crate::error::{ArrowError, Result};
+use crate::util::bit_util;
/// The set of datatypes that are supported by this implementation of Apache
Arrow.
///
@@ -184,7 +186,7 @@ pub struct Field {
}
pub trait ArrowNativeType:
- fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + 'static
+ fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + 'static
{
fn into_json_value(self) -> Option<Value>;
@@ -208,12 +210,25 @@ pub trait ArrowPrimitiveType: 'static {
fn get_data_type() -> DataType;
/// Returns the bit width of this primitive type.
- fn get_bit_width() -> usize;
+ fn get_bit_width() -> usize {
+ size_of::<Self::Native>() * 8
+ }
/// Returns a default value of this primitive type.
///
/// This is useful for aggregate array ops like `sum()`, `mean()`.
- fn default_value() -> Self::Native;
+ fn default_value() -> Self::Native {
+ Default::default()
+ }
+
+ /// Returns a value offset from the given pointer by the given index. The
default
+ /// implementation (used for all non-boolean types) is simply equivalent
to pointer-arithmetic.
+ /// # Safety
+ /// Just like array-access in C: the raw_ptr must be the start of a valid
array, and the index
+ /// must be less than the size of the array.
+ unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native {
+ *(raw_ptr.add(i))
+ }
}
impl ArrowNativeType for bool {
@@ -346,8 +361,32 @@ impl ArrowNativeType for f64 {
}
}
+// BooleanType is special: its bit-width is not the size of the primitive
type, and its `index`
+// operation assumes bit-packing.
+#[derive(Debug)]
+pub struct BooleanType {}
+
+impl ArrowPrimitiveType for BooleanType {
+ type Native = bool;
+
+ fn get_data_type() -> DataType {
+ DataType::Boolean
+ }
+
+ fn get_bit_width() -> usize {
+ 1
+ }
+
+ /// # Safety
+ /// The pointer must be part of a bit-packed boolean array, and the index
must be less than the
+ /// size of the array.
+ unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native {
+ bit_util::get_bit_raw(raw_ptr as *const u8, i)
+ }
+}
+
macro_rules! make_type {
- ($name:ident, $native_ty:ty, $data_ty:expr, $bit_width:expr,
$default_val:expr) => {
+ ($name:ident, $native_ty:ty, $data_ty:expr) => {
#[derive(Debug)]
pub struct $name {}
@@ -357,134 +396,87 @@ macro_rules! make_type {
fn get_data_type() -> DataType {
$data_ty
}
-
- fn get_bit_width() -> usize {
- $bit_width
- }
-
- fn default_value() -> Self::Native {
- $default_val
- }
}
};
}
-make_type!(BooleanType, bool, DataType::Boolean, 1, false);
-make_type!(Int8Type, i8, DataType::Int8, 8, 0i8);
-make_type!(Int16Type, i16, DataType::Int16, 16, 0i16);
-make_type!(Int32Type, i32, DataType::Int32, 32, 0i32);
-make_type!(Int64Type, i64, DataType::Int64, 64, 0i64);
-make_type!(UInt8Type, u8, DataType::UInt8, 8, 0u8);
-make_type!(UInt16Type, u16, DataType::UInt16, 16, 0u16);
-make_type!(UInt32Type, u32, DataType::UInt32, 32, 0u32);
-make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64);
-make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32);
-make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64);
+make_type!(Int8Type, i8, DataType::Int8);
+make_type!(Int16Type, i16, DataType::Int16);
+make_type!(Int32Type, i32, DataType::Int32);
+make_type!(Int64Type, i64, DataType::Int64);
+make_type!(UInt8Type, u8, DataType::UInt8);
+make_type!(UInt16Type, u16, DataType::UInt16);
+make_type!(UInt32Type, u32, DataType::UInt32);
+make_type!(UInt64Type, u64, DataType::UInt64);
+make_type!(Float32Type, f32, DataType::Float32);
+make_type!(Float64Type, f64, DataType::Float64);
make_type!(
TimestampSecondType,
i64,
- DataType::Timestamp(TimeUnit::Second, None),
- 64,
- 0i64
+ DataType::Timestamp(TimeUnit::Second, None)
);
make_type!(
TimestampMillisecondType,
i64,
- DataType::Timestamp(TimeUnit::Millisecond, None),
- 64,
- 0i64
+ DataType::Timestamp(TimeUnit::Millisecond, None)
);
make_type!(
TimestampMicrosecondType,
i64,
- DataType::Timestamp(TimeUnit::Microsecond, None),
- 64,
- 0i64
+ DataType::Timestamp(TimeUnit::Microsecond, None)
);
make_type!(
TimestampNanosecondType,
i64,
- DataType::Timestamp(TimeUnit::Nanosecond, None),
- 64,
- 0i64
-);
-make_type!(Date32Type, i32, DataType::Date32(DateUnit::Day), 32, 0i32);
-make_type!(
- Date64Type,
- i64,
- DataType::Date64(DateUnit::Millisecond),
- 64,
- 0i64
-);
-make_type!(
- Time32SecondType,
- i32,
- DataType::Time32(TimeUnit::Second),
- 32,
- 0i32
+ DataType::Timestamp(TimeUnit::Nanosecond, None)
);
+make_type!(Date32Type, i32, DataType::Date32(DateUnit::Day));
+make_type!(Date64Type, i64, DataType::Date64(DateUnit::Millisecond));
+make_type!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second));
make_type!(
Time32MillisecondType,
i32,
- DataType::Time32(TimeUnit::Millisecond),
- 32,
- 0i32
+ DataType::Time32(TimeUnit::Millisecond)
);
make_type!(
Time64MicrosecondType,
i64,
- DataType::Time64(TimeUnit::Microsecond),
- 64,
- 0i64
+ DataType::Time64(TimeUnit::Microsecond)
);
make_type!(
Time64NanosecondType,
i64,
- DataType::Time64(TimeUnit::Nanosecond),
- 64,
- 0i64
+ DataType::Time64(TimeUnit::Nanosecond)
);
make_type!(
IntervalYearMonthType,
i32,
- DataType::Interval(IntervalUnit::YearMonth),
- 32,
- 0i32
+ DataType::Interval(IntervalUnit::YearMonth)
);
make_type!(
IntervalDayTimeType,
i64,
- DataType::Interval(IntervalUnit::DayTime),
- 64,
- 0i64
+ DataType::Interval(IntervalUnit::DayTime)
);
make_type!(
DurationSecondType,
i64,
- DataType::Duration(TimeUnit::Second),
- 64,
- 0i64
+ DataType::Duration(TimeUnit::Second)
);
make_type!(
DurationMillisecondType,
i64,
- DataType::Duration(TimeUnit::Millisecond),
- 64,
- 0i64
+ DataType::Duration(TimeUnit::Millisecond)
);
make_type!(
DurationMicrosecondType,
i64,
- DataType::Duration(TimeUnit::Microsecond),
- 64,
- 0i64
+ DataType::Duration(TimeUnit::Microsecond)
);
make_type!(
DurationNanosecondType,
i64,
- DataType::Duration(TimeUnit::Nanosecond),
- 64,
- 0i64
+ DataType::Duration(TimeUnit::Nanosecond)
);
/// A subtype of primitive type that represents legal dictionary keys.
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs
index afa4827..65ec2a8 100644
--- a/rust/arrow/src/util/pretty.rs
+++ b/rust/arrow/src/util/pretty.rs
@@ -18,7 +18,7 @@
//! Utilities for printing record batches
use crate::array;
-use crate::array::StringArrayOps;
+use crate::array::{PrimitiveArrayOps, StringArrayOps};
use crate::datatypes::{DataType, TimeUnit};
use crate::record_batch::RecordBatch;
diff --git a/rust/datafusion/examples/simple_udf.rs
b/rust/datafusion/examples/simple_udf.rs
index 8a8be72..313d916 100644
--- a/rust/datafusion/examples/simple_udf.rs
+++ b/rust/datafusion/examples/simple_udf.rs
@@ -16,7 +16,9 @@
// under the License.
use arrow::{
- array::{Array, ArrayRef, Float32Array, Float64Array, Float64Builder},
+ array::{
+ Array, ArrayRef, Float32Array, Float64Array, Float64Builder,
PrimitiveArrayOps,
+ },
datatypes::DataType,
record_batch::RecordBatch,
util::pretty,
diff --git a/rust/datafusion/src/datasource/parquet.rs
b/rust/datafusion/src/datasource/parquet.rs
index 117f96d..beb450d 100644
--- a/rust/datafusion/src/datasource/parquet.rs
+++ b/rust/datafusion/src/datasource/parquet.rs
@@ -69,6 +69,7 @@ impl TableProvider for ParquetTable {
#[cfg(test)]
mod tests {
use super::*;
+ use arrow::array::PrimitiveArrayOps;
use arrow::array::{
BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array,
TimestampNanosecondArray,
diff --git a/rust/datafusion/src/execution/context.rs
b/rust/datafusion/src/execution/context.rs
index 7d24e90..c5c5213 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -515,7 +515,9 @@ mod tests {
use crate::physical_plan::functions::ScalarFunctionImplementation;
use crate::test;
use crate::variable::VarType;
- use arrow::array::{ArrayRef, Int32Array, StringArray, StringArrayOps};
+ use arrow::array::{
+ ArrayRef, Int32Array, PrimitiveArrayOps, StringArray, StringArrayOps,
+ };
use arrow::compute::add;
use std::fs::File;
use std::{io::prelude::*, sync::Mutex};
diff --git a/rust/datafusion/src/physical_plan/common.rs
b/rust/datafusion/src/physical_plan/common.rs
index 3378c44..c076a7a 100644
--- a/rust/datafusion/src/physical_plan/common.rs
+++ b/rust/datafusion/src/physical_plan/common.rs
@@ -24,7 +24,7 @@ use std::sync::{Arc, Mutex};
use crate::error::{ExecutionError, Result};
use crate::logical_plan::ScalarValue;
-use arrow::array::{self, ArrayRef, StringArrayOps};
+use arrow::array::{self, ArrayRef, PrimitiveArrayOps, StringArrayOps};
use arrow::datatypes::{DataType, SchemaRef};
use arrow::error::Result as ArrowResult;
use arrow::record_batch::{RecordBatch, RecordBatchReader};
diff --git a/rust/datafusion/src/physical_plan/expressions.rs
b/rust/datafusion/src/physical_plan/expressions.rs
index ceb9dd1..a82114c 100644
--- a/rust/datafusion/src/physical_plan/expressions.rs
+++ b/rust/datafusion/src/physical_plan/expressions.rs
@@ -1509,7 +1509,7 @@ mod tests {
use crate::error::Result;
use crate::physical_plan::common::get_scalar_value;
use arrow::array::{
- LargeStringArray, PrimitiveArray, StringArray, StringArrayOps,
+ LargeStringArray, PrimitiveArray, PrimitiveArrayOps, StringArray,
StringArrayOps,
Time64NanosecondArray,
};
use arrow::datatypes::*;
diff --git a/rust/datafusion/src/physical_plan/functions.rs
b/rust/datafusion/src/physical_plan/functions.rs
index 5120866..af02c6d 100644
--- a/rust/datafusion/src/physical_plan/functions.rs
+++ b/rust/datafusion/src/physical_plan/functions.rs
@@ -341,7 +341,10 @@ mod tests {
error::Result, logical_plan::ScalarValue,
physical_plan::expressions::lit,
};
use arrow::{
- array::{ArrayRef, Float64Array, Int32Array, StringArray,
StringArrayOps},
+ array::{
+ ArrayRef, Float64Array, Int32Array, PrimitiveArrayOps, StringArray,
+ StringArrayOps,
+ },
datatypes::Field,
record_batch::RecordBatch,
};
diff --git a/rust/datafusion/src/physical_plan/hash_aggregate.rs
b/rust/datafusion/src/physical_plan/hash_aggregate.rs
index e8a72f5..25c2f38 100644
--- a/rust/datafusion/src/physical_plan/hash_aggregate.rs
+++ b/rust/datafusion/src/physical_plan/hash_aggregate.rs
@@ -26,6 +26,7 @@ use crate::physical_plan::{
Accumulator, AggregateExpr, Distribution, ExecutionPlan, Partitioning,
PhysicalExpr,
};
+use arrow::array::PrimitiveArrayOps;
use arrow::array::{
ArrayBuilder, ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array,
Int64Array, Int8Array, StringArray, StringArrayOps, UInt16Array,
UInt32Array,
diff --git a/rust/datafusion/src/physical_plan/math_expressions.rs
b/rust/datafusion/src/physical_plan/math_expressions.rs
index 8647699..32049d0 100644
--- a/rust/datafusion/src/physical_plan/math_expressions.rs
+++ b/rust/datafusion/src/physical_plan/math_expressions.rs
@@ -19,7 +19,9 @@
use std::sync::Arc;
-use arrow::array::{make_array, Array, ArrayData, ArrayRef, Float32Array,
Float64Array};
+use arrow::array::{
+ make_array, Array, ArrayData, ArrayRef, Float32Array, Float64Array,
PrimitiveArrayOps,
+};
use arrow::buffer::Buffer;
use arrow::datatypes::{DataType, ToByteSlice};
diff --git a/rust/datafusion/src/test/mod.rs b/rust/datafusion/src/test/mod.rs
index faa7770..be46c8a 100644
--- a/rust/datafusion/src/test/mod.rs
+++ b/rust/datafusion/src/test/mod.rs
@@ -23,6 +23,7 @@ use crate::execution::context::ExecutionContext;
use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
use crate::physical_plan::ExecutionPlan;
use arrow::array;
+use arrow::array::PrimitiveArrayOps;
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use arrow::record_batch::RecordBatch;
use std::env;
diff --git a/rust/datafusion/tests/user_defined_plan.rs
b/rust/datafusion/tests/user_defined_plan.rs
index a6b6919..1f67945 100644
--- a/rust/datafusion/tests/user_defined_plan.rs
+++ b/rust/datafusion/tests/user_defined_plan.rs
@@ -59,7 +59,7 @@
//!
use arrow::{
- array::{Int64Array, StringArray, StringArrayOps},
+ array::{Int64Array, PrimitiveArrayOps, StringArray, StringArrayOps},
datatypes::SchemaRef,
error::ArrowError,
record_batch::{RecordBatch, RecordBatchReader},