This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a374c3c ARROW-4490: [Rust] Add explicit SIMD vectorization for
boolean ops in "array_ops"
a374c3c is described below
commit a374c3c3af0a2524c51a7a1f53d6c838f7806de7
Author: Paddy Horan <[email protected]>
AuthorDate: Sat Feb 16 09:45:56 2019 -0700
ARROW-4490: [Rust] Add explicit SIMD vectorization for boolean ops in
"array_ops"
This PR adds explicit SIMD for boolean ops, `and`, `or` and `not`.
I moved `array_ops` into the new `compute` module. From the outside this
module serves the same purpose as the previous `array_ops` module (all kernels
will be accessible from this namespace) and the remaining `array_ops`
implementations are exposed via the `compute` module currently. As I add
explicit SIMD for more kernels they will migrate from `array_ops` into their
own modules under `compute`. I am keeping sub-modules under `compute` (as
apposed to compute.rs) as SIMD can get r [...]
I have included benchmarks where I re-create the old default
implementations for others to take a look at the speed improvement. It's not
clear whether we need the non-SIMD versions in the benchmarks long term but I
left them in for now to make the non-SIMD/SIMD comparison.
There are likely more optimizations possible (processing the values and
null bit buffers in a single loop for instance) but I wanted to get the
cleanest impl first and add further optimizations later if needed.
Author: Paddy Horan <[email protected]>
Closes #3641 from paddyhoran/boolean-kernels and squashes the following
commits:
89588e6 <Paddy Horan> Removed `compute` from `mod.rs`
f9ae58a <Paddy Horan> Updated benchmarks
e321cec <Paddy Horan> Updated `not` to use trait impls
da16486 <Paddy Horan> Implemented `Not` for `Buffer`
f21253d <Paddy Horan> Updated datafusion and comments
a3c01c8 <Paddy Horan> Added SIMD binary boolean kernels
---
rust/arrow/Cargo.toml | 2 +-
rust/arrow/benches/bitwise_ops.rs | 75 -------------
rust/arrow/benches/boolean_kernels.rs | 129 ++++++++++++++++++++++
rust/arrow/src/bitmap.rs | 20 ++--
rust/arrow/src/buffer.rs | 89 ++++++++++++----
rust/arrow/src/{ => compute}/array_ops.rs | 106 -------------------
rust/arrow/src/compute/boolean_kernels.rs | 159 ++++++++++++++++++++++++++++
rust/arrow/src/{lib.rs => compute/mod.rs} | 31 ++----
rust/arrow/src/compute/util.rs | 83 +++++++++++++++
rust/arrow/src/lib.rs | 2 +-
rust/datafusion/src/execution/aggregate.rs | 62 +++++------
rust/datafusion/src/execution/expression.rs | 6 +-
12 files changed, 492 insertions(+), 272 deletions(-)
diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 6e2d483..04e8ac0 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -60,5 +60,5 @@ name = "builder"
harness = false
[[bench]]
-name = "bitwise_ops"
+name = "boolean_kernels"
harness = false
diff --git a/rust/arrow/benches/bitwise_ops.rs
b/rust/arrow/benches/bitwise_ops.rs
deleted file mode 100644
index 434ff4d..0000000
--- a/rust/arrow/benches/bitwise_ops.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::buffer::Buffer;
-use arrow::builder::{BufferBuilderTrait, UInt8BufferBuilder};
-
-fn create_buffer(size: usize) -> Buffer {
- let mut builder = UInt8BufferBuilder::new(size);
- for _i in 0..size {
- builder.append(1_u8).unwrap();
- }
- builder.finish()
-}
-
-fn bitwise_default<F>(size: usize, op: F)
-where
- F: Fn(&u8, &u8) -> u8,
-{
- let buffer_a = create_buffer(size);
- let buffer_b = create_buffer(size);
-
- criterion::black_box({
- let mut builder = UInt8BufferBuilder::new(buffer_a.len());
- for i in 0..buffer_a.len() {
- unsafe {
- builder
- .append(op(
- buffer_a.data().get_unchecked(i),
- buffer_b.data().get_unchecked(i),
- ))
- .unwrap();
- }
- }
- builder.finish()
- });
-}
-
-fn bitwise_simd<F>(size: usize, op: F)
-where
- F: Fn(&Buffer, &Buffer) -> Buffer,
-{
- let buffer_a = create_buffer(size);
- let buffer_b = create_buffer(size);
- criterion::black_box(op(&buffer_a, &buffer_b));
-}
-
-fn add_benchmark(c: &mut Criterion) {
- c.bench_function("add", |b| b.iter(|| bitwise_default(512, |a, b| a & b)));
- c.bench_function("add simd", |b| b.iter(|| bitwise_simd(512, |a, b| a &
b)));
- c.bench_function("or", |b| b.iter(|| bitwise_default(512, |a, b| a | b)));
- c.bench_function("or simd", |b| b.iter(|| bitwise_simd(512, |a, b| a |
b)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/boolean_kernels.rs
b/rust/arrow/benches/boolean_kernels.rs
new file mode 100644
index 0000000..237b1e9
--- /dev/null
+++ b/rust/arrow/benches/boolean_kernels.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+use criterion::Criterion;
+
+extern crate arrow;
+
+use arrow::array::*;
+use arrow::builder::*;
+use arrow::compute::boolean_kernels;
+use arrow::error::{ArrowError, Result};
+
+/// Helper function to create arrays
+fn create_boolean_array(size: usize) -> BooleanArray {
+ let mut builder = BooleanBuilder::new(size);
+ for i in 0..size {
+ if i % 2 == 0 {
+ builder.append_value(true).unwrap();
+ } else {
+ builder.append_value(false).unwrap();
+ }
+ }
+ builder.finish()
+}
+
+/// Helper function to implement `AND` and `OR` without SIMD
+pub fn bin_op_no_simd<F>(
+ left: &BooleanArray,
+ right: &BooleanArray,
+ op: F,
+) -> Result<BooleanArray>
+where
+ F: Fn(bool, bool) -> bool,
+{
+ if left.len() != right.len() {
+ return Err(ArrowError::ComputeError(
+ "Cannot perform boolean operation on arrays of different
length".to_string(),
+ ));
+ }
+ let mut b = BooleanArray::builder(left.len());
+ for i in 0..left.len() {
+ if left.is_null(i) || right.is_null(i) {
+ b.append_null()?;
+ } else {
+ b.append_value(op(left.value(i), right.value(i)))?;
+ }
+ }
+ Ok(b.finish())
+}
+
+/// Benchmark for `AND` and `OR` with no SIMD
+fn bench_bin_op_no_simd<F>(size: usize, op: F)
+where
+ F: Fn(bool, bool) -> bool,
+{
+ let array_a = create_boolean_array(size);
+ let array_b = create_boolean_array(size);
+
+ criterion::black_box(bin_op_no_simd(&array_a, &array_b, &op).unwrap());
+}
+
+/// Benchmark for `NOT` with no SIMD
+fn bench_not_no_simd(size: usize) {
+ let array = create_boolean_array(size);
+
+ criterion::black_box({
+ let mut b = BooleanArray::builder(array.len());
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ b.append_null().unwrap();
+ } else {
+ b.append_value(!array.value(i)).unwrap();
+ }
+ }
+ b.finish()
+ });
+}
+
+/// Benchmark for `AND` with SIMD
+fn bench_and_simd(size: usize) {
+ let buffer_a = create_boolean_array(size);
+ let buffer_b = create_boolean_array(size);
+ criterion::black_box(boolean_kernels::and(&buffer_a, &buffer_b).unwrap());
+}
+
+/// Benchmark for `OR` with SIMD
+fn bench_or_simd(size: usize) {
+ let buffer_a = create_boolean_array(size);
+ let buffer_b = create_boolean_array(size);
+ criterion::black_box(boolean_kernels::or(&buffer_a, &buffer_b).unwrap());
+}
+
+/// Benchmark for `NOT` with SIMD
+fn bench_not_simd(size: usize) {
+ let buffer = create_boolean_array(size);
+ criterion::black_box(boolean_kernels::not(&buffer).unwrap());
+}
+
+fn add_benchmark(c: &mut Criterion) {
+ c.bench_function("and", |b| {
+ b.iter(|| bench_bin_op_no_simd(512, |a, b| a && b))
+ });
+ c.bench_function("and simd", |b| b.iter(|| bench_and_simd(512)));
+ c.bench_function("or", |b| {
+ b.iter(|| bench_bin_op_no_simd(512, |a, b| a || b))
+ });
+ c.bench_function("or simd", |b| b.iter(|| bench_or_simd(512)));
+ c.bench_function("not", |b| b.iter(|| bench_not_no_simd(512)));
+ c.bench_function("not simd", |b| b.iter(|| bench_not_simd(512)));
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs
index 17dc548..e42e560 100644
--- a/rust/arrow/src/bitmap.rs
+++ b/rust/arrow/src/bitmap.rs
@@ -18,8 +18,10 @@
//! Defines a bitmap, which is used to track which values in an Arrow array
are null.
//! This is called a "validity bitmap" in the Arrow documentation.
-use super::buffer::Buffer;
+use crate::buffer::Buffer;
+use crate::error::Result;
use crate::util::bit_util;
+
use std::ops::{BitAnd, BitOr};
#[derive(PartialEq, Debug)]
@@ -56,18 +58,18 @@ impl Bitmap {
}
impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {
- type Output = Bitmap;
+ type Output = Result<Bitmap>;
- fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
- Bitmap::from(&self.bits & &rhs.bits)
+ fn bitand(self, rhs: &'b Bitmap) -> Result<Bitmap> {
+ Ok(Bitmap::from((&self.bits & &rhs.bits)?))
}
}
impl<'a, 'b> BitOr<&'b Bitmap> for &'a Bitmap {
- type Output = Bitmap;
+ type Output = Result<Bitmap>;
- fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
- Bitmap::from(&self.bits | &rhs.bits)
+ fn bitor(self, rhs: &'b Bitmap) -> Result<Bitmap> {
+ Ok(Bitmap::from((&self.bits | &rhs.bits)?))
}
}
@@ -94,7 +96,7 @@ mod tests {
let bitmap2 = Bitmap::from(Buffer::from([0b01001110]));
assert_eq!(
Bitmap::from(Buffer::from([0b01001010])),
- &bitmap1 & &bitmap2
+ (&bitmap1 & &bitmap2).unwrap()
);
}
@@ -104,7 +106,7 @@ mod tests {
let bitmap2 = Bitmap::from(Buffer::from([0b01001110]));
assert_eq!(
Bitmap::from(Buffer::from([0b01101110])),
- &bitmap1 | &bitmap2
+ (&bitmap1 | &bitmap2).unwrap()
);
}
diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs
index 447a575..ff4cae8 100644
--- a/rust/arrow/src/buffer.rs
+++ b/rust/arrow/src/buffer.rs
@@ -24,12 +24,12 @@ use packed_simd::u8x64;
use std::cmp;
use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write};
use std::mem;
-use std::ops::{BitAnd, BitOr};
+use std::ops::{BitAnd, BitOr, Not};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::sync::Arc;
use crate::builder::{BufferBuilderTrait, UInt8BufferBuilder};
-use crate::error::Result;
+use crate::error::{ArrowError, Result};
use crate::memory;
use crate::util::bit_util;
@@ -173,19 +173,19 @@ where
}
impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
- type Output = Buffer;
+ type Output = Result<Buffer>;
- fn bitand(self, rhs: &'b Buffer) -> Buffer {
- assert_eq!(
- self.len(),
- rhs.len(),
- "Buffers must be the same size to apply Bitwise AND."
- );
+ fn bitand(self, rhs: &'b Buffer) -> Result<Buffer> {
+ if self.len() != rhs.len() {
+ return Err(ArrowError::ComputeError(
+ "Buffers must be the same size to apply Bitwise
AND.".to_string(),
+ ));
+ }
// SIMD implementation if available
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
- return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b);
+ return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b));
}
// Default implementation
@@ -201,25 +201,25 @@ impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
.unwrap();
}
}
- builder.finish()
+ Ok(builder.finish())
}
}
}
impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
- type Output = Buffer;
+ type Output = Result<Buffer>;
- fn bitor(self, rhs: &'b Buffer) -> Buffer {
- assert_eq!(
- self.len(),
- rhs.len(),
- "Buffers must be the same size to apply Bitwise OR."
- );
+ fn bitor(self, rhs: &'b Buffer) -> Result<Buffer> {
+ if self.len() != rhs.len() {
+ return Err(ArrowError::ComputeError(
+ "Buffers must be the same size to apply Bitwise
OR.".to_string(),
+ ));
+ }
// SIMD implementation if available
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
- return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b);
+ return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b));
}
// Default implementation
@@ -235,6 +235,45 @@ impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
.unwrap();
}
}
+ Ok(builder.finish())
+ }
+ }
+}
+
+impl Not for &Buffer {
+ type Output = Buffer;
+
+ fn not(self) -> Buffer {
+ // SIMD implementation if available
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ {
+ let mut result =
+ MutableBuffer::new(self.len()).with_bitset(self.len(), false);
+ let lanes = u8x64::lanes();
+ for i in (0..self.len()).step_by(lanes) {
+ unsafe {
+ let data = from_raw_parts(self.raw_data().offset(i as
isize), lanes);
+ let data_simd =
u8x64::from_slice_unaligned_unchecked(data);
+ let simd_result = !data_simd;
+ let result_slice: &mut [u8] = from_raw_parts_mut(
+ (result.data_mut().as_mut_ptr() as *mut u8).offset(i
as isize),
+ lanes,
+ );
+
simd_result.write_to_slice_unaligned_unchecked(result_slice);
+ }
+ }
+ return result.freeze();
+ }
+
+ // Default implementation
+ #[allow(unreachable_code)]
+ {
+ let mut builder = UInt8BufferBuilder::new(self.len());
+ for i in 0..self.len() {
+ unsafe {
+ builder.append(!self.data().get_unchecked(i)).unwrap();
+ }
+ }
builder.finish()
}
}
@@ -537,14 +576,20 @@ mod tests {
fn test_bitwise_and() {
let buf1 = Buffer::from([0b01101010]);
let buf2 = Buffer::from([0b01001110]);
- assert_eq!(Buffer::from([0b01001010]), &buf1 & &buf2);
+ assert_eq!(Buffer::from([0b01001010]), (&buf1 & &buf2).unwrap());
}
#[test]
fn test_bitwise_or() {
let buf1 = Buffer::from([0b01101010]);
let buf2 = Buffer::from([0b01001110]);
- assert_eq!(Buffer::from([0b01101110]), &buf1 | &buf2);
+ assert_eq!(Buffer::from([0b01101110]), (&buf1 | &buf2).unwrap());
+ }
+
+ #[test]
+ fn test_bitwise_not() {
+ let buf = Buffer::from([0b01101010]);
+ assert_eq!(Buffer::from([0b10010101]), !&buf);
}
#[test]
@@ -552,7 +597,7 @@ mod tests {
fn test_buffer_bitand_different_sizes() {
let buf1 = Buffer::from([1_u8, 1_u8]);
let buf2 = Buffer::from([0b01001110]);
- let _buf3 = &buf1 | &buf2;
+ let _buf3 = (&buf1 | &buf2).unwrap();
}
#[test]
diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/compute/array_ops.rs
similarity index 82%
rename from rust/arrow/src/array_ops.rs
rename to rust/arrow/src/compute/array_ops.rs
index 6e847c8..1a2d5fa 100644
--- a/rust/arrow/src/array_ops.rs
+++ b/rust/arrow/src/compute/array_ops.rs
@@ -312,58 +312,6 @@ where
Ok(b.finish())
}
-/// Perform `AND` operation on two arrays. If either left or right value is
null then the
-/// result is also null.
-pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
- if left.len() != right.len() {
- return Err(ArrowError::ComputeError(
- "Cannot perform boolean operation on arrays of different
length".to_string(),
- ));
- }
- let mut b = BooleanArray::builder(left.len());
- for i in 0..left.len() {
- if left.is_null(i) || right.is_null(i) {
- b.append_null()?;
- } else {
- b.append_value(left.value(i) && right.value(i))?;
- }
- }
- Ok(b.finish())
-}
-
-/// Perform `OR` operation on two arrays. If either left or right value is
null then the
-/// result is also null.
-pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
- if left.len() != right.len() {
- return Err(ArrowError::ComputeError(
- "Cannot perform boolean operation on arrays of different
length".to_string(),
- ));
- }
- let mut b = BooleanArray::builder(left.len());
- for i in 0..left.len() {
- if left.is_null(i) || right.is_null(i) {
- b.append_null()?;
- } else {
- b.append_value(left.value(i) || right.value(i))?;
- }
- }
- Ok(b.finish())
-}
-
-/// Perform unary `NOT` operation on an arrays. If value is null then the
result is also
-/// null.
-pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
- let mut b = BooleanArray::builder(left.len());
- for i in 0..left.len() {
- if left.is_null(i) {
- b.append_null()?;
- } else {
- b.append_value(!left.value(i))?;
- }
- }
- Ok(b.finish())
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -611,58 +559,4 @@ mod tests {
assert_eq!(5, min(&a).unwrap());
assert_eq!(9, max(&a).unwrap());
}
-
- #[test]
- fn test_bool_array_and() {
- let a = BooleanArray::from(vec![false, false, true, true]);
- let b = BooleanArray::from(vec![false, true, false, true]);
- let c = and(&a, &b).unwrap();
- assert_eq!(false, c.value(0));
- assert_eq!(false, c.value(1));
- assert_eq!(false, c.value(2));
- assert_eq!(true, c.value(3));
- }
-
- #[test]
- fn test_bool_array_or() {
- let a = BooleanArray::from(vec![false, false, true, true]);
- let b = BooleanArray::from(vec![false, true, false, true]);
- let c = or(&a, &b).unwrap();
- assert_eq!(false, c.value(0));
- assert_eq!(true, c.value(1));
- assert_eq!(true, c.value(2));
- assert_eq!(true, c.value(3));
- }
-
- #[test]
- fn test_bool_array_or_nulls() {
- let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]);
- let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]);
- let c = or(&a, &b).unwrap();
- assert_eq!(true, c.is_null(0));
- assert_eq!(true, c.is_null(1));
- assert_eq!(true, c.is_null(2));
- assert_eq!(false, c.is_null(3));
- }
-
- #[test]
- fn test_bool_array_not() {
- let a = BooleanArray::from(vec![false, false, true, true]);
- let c = not(&a).unwrap();
- assert_eq!(true, c.value(0));
- assert_eq!(true, c.value(1));
- assert_eq!(false, c.value(2));
- assert_eq!(false, c.value(3));
- }
-
- #[test]
- fn test_bool_array_and_nulls() {
- let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]);
- let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]);
- let c = and(&a, &b).unwrap();
- assert_eq!(true, c.is_null(0));
- assert_eq!(true, c.is_null(1));
- assert_eq!(true, c.is_null(2));
- assert_eq!(false, c.is_null(3));
- }
}
diff --git a/rust/arrow/src/compute/boolean_kernels.rs
b/rust/arrow/src/compute/boolean_kernels.rs
new file mode 100644
index 0000000..ededea9
--- /dev/null
+++ b/rust/arrow/src/compute/boolean_kernels.rs
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines boolean kernels on Arrow `BooleanArray`'s, e.g. `AND`, `OR` and
`NOT`.
+//!
+//! Kernels support SIMD using [static CPU feature
detection](https://doc.rust-lang.org/stable/std/arch/#static-cpu-feature-detection)
+//! .
+
+use std::sync::Arc;
+
+use crate::array::{Array, BooleanArray};
+use crate::array_data::ArrayData;
+use crate::buffer::Buffer;
+use crate::compute::util::apply_bin_op_to_option_bitmap;
+use crate::datatypes::DataType;
+use crate::error::{ArrowError, Result};
+
+/// Helper function to implement binary kernels
+fn binary_boolean_kernel<F>(
+ left: &BooleanArray,
+ right: &BooleanArray,
+ op: F,
+) -> Result<BooleanArray>
+where
+ F: Fn(&Buffer, &Buffer) -> Result<Buffer>,
+{
+ if left.offset() != right.offset() {
+ return Err(ArrowError::ComputeError(
+ "Cannot apply Bitwise binary op when arrays have different
offsets."
+ .to_string(),
+ ));
+ }
+
+ let left_data = left.data();
+ let right_data = right.data();
+ let null_bit_buffer = apply_bin_op_to_option_bitmap(
+ left_data.null_bitmap(),
+ right_data.null_bitmap(),
+ |a, b| a & b,
+ )?;
+ let values = op(&left_data.buffers()[0], &right_data.buffers()[0])?;
+ let data = ArrayData::new(
+ DataType::Boolean,
+ left.len(),
+ None,
+ null_bit_buffer,
+ left.offset(),
+ vec![values],
+ vec![],
+ );
+ Ok(BooleanArray::from(Arc::new(data)))
+}
+
+/// Performs `AND` operation on two arrays. If either left or right value is
null then the
+/// result is also null.
+pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
+ binary_boolean_kernel(&left, &right, |a, b| a & b)
+}
+
+/// Performs `OR` operation on two arrays. If either left or right value is
null then the
+/// result is also null.
+pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
+ binary_boolean_kernel(&left, &right, |a, b| a | b)
+}
+
+/// Performs unary `NOT` operation on an arrays. If value is null then the
result is also
+/// null.
+pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
+ let data = left.data();
+ let null_bit_buffer = match *data.null_bitmap() {
+ None => None,
+ Some(ref b) => Some(b.bits.clone()),
+ };
+
+ let values = !&data.buffers()[0];
+ let data = ArrayData::new(
+ DataType::Boolean,
+ left.len(),
+ None,
+ null_bit_buffer,
+ left.offset(),
+ vec![values],
+ vec![],
+ );
+ Ok(BooleanArray::from(Arc::new(data)))
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_bool_array_and() {
+ let a = BooleanArray::from(vec![false, false, true, true]);
+ let b = BooleanArray::from(vec![false, true, false, true]);
+ let c = and(&a, &b).unwrap();
+ assert_eq!(false, c.value(0));
+ assert_eq!(false, c.value(1));
+ assert_eq!(false, c.value(2));
+ assert_eq!(true, c.value(3));
+ }
+
+ #[test]
+ fn test_bool_array_or() {
+ let a = BooleanArray::from(vec![false, false, true, true]);
+ let b = BooleanArray::from(vec![false, true, false, true]);
+ let c = or(&a, &b).unwrap();
+ assert_eq!(false, c.value(0));
+ assert_eq!(true, c.value(1));
+ assert_eq!(true, c.value(2));
+ assert_eq!(true, c.value(3));
+ }
+
+ #[test]
+ fn test_bool_array_or_nulls() {
+ let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]);
+ let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]);
+ let c = or(&a, &b).unwrap();
+ assert_eq!(true, c.is_null(0));
+ assert_eq!(true, c.is_null(1));
+ assert_eq!(true, c.is_null(2));
+ assert_eq!(false, c.is_null(3));
+ }
+
+ #[test]
+ fn test_bool_array_not() {
+ let a = BooleanArray::from(vec![false, false, true, true]);
+ let c = not(&a).unwrap();
+ assert_eq!(true, c.value(0));
+ assert_eq!(true, c.value(1));
+ assert_eq!(false, c.value(2));
+ assert_eq!(false, c.value(3));
+ }
+
+ #[test]
+ fn test_bool_array_and_nulls() {
+ let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]);
+ let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]);
+ let c = and(&a, &b).unwrap();
+ assert_eq!(true, c.is_null(0));
+ assert_eq!(true, c.is_null(1));
+ assert_eq!(true, c.is_null(2));
+ assert_eq!(false, c.is_null(3));
+ }
+}
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/compute/mod.rs
similarity index 56%
copy from rust/arrow/src/lib.rs
copy to rust/arrow/src/compute/mod.rs
index dbac4db..5aee2b3 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/compute/mod.rs
@@ -15,29 +15,12 @@
// specific language governing permissions and limitations
// under the License.
-//! A native Rust implementation of [Apache Arrow](https://arrow.apache.org),
a cross-language
-//! development platform for in-memory data.
-//!
-//! Currently the project is developed and tested against nightly Rust. To
learn more
-//! about the status of Arrow in Rust, see `README.md`.
+//! Computation kernels on Arrow Arrays
-#![feature(type_ascription)]
-#![feature(rustc_private)]
-#![feature(specialization)]
-#![feature(try_from)]
-#![allow(dead_code)]
-#![allow(non_camel_case_types)]
-
-pub mod array;
-pub mod array_data;
pub mod array_ops;
-pub mod bitmap;
-pub mod buffer;
-pub mod builder;
-pub mod csv;
-pub mod datatypes;
-pub mod error;
-pub mod memory;
-pub mod record_batch;
-pub mod tensor;
-pub mod util;
+pub mod boolean_kernels;
+
+mod util;
+
+pub use self::array_ops::*;
+pub use self::boolean_kernels::*;
diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs
new file mode 100644
index 0000000..55726b8
--- /dev/null
+++ b/rust/arrow/src/compute/util.rs
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Common utilities for computation kernels.
+
+use crate::bitmap::Bitmap;
+use crate::buffer::Buffer;
+use crate::error::Result;
+
+/// Applies a given binary operation, `op`, to two references to
`Option<Bitmap>`'s.
+///
+/// This function is useful when implementing operations on higher level
arrays.
+pub(crate) fn apply_bin_op_to_option_bitmap<F>(
+ left: &Option<Bitmap>,
+ right: &Option<Bitmap>,
+ op: F,
+) -> Result<Option<Buffer>>
+where
+ F: Fn(&Buffer, &Buffer) -> Result<Buffer>,
+{
+ match *left {
+ None => match *right {
+ None => Ok(None),
+ Some(ref r) => Ok(Some(r.bits.clone())),
+ },
+ Some(ref l) => match *right {
+ None => Ok(Some(l.bits.clone())),
+ Some(ref r) => Ok(Some(op(&l.bits, &r.bits)?)),
+ },
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_apply_bin_op_to_option_bitmap() {
+ assert_eq!(
+ Ok(None),
+ apply_bin_op_to_option_bitmap(&None, &None, |a, b| a & b)
+ );
+ assert_eq!(
+ Ok(Some(Buffer::from([0b01101010]))),
+ apply_bin_op_to_option_bitmap(
+ &Some(Bitmap::from(Buffer::from([0b01101010]))),
+ &None,
+ |a, b| a & b
+ )
+ );
+ assert_eq!(
+ Ok(Some(Buffer::from([0b01001110]))),
+ apply_bin_op_to_option_bitmap(
+ &None,
+ &Some(Bitmap::from(Buffer::from([0b01001110]))),
+ |a, b| a & b
+ )
+ );
+ assert_eq!(
+ Ok(Some(Buffer::from([0b01001010]))),
+ apply_bin_op_to_option_bitmap(
+ &Some(Bitmap::from(Buffer::from([0b01101010]))),
+ &Some(Bitmap::from(Buffer::from([0b01001110]))),
+ |a, b| a & b
+ )
+ );
+ }
+
+}
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
index dbac4db..ca06fc1 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/lib.rs
@@ -30,10 +30,10 @@
pub mod array;
pub mod array_data;
-pub mod array_ops;
pub mod bitmap;
pub mod buffer;
pub mod builder;
+pub mod compute;
pub mod csv;
pub mod datatypes;
pub mod error;
diff --git a/rust/datafusion/src/execution/aggregate.rs
b/rust/datafusion/src/execution/aggregate.rs
index 5acf5fb..d8cbfdf 100644
--- a/rust/datafusion/src/execution/aggregate.rs
+++ b/rust/datafusion/src/execution/aggregate.rs
@@ -24,8 +24,8 @@ use std::str;
use std::sync::Arc;
use arrow::array::*;
-use arrow::array_ops;
use arrow::builder::*;
+use arrow::compute;
use arrow::datatypes::{DataType, Schema};
use arrow::record_batch::RecordBatch;
@@ -346,61 +346,61 @@ fn create_accumulators(aggr_expr: &Vec<RuntimeExpr>) ->
Result<AccumulatorSet> {
fn array_min(array: ArrayRef, dt: &DataType) -> Result<Option<ScalarValue>> {
match dt {
DataType::UInt8 => {
- match
array_ops::min(array.as_any().downcast_ref::<UInt8Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<UInt8Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt8(n))),
None => Ok(None),
}
}
DataType::UInt16 => {
- match
array_ops::min(array.as_any().downcast_ref::<UInt16Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<UInt16Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt16(n))),
None => Ok(None),
}
}
DataType::UInt32 => {
- match
array_ops::min(array.as_any().downcast_ref::<UInt32Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<UInt32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt32(n))),
None => Ok(None),
}
}
DataType::UInt64 => {
- match
array_ops::min(array.as_any().downcast_ref::<UInt64Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<UInt64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt64(n))),
None => Ok(None),
}
}
DataType::Int8 => {
- match
array_ops::min(array.as_any().downcast_ref::<Int8Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<Int8Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int8(n))),
None => Ok(None),
}
}
DataType::Int16 => {
- match
array_ops::min(array.as_any().downcast_ref::<Int16Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<Int16Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int16(n))),
None => Ok(None),
}
}
DataType::Int32 => {
- match
array_ops::min(array.as_any().downcast_ref::<Int32Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<Int32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int32(n))),
None => Ok(None),
}
}
DataType::Int64 => {
- match
array_ops::min(array.as_any().downcast_ref::<Int64Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<Int64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int64(n))),
None => Ok(None),
}
}
DataType::Float32 => {
- match
array_ops::min(array.as_any().downcast_ref::<Float32Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<Float32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Float32(n))),
None => Ok(None),
}
}
DataType::Float64 => {
- match
array_ops::min(array.as_any().downcast_ref::<Float64Array>().unwrap()) {
+ match
compute::min(array.as_any().downcast_ref::<Float64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Float64(n))),
None => Ok(None),
}
@@ -414,61 +414,61 @@ fn array_min(array: ArrayRef, dt: &DataType) ->
Result<Option<ScalarValue>> {
fn array_max(array: ArrayRef, dt: &DataType) -> Result<Option<ScalarValue>> {
match dt {
DataType::UInt8 => {
- match
array_ops::max(array.as_any().downcast_ref::<UInt8Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<UInt8Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt8(n))),
None => Ok(None),
}
}
DataType::UInt16 => {
- match
array_ops::max(array.as_any().downcast_ref::<UInt16Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<UInt16Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt16(n))),
None => Ok(None),
}
}
DataType::UInt32 => {
- match
array_ops::max(array.as_any().downcast_ref::<UInt32Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<UInt32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt32(n))),
None => Ok(None),
}
}
DataType::UInt64 => {
- match
array_ops::max(array.as_any().downcast_ref::<UInt64Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<UInt64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt64(n))),
None => Ok(None),
}
}
DataType::Int8 => {
- match
array_ops::max(array.as_any().downcast_ref::<Int8Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<Int8Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int8(n))),
None => Ok(None),
}
}
DataType::Int16 => {
- match
array_ops::max(array.as_any().downcast_ref::<Int16Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<Int16Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int16(n))),
None => Ok(None),
}
}
DataType::Int32 => {
- match
array_ops::max(array.as_any().downcast_ref::<Int32Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<Int32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int32(n))),
None => Ok(None),
}
}
DataType::Int64 => {
- match
array_ops::max(array.as_any().downcast_ref::<Int64Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<Int64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int64(n))),
None => Ok(None),
}
}
DataType::Float32 => {
- match
array_ops::max(array.as_any().downcast_ref::<Float32Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<Float32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Float32(n))),
None => Ok(None),
}
}
DataType::Float64 => {
- match
array_ops::max(array.as_any().downcast_ref::<Float64Array>().unwrap()) {
+ match
compute::max(array.as_any().downcast_ref::<Float64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Float64(n))),
None => Ok(None),
}
@@ -482,61 +482,61 @@ fn array_max(array: ArrayRef, dt: &DataType) ->
Result<Option<ScalarValue>> {
fn array_sum(array: ArrayRef, dt: &DataType) -> Result<Option<ScalarValue>> {
match dt {
DataType::UInt8 => {
- match
array_ops::sum(array.as_any().downcast_ref::<UInt8Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<UInt8Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt8(n))),
None => Ok(None),
}
}
DataType::UInt16 => {
- match
array_ops::sum(array.as_any().downcast_ref::<UInt16Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<UInt16Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt16(n))),
None => Ok(None),
}
}
DataType::UInt32 => {
- match
array_ops::sum(array.as_any().downcast_ref::<UInt32Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<UInt32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt32(n))),
None => Ok(None),
}
}
DataType::UInt64 => {
- match
array_ops::sum(array.as_any().downcast_ref::<UInt64Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<UInt64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::UInt64(n))),
None => Ok(None),
}
}
DataType::Int8 => {
- match
array_ops::sum(array.as_any().downcast_ref::<Int8Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<Int8Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int8(n))),
None => Ok(None),
}
}
DataType::Int16 => {
- match
array_ops::sum(array.as_any().downcast_ref::<Int16Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<Int16Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int16(n))),
None => Ok(None),
}
}
DataType::Int32 => {
- match
array_ops::sum(array.as_any().downcast_ref::<Int32Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<Int32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int32(n))),
None => Ok(None),
}
}
DataType::Int64 => {
- match
array_ops::sum(array.as_any().downcast_ref::<Int64Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<Int64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Int64(n))),
None => Ok(None),
}
}
DataType::Float32 => {
- match
array_ops::sum(array.as_any().downcast_ref::<Float32Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<Float32Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Float32(n))),
None => Ok(None),
}
}
DataType::Float64 => {
- match
array_ops::sum(array.as_any().downcast_ref::<Float64Array>().unwrap()) {
+ match
compute::sum(array.as_any().downcast_ref::<Float64Array>().unwrap()) {
Some(n) => Ok(Some(ScalarValue::Float64(n))),
None => Ok(None),
}
diff --git a/rust/datafusion/src/execution/expression.rs
b/rust/datafusion/src/execution/expression.rs
index 108a855..fa6201a 100644
--- a/rust/datafusion/src/execution/expression.rs
+++ b/rust/datafusion/src/execution/expression.rs
@@ -19,7 +19,7 @@ use std::rc::Rc;
use std::sync::Arc;
use arrow::array::*;
-use arrow::array_ops;
+use arrow::compute;
use arrow::datatypes::{DataType, Schema};
use arrow::record_batch::RecordBatch;
@@ -127,7 +127,7 @@ macro_rules! binary_op {
($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
let ll = $LEFT.as_any().downcast_ref::<$DT>().unwrap();
let rr = $RIGHT.as_any().downcast_ref::<$DT>().unwrap();
- Ok(Arc::new(array_ops::$OP(&ll, &rr)?))
+ Ok(Arc::new(compute::$OP(&ll, &rr)?))
}};
}
@@ -216,7 +216,7 @@ macro_rules! boolean_ops {
($LEFT:expr, $RIGHT:expr, $BATCH:expr, $OP:ident) => {{
let left_values = $LEFT.get_func()($BATCH)?;
let right_values = $RIGHT.get_func()($BATCH)?;
- Ok(Arc::new(array_ops::$OP(
+ Ok(Arc::new(compute::$OP(
left_values.as_any().downcast_ref::<BooleanArray>().unwrap(),
right_values
.as_any()