This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 9ee36b216 Add Scalar/Datum abstraction (#1047) (#4393)
9ee36b216 is described below
commit 9ee36b216c3f7dcbaae520f451194acd4f55b98e
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Jul 4 10:06:38 2023 +0100
Add Scalar/Datum abstraction (#1047) (#4393)
* Add Scalar/Datum abstraction (#1047)
* Add dyn Array
---
arrow-array/src/lib.rs | 3 ++
arrow-array/src/scalar.rs | 116 +++++++++++++++++++++++++++++++++++++++++++++
arrow-select/src/filter.rs | 10 ----
3 files changed, 119 insertions(+), 10 deletions(-)
diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index 46de381c3..afb7ec5e6 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -192,6 +192,9 @@ pub use arithmetic::ArrowNativeTypeOp;
mod numeric;
pub use numeric::*;
+mod scalar;
+pub use scalar::*;
+
pub mod builder;
pub mod cast;
mod delta;
diff --git a/arrow-array/src/scalar.rs b/arrow-array/src/scalar.rs
new file mode 100644
index 000000000..e54a999f9
--- /dev/null
+++ b/arrow-array/src/scalar.rs
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::Array;
+
+/// A possibly [`Scalar`] [`Array`]
+///
+/// This allows optimised binary kernels where one or more arguments are
constant
+///
+/// ```
+/// # use arrow_array::*;
+/// # use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
+/// # use arrow_schema::ArrowError;
+/// #
+/// fn eq_impl<T: ArrowPrimitiveType>(
+/// a: &PrimitiveArray<T>,
+/// a_scalar: bool,
+/// b: &PrimitiveArray<T>,
+/// b_scalar: bool,
+/// ) -> BooleanArray {
+/// let (array, scalar) = match (a_scalar, b_scalar) {
+/// (true, true) | (false, false) => {
+/// let len = a.len().min(b.len());
+/// let nulls = NullBuffer::union(a.nulls(), b.nulls());
+/// let buffer = BooleanBuffer::collect_bool(len, |idx|
a.value(idx) == b.value(idx));
+/// return BooleanArray::new(buffer, nulls);
+/// }
+/// (true, false) => (b, (a.null_count() == 0).then(|| a.value(0))),
+/// (false, true) => (a, (b.null_count() == 0).then(|| b.value(0))),
+/// };
+/// match scalar {
+/// Some(v) => {
+/// let len = array.len();
+/// let nulls = array.nulls().cloned();
+/// let buffer = BooleanBuffer::collect_bool(len, |idx|
array.value(idx) == v);
+/// BooleanArray::new(buffer, nulls)
+/// }
+/// None => BooleanArray::new_null(array.len()),
+/// }
+/// }
+///
+/// pub fn eq(l: &dyn Datum, r: &dyn Datum) -> Result<BooleanArray,
ArrowError> {
+/// let (l_array, l_scalar) = l.get();
+/// let (r_array, r_scalar) = r.get();
+/// downcast_primitive_array!(
+/// (l_array, r_array) => Ok(eq_impl(l_array, l_scalar, r_array,
r_scalar)),
+/// (a, b) => Err(ArrowError::NotYetImplemented(format!("{a} ==
{b}"))),
+/// )
+/// }
+///
+/// // Comparison of two arrays
+/// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+/// let b = Int32Array::from(vec![1, 2, 4, 7, 3]);
+/// let r = eq(&a, &b).unwrap();
+/// let values: Vec<_> = r.values().iter().collect();
+/// assert_eq!(values, &[true, true, false, false, false]);
+///
+/// // Comparison of an array and a scalar
+/// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+/// let b = Int32Array::from(vec![1]);
+/// let r = eq(&a, &Scalar::new(&b)).unwrap();
+/// let values: Vec<_> = r.values().iter().collect();
+/// assert_eq!(values, &[true, false, false, false, false]);
+pub trait Datum {
+ /// Returns the value for this [`Datum`] and a boolean indicating if the
value is scalar
+ fn get(&self) -> (&dyn Array, bool);
+}
+
+impl<T: Array> Datum for T {
+ fn get(&self) -> (&dyn Array, bool) {
+ (self, false)
+ }
+}
+
+impl Datum for dyn Array {
+ fn get(&self) -> (&dyn Array, bool) {
+ (self, false)
+ }
+}
+
+/// A wrapper around a single value [`Array`] indicating kernels should treat
it as a scalar value
+///
+/// See [`Datum`] for more information
+pub struct Scalar<'a>(&'a dyn Array);
+
+impl<'a> Scalar<'a> {
+ /// Create a new [`Scalar`] from an [`Array`]
+ ///
+ /// # Panics
+ ///
+ /// Panics if `array.len() != 1`
+ pub fn new(array: &'a dyn Array) -> Self {
+ assert_eq!(array.len(), 1);
+ Self(array)
+ }
+}
+
+impl<'a> Datum for Scalar<'a> {
+ fn get(&self) -> (&dyn Array, bool) {
+ (self.0, true)
+ }
+}
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index c89491944..94afd2df3 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -321,16 +321,6 @@ fn filter_array(
// actually filter
_ => downcast_primitive_array! {
values => Ok(Arc::new(filter_primitive(values, predicate))),
- DataType::Decimal128(p, s) => {
- let values =
values.as_any().downcast_ref::<Decimal128Array>().unwrap();
- let filtered = filter_primitive(values, predicate);
- Ok(Arc::new(filtered.with_precision_and_scale(*p,
*s).unwrap()))
- }
- DataType::Decimal256(p, s) => {
- let values =
values.as_any().downcast_ref::<Decimal256Array>().unwrap();
- let filtered = filter_primitive(values, predicate);
- Ok(Arc::new(filtered.with_precision_and_scale(*p,
*s).unwrap()))
- }
DataType::Boolean => {
let values =
values.as_any().downcast_ref::<BooleanArray>().unwrap();
Ok(Arc::new(filter_boolean(values, predicate)))