rluvaton commented on code in PR #8653:
URL: https://github.com/apache/arrow-rs/pull/8653#discussion_r2463751080
##########
arrow-select/src/zip.rs:
##########
@@ -166,9 +196,465 @@ pub fn zip(
Ok(make_array(data))
}
+/// Zipper for 2 scalars
+///
+/// Useful for using in `IF <expr> THEN <scalar> ELSE <scalar> END` expressions
+///
+/// # Example
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array, Scalar,
cast::AsArray, types::Int32Type};
+///
+/// # use arrow_select::zip::ScalarZipper;
+/// let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+/// let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
+/// let zipper = ScalarZipper::try_new(&scalar_truthy, &scalar_falsy).unwrap();
+///
+/// // Later when we have a boolean mask
+/// let mask = BooleanArray::from(vec![true, false, true, false, true]);
+/// let result = zipper.zip(&mask).unwrap();
+/// let actual = result.as_primitive::<Int32Type>();
+/// let expected = Int32Array::from(vec![Some(42), Some(123), Some(42),
Some(123), Some(42)]);
+/// ```
+///
+#[derive(Debug, Clone)]
+pub struct ScalarZipper {
+ zip_impl: Arc<dyn ZipImpl>,
+}
+
+impl ScalarZipper {
+ /// Try to create a new ScalarZipper from two scalar Datum
+ ///
+ /// # Errors
+ /// returns error if:
+ /// - the two Datum have different data types
+ /// - either Datum is not a scalar (or has more than 1 element)
+ ///
+ pub fn try_new(truthy: &dyn Datum, falsy: &dyn Datum) -> Result<Self,
ArrowError> {
+ let (truthy, truthy_is_scalar) = truthy.get();
+ let (falsy, falsy_is_scalar) = falsy.get();
+
+ if truthy.data_type() != falsy.data_type() {
+ return Err(ArrowError::InvalidArgumentError(
+ "arguments need to have the same data type".into(),
+ ));
+ }
+
+ if !truthy_is_scalar {
+ return Err(ArrowError::InvalidArgumentError(
+ "only scalar arrays are supported".into(),
+ ));
+ }
+
+ if !falsy_is_scalar {
+ return Err(ArrowError::InvalidArgumentError(
+ "only scalar arrays are supported".into(),
+ ));
+ }
+
+ if truthy.len() != 1 {
+ return Err(ArrowError::InvalidArgumentError(
+ "scalar arrays must have 1 element".into(),
+ ));
+ }
+ if falsy.len() != 1 {
+ return Err(ArrowError::InvalidArgumentError(
+ "scalar arrays must have 1 element".into(),
+ ));
+ }
+
+ macro_rules! primitive_size_helper {
+ ($t:ty) => {
+ Arc::new(PrimitiveScalarImpl::<$t>::new(truthy, falsy)) as
Arc<dyn ZipImpl>
+ };
+ }
+
+ let zip_impl = downcast_primitive! {
+ truthy.data_type() => (primitive_size_helper),
+ DataType::Utf8 => {
+ Arc::new(BytesScalarImpl::<Utf8Type>::new(truthy, falsy)) as
Arc<dyn ZipImpl>
+ },
+ DataType::LargeUtf8 => {
+ Arc::new(BytesScalarImpl::<LargeUtf8Type>::new(truthy, falsy))
as Arc<dyn ZipImpl>
+ },
+ DataType::Binary => {
+ Arc::new(BytesScalarImpl::<BinaryType>::new(truthy, falsy)) as
Arc<dyn ZipImpl>
+ },
+ DataType::LargeBinary => {
+ Arc::new(BytesScalarImpl::<LargeBinaryType>::new(truthy,
falsy)) as Arc<dyn ZipImpl>
+ },
+ _ => {
+ Arc::new(FallbackImpl::new(truthy, falsy)) as Arc<dyn ZipImpl>
+ },
+ };
+
+ Ok(Self { zip_impl })
+ }
+
+ /// Creating output array based on input boolean array and the two scalar
values the zipper was created with
+ /// See struct level documentation for examples.
+ pub fn zip(&self, mask: &BooleanArray) -> Result<ArrayRef, ArrowError> {
Review Comment:
I don't see a way that `zip` would mutate the input mask, and getting a
`BooleanArray` would still not allow it to mutate without a full copy, if it
for purely getting masks of different types
(`BooleanArray`/`BooleanBufferBuilder`) I'm down for it.
renaming
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]