andygrove commented on code in PR #3189: URL: https://github.com/apache/arrow-datafusion/pull/3189#discussion_r950519839
########## datafusion/physical-expr/src/expressions/is_true.rs: ########## @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! IS TRUE expression + +use std::{any::Any, sync::Arc}; + +use crate::PhysicalExpr; +use arrow::{ + array::{Array, BooleanArray}, + datatypes::{DataType, Schema}, + record_batch::RecordBatch, +}; +use datafusion_common::DataFusionError; +use datafusion_common::Result; +use datafusion_common::ScalarValue; +use datafusion_expr::ColumnarValue; + +/// IS TRUE expression +#[derive(Debug)] +pub struct IsTrueExpr { + /// The input expression + arg: Arc<dyn PhysicalExpr>, +} + +impl IsTrueExpr { + /// Create new not expression + pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self { + Self { arg } + } + + /// Get the input expression + pub fn arg(&self) -> &Arc<dyn PhysicalExpr> { + &self.arg + } +} + +impl std::fmt::Display for IsTrueExpr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} IS TRUE", self.arg) + } +} + +impl PhysicalExpr for IsTrueExpr { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, _input_schema: &Schema) -> Result<DataType> { + Ok(DataType::Boolean) + } + + fn nullable(&self, _input_schema: &Schema) -> Result<bool> { + Ok(false) + } + + fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> { + let arg = self.arg.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + let array_len = array.len(); + let my_array = ColumnarValue::Array(array).into_array(array_len); + let true_array = BooleanArray::from(vec![Some(true)]); + let false_array = BooleanArray::from(vec![Some(false)]); + let null_array = BooleanArray::from(vec![None]); + let mut result_vec = vec![]; + for i in 0..array_len { + let current = (*my_array).slice(i, 1); + if (*current).eq(&true_array) { + result_vec.push(Some(true)); + } else if (*current).eq(&false_array) || (*current).eq(&null_array) { + result_vec.push(Some(false)); + } else { + return Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{:?}> IS TRUE'. Supported form(s): '<BOOLEAN> IS TRUE'", current.data_type()))) + } + } + + let return_array = BooleanArray::from(result_vec); + Ok(ColumnarValue::Array(Arc::new(return_array))) Review Comment: Here is my suggested approach. The key changes are: - It is more efficient to use an Arrow array builder, rather than build a vec and then convert it into an Arrow array - We can just use `arary.value(i)` to get individual values now that we are downcasting to a `BinaryArray` ```suggestion match array.as_any().downcast_ref::<BooleanArray>() { Some(bool_array) => { let array_len = array.len(); let mut result_builder = BooleanBuilder::new(array_len); for i in 0..array_len { result_builder.append_value(!bool_array.is_null(i) && !bool_array.value(i)); } Ok(ColumnarValue::Array(Arc::new(result_builder.finish()))) } _ => Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{:?}> IS FALSE'. Supported form(s): '<BOOLEAN> IS FALSE'", array.data_type()))) } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
