alamb commented on code in PR #3189: URL: https://github.com/apache/arrow-datafusion/pull/3189#discussion_r950828871
########## datafusion/physical-expr/src/expressions/is_false.rs: ########## @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! IS FALSE expression + +use std::{any::Any, sync::Arc}; + +use crate::PhysicalExpr; +use arrow::{ + array::{Array, BooleanArray, BooleanBuilder}, + datatypes::{DataType, Schema}, + record_batch::RecordBatch, +}; +use datafusion_common::DataFusionError; +use datafusion_common::Result; +use datafusion_common::ScalarValue; +use datafusion_expr::ColumnarValue; + +/// IS FALSE expression +#[derive(Debug)] +pub struct IsFalseExpr { + /// The input expression + arg: Arc<dyn PhysicalExpr>, +} + +impl IsFalseExpr { + /// Create new not expression + pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self { + Self { arg } + } + + /// Get the input expression + pub fn arg(&self) -> &Arc<dyn PhysicalExpr> { + &self.arg + } +} + +impl std::fmt::Display for IsFalseExpr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} IS FALSE", self.arg) + } +} + +impl PhysicalExpr for IsFalseExpr { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, _input_schema: &Schema) -> Result<DataType> { + Ok(DataType::Boolean) + } + + fn nullable(&self, _input_schema: &Schema) -> Result<bool> { + Ok(false) + } + + fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> { + let arg = self.arg.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + match array.as_any().downcast_ref::<BooleanArray>() { + Some(bool_array) => { + let array_len = array.len(); + let mut result_builder = BooleanBuilder::new(array_len); + for i in 0..array_len { + result_builder.append_value(!bool_array.is_null(i) && !bool_array.value(i)); + } + Ok(ColumnarValue::Array(Arc::new(result_builder.finish()))) Review Comment: You can probably do the same thing with less code (and more efficiently) using the `FromIter` method: ```suggestion let result: BooleanArray = bool_array.iter() .map(|v| v.map(|v| !v).or(Some(false))) .collect(); ``` ########## datafusion/sql/src/planner.rs: ########## @@ -1882,6 +1882,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.sql_expr_to_logical_expr(*expr, schema, ctes)?, ))), + SQLExpr::IsTrue(expr) => Ok(Expr::IsTrue(Box::new( + self.sql_expr_to_logical_expr(*expr, schema, ctes)?, + ))), Review Comment: I think you might be able to avoid having to extend `Expr` and add a `PhysicalExpr` at all if you rewrote `IS TRUE` in the sql planner to `IS NOT DISTINCT FROM` Something like ```suggestion SQLExpr::IsTrue(expr) => Ok(Expr::BinaryExpr { left: Box::new(self.sql_expr_to_logical_expr(*expr, schema, ctes)?), op: Operator::IsNotDistinctFrom, right: Box::new(lit(true)), }), ``` That way a query like `SELECT x IS TRUE` becomes `SELECT x IS NOT DISTINCT FROM TRUE` Here is an example from postgres showing they are equivalent: ```sql alamb=# select column1 is true, column1 is not distinct from true from (values (true), (false), (null)) as sq; ?column? | ?column? ----------+---------- t | t f | f f | f (3 rows) ``` The same type of transformation applies to `IS FALSE` Then this PR would likely be 8 lines of code and then the sql level tests ########## datafusion/physical-expr/src/expressions/is_false.rs: ########## @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! IS FALSE expression + +use std::{any::Any, sync::Arc}; + +use crate::PhysicalExpr; +use arrow::{ + array::{Array, BooleanArray, BooleanBuilder}, + datatypes::{DataType, Schema}, + record_batch::RecordBatch, +}; +use datafusion_common::DataFusionError; +use datafusion_common::Result; +use datafusion_common::ScalarValue; +use datafusion_expr::ColumnarValue; + +/// IS FALSE expression +#[derive(Debug)] +pub struct IsFalseExpr { + /// The input expression + arg: Arc<dyn PhysicalExpr>, +} + +impl IsFalseExpr { + /// Create new not expression + pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self { + Self { arg } + } + + /// Get the input expression + pub fn arg(&self) -> &Arc<dyn PhysicalExpr> { + &self.arg + } +} + +impl std::fmt::Display for IsFalseExpr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} IS FALSE", self.arg) + } +} + +impl PhysicalExpr for IsFalseExpr { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, _input_schema: &Schema) -> Result<DataType> { + Ok(DataType::Boolean) + } + + fn nullable(&self, _input_schema: &Schema) -> Result<bool> { + Ok(false) + } + + fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> { + let arg = self.arg.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + match array.as_any().downcast_ref::<BooleanArray>() { + Some(bool_array) => { + let array_len = array.len(); + let mut result_builder = BooleanBuilder::new(array_len); + for i in 0..array_len { + result_builder.append_value(!bool_array.is_null(i) && !bool_array.value(i)); + } + Ok(ColumnarValue::Array(Arc::new(result_builder.finish()))) Review Comment: (same comment applies to is_true.rs) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
