This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new a6a45fc7 feat: Move `IfExpr` to `spark-expr` crate (#653)
a6a45fc7 is described below

commit a6a45fc7d19572f5d866e8ae31cdd5ad0f415f21
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jul 11 05:39:58 2024 -0600

    feat: Move `IfExpr` to `spark-expr` crate (#653)
---
 native/Cargo.lock                                  | 10 +++++
 native/Cargo.toml                                  |  5 ++-
 native/core/Cargo.toml                             |  3 +-
 .../src/execution/datafusion/expressions/mod.rs    |  1 -
 .../src/execution/datafusion/expressions/utils.rs  | 18 +--------
 native/core/src/execution/datafusion/planner.rs    |  3 +-
 native/spark-expr/Cargo.toml                       |  2 +
 .../expressions => spark-expr/src}/if_expr.rs      | 20 ++++-----
 native/spark-expr/src/lib.rs                       |  6 ++-
 native/{spark-expr => utils}/Cargo.toml            | 12 ++----
 native/utils/README.md                             | 22 ++++++++++
 .../expressions/mod.rs => utils/src/lib.rs}        | 47 +++++++---------------
 12 files changed, 77 insertions(+), 72 deletions(-)

diff --git a/native/Cargo.lock b/native/Cargo.lock
index c0f22fa1..9bf8247d 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -867,6 +867,7 @@ dependencies = [
  "criterion",
  "datafusion",
  "datafusion-comet-spark-expr",
+ "datafusion-comet-utils",
  "datafusion-common",
  "datafusion-expr",
  "datafusion-physical-expr",
@@ -909,8 +910,17 @@ dependencies = [
  "arrow",
  "arrow-schema",
  "datafusion",
+ "datafusion-comet-utils",
  "datafusion-common",
  "datafusion-functions",
+ "datafusion-physical-expr",
+]
+
+[[package]]
+name = "datafusion-comet-utils"
+version = "0.1.0"
+dependencies = [
+ "datafusion-physical-plan",
 ]
 
 [[package]]
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 13860fbd..53afed85 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [workspace]
-members = ["core", "spark-expr"]
+members = ["core", "spark-expr", "utils"]
 resolver = "2"
 
 [workspace.package]
@@ -43,8 +43,11 @@ datafusion-common = { git = 
"https://github.com/apache/datafusion.git";, rev = "4
 datafusion = { default-features = false, git = 
"https://github.com/apache/datafusion.git";, rev = "40.0.0-rc1", features = 
["unicode_expressions", "crypto_expressions"] }
 datafusion-functions = { git = "https://github.com/apache/datafusion.git";, rev 
= "40.0.0-rc1", features = ["crypto_expressions"] }
 datafusion-expr = { git = "https://github.com/apache/datafusion.git";, rev = 
"40.0.0-rc1", default-features = false }
+datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git";, 
rev = "40.0.0-rc1", default-features = false }
 datafusion-physical-expr-common = { git = 
"https://github.com/apache/datafusion.git";, rev = "40.0.0-rc1", 
default-features = false }
 datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git";, 
rev = "40.0.0-rc1", default-features = false }
+datafusion-comet-spark-expr = { path = "spark-expr", version = "0.1.0" }
+datafusion-comet-utils = { path = "utils", version = "0.1.0" }
 
 [profile.release]
 debug = true
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 6432118d..be135d4e 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -77,7 +77,8 @@ once_cell = "1.18.0"
 regex = "1.9.6"
 crc32fast = "1.3.2"
 simd-adler32 = "0.3.7"
-datafusion-comet-spark-expr = { path = "../spark-expr", version = "0.1.0" }
+datafusion-comet-spark-expr = { workspace = true }
+datafusion-comet-utils = { workspace = true }
 
 [build-dependencies]
 prost-build = "0.9.0"
diff --git a/native/core/src/execution/datafusion/expressions/mod.rs 
b/native/core/src/execution/datafusion/expressions/mod.rs
index 98b422dc..d573c237 100644
--- a/native/core/src/execution/datafusion/expressions/mod.rs
+++ b/native/core/src/execution/datafusion/expressions/mod.rs
@@ -20,7 +20,6 @@
 pub mod bitwise_not;
 pub mod cast;
 pub mod checkoverflow;
-pub mod if_expr;
 mod normalize_nan;
 pub mod scalar_funcs;
 pub use normalize_nan::NormalizeNaNAndZero;
diff --git a/native/core/src/execution/datafusion/expressions/utils.rs 
b/native/core/src/execution/datafusion/expressions/utils.rs
index ee8646a7..6a7ec2e1 100644
--- a/native/core/src/execution/datafusion/expressions/utils.rs
+++ b/native/core/src/execution/datafusion/expressions/utils.rs
@@ -30,24 +30,10 @@ use arrow_array::{cast::AsArray, types::ArrowPrimitiveType};
 use arrow_schema::DataType;
 use chrono::{DateTime, Offset, TimeZone};
 use datafusion_common::cast::as_generic_string_array;
-use datafusion_physical_expr::PhysicalExpr;
 use num::integer::div_floor;
-use std::{any::Any, sync::Arc};
+use std::sync::Arc;
 
-/// An utility function from DataFusion. It is not exposed by DataFusion.
-pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
-    if any.is::<Arc<dyn PhysicalExpr>>() {
-        any.downcast_ref::<Arc<dyn PhysicalExpr>>()
-            .unwrap()
-            .as_any()
-    } else if any.is::<Box<dyn PhysicalExpr>>() {
-        any.downcast_ref::<Box<dyn PhysicalExpr>>()
-            .unwrap()
-            .as_any()
-    } else {
-        any
-    }
-}
+pub use datafusion_comet_utils::down_cast_any_ref;
 
 /// Preprocesses input arrays to add timezone information from Spark to Arrow 
array datatype or
 /// to apply timezone offset.
diff --git a/native/core/src/execution/datafusion/planner.rs 
b/native/core/src/execution/datafusion/planner.rs
index ee208ac7..23960c30 100644
--- a/native/core/src/execution/datafusion/planner.rs
+++ b/native/core/src/execution/datafusion/planner.rs
@@ -79,7 +79,6 @@ use crate::{
                 checkoverflow::CheckOverflow,
                 correlation::Correlation,
                 covariance::Covariance,
-                if_expr::IfExpr,
                 negative,
                 scalar_funcs::create_comet_physical_fun,
                 stats::StatsType,
@@ -108,7 +107,7 @@ use crate::{
 };
 
 use super::expressions::{create_named_struct::CreateNamedStruct, EvalMode};
-use datafusion_comet_spark_expr::abs::Abs;
+use datafusion_comet_spark_expr::{Abs, IfExpr};
 
 // For clippy error on type_complexity.
 type ExecResult<T> = Result<T, ExecutionError>;
diff --git a/native/spark-expr/Cargo.toml b/native/spark-expr/Cargo.toml
index d10d0494..8bf76dff 100644
--- a/native/spark-expr/Cargo.toml
+++ b/native/spark-expr/Cargo.toml
@@ -32,6 +32,8 @@ arrow-schema = { workspace = true }
 datafusion = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-functions = { workspace = true }
+datafusion-physical-expr = { workspace = true }
+datafusion-comet-utils = { workspace = true }
 
 [lib]
 name = "datafusion_comet_spark_expr"
diff --git a/native/core/src/execution/datafusion/expressions/if_expr.rs 
b/native/spark-expr/src/if_expr.rs
similarity index 95%
rename from native/core/src/execution/datafusion/expressions/if_expr.rs
rename to native/spark-expr/src/if_expr.rs
index fa235cc6..c04494ec 100644
--- a/native/core/src/execution/datafusion/expressions/if_expr.rs
+++ b/native/spark-expr/src/if_expr.rs
@@ -31,7 +31,7 @@ use datafusion::logical_expr::ColumnarValue;
 use datafusion_common::{cast::as_boolean_array, Result};
 use datafusion_physical_expr::PhysicalExpr;
 
-use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+use datafusion_comet_utils::down_cast_any_ref;
 
 #[derive(Debug, Hash)]
 pub struct IfExpr {
@@ -147,15 +147,6 @@ impl PartialEq<dyn Any> for IfExpr {
     }
 }
 
-/// Create an If expression
-pub fn if_fn(
-    if_expr: Arc<dyn PhysicalExpr>,
-    true_expr: Arc<dyn PhysicalExpr>,
-    false_expr: Arc<dyn PhysicalExpr>,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(IfExpr::new(if_expr, true_expr, false_expr)))
-}
-
 #[cfg(test)]
 mod tests {
     use arrow::{array::StringArray, datatypes::*};
@@ -165,6 +156,15 @@ mod tests {
 
     use super::*;
 
+    /// Create an If expression
+    fn if_fn(
+        if_expr: Arc<dyn PhysicalExpr>,
+        true_expr: Arc<dyn PhysicalExpr>,
+        false_expr: Arc<dyn PhysicalExpr>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(IfExpr::new(if_expr, true_expr, false_expr)))
+    }
+
     #[test]
     fn test_if_1() -> Result<()> {
         let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs
index 3873754b..c36e8855 100644
--- a/native/spark-expr/src/lib.rs
+++ b/native/spark-expr/src/lib.rs
@@ -18,7 +18,11 @@
 use std::error::Error;
 use std::fmt::{Display, Formatter};
 
-pub mod abs;
+mod abs;
+mod if_expr;
+
+pub use abs::Abs;
+pub use if_expr::IfExpr;
 
 /// Spark supports three evaluation modes when evaluating expressions, which 
affect
 /// the behavior when processing input values that are invalid or would result 
in an
diff --git a/native/spark-expr/Cargo.toml b/native/utils/Cargo.toml
similarity index 76%
copy from native/spark-expr/Cargo.toml
copy to native/utils/Cargo.toml
index d10d0494..05ddd348 100644
--- a/native/spark-expr/Cargo.toml
+++ b/native/utils/Cargo.toml
@@ -16,8 +16,8 @@
 # under the License.
 
 [package]
-name = "datafusion-comet-spark-expr"
-description = "DataFusion expressions that emulate Apache Spark's behavior"
+name = "datafusion-comet-utils"
+description = "DataFusion Comet Utilities"
 version = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
@@ -27,12 +27,8 @@ license = { workspace = true }
 edition = { workspace = true }
 
 [dependencies]
-arrow = { workspace = true }
-arrow-schema = { workspace = true }
-datafusion = { workspace = true }
-datafusion-common = { workspace = true }
-datafusion-functions = { workspace = true }
+datafusion-physical-plan = { workspace = true }
 
 [lib]
-name = "datafusion_comet_spark_expr"
+name = "datafusion_comet_utils"
 path = "src/lib.rs"
diff --git a/native/utils/README.md b/native/utils/README.md
new file mode 100644
index 00000000..513c6245
--- /dev/null
+++ b/native/utils/README.md
@@ -0,0 +1,22 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# datafusion-comet-utils
+
+This crate provides utilities for use in the [Apache DataFusion 
Comet](https://github.com/apache/datafusion-comet/) project.
\ No newline at end of file
diff --git a/native/core/src/execution/datafusion/expressions/mod.rs 
b/native/utils/src/lib.rs
similarity index 52%
copy from native/core/src/execution/datafusion/expressions/mod.rs
copy to native/utils/src/lib.rs
index 98b422dc..54ff55b4 100644
--- a/native/core/src/execution/datafusion/expressions/mod.rs
+++ b/native/utils/src/lib.rs
@@ -15,39 +15,22 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Native DataFusion expressions
+use std::any::Any;
+use std::sync::Arc;
 
-pub mod bitwise_not;
-pub mod cast;
-pub mod checkoverflow;
-pub mod if_expr;
-mod normalize_nan;
-pub mod scalar_funcs;
-pub use normalize_nan::NormalizeNaNAndZero;
+use datafusion_physical_plan::PhysicalExpr;
 
-use crate::errors::CometError;
-pub mod avg;
-pub mod avg_decimal;
-pub mod bloom_filter_might_contain;
-pub mod correlation;
-pub mod covariance;
-pub mod create_named_struct;
-pub mod negative;
-pub mod stats;
-pub mod stddev;
-pub mod strings;
-pub mod subquery;
-pub mod sum_decimal;
-pub mod temporal;
-pub mod unbound;
-mod utils;
-pub mod variance;
-pub mod xxhash64;
-
-pub use datafusion_comet_spark_expr::EvalMode;
-
-fn arithmetic_overflow_error(from_type: &str) -> CometError {
-    CometError::ArithmeticOverflow {
-        from_type: from_type.to_string(),
+/// A utility function from DataFusion. It is not exposed by DataFusion.
+pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
+    if any.is::<Arc<dyn PhysicalExpr>>() {
+        any.downcast_ref::<Arc<dyn PhysicalExpr>>()
+            .unwrap()
+            .as_any()
+    } else if any.is::<Box<dyn PhysicalExpr>>() {
+        any.downcast_ref::<Box<dyn PhysicalExpr>>()
+            .unwrap()
+            .as_any()
+    } else {
+        any
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to