This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new a6a45fc7 feat: Move `IfExpr` to `spark-expr` crate (#653)
a6a45fc7 is described below
commit a6a45fc7d19572f5d866e8ae31cdd5ad0f415f21
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jul 11 05:39:58 2024 -0600
feat: Move `IfExpr` to `spark-expr` crate (#653)
---
native/Cargo.lock | 10 +++++
native/Cargo.toml | 5 ++-
native/core/Cargo.toml | 3 +-
.../src/execution/datafusion/expressions/mod.rs | 1 -
.../src/execution/datafusion/expressions/utils.rs | 18 +--------
native/core/src/execution/datafusion/planner.rs | 3 +-
native/spark-expr/Cargo.toml | 2 +
.../expressions => spark-expr/src}/if_expr.rs | 20 ++++-----
native/spark-expr/src/lib.rs | 6 ++-
native/{spark-expr => utils}/Cargo.toml | 12 ++----
native/utils/README.md | 22 ++++++++++
.../expressions/mod.rs => utils/src/lib.rs} | 47 +++++++---------------
12 files changed, 77 insertions(+), 72 deletions(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index c0f22fa1..9bf8247d 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -867,6 +867,7 @@ dependencies = [
"criterion",
"datafusion",
"datafusion-comet-spark-expr",
+ "datafusion-comet-utils",
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
@@ -909,8 +910,17 @@ dependencies = [
"arrow",
"arrow-schema",
"datafusion",
+ "datafusion-comet-utils",
"datafusion-common",
"datafusion-functions",
+ "datafusion-physical-expr",
+]
+
+[[package]]
+name = "datafusion-comet-utils"
+version = "0.1.0"
+dependencies = [
+ "datafusion-physical-plan",
]
[[package]]
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 13860fbd..53afed85 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -16,7 +16,7 @@
# under the License.
[workspace]
-members = ["core", "spark-expr"]
+members = ["core", "spark-expr", "utils"]
resolver = "2"
[workspace.package]
@@ -43,8 +43,11 @@ datafusion-common = { git =
"https://github.com/apache/datafusion.git", rev = "4
datafusion = { default-features = false, git =
"https://github.com/apache/datafusion.git", rev = "40.0.0-rc1", features =
["unicode_expressions", "crypto_expressions"] }
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev
= "40.0.0-rc1", features = ["crypto_expressions"] }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev =
"40.0.0-rc1", default-features = false }
+datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git",
rev = "40.0.0-rc1", default-features = false }
datafusion-physical-expr-common = { git =
"https://github.com/apache/datafusion.git", rev = "40.0.0-rc1",
default-features = false }
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git",
rev = "40.0.0-rc1", default-features = false }
+datafusion-comet-spark-expr = { path = "spark-expr", version = "0.1.0" }
+datafusion-comet-utils = { path = "utils", version = "0.1.0" }
[profile.release]
debug = true
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 6432118d..be135d4e 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -77,7 +77,8 @@ once_cell = "1.18.0"
regex = "1.9.6"
crc32fast = "1.3.2"
simd-adler32 = "0.3.7"
-datafusion-comet-spark-expr = { path = "../spark-expr", version = "0.1.0" }
+datafusion-comet-spark-expr = { workspace = true }
+datafusion-comet-utils = { workspace = true }
[build-dependencies]
prost-build = "0.9.0"
diff --git a/native/core/src/execution/datafusion/expressions/mod.rs
b/native/core/src/execution/datafusion/expressions/mod.rs
index 98b422dc..d573c237 100644
--- a/native/core/src/execution/datafusion/expressions/mod.rs
+++ b/native/core/src/execution/datafusion/expressions/mod.rs
@@ -20,7 +20,6 @@
pub mod bitwise_not;
pub mod cast;
pub mod checkoverflow;
-pub mod if_expr;
mod normalize_nan;
pub mod scalar_funcs;
pub use normalize_nan::NormalizeNaNAndZero;
diff --git a/native/core/src/execution/datafusion/expressions/utils.rs
b/native/core/src/execution/datafusion/expressions/utils.rs
index ee8646a7..6a7ec2e1 100644
--- a/native/core/src/execution/datafusion/expressions/utils.rs
+++ b/native/core/src/execution/datafusion/expressions/utils.rs
@@ -30,24 +30,10 @@ use arrow_array::{cast::AsArray, types::ArrowPrimitiveType};
use arrow_schema::DataType;
use chrono::{DateTime, Offset, TimeZone};
use datafusion_common::cast::as_generic_string_array;
-use datafusion_physical_expr::PhysicalExpr;
use num::integer::div_floor;
-use std::{any::Any, sync::Arc};
+use std::sync::Arc;
-/// An utility function from DataFusion. It is not exposed by DataFusion.
-pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
- if any.is::<Arc<dyn PhysicalExpr>>() {
- any.downcast_ref::<Arc<dyn PhysicalExpr>>()
- .unwrap()
- .as_any()
- } else if any.is::<Box<dyn PhysicalExpr>>() {
- any.downcast_ref::<Box<dyn PhysicalExpr>>()
- .unwrap()
- .as_any()
- } else {
- any
- }
-}
+pub use datafusion_comet_utils::down_cast_any_ref;
/// Preprocesses input arrays to add timezone information from Spark to Arrow
array datatype or
/// to apply timezone offset.
diff --git a/native/core/src/execution/datafusion/planner.rs
b/native/core/src/execution/datafusion/planner.rs
index ee208ac7..23960c30 100644
--- a/native/core/src/execution/datafusion/planner.rs
+++ b/native/core/src/execution/datafusion/planner.rs
@@ -79,7 +79,6 @@ use crate::{
checkoverflow::CheckOverflow,
correlation::Correlation,
covariance::Covariance,
- if_expr::IfExpr,
negative,
scalar_funcs::create_comet_physical_fun,
stats::StatsType,
@@ -108,7 +107,7 @@ use crate::{
};
use super::expressions::{create_named_struct::CreateNamedStruct, EvalMode};
-use datafusion_comet_spark_expr::abs::Abs;
+use datafusion_comet_spark_expr::{Abs, IfExpr};
// For clippy error on type_complexity.
type ExecResult<T> = Result<T, ExecutionError>;
diff --git a/native/spark-expr/Cargo.toml b/native/spark-expr/Cargo.toml
index d10d0494..8bf76dff 100644
--- a/native/spark-expr/Cargo.toml
+++ b/native/spark-expr/Cargo.toml
@@ -32,6 +32,8 @@ arrow-schema = { workspace = true }
datafusion = { workspace = true }
datafusion-common = { workspace = true }
datafusion-functions = { workspace = true }
+datafusion-physical-expr = { workspace = true }
+datafusion-comet-utils = { workspace = true }
[lib]
name = "datafusion_comet_spark_expr"
diff --git a/native/core/src/execution/datafusion/expressions/if_expr.rs
b/native/spark-expr/src/if_expr.rs
similarity index 95%
rename from native/core/src/execution/datafusion/expressions/if_expr.rs
rename to native/spark-expr/src/if_expr.rs
index fa235cc6..c04494ec 100644
--- a/native/core/src/execution/datafusion/expressions/if_expr.rs
+++ b/native/spark-expr/src/if_expr.rs
@@ -31,7 +31,7 @@ use datafusion::logical_expr::ColumnarValue;
use datafusion_common::{cast::as_boolean_array, Result};
use datafusion_physical_expr::PhysicalExpr;
-use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+use datafusion_comet_utils::down_cast_any_ref;
#[derive(Debug, Hash)]
pub struct IfExpr {
@@ -147,15 +147,6 @@ impl PartialEq<dyn Any> for IfExpr {
}
}
-/// Create an If expression
-pub fn if_fn(
- if_expr: Arc<dyn PhysicalExpr>,
- true_expr: Arc<dyn PhysicalExpr>,
- false_expr: Arc<dyn PhysicalExpr>,
-) -> Result<Arc<dyn PhysicalExpr>> {
- Ok(Arc::new(IfExpr::new(if_expr, true_expr, false_expr)))
-}
-
#[cfg(test)]
mod tests {
use arrow::{array::StringArray, datatypes::*};
@@ -165,6 +156,15 @@ mod tests {
use super::*;
+ /// Create an If expression
+ fn if_fn(
+ if_expr: Arc<dyn PhysicalExpr>,
+ true_expr: Arc<dyn PhysicalExpr>,
+ false_expr: Arc<dyn PhysicalExpr>,
+ ) -> Result<Arc<dyn PhysicalExpr>> {
+ Ok(Arc::new(IfExpr::new(if_expr, true_expr, false_expr)))
+ }
+
#[test]
fn test_if_1() -> Result<()> {
let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs
index 3873754b..c36e8855 100644
--- a/native/spark-expr/src/lib.rs
+++ b/native/spark-expr/src/lib.rs
@@ -18,7 +18,11 @@
use std::error::Error;
use std::fmt::{Display, Formatter};
-pub mod abs;
+mod abs;
+mod if_expr;
+
+pub use abs::Abs;
+pub use if_expr::IfExpr;
/// Spark supports three evaluation modes when evaluating expressions, which
affect
/// the behavior when processing input values that are invalid or would result
in an
diff --git a/native/spark-expr/Cargo.toml b/native/utils/Cargo.toml
similarity index 76%
copy from native/spark-expr/Cargo.toml
copy to native/utils/Cargo.toml
index d10d0494..05ddd348 100644
--- a/native/spark-expr/Cargo.toml
+++ b/native/utils/Cargo.toml
@@ -16,8 +16,8 @@
# under the License.
[package]
-name = "datafusion-comet-spark-expr"
-description = "DataFusion expressions that emulate Apache Spark's behavior"
+name = "datafusion-comet-utils"
+description = "DataFusion Comet Utilities"
version = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }
@@ -27,12 +27,8 @@ license = { workspace = true }
edition = { workspace = true }
[dependencies]
-arrow = { workspace = true }
-arrow-schema = { workspace = true }
-datafusion = { workspace = true }
-datafusion-common = { workspace = true }
-datafusion-functions = { workspace = true }
+datafusion-physical-plan = { workspace = true }
[lib]
-name = "datafusion_comet_spark_expr"
+name = "datafusion_comet_utils"
path = "src/lib.rs"
diff --git a/native/utils/README.md b/native/utils/README.md
new file mode 100644
index 00000000..513c6245
--- /dev/null
+++ b/native/utils/README.md
@@ -0,0 +1,22 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# datafusion-comet-utils
+
+This crate provides utilities for use in the [Apache DataFusion
Comet](https://github.com/apache/datafusion-comet/) project.
\ No newline at end of file
diff --git a/native/core/src/execution/datafusion/expressions/mod.rs
b/native/utils/src/lib.rs
similarity index 52%
copy from native/core/src/execution/datafusion/expressions/mod.rs
copy to native/utils/src/lib.rs
index 98b422dc..54ff55b4 100644
--- a/native/core/src/execution/datafusion/expressions/mod.rs
+++ b/native/utils/src/lib.rs
@@ -15,39 +15,22 @@
// specific language governing permissions and limitations
// under the License.
-//! Native DataFusion expressions
+use std::any::Any;
+use std::sync::Arc;
-pub mod bitwise_not;
-pub mod cast;
-pub mod checkoverflow;
-pub mod if_expr;
-mod normalize_nan;
-pub mod scalar_funcs;
-pub use normalize_nan::NormalizeNaNAndZero;
+use datafusion_physical_plan::PhysicalExpr;
-use crate::errors::CometError;
-pub mod avg;
-pub mod avg_decimal;
-pub mod bloom_filter_might_contain;
-pub mod correlation;
-pub mod covariance;
-pub mod create_named_struct;
-pub mod negative;
-pub mod stats;
-pub mod stddev;
-pub mod strings;
-pub mod subquery;
-pub mod sum_decimal;
-pub mod temporal;
-pub mod unbound;
-mod utils;
-pub mod variance;
-pub mod xxhash64;
-
-pub use datafusion_comet_spark_expr::EvalMode;
-
-fn arithmetic_overflow_error(from_type: &str) -> CometError {
- CometError::ArithmeticOverflow {
- from_type: from_type.to_string(),
+/// A utility function from DataFusion. It is not exposed by DataFusion.
+pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
+ if any.is::<Arc<dyn PhysicalExpr>>() {
+ any.downcast_ref::<Arc<dyn PhysicalExpr>>()
+ .unwrap()
+ .as_any()
+ } else if any.is::<Box<dyn PhysicalExpr>>() {
+ any.downcast_ref::<Box<dyn PhysicalExpr>>()
+ .unwrap()
+ .as_any()
+ } else {
+ any
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]