This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new dd4ed97 Prepare 36.0.0 Release (#599)
dd4ed97 is described below
commit dd4ed977138b98749664bf63bb1e843559fd3923
Author: Andy Grove <[email protected]>
AuthorDate: Sat Mar 2 16:57:04 2024 -0700
Prepare 36.0.0 Release (#599)
---
CHANGELOG.md | 17 +++++++
Cargo.lock | 131 +++++++++++++++++++++++++++++++++--------------
Cargo.toml | 15 +++---
src/common/data_type.rs | 12 +++--
src/expr.rs | 132 +++++++++++++++++++++++++++++++-----------------
src/functions.rs | 53 +++++++++++++++----
src/udaf.rs | 4 +-
7 files changed, 258 insertions(+), 106 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d5e3abb..0a05429 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,23 @@
# DataFusion Python Changelog
+## [36.0.0](https://github.com/apache/arrow-datafusion-python/tree/36.0.0)
(2024-03-02)
+
+**Implemented enhancements:**
+
+- feat: Add `flatten` array function
[#562](https://github.com/apache/arrow-datafusion-python/pull/562)
(mobley-trent)
+
+**Documentation updates:**
+
+- docs: Add ASF attribution
[#580](https://github.com/apache/arrow-datafusion-python/pull/580) (simicd)
+
+**Merged pull requests:**
+
+- Allow PyDataFrame to be used from other projects
[#582](https://github.com/apache/arrow-datafusion-python/pull/582) (andygrove)
+- docs: Add ASF attribution
[#580](https://github.com/apache/arrow-datafusion-python/pull/580) (simicd)
+- Add array functions
[#560](https://github.com/apache/arrow-datafusion-python/pull/560) (ongchi)
+- feat: Add `flatten` array function
[#562](https://github.com/apache/arrow-datafusion-python/pull/562)
(mobley-trent)
+
## [35.0.0](https://github.com/apache/arrow-datafusion-python/tree/35.0.0)
(2024-01-20)
**Merged pull requests:**
diff --git a/Cargo.lock b/Cargo.lock
index e6aa605..31e7bad 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -107,8 +107,8 @@ dependencies = [
"serde",
"serde_json",
"snap",
- "strum",
- "strum_macros",
+ "strum 0.25.0",
+ "strum_macros 0.25.3",
"thiserror",
"typed-builder",
"uuid",
@@ -544,15 +544,15 @@ checksum =
"baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
-version = "0.4.31"
+version = "0.4.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
+checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b"
dependencies = [
"android-tzdata",
"iana-time-zone",
"num-traits",
"serde",
- "windows-targets 0.48.5",
+ "windows-targets 0.52.0",
]
[[package]]
@@ -583,8 +583,8 @@ version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686"
dependencies = [
- "strum",
- "strum_macros",
+ "strum 0.25.0",
+ "strum_macros 0.25.3",
"unicode-width",
]
@@ -715,9 +715,9 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49"
+checksum = "b2b360b692bf6c6d6e6b6dbaf41a3be0020daeceac0f406aed54c75331e50dbb"
dependencies = [
"ahash",
"apache-avro",
@@ -734,6 +734,8 @@ dependencies = [
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-array",
"datafusion-optimizer",
"datafusion-physical-expr",
"datafusion-physical-plan",
@@ -765,9 +767,9 @@ dependencies = [
[[package]]
name = "datafusion-common"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e"
+checksum = "37f343ccc298f440e25aa38ff82678291a7acc24061c7370ba6c0ff5cc811412"
dependencies = [
"ahash",
"apache-avro",
@@ -787,9 +789,9 @@ dependencies = [
[[package]]
name = "datafusion-execution"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496"
+checksum = "3f9c93043081487e335399a21ebf8295626367a647ac5cb87d41d18afad7d0f7"
dependencies = [
"arrow",
"chrono",
@@ -808,9 +810,9 @@ dependencies = [
[[package]]
name = "datafusion-expr"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b"
+checksum = "e204d89909e678846b6a95f156aafc1ee5b36cb6c9e37ec2e1449b078a38c818"
dependencies = [
"ahash",
"arrow",
@@ -818,15 +820,44 @@ dependencies = [
"datafusion-common",
"paste",
"sqlparser",
- "strum",
- "strum_macros",
+ "strum 0.26.1",
+ "strum_macros 0.26.1",
+]
+
+[[package]]
+name = "datafusion-functions"
+version = "36.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "98f1c73f7801b2b8ba2297b3ad78ffcf6c1fc6b8171f502987eb9ad5cb244ee7"
+dependencies = [
+ "arrow",
+ "base64",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "hex",
+ "log",
+]
+
+[[package]]
+name = "datafusion-functions-array"
+version = "36.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42d16a0ddf2c991526f6ffe2f47a72c6da0b7354d6c32411dd20631fe2e38937"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "log",
+ "paste",
]
[[package]]
name = "datafusion-optimizer"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010"
+checksum = "5ae27e07bf1f04d327be5c2a293470879801ab5535204dc3b16b062fda195496"
dependencies = [
"arrow",
"async-trait",
@@ -842,9 +873,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0"
+checksum = "dde620cd9ef76a3bca9c754fb68854bd2349c49f55baf97e08001f9e967f6d6b"
dependencies = [
"ahash",
"arrow",
@@ -852,11 +883,13 @@ dependencies = [
"arrow-buffer",
"arrow-ord",
"arrow-schema",
+ "arrow-string",
"base64",
"blake2",
"blake3",
"chrono",
"datafusion-common",
+ "datafusion-execution",
"datafusion-expr",
"half",
"hashbrown 0.14.3",
@@ -876,9 +909,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-plan"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3"
+checksum = "9a4c75fba9ea99d64b2246cbd2fcae2e6fc973e6616b1015237a616036506dd4"
dependencies = [
"ahash",
"arrow",
@@ -907,12 +940,13 @@ dependencies = [
[[package]]
name = "datafusion-python"
-version = "35.0.0"
+version = "36.0.0"
dependencies = [
"async-trait",
"datafusion",
"datafusion-common",
"datafusion-expr",
+ "datafusion-functions-array",
"datafusion-optimizer",
"datafusion-sql",
"datafusion-substrait",
@@ -934,9 +968,9 @@ dependencies = [
[[package]]
name = "datafusion-sql"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768"
+checksum = "21474a95c3a62d113599d21b439fa15091b538bac06bd20be0bb2e7d22903c09"
dependencies = [
"arrow",
"arrow-schema",
@@ -948,9 +982,9 @@ dependencies = [
[[package]]
name = "datafusion-substrait"
-version = "35.0.0"
+version = "36.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dad6bef77af3d8a697ae63ffbcb5aa66b74cd08ea93a31e2e757da75b2f1452f"
+checksum = "aab89c01ef66a59ec92d2360db63893224b4f7e085e2ee6351e0bb77f88931f0"
dependencies = [
"async-recursion",
"chrono",
@@ -1181,9 +1215,9 @@ checksum =
"4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "git2"
-version = "0.18.1"
+version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd"
+checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd"
dependencies = [
"bitflags 2.4.2",
"libc",
@@ -1557,9 +1591,9 @@ dependencies = [
[[package]]
name = "libgit2-sys"
-version = "0.16.1+1.7.1"
+version = "0.16.2+1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c"
+checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8"
dependencies = [
"cc",
"libc",
@@ -2662,9 +2696,9 @@ checksum =
"6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "sqlparser"
-version = "0.41.0"
+version = "0.43.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964"
+checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4"
dependencies = [
"log",
"sqlparser_derive",
@@ -2692,8 +2726,14 @@ name = "strum"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
+
+[[package]]
+name = "strum"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f"
dependencies = [
- "strum_macros",
+ "strum_macros 0.26.1",
]
[[package]]
@@ -2709,11 +2749,24 @@ dependencies = [
"syn 2.0.48",
]
+[[package]]
+name = "strum_macros"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.48",
+]
+
[[package]]
name = "substrait"
-version = "0.22.1"
+version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5478fbd0313a9b0915a1c0e7ebf15b5fed7d7c6dd7229b4f5e32ce75b10f256a"
+checksum = "2c8ffb7a3e7505bb835513e77ebfe67d359e57d684a5972323e3bdefbecc1f25"
dependencies = [
"git2",
"heck",
@@ -2857,9 +2910,9 @@ checksum =
"1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
-version = "1.35.1"
+version = "1.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104"
+checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931"
dependencies = [
"backtrace",
"bytes",
diff --git a/Cargo.toml b/Cargo.toml
index 6c47484..f8c6bf9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@
[package]
name = "datafusion-python"
-version = "35.0.0"
+version = "36.0.0"
homepage = "https://github.com/apache/arrow-datafusion-python"
repository = "https://github.com/apache/arrow-datafusion-python"
authors = ["Apache Arrow <[email protected]>"]
@@ -37,12 +37,13 @@ substrait = ["dep:datafusion-substrait"]
tokio = { version = "1.35", features = ["macros", "rt", "rt-multi-thread",
"sync"] }
rand = "0.8"
pyo3 = { version = "0.20", features = ["extension-module", "abi3",
"abi3-py38"] }
-datafusion = { version = "35.0.0", features = ["pyarrow", "avro"] }
-datafusion-common = { version = "35.0.0", features = ["pyarrow"] }
-datafusion-expr = "35.0.0"
-datafusion-optimizer = "35.0.0"
-datafusion-sql = "35.0.0"
-datafusion-substrait = { version = "35.0.0", optional = true }
+datafusion = { version = "36.0.0", features = ["pyarrow", "avro"] }
+datafusion-common = { version = "36.0.0", features = ["pyarrow"] }
+datafusion-expr = "36.0.0"
+datafusion-functions-array = "36.0.0"
+datafusion-optimizer = "36.0.0"
+datafusion-sql = "36.0.0"
+datafusion-substrait = { version = "36.0.0", optional = true }
prost = "0.12"
prost-types = "0.12"
uuid = { version = "1.3", features = ["v4"] }
diff --git a/src/common/data_type.rs b/src/common/data_type.rs
index 6059768..d3203fd 100644
--- a/src/common/data_type.rs
+++ b/src/common/data_type.rs
@@ -290,15 +290,21 @@ impl DataTypeMap {
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
}
ScalarValue::List(arr) => Ok(arr.data_type().to_owned()),
- ScalarValue::Struct(_, fields) =>
Ok(DataType::Struct(fields.to_owned())),
+ ScalarValue::Struct(_fields) => Err(py_datafusion_err(
+
DataFusionError::NotImplemented("ScalarValue::Struct".to_string()),
+ )),
ScalarValue::FixedSizeBinary(size, _) =>
Ok(DataType::FixedSizeBinary(*size)),
ScalarValue::FixedSizeList(_array_ref) => {
// The FieldRef was removed from ScalarValue::FixedSizeList in
// https://github.com/apache/arrow-datafusion/pull/8221, so we
can no
// longer convert back to a DataType here
- todo!()
+ Err(py_datafusion_err(DataFusionError::NotImplemented(
+ "ScalarValue::FixedSizeList".to_string(),
+ )))
}
- ScalarValue::LargeList(_) => todo!(),
+ ScalarValue::LargeList(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ "ScalarValue::LargeList".to_string(),
+ ))),
ScalarValue::DurationSecond(_) =>
Ok(DataType::Duration(TimeUnit::Second)),
ScalarValue::DurationMillisecond(_) =>
Ok(DataType::Duration(TimeUnit::Millisecond)),
ScalarValue::DurationMicrosecond(_) =>
Ok(DataType::Duration(TimeUnit::Microsecond)),
diff --git a/src/expr.rs b/src/expr.rs
index dbb56ee..c0e7019 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -32,7 +32,7 @@ use datafusion_expr::{
};
use crate::common::data_type::{DataTypeMap, RexType};
-use crate::errors::{py_runtime_err, py_type_err, DataFusionError};
+use crate::errors::{py_datafusion_err, py_runtime_err, py_type_err,
DataFusionError};
use crate::expr::aggregate_expr::PyAggregateFunction;
use crate::expr::binary_expr::PyBinaryExpr;
use crate::expr::column::PyColumn;
@@ -292,6 +292,7 @@ impl PyExpr {
| Expr::IsNotFalse(..)
| Expr::Placeholder { .. }
| Expr::OuterReferenceColumn(_, _)
+ | Expr::Unnest(_)
| Expr::IsNotUnknown(_) => RexType::Call,
Expr::ScalarSubquery(..) => RexType::ScalarSubquery,
})
@@ -306,49 +307,81 @@ impl PyExpr {
/// Extracts the Expr value into a PyObject that can be shared with Python
pub fn python_value(&self, py: Python) -> PyResult<PyObject> {
match &self.expr {
- Expr::Literal(scalar_value) => Ok(match scalar_value {
- ScalarValue::Null => todo!(),
- ScalarValue::Boolean(v) => v.into_py(py),
- ScalarValue::Float32(v) => v.into_py(py),
- ScalarValue::Float64(v) => v.into_py(py),
- ScalarValue::Decimal128(v, _, _) => v.into_py(py),
- ScalarValue::Decimal256(_, _, _) => todo!(),
- ScalarValue::Int8(v) => v.into_py(py),
- ScalarValue::Int16(v) => v.into_py(py),
- ScalarValue::Int32(v) => v.into_py(py),
- ScalarValue::Int64(v) => v.into_py(py),
- ScalarValue::UInt8(v) => v.into_py(py),
- ScalarValue::UInt16(v) => v.into_py(py),
- ScalarValue::UInt32(v) => v.into_py(py),
- ScalarValue::UInt64(v) => v.into_py(py),
- ScalarValue::Utf8(v) => v.clone().into_py(py),
- ScalarValue::LargeUtf8(v) => v.clone().into_py(py),
- ScalarValue::Binary(v) => v.clone().into_py(py),
- ScalarValue::FixedSizeBinary(_, _) => todo!(),
- ScalarValue::LargeBinary(v) => v.clone().into_py(py),
- ScalarValue::List(_) => todo!(),
- ScalarValue::Date32(v) => v.into_py(py),
- ScalarValue::Date64(v) => v.into_py(py),
- ScalarValue::Time32Second(v) => v.into_py(py),
- ScalarValue::Time32Millisecond(v) => v.into_py(py),
- ScalarValue::Time64Microsecond(v) => v.into_py(py),
- ScalarValue::Time64Nanosecond(v) => v.into_py(py),
- ScalarValue::TimestampSecond(v, _) => v.into_py(py),
- ScalarValue::TimestampMillisecond(v, _) => v.into_py(py),
- ScalarValue::TimestampMicrosecond(v, _) => v.into_py(py),
- ScalarValue::TimestampNanosecond(v, _) => v.into_py(py),
- ScalarValue::IntervalYearMonth(v) => v.into_py(py),
- ScalarValue::IntervalDayTime(v) => v.into_py(py),
- ScalarValue::IntervalMonthDayNano(v) => v.into_py(py),
- ScalarValue::DurationSecond(v) => v.into_py(py),
- ScalarValue::DurationMicrosecond(v) => v.into_py(py),
- ScalarValue::DurationNanosecond(v) => v.into_py(py),
- ScalarValue::DurationMillisecond(v) => v.into_py(py),
- ScalarValue::Struct(_, _) => todo!(),
- ScalarValue::Dictionary(_, _) => todo!(),
- ScalarValue::FixedSizeList(_) => todo!(),
- ScalarValue::LargeList(_) => todo!(),
- }),
+ Expr::Literal(scalar_value) => match scalar_value {
+ ScalarValue::Null => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::Null".to_string(),
+ ),
+ )),
+ ScalarValue::Boolean(v) => Ok(v.into_py(py)),
+ ScalarValue::Float32(v) => Ok(v.into_py(py)),
+ ScalarValue::Float64(v) => Ok(v.into_py(py)),
+ ScalarValue::Decimal128(v, _, _) => Ok(v.into_py(py)),
+ ScalarValue::Decimal256(_, _, _) => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::Decimal256".to_string(),
+ ),
+ )),
+ ScalarValue::Int8(v) => Ok(v.into_py(py)),
+ ScalarValue::Int16(v) => Ok(v.into_py(py)),
+ ScalarValue::Int32(v) => Ok(v.into_py(py)),
+ ScalarValue::Int64(v) => Ok(v.into_py(py)),
+ ScalarValue::UInt8(v) => Ok(v.into_py(py)),
+ ScalarValue::UInt16(v) => Ok(v.into_py(py)),
+ ScalarValue::UInt32(v) => Ok(v.into_py(py)),
+ ScalarValue::UInt64(v) => Ok(v.into_py(py)),
+ ScalarValue::Utf8(v) => Ok(v.clone().into_py(py)),
+ ScalarValue::LargeUtf8(v) => Ok(v.clone().into_py(py)),
+ ScalarValue::Binary(v) => Ok(v.clone().into_py(py)),
+ ScalarValue::FixedSizeBinary(_, _) => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::FixedSizeBinary".to_string(),
+ ),
+ )),
+ ScalarValue::LargeBinary(v) => Ok(v.clone().into_py(py)),
+ ScalarValue::List(_) => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::List".to_string(),
+ ),
+ )),
+ ScalarValue::Date32(v) => Ok(v.into_py(py)),
+ ScalarValue::Date64(v) => Ok(v.into_py(py)),
+ ScalarValue::Time32Second(v) => Ok(v.into_py(py)),
+ ScalarValue::Time32Millisecond(v) => Ok(v.into_py(py)),
+ ScalarValue::Time64Microsecond(v) => Ok(v.into_py(py)),
+ ScalarValue::Time64Nanosecond(v) => Ok(v.into_py(py)),
+ ScalarValue::TimestampSecond(v, _) => Ok(v.into_py(py)),
+ ScalarValue::TimestampMillisecond(v, _) => Ok(v.into_py(py)),
+ ScalarValue::TimestampMicrosecond(v, _) => Ok(v.into_py(py)),
+ ScalarValue::TimestampNanosecond(v, _) => Ok(v.into_py(py)),
+ ScalarValue::IntervalYearMonth(v) => Ok(v.into_py(py)),
+ ScalarValue::IntervalDayTime(v) => Ok(v.into_py(py)),
+ ScalarValue::IntervalMonthDayNano(v) => Ok(v.into_py(py)),
+ ScalarValue::DurationSecond(v) => Ok(v.into_py(py)),
+ ScalarValue::DurationMicrosecond(v) => Ok(v.into_py(py)),
+ ScalarValue::DurationNanosecond(v) => Ok(v.into_py(py)),
+ ScalarValue::DurationMillisecond(v) => Ok(v.into_py(py)),
+ ScalarValue::Struct(_) => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::Struct".to_string(),
+ ),
+ )),
+ ScalarValue::Dictionary(_, _) => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::Dictionary".to_string(),
+ ),
+ )),
+ ScalarValue::FixedSizeList(_) => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::FixedSizeList".to_string(),
+ ),
+ )),
+ ScalarValue::LargeList(_) => Err(py_datafusion_err(
+ datafusion_common::DataFusionError::NotImplemented(
+ "ScalarValue::LargeList".to_string(),
+ ),
+ )),
+ },
_ => Err(py_type_err(format!(
"Non Expr::Literal encountered in types: {:?}",
&self.expr
@@ -455,6 +488,7 @@ impl PyExpr {
// Currently un-support/implemented Expr types for Rex Call
operations
Expr::GroupingSet(..)
+ | Expr::Unnest(_)
| Expr::OuterReferenceColumn(_, _)
| Expr::Wildcard { .. }
| Expr::ScalarSubquery(..)
@@ -573,7 +607,11 @@ impl PyExpr {
| Operator::RegexMatch
| Operator::RegexIMatch
| Operator::RegexNotMatch
- | Operator::RegexNotIMatch =>
DataTypeMap::map_from_arrow_type(&DataType::Boolean),
+ | Operator::RegexNotIMatch
+ | Operator::LikeMatch
+ | Operator::ILikeMatch
+ | Operator::NotLikeMatch
+ | Operator::NotILikeMatch =>
DataTypeMap::map_from_arrow_type(&DataType::Boolean),
Operator::Plus | Operator::Minus | Operator::Multiply |
Operator::Modulo => {
DataTypeMap::map_from_arrow_type(&DataType::Int64)
}
@@ -584,7 +622,9 @@ impl PyExpr {
| Operator::BitwiseXor
| Operator::BitwiseAnd
| Operator::BitwiseOr =>
DataTypeMap::map_from_arrow_type(&DataType::Binary),
- Operator::AtArrow | Operator::ArrowAt => todo!(),
+ Operator::AtArrow | Operator::ArrowAt => {
+ Err(py_type_err(format!("Unsupported expr: ${op}")))
+ }
},
Expr::Cast(Cast { expr: _, data_type }) =>
DataTypeMap::map_from_arrow_type(data_type),
Expr::Literal(scalar_value) =>
DataTypeMap::map_from_scalar_value(scalar_value),
diff --git a/src/functions.rs b/src/functions.rs
index 5a558de..757fb31 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -23,6 +23,7 @@ use crate::expr::conditional_expr::PyCaseBuilder;
use crate::expr::window::PyWindowFrame;
use crate::expr::PyExpr;
use datafusion::execution::FunctionRegistry;
+use datafusion::functions;
use datafusion_common::{Column, TableReference};
use datafusion_expr::expr::Alias;
use datafusion_expr::{
@@ -34,6 +35,49 @@ use datafusion_expr::{
lit, BuiltinScalarFunction, Expr, WindowFunctionDefinition,
};
+#[pyfunction]
+pub fn isnan(expr: PyExpr) -> PyExpr {
+ functions::expr_fn::isnan(expr.into()).into()
+}
+
+#[pyfunction]
+pub fn nullif(expr1: PyExpr, expr2: PyExpr) -> PyExpr {
+ functions::expr_fn::nullif(expr1.into(), expr2.into()).into()
+}
+
+#[pyfunction]
+pub fn encode(input: PyExpr, encoding: PyExpr) -> PyExpr {
+ functions::expr_fn::encode(input.into(), encoding.into()).into()
+}
+
+#[pyfunction]
+pub fn decode(input: PyExpr, encoding: PyExpr) -> PyExpr {
+ functions::expr_fn::decode(input.into(), encoding.into()).into()
+}
+
+#[pyfunction]
+pub fn array_to_string(expr: PyExpr, delim: PyExpr) -> PyExpr {
+ datafusion_functions_array::expr_fn::array_to_string(expr.into(),
delim.into()).into()
+}
+
+#[pyfunction]
+pub fn array_join(expr: PyExpr, delim: PyExpr) -> PyExpr {
+ // alias for array_to_string
+ array_to_string(expr, delim)
+}
+
+#[pyfunction]
+pub fn list_to_string(expr: PyExpr, delim: PyExpr) -> PyExpr {
+ // alias for array_to_string
+ array_to_string(expr, delim)
+}
+
+#[pyfunction]
+pub fn list_join(expr: PyExpr, delim: PyExpr) -> PyExpr {
+ // alias for array_to_string
+ array_to_string(expr, delim)
+}
+
#[pyfunction]
fn in_list(expr: PyExpr, value: Vec<PyExpr>, negated: bool) -> PyExpr {
datafusion_expr::in_list(
@@ -252,7 +296,6 @@ scalar_function!(factorial, Factorial);
scalar_function!(floor, Floor);
scalar_function!(gcd, Gcd);
scalar_function!(initcap, InitCap, "Converts the first letter of each word to
upper case and the rest to lower case. Words are sequences of alphanumeric
characters separated by non-alphanumeric characters.");
-scalar_function!(isnan, Isnan);
scalar_function!(iszero, Iszero);
scalar_function!(lcm, Lcm);
scalar_function!(left, Left, "Returns first n characters in the string, or
when n is negative, returns all but last |n| characters.");
@@ -348,15 +391,11 @@ scalar_function!(trunc, Trunc);
scalar_function!(upper, Upper, "Converts the string to all upper case.");
scalar_function!(make_array, MakeArray);
scalar_function!(array, MakeArray);
-scalar_function!(nullif, NullIf);
scalar_function!(uuid, Uuid);
scalar_function!(r#struct, Struct); // Use raw identifier since struct is a
keyword
scalar_function!(from_unixtime, FromUnixtime);
scalar_function!(arrow_typeof, ArrowTypeof);
scalar_function!(random, Random);
-//Binary String Functions
-scalar_function!(encode, Encode);
-scalar_function!(decode, Decode);
// Array Functions
scalar_function!(array_append, ArrayAppend);
@@ -382,10 +421,6 @@ scalar_function!(list_position, ArrayPosition);
scalar_function!(list_indexof, ArrayPosition);
scalar_function!(array_positions, ArrayPositions);
scalar_function!(list_positions, ArrayPositions);
-scalar_function!(array_to_string, ArrayToString);
-scalar_function!(array_join, ArrayToString);
-scalar_function!(list_to_string, ArrayToString);
-scalar_function!(list_join, ArrayToString);
scalar_function!(array_ndims, ArrayNdims);
scalar_function!(list_ndims, ArrayNdims);
scalar_function!(array_prepend, ArrayPrepend);
diff --git a/src/udaf.rs b/src/udaf.rs
index 0e7a8de..9aea761 100644
--- a/src/udaf.rs
+++ b/src/udaf.rs
@@ -41,12 +41,12 @@ impl RustAccumulator {
}
impl Accumulator for RustAccumulator {
- fn state(&self) -> Result<Vec<ScalarValue>> {
+ fn state(&mut self) -> Result<Vec<ScalarValue>> {
Python::with_gil(|py|
self.accum.as_ref(py).call_method0("state")?.extract())
.map_err(|e| DataFusionError::Execution(format!("{e}")))
}
- fn evaluate(&self) -> Result<ScalarValue> {
+ fn evaluate(&mut self) -> Result<ScalarValue> {
Python::with_gil(|py|
self.accum.as_ref(py).call_method0("evaluate")?.extract())
.map_err(|e| DataFusionError::Execution(format!("{e}")))
}