This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 292865e38c Create `datafusion-functions-array` crate and move
`ArrayToString` function into it (#9113)
292865e38c is described below
commit 292865e38cba1275a0f863230d816ee0e08c1318
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Feb 12 13:19:10 2024 -0500
Create `datafusion-functions-array` crate and move `ArrayToString` function
into it (#9113)
* Add `datafusion-functions-array` crate
* Add test for round tripping array_to_string
---
.github/workflows/rust.yml | 3 +
Cargo.toml | 3 +-
README.md | 1 +
datafusion-cli/Cargo.lock | 68 +++---
datafusion/core/Cargo.toml | 6 +-
datafusion/core/src/execution/context/mod.rs | 5 +
datafusion/core/src/lib.rs | 6 +
datafusion/core/src/prelude.rs | 2 +
.../core/tests/dataframe/dataframe_functions.rs | 30 ++-
datafusion/expr/src/built_in_function.rs | 13 --
datafusion/expr/src/expr_fn.rs | 7 -
datafusion/functions-array/Cargo.toml | 45 ++++
datafusion/functions-array/README.md | 27 +++
datafusion/functions-array/src/kernels.rs | 254 +++++++++++++++++++++
datafusion/functions-array/src/lib.rs | 56 +++++
datafusion/functions-array/src/macros.rs | 79 +++++++
datafusion/functions-array/src/udf.rs | 85 +++++++
datafusion/physical-expr/src/array_expressions.rs | 214 -----------------
datafusion/physical-expr/src/functions.rs | 3 -
datafusion/proto/proto/datafusion.proto | 2 +-
datafusion/proto/src/generated/pbjson.rs | 3 -
datafusion/proto/src/generated/prost.rs | 4 +-
datafusion/proto/src/logical_plan/from_proto.rs | 14 +-
datafusion/proto/src/logical_plan/to_proto.rs | 1 -
.../proto/tests/cases/roundtrip_logical_plan.rs | 14 +-
25 files changed, 652 insertions(+), 293 deletions(-)
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index adcfcc27d3..726eb07ff6 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -82,6 +82,9 @@ jobs:
- name: Check function packages (encoding_expressions)
run: cargo check --no-default-features --features=encoding_expressions
-p datafusion
+ - name: Check function packages (array_expressions)
+ run: cargo check --no-default-features --features=array_expressions -p
datafusion
+
- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the
`datafusion-cli` directory
diff --git a/Cargo.toml b/Cargo.toml
index 20c3173064..6046f820c7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@
[workspace]
exclude = ["datafusion-cli"]
-members = ["datafusion/common", "datafusion/core", "datafusion/expr",
"datafusion/execution", "datafusion/functions", "datafusion/optimizer",
"datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto",
"datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest",
"datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs",
"test-utils", "benchmarks",
+members = ["datafusion/common", "datafusion/core", "datafusion/expr",
"datafusion/execution", "datafusion/functions", "datafusion/functions-array",
"datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan",
"datafusion/proto", "datafusion/proto/gen", "datafusion/sql",
"datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest",
"datafusion-examples", "docs", "test-utils", "benchmarks",
]
resolver = "2"
@@ -51,6 +51,7 @@ datafusion-common = { path = "datafusion/common", version =
"35.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "35.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "35.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "35.0.0" }
+datafusion-functions-array = { path = "datafusion/functions-array", version =
"35.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "35.0.0" }
datafusion-physical-expr = { path = "datafusion/physical-expr", version =
"35.0.0" }
datafusion-physical-plan = { path = "datafusion/physical-plan", version =
"35.0.0" }
diff --git a/README.md b/README.md
index 4cbc5bb0ad..44e06e1b6a 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,7 @@ This crate has several [features] which can be specified in
your `Cargo.toml`.
Default features:
+- `array_expressions`: functions for working with arrays such as
`array_to_string`
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and
`zstd`
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
- `encoding_expressions`: `encode` and `decode` functions
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 072898cda4..5ff41ed61d 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -84,9 +84,9 @@ dependencies = [
[[package]]
name = "anstyle"
-version = "1.0.5"
+version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2faccea4cc4ab4a667ce676a30e8ec13922a692c99bb8f5b11f1502c72e04220"
+checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
[[package]]
name = "apache-avro"
@@ -1116,6 +1116,7 @@ dependencies = [
"datafusion-execution",
"datafusion-expr",
"datafusion-functions",
+ "datafusion-functions-array",
"datafusion-optimizer",
"datafusion-physical-expr",
"datafusion-physical-plan",
@@ -1238,6 +1239,18 @@ dependencies = [
"log",
]
+[[package]]
+name = "datafusion-functions-array"
+version = "35.0.0"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "log",
+ "paste",
+]
+
[[package]]
name = "datafusion-optimizer"
version = "35.0.0"
@@ -1738,9 +1751,9 @@ dependencies = [
[[package]]
name = "hermit-abi"
-version = "0.3.4"
+version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
+checksum = "d0c62115964e08cb8039170eb33c1d0e2388a256930279edca206fff675f82c3"
[[package]]
name = "hex"
@@ -1950,9 +1963,9 @@ dependencies = [
[[package]]
name = "js-sys"
-version = "0.3.67"
+version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1"
+checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee"
dependencies = [
"wasm-bindgen",
]
@@ -2230,9 +2243,9 @@ dependencies = [
[[package]]
name = "num-complex"
-version = "0.4.4"
+version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214"
+checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6"
dependencies = [
"num-traits",
]
@@ -2292,7 +2305,7 @@ version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
- "hermit-abi 0.3.4",
+ "hermit-abi 0.3.5",
"libc",
]
@@ -3306,13 +3319,12 @@ dependencies = [
[[package]]
name = "tempfile"
-version = "3.9.0"
+version = "3.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
+checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67"
dependencies = [
"cfg-if",
"fastrand 2.0.1",
- "redox_syscall",
"rustix",
"windows-sys 0.52.0",
]
@@ -3624,9 +3636,9 @@ dependencies = [
[[package]]
name = "unicode-segmentation"
-version = "1.10.1"
+version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
+checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "unicode-width"
@@ -3727,9 +3739,9 @@ checksum =
"9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
-version = "0.2.90"
+version = "0.2.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406"
+checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
@@ -3737,9 +3749,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
-version = "0.2.90"
+version = "0.2.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd"
+checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b"
dependencies = [
"bumpalo",
"log",
@@ -3752,9 +3764,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
-version = "0.4.40"
+version = "0.4.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bde2032aeb86bdfaecc8b261eef3cba735cc426c1f3a3416d1e0791be95fc461"
+checksum = "877b9c3f61ceea0e56331985743b13f3d25c406a7098d45180fb5f09bc19ed97"
dependencies = [
"cfg-if",
"js-sys",
@@ -3764,9 +3776,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
-version = "0.2.90"
+version = "0.2.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999"
+checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -3774,9 +3786,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
-version = "0.2.90"
+version = "0.2.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7"
+checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66"
dependencies = [
"proc-macro2",
"quote",
@@ -3787,9 +3799,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
-version = "0.2.90"
+version = "0.2.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b"
+checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838"
[[package]]
name = "wasm-streams"
@@ -3806,9 +3818,9 @@ dependencies = [
[[package]]
name = "web-sys"
-version = "0.3.67"
+version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed"
+checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446"
dependencies = [
"js-sys",
"wasm-bindgen",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 429cf35fcd..59d568cd7f 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -38,11 +38,12 @@ path = "src/lib.rs"
[features]
# Used to enable the avro format
+array_expressions = ["datafusion-functions-array"]
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression",
"tokio-util"]
crypto_expressions = ["datafusion-physical-expr/crypto_expressions",
"datafusion-optimizer/crypto_expressions"]
-default = ["crypto_expressions", "encoding_expressions", "regex_expressions",
"unicode_expressions", "compression", "parquet"]
+default = ["array_expressions", "crypto_expressions", "encoding_expressions",
"regex_expressions", "unicode_expressions", "compression", "parquet"]
encoding_expressions = ["datafusion-functions/encoding_expressions"]
# Used for testing ONLY: causes all values to hash to the same value (test for
collisions)
force_hash_collisions = []
@@ -68,7 +69,8 @@ dashmap = { workspace = true }
datafusion-common = { path = "../common", version = "35.0.0", features =
["object_store"], default-features = false }
datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
-datafusion-functions = { path = "../functions", version = "35.0.0" }
+datafusion-functions = { workspace = true }
+datafusion-functions-array = { workspace = true, optional = true }
datafusion-optimizer = { path = "../optimizer", version = "35.0.0",
default-features = false }
datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0",
default-features = false }
datafusion-physical-plan = { workspace = true }
diff --git a/datafusion/core/src/execution/context/mod.rs
b/datafusion/core/src/execution/context/mod.rs
index 0bc75720e7..c81e011b45 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -1348,6 +1348,11 @@ impl SessionState {
datafusion_functions::register_all(&mut new_self)
.expect("can not register built in functions");
+ // register crate of array expressions (if enabled)
+ #[cfg(feature = "array_expressions")]
+ datafusion_functions_array::register_all(&mut new_self)
+ .expect("can not register array expressions");
+
new_self
}
/// Returns new [`SessionState`] using the provided
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 0f7292e1c3..d78d7a38a1 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -521,6 +521,12 @@ pub mod functions {
pub use datafusion_functions::*;
}
+/// re-export of [`datafusion_functions_array`] crate, if "array_expressions"
feature is enabled
+pub mod functions_array {
+ #[cfg(feature = "array_expressions")]
+ pub use datafusion_functions::*;
+}
+
#[cfg(test)]
pub mod test;
pub mod test_util;
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index 69c3335540..d82a5a2cc1 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -39,6 +39,8 @@ pub use datafusion_expr::{
Expr,
};
pub use datafusion_functions::expr_fn::*;
+#[cfg(feature = "array_expressions")]
+pub use datafusion_functions_array::expr_fn::*;
pub use std::ops::Not;
pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs
b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 8e00366d2b..95c13fc17c 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -20,6 +20,8 @@ use arrow::{
array::{Int32Array, StringArray},
record_batch::RecordBatch,
};
+use arrow_array::types::Int32Type;
+use arrow_array::ListArray;
use arrow_schema::SchemaRef;
use std::sync::Arc;
@@ -40,6 +42,7 @@ fn test_schema() -> SchemaRef {
Arc::new(Schema::new(vec![
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Int32, false),
+ Field::new("l", DataType::new_list(DataType::Int32, true), true),
]))
}
@@ -57,6 +60,12 @@ async fn create_test_table() -> Result<DataFrame> {
"123AbcDef",
])),
Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
+ Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+ Some(vec![Some(0), Some(1), Some(2)]),
+ None,
+ Some(vec![Some(3), None, Some(5)]),
+ Some(vec![Some(6), Some(7)]),
+ ])),
],
)?;
@@ -67,7 +76,7 @@ async fn create_test_table() -> Result<DataFrame> {
ctx.table("test").await
}
-/// Excutes an expression on the test dataframe as a select.
+/// Executes an expression on the test dataframe as a select.
/// Compares formatted output of a record batch with an expected
/// vector of strings, using the assert_batch_eq! macro
macro_rules! assert_fn_batches {
@@ -862,3 +871,22 @@ async fn test_fn_decode() -> Result<()> {
Ok(())
}
+
+#[tokio::test]
+async fn test_fn_array_to_string() -> Result<()> {
+ let expr = array_to_string(col("l"), lit("***"));
+
+ let expected = [
+ "+-------------------------------------+",
+ "| array_to_string(test.l,Utf8(\"***\")) |",
+ "+-------------------------------------+",
+ "| 0***1***2 |",
+ "| |",
+ "| 3***5 |",
+ "| 6***7 |",
+ "+-------------------------------------+",
+ ];
+ assert_fn_batches!(expr, expected);
+
+ Ok(())
+}
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 81c600b976..274a6fa9c2 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -175,8 +175,6 @@ pub enum BuiltinScalarFunction {
ArrayReverse,
/// array_slice
ArraySlice,
- /// array_to_string
- ArrayToString,
/// array_intersect
ArrayIntersect,
/// array_union
@@ -434,7 +432,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
BuiltinScalarFunction::Flatten => Volatility::Immutable,
BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
- BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
@@ -631,7 +628,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReverse =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArraySlice =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayResize =>
Ok(input_expr_types[0].clone()),
- BuiltinScalarFunction::ArrayToString => Ok(Utf8),
BuiltinScalarFunction::ArrayIntersect => {
match (input_expr_types[0].clone(),
input_expr_types[1].clone()) {
(DataType::Null, DataType::Null) | (DataType::Null, _) => {
@@ -991,9 +987,6 @@ impl BuiltinScalarFunction {
Signature::variadic_any(self.volatility())
}
- BuiltinScalarFunction::ArrayToString => {
- Signature::variadic_any(self.volatility())
- }
BuiltinScalarFunction::ArrayIntersect => Signature::any(2,
self.volatility()),
BuiltinScalarFunction::ArrayUnion => Signature::any(2,
self.volatility()),
BuiltinScalarFunction::Cardinality => Signature::any(1,
self.volatility()),
@@ -1605,12 +1598,6 @@ impl BuiltinScalarFunction {
}
BuiltinScalarFunction::ArrayReverse => &["array_reverse",
"list_reverse"],
BuiltinScalarFunction::ArraySlice => &["array_slice",
"list_slice"],
- BuiltinScalarFunction::ArrayToString => &[
- "array_to_string",
- "list_to_string",
- "array_join",
- "list_join",
- ],
BuiltinScalarFunction::ArrayUnion => &["array_union",
"list_union"],
BuiltinScalarFunction::Cardinality => &["cardinality"],
BuiltinScalarFunction::ArrayResize => &["array_resize",
"list_resize"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 8ed4fd3de2..9c20763c89 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -740,12 +740,6 @@ scalar_expr!(
array begin end stride,
"returns a slice of the array."
);
-scalar_expr!(
- ArrayToString,
- array_to_string,
- array delimiter,
- "converts each element to its text representation."
-);
scalar_expr!(ArrayUnion, array_union, array1 array2, "returns an array of the
elements in the union of array1 and array2 without duplicates.");
scalar_expr!(
@@ -1454,7 +1448,6 @@ mod test {
test_scalar_expr!(ArrayReplace, array_replace, array, from, to);
test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to,
max);
test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
- test_scalar_expr!(ArrayToString, array_to_string, array, delimiter);
test_unary_scalar_expr!(Cardinality, cardinality);
test_nary_scalar_expr!(MakeArray, array, input);
diff --git a/datafusion/functions-array/Cargo.toml
b/datafusion/functions-array/Cargo.toml
new file mode 100644
index 0000000000..9cf769bf29
--- /dev/null
+++ b/datafusion/functions-array/Cargo.toml
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-functions-array"
+description = "Array Function packages for the DataFusion query engine"
+keywords = ["datafusion", "logical", "plan", "expressions"]
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[features]
+
+[lib]
+name = "datafusion_functions_array"
+path = "src/lib.rs"
+
+# See more keys and their definitions at
https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+arrow = { workspace = true }
+datafusion-common = { workspace = true }
+datafusion-execution = { workspace = true }
+datafusion-expr = { workspace = true }
+log = "0.4.20"
+paste = "1.0.14"
diff --git a/datafusion/functions-array/README.md
b/datafusion/functions-array/README.md
new file mode 100644
index 0000000000..25deca8e1c
--- /dev/null
+++ b/datafusion/functions-array/README.md
@@ -0,0 +1,27 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# DataFusion Array Function Library
+
+[DataFusion][df] is an extensible query execution framework, written in Rust,
that uses Apache Arrow as its in-memory format.
+
+This crate contains functions for working with arrays, such as `array_append`
that work with
+`ListArray`, `LargeListArray` and `FixedListArray` types from the `arrow`
crate.
+
+[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/functions-array/src/kernels.rs
b/datafusion/functions-array/src/kernels.rs
new file mode 100644
index 0000000000..1b96e01d8b
--- /dev/null
+++ b/datafusion/functions-array/src/kernels.rs
@@ -0,0 +1,254 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! implementation kernels for array functions
+
+use arrow::array::{
+ Array, ArrayRef, BooleanArray, Float32Array, Float64Array,
GenericListArray,
+ Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray,
OffsetSizeTrait,
+ StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+};
+use arrow::datatypes::DataType;
+use datafusion_common::cast::{as_large_list_array, as_list_array,
as_string_array};
+use datafusion_common::{exec_err, DataFusionError};
+use std::any::type_name;
+use std::sync::Arc;
+
+macro_rules! downcast_arg {
+ ($ARG:expr, $ARRAY_TYPE:ident) => {{
+ $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
+ DataFusionError::Internal(format!(
+ "could not cast to {}",
+ type_name::<$ARRAY_TYPE>()
+ ))
+ })?
+ }};
+}
+
+macro_rules! to_string {
+ ($ARG:expr, $ARRAY:expr, $DELIMITER:expr, $NULL_STRING:expr,
$WITH_NULL_STRING:expr, $ARRAY_TYPE:ident) => {{
+ let arr = downcast_arg!($ARRAY, $ARRAY_TYPE);
+ for x in arr {
+ match x {
+ Some(x) => {
+ $ARG.push_str(&x.to_string());
+ $ARG.push_str($DELIMITER);
+ }
+ None => {
+ if $WITH_NULL_STRING {
+ $ARG.push_str($NULL_STRING);
+ $ARG.push_str($DELIMITER);
+ }
+ }
+ }
+ }
+ Ok($ARG)
+ }};
+}
+
+macro_rules! call_array_function {
+ ($DATATYPE:expr, false) => {
+ match $DATATYPE {
+ DataType::Utf8 => array_function!(StringArray),
+ DataType::LargeUtf8 => array_function!(LargeStringArray),
+ DataType::Boolean => array_function!(BooleanArray),
+ DataType::Float32 => array_function!(Float32Array),
+ DataType::Float64 => array_function!(Float64Array),
+ DataType::Int8 => array_function!(Int8Array),
+ DataType::Int16 => array_function!(Int16Array),
+ DataType::Int32 => array_function!(Int32Array),
+ DataType::Int64 => array_function!(Int64Array),
+ DataType::UInt8 => array_function!(UInt8Array),
+ DataType::UInt16 => array_function!(UInt16Array),
+ DataType::UInt32 => array_function!(UInt32Array),
+ DataType::UInt64 => array_function!(UInt64Array),
+ _ => unreachable!(),
+ }
+ };
+ ($DATATYPE:expr, $INCLUDE_LIST:expr) => {{
+ match $DATATYPE {
+ DataType::List(_) => array_function!(ListArray),
+ DataType::Utf8 => array_function!(StringArray),
+ DataType::LargeUtf8 => array_function!(LargeStringArray),
+ DataType::Boolean => array_function!(BooleanArray),
+ DataType::Float32 => array_function!(Float32Array),
+ DataType::Float64 => array_function!(Float64Array),
+ DataType::Int8 => array_function!(Int8Array),
+ DataType::Int16 => array_function!(Int16Array),
+ DataType::Int32 => array_function!(Int32Array),
+ DataType::Int64 => array_function!(Int64Array),
+ DataType::UInt8 => array_function!(UInt8Array),
+ DataType::UInt16 => array_function!(UInt16Array),
+ DataType::UInt32 => array_function!(UInt32Array),
+ DataType::UInt64 => array_function!(UInt64Array),
+ _ => unreachable!(),
+ }
+ }};
+}
+
+/// Array_to_string SQL function
+pub(super) fn array_to_string(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+ if args.len() < 2 || args.len() > 3 {
+ return exec_err!("array_to_string expects two or three arguments");
+ }
+
+ let arr = &args[0];
+
+ let delimiters = as_string_array(&args[1])?;
+ let delimiters: Vec<Option<&str>> = delimiters.iter().collect();
+
+ let mut null_string = String::from("");
+ let mut with_null_string = false;
+ if args.len() == 3 {
+ null_string = as_string_array(&args[2])?.value(0).to_string();
+ with_null_string = true;
+ }
+
+ fn compute_array_to_string(
+ arg: &mut String,
+ arr: ArrayRef,
+ delimiter: String,
+ null_string: String,
+ with_null_string: bool,
+ ) -> datafusion_common::Result<&mut String> {
+ match arr.data_type() {
+ DataType::List(..) => {
+ let list_array = as_list_array(&arr)?;
+ for i in 0..list_array.len() {
+ compute_array_to_string(
+ arg,
+ list_array.value(i),
+ delimiter.clone(),
+ null_string.clone(),
+ with_null_string,
+ )?;
+ }
+
+ Ok(arg)
+ }
+ DataType::LargeList(..) => {
+ let list_array = as_large_list_array(&arr)?;
+ for i in 0..list_array.len() {
+ compute_array_to_string(
+ arg,
+ list_array.value(i),
+ delimiter.clone(),
+ null_string.clone(),
+ with_null_string,
+ )?;
+ }
+
+ Ok(arg)
+ }
+ DataType::Null => Ok(arg),
+ data_type => {
+ macro_rules! array_function {
+ ($ARRAY_TYPE:ident) => {
+ to_string!(
+ arg,
+ arr,
+ &delimiter,
+ &null_string,
+ with_null_string,
+ $ARRAY_TYPE
+ )
+ };
+ }
+ call_array_function!(data_type, false)
+ }
+ }
+ }
+
+ fn generate_string_array<O: OffsetSizeTrait>(
+ list_arr: &GenericListArray<O>,
+ delimiters: Vec<Option<&str>>,
+ null_string: String,
+ with_null_string: bool,
+ ) -> datafusion_common::Result<StringArray> {
+ let mut res: Vec<Option<String>> = Vec::new();
+ for (arr, &delimiter) in list_arr.iter().zip(delimiters.iter()) {
+ if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
+ let mut arg = String::from("");
+ let s = compute_array_to_string(
+ &mut arg,
+ arr,
+ delimiter.to_string(),
+ null_string.clone(),
+ with_null_string,
+ )?
+ .clone();
+
+ if let Some(s) = s.strip_suffix(delimiter) {
+ res.push(Some(s.to_string()));
+ } else {
+ res.push(Some(s));
+ }
+ } else {
+ res.push(None);
+ }
+ }
+
+ Ok(StringArray::from(res))
+ }
+
+ let arr_type = arr.data_type();
+ let string_arr = match arr_type {
+ DataType::List(_) | DataType::FixedSizeList(_, _) => {
+ let list_array = as_list_array(&arr)?;
+ generate_string_array::<i32>(
+ list_array,
+ delimiters,
+ null_string,
+ with_null_string,
+ )?
+ }
+ DataType::LargeList(_) => {
+ let list_array = as_large_list_array(&arr)?;
+ generate_string_array::<i64>(
+ list_array,
+ delimiters,
+ null_string,
+ with_null_string,
+ )?
+ }
+ _ => {
+ let mut arg = String::from("");
+ let mut res: Vec<Option<String>> = Vec::new();
+ // delimiter length is 1
+ assert_eq!(delimiters.len(), 1);
+ let delimiter = delimiters[0].unwrap();
+ let s = compute_array_to_string(
+ &mut arg,
+ arr.clone(),
+ delimiter.to_string(),
+ null_string,
+ with_null_string,
+ )?
+ .clone();
+
+ if !s.is_empty() {
+ let s = s.strip_suffix(delimiter).unwrap().to_string();
+ res.push(Some(s));
+ } else {
+ res.push(Some(s));
+ }
+ StringArray::from(res)
+ }
+ };
+
+ Ok(Arc::new(string_arr))
+}
diff --git a/datafusion/functions-array/src/lib.rs
b/datafusion/functions-array/src/lib.rs
new file mode 100644
index 0000000000..84997ed10e
--- /dev/null
+++ b/datafusion/functions-array/src/lib.rs
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Array Functions for [DataFusion].
+//!
+//! This crate contains a collection of array functions implemented using the
+//! extension API.
+//!
+//! [DataFusion]: https://crates.io/crates/datafusion
+//!
+//! You can register the functions in this crate using the [`register_all`]
function.
+//!
+
+#[macro_use]
+pub mod macros;
+
+mod kernels;
+mod udf;
+
+use datafusion_common::Result;
+use datafusion_execution::FunctionRegistry;
+use datafusion_expr::ScalarUDF;
+use log::debug;
+use std::sync::Arc;
+
+/// Fluent-style API for creating `Expr`s
+pub mod expr_fn {
+ pub use super::udf::array_to_string;
+}
+
+/// Registers all enabled packages with a [`FunctionRegistry`]
+pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
+ let functions: Vec<Arc<ScalarUDF>> = vec![udf::array_to_string_udf()];
+ functions.into_iter().try_for_each(|udf| {
+ let existing_udf = registry.register_udf(udf)?;
+ if let Some(existing_udf) = existing_udf {
+ debug!("Overwrite existing UDF: {}", existing_udf.name());
+ }
+ Ok(()) as Result<()>
+ })?;
+ Ok(())
+}
diff --git a/datafusion/functions-array/src/macros.rs
b/datafusion/functions-array/src/macros.rs
new file mode 100644
index 0000000000..c503fde05b
--- /dev/null
+++ b/datafusion/functions-array/src/macros.rs
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Creates external API functions for an array UDF. Specifically, creates
+///
+/// 1. Single `ScalarUDF` instance
+///
+/// Creates a singleton `ScalarUDF` of the `$UDF` function named `$GNAME` and a
+/// function named `$NAME` which returns that function named $NAME.
+///
+/// This is used to ensure creating the list of `ScalarUDF` only happens once.
+///
+/// # 2. `expr_fn` style function
+///
+/// These are functions that create an `Expr` that invokes the UDF, used
+/// primarily to programmatically create expressions.
+///
+/// For example:
+/// ```text
+/// pub fn array_to_string(delimiter: Expr) -> Expr {
+/// ...
+/// }
+/// ```
+/// # Arguments
+/// * `UDF`: name of the [`ScalarUDFImpl`]
+/// * `EXPR_FN`: name of the expr_fn function to be created
+/// * `arg`: 0 or more named arguments for the function
+/// * `DOC`: documentation string for the function
+/// * `SCALAR_UDF_FUNC`: name of the function to create (just) the `ScalarUDF`
+/// * `GNAME`: name for the single static instance of the `ScalarUDF`
+///
+/// [`ScalarUDFImpl`]: datafusion_expr::ScalarUDFImpl
+macro_rules! make_udf_function {
+ ($UDF:ty, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr ,
$SCALAR_UDF_FN:ident) => {
+ paste::paste! {
+ // "fluent expr_fn" style function
+ #[doc = $DOC]
+ pub fn $EXPR_FN($($arg: Expr),*) -> Expr {
+ Expr::ScalarFunction(ScalarFunction::new_udf(
+ $SCALAR_UDF_FN(),
+ vec![$($arg),*],
+ ))
+ }
+
+ /// Singleton instance of [`$UDF`], ensures the UDF is only
created once
+ /// named STATIC_$(UDF). For example `STATIC_ArrayToString`
+ #[allow(non_upper_case_globals)]
+ static [< STATIC_ $UDF >]:
std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
+ std::sync::OnceLock::new();
+
+ /// ScalarFunction that returns a [`ScalarUDF`] for [`$UDF`]
+ ///
+ /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
+ pub fn $SCALAR_UDF_FN() ->
std::sync::Arc<datafusion_expr::ScalarUDF> {
+ [< STATIC_ $UDF >]
+ .get_or_init(|| {
+
std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
+ <$UDF>::new(),
+ ))
+ })
+ .clone()
+ }
+ }
+ };
+}
diff --git a/datafusion/functions-array/src/udf.rs
b/datafusion/functions-array/src/udf.rs
new file mode 100644
index 0000000000..b7f9d2497f
--- /dev/null
+++ b/datafusion/functions-array/src/udf.rs
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ScalarUDFImpl`] definitions for array functions.
+
+use arrow::datatypes::DataType;
+use datafusion_common::{plan_err, DataFusionError};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::Expr;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+// Create static instances of ScalarUDFs for each function
+make_udf_function!(ArrayToString,
+ array_to_string,
+ array delimiter, // arg name
+ "converts each element to its text representation.", // doc
+ array_to_string_udf // internal function name
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayToString {
+ signature: Signature,
+ aliases: Vec<String>,
+}
+
+impl ArrayToString {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::variadic_any(Volatility::Immutable),
+ aliases: vec![
+ String::from("array_to_string"),
+ String::from("list_to_string"),
+ String::from("array_join"),
+ String::from("list_join"),
+ ],
+ }
+ }
+}
+
+impl ScalarUDFImpl for ArrayToString {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "array_to_string"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ use DataType::*;
+ Ok(match arg_types[0] {
+ List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8,
+ _ => {
+ return plan_err!("The array_to_string function can only accept
List/LargeList/FixedSizeList.");
+ }
+ })
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(args)?;
+ crate::kernels::array_to_string(&args).map(ColumnarValue::Array)
+ }
+
+ fn aliases(&self) -> &[String] {
+ &self.aliases
+ }
+}
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 0709e66a35..0468b3e542 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -229,46 +229,6 @@ fn check_datatypes(name: &str, args: &[&ArrayRef]) ->
Result<()> {
Ok(())
}
-macro_rules! call_array_function {
- ($DATATYPE:expr, false) => {
- match $DATATYPE {
- DataType::Utf8 => array_function!(StringArray),
- DataType::LargeUtf8 => array_function!(LargeStringArray),
- DataType::Boolean => array_function!(BooleanArray),
- DataType::Float32 => array_function!(Float32Array),
- DataType::Float64 => array_function!(Float64Array),
- DataType::Int8 => array_function!(Int8Array),
- DataType::Int16 => array_function!(Int16Array),
- DataType::Int32 => array_function!(Int32Array),
- DataType::Int64 => array_function!(Int64Array),
- DataType::UInt8 => array_function!(UInt8Array),
- DataType::UInt16 => array_function!(UInt16Array),
- DataType::UInt32 => array_function!(UInt32Array),
- DataType::UInt64 => array_function!(UInt64Array),
- _ => unreachable!(),
- }
- };
- ($DATATYPE:expr, $INCLUDE_LIST:expr) => {{
- match $DATATYPE {
- DataType::List(_) => array_function!(ListArray),
- DataType::Utf8 => array_function!(StringArray),
- DataType::LargeUtf8 => array_function!(LargeStringArray),
- DataType::Boolean => array_function!(BooleanArray),
- DataType::Float32 => array_function!(Float32Array),
- DataType::Float64 => array_function!(Float64Array),
- DataType::Int8 => array_function!(Int8Array),
- DataType::Int16 => array_function!(Int16Array),
- DataType::Int32 => array_function!(Int32Array),
- DataType::Int64 => array_function!(Int64Array),
- DataType::UInt8 => array_function!(UInt8Array),
- DataType::UInt16 => array_function!(UInt16Array),
- DataType::UInt32 => array_function!(UInt32Array),
- DataType::UInt64 => array_function!(UInt64Array),
- _ => unreachable!(),
- }
- }};
-}
-
/// Convert one or more [`ArrayRef`] of the same type into a
/// `ListArray` or 'LargeListArray' depending on the offset size.
///
@@ -1870,27 +1830,6 @@ pub fn array_replace_all(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-macro_rules! to_string {
- ($ARG:expr, $ARRAY:expr, $DELIMITER:expr, $NULL_STRING:expr,
$WITH_NULL_STRING:expr, $ARRAY_TYPE:ident) => {{
- let arr = downcast_arg!($ARRAY, $ARRAY_TYPE);
- for x in arr {
- match x {
- Some(x) => {
- $ARG.push_str(&x.to_string());
- $ARG.push_str($DELIMITER);
- }
- None => {
- if $WITH_NULL_STRING {
- $ARG.push_str($NULL_STRING);
- $ARG.push_str($DELIMITER);
- }
- }
- }
- }
- Ok($ARG)
- }};
-}
-
#[derive(Debug, PartialEq)]
enum SetOp {
Union,
@@ -2058,159 +1997,6 @@ pub fn array_intersect(args: &[ArrayRef]) ->
Result<ArrayRef> {
general_set_op(array1, array2, SetOp::Intersect)
}
-/// Array_to_string SQL function
-pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
- if args.len() < 2 || args.len() > 3 {
- return exec_err!("array_to_string expects two or three arguments");
- }
-
- let arr = &args[0];
-
- let delimiters = as_string_array(&args[1])?;
- let delimiters: Vec<Option<&str>> = delimiters.iter().collect();
-
- let mut null_string = String::from("");
- let mut with_null_string = false;
- if args.len() == 3 {
- null_string = as_string_array(&args[2])?.value(0).to_string();
- with_null_string = true;
- }
-
- fn compute_array_to_string(
- arg: &mut String,
- arr: ArrayRef,
- delimiter: String,
- null_string: String,
- with_null_string: bool,
- ) -> Result<&mut String> {
- match arr.data_type() {
- DataType::List(..) => {
- let list_array = as_list_array(&arr)?;
- for i in 0..list_array.len() {
- compute_array_to_string(
- arg,
- list_array.value(i),
- delimiter.clone(),
- null_string.clone(),
- with_null_string,
- )?;
- }
-
- Ok(arg)
- }
- DataType::LargeList(..) => {
- let list_array = as_large_list_array(&arr)?;
- for i in 0..list_array.len() {
- compute_array_to_string(
- arg,
- list_array.value(i),
- delimiter.clone(),
- null_string.clone(),
- with_null_string,
- )?;
- }
-
- Ok(arg)
- }
- DataType::Null => Ok(arg),
- data_type => {
- macro_rules! array_function {
- ($ARRAY_TYPE:ident) => {
- to_string!(
- arg,
- arr,
- &delimiter,
- &null_string,
- with_null_string,
- $ARRAY_TYPE
- )
- };
- }
- call_array_function!(data_type, false)
- }
- }
- }
-
- fn generate_string_array<O: OffsetSizeTrait>(
- list_arr: &GenericListArray<O>,
- delimiters: Vec<Option<&str>>,
- null_string: String,
- with_null_string: bool,
- ) -> Result<StringArray> {
- let mut res: Vec<Option<String>> = Vec::new();
- for (arr, &delimiter) in list_arr.iter().zip(delimiters.iter()) {
- if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
- let mut arg = String::from("");
- let s = compute_array_to_string(
- &mut arg,
- arr,
- delimiter.to_string(),
- null_string.clone(),
- with_null_string,
- )?
- .clone();
-
- if let Some(s) = s.strip_suffix(delimiter) {
- res.push(Some(s.to_string()));
- } else {
- res.push(Some(s));
- }
- } else {
- res.push(None);
- }
- }
-
- Ok(StringArray::from(res))
- }
-
- let arr_type = arr.data_type();
- let string_arr = match arr_type {
- DataType::List(_) | DataType::FixedSizeList(_, _) => {
- let list_array = as_list_array(&arr)?;
- generate_string_array::<i32>(
- list_array,
- delimiters,
- null_string,
- with_null_string,
- )?
- }
- DataType::LargeList(_) => {
- let list_array = as_large_list_array(&arr)?;
- generate_string_array::<i64>(
- list_array,
- delimiters,
- null_string,
- with_null_string,
- )?
- }
- _ => {
- let mut arg = String::from("");
- let mut res: Vec<Option<String>> = Vec::new();
- // delimiter length is 1
- assert_eq!(delimiters.len(), 1);
- let delimiter = delimiters[0].unwrap();
- let s = compute_array_to_string(
- &mut arg,
- arr.clone(),
- delimiter.to_string(),
- null_string,
- with_null_string,
- )?
- .clone();
-
- if !s.is_empty() {
- let s = s.strip_suffix(delimiter).unwrap().to_string();
- res.push(Some(s));
- } else {
- res.push(Some(s));
- }
- StringArray::from(res)
- }
- };
-
- Ok(Arc::new(string_arr))
-}
-
/// Cardinality SQL function
pub fn cardinality(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 {
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index e09c6ab2e3..81f433611a 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -411,9 +411,6 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArraySlice => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_slice)(args)
}),
- BuiltinScalarFunction::ArrayToString => Arc::new(|args| {
-
make_scalar_function_inner(array_expressions::array_to_string)(args)
- }),
BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_intersect)(args)
}),
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 0781e91391..d50336bd0f 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -641,7 +641,7 @@ enum ScalarFunction {
ArrayPrepend = 94;
ArrayRemove = 95;
ArrayReplace = 96;
- ArrayToString = 97;
+ // 97 was ArrayToString
Cardinality = 98;
ArrayElement = 99;
ArraySlice = 100;
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index f2b383b2e8..592c2609b6 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22400,7 +22400,6 @@ impl serde::Serialize for ScalarFunction {
Self::ArrayPrepend => "ArrayPrepend",
Self::ArrayRemove => "ArrayRemove",
Self::ArrayReplace => "ArrayReplace",
- Self::ArrayToString => "ArrayToString",
Self::Cardinality => "Cardinality",
Self::ArrayElement => "ArrayElement",
Self::ArraySlice => "ArraySlice",
@@ -22545,7 +22544,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayPrepend",
"ArrayRemove",
"ArrayReplace",
- "ArrayToString",
"Cardinality",
"ArrayElement",
"ArraySlice",
@@ -22719,7 +22717,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend),
"ArrayRemove" => Ok(ScalarFunction::ArrayRemove),
"ArrayReplace" => Ok(ScalarFunction::ArrayReplace),
- "ArrayToString" => Ok(ScalarFunction::ArrayToString),
"Cardinality" => Ok(ScalarFunction::Cardinality),
"ArrayElement" => Ok(ScalarFunction::ArrayElement),
"ArraySlice" => Ok(ScalarFunction::ArraySlice),
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index f421f75a27..a65df74bbc 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2728,7 +2728,7 @@ pub enum ScalarFunction {
ArrayPrepend = 94,
ArrayRemove = 95,
ArrayReplace = 96,
- ArrayToString = 97,
+ /// 97 was ArrayToString
Cardinality = 98,
ArrayElement = 99,
ArraySlice = 100,
@@ -2870,7 +2870,6 @@ impl ScalarFunction {
ScalarFunction::ArrayPrepend => "ArrayPrepend",
ScalarFunction::ArrayRemove => "ArrayRemove",
ScalarFunction::ArrayReplace => "ArrayReplace",
- ScalarFunction::ArrayToString => "ArrayToString",
ScalarFunction::Cardinality => "Cardinality",
ScalarFunction::ArrayElement => "ArrayElement",
ScalarFunction::ArraySlice => "ArraySlice",
@@ -3009,7 +3008,6 @@ impl ScalarFunction {
"ArrayPrepend" => Some(Self::ArrayPrepend),
"ArrayRemove" => Some(Self::ArrayRemove),
"ArrayReplace" => Some(Self::ArrayReplace),
- "ArrayToString" => Some(Self::ArrayToString),
"Cardinality" => Some(Self::Cardinality),
"ArrayElement" => Some(Self::ArrayElement),
"ArraySlice" => Some(Self::ArraySlice),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index 4bc8d5baf1..07590e0d93 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -52,11 +52,10 @@ use datafusion_expr::{
array_intersect, array_length, array_ndims, array_pop_back,
array_pop_front,
array_position, array_positions, array_prepend, array_remove,
array_remove_all,
array_remove_n, array_repeat, array_replace, array_replace_all,
array_replace_n,
- array_resize, array_slice, array_sort, array_to_string, array_union,
arrow_typeof,
- ascii, asin, asinh, atan, atan2, atanh, bit_length, btrim, cardinality,
cbrt, ceil,
- character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh,
cot,
- current_date, current_time, date_bin, date_part, date_trunc, degrees,
digest,
- ends_with, exp,
+ array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii,
asin, asinh,
+ atan, atan2, atanh, bit_length, btrim, cardinality, cbrt, ceil,
character_length,
+ chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, cot, current_date,
+ current_time, date_bin, date_part, date_trunc, degrees, digest, ends_with,
exp,
expr::{self, InList, Sort, WindowFunction},
factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range,
initcap,
instr, isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
@@ -507,7 +506,6 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
ScalarFunction::ArrayReverse => Self::ArrayReverse,
ScalarFunction::ArraySlice => Self::ArraySlice,
- ScalarFunction::ArrayToString => Self::ArrayToString,
ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
ScalarFunction::ArrayUnion => Self::ArrayUnion,
ScalarFunction::ArrayResize => Self::ArrayResize,
@@ -1463,10 +1461,6 @@ pub fn parse_expr(
parse_expr(&args[2], registry)?,
parse_expr(&args[3], registry)?,
)),
- ScalarFunction::ArrayToString => Ok(array_to_string(
- parse_expr(&args[0], registry)?,
- parse_expr(&args[1], registry)?,
- )),
ScalarFunction::Range => Ok(gen_range(
args.to_owned()
.iter()
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 9d520c7d04..7a8fbde07b 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1486,7 +1486,6 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse,
BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
- BuiltinScalarFunction::ArrayToString => Self::ArrayToString,
BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion,
BuiltinScalarFunction::Range => Self::Range,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 652e59672b..b6d288da2c 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -567,13 +567,15 @@ async fn roundtrip_expr_api() -> Result<()> {
let table = ctx.table("t1").await?;
let schema = table.schema().clone();
+ // list of expressions to round trip
+ let expr_list = vec![
+ encode(col("a").cast_to(&DataType::Utf8, &schema)?, lit("hex")),
+ decode(lit("1234"), lit("hex")),
+ array_to_string(array(vec![lit(1), lit(2), lit(3)]), lit(",")),
+ ];
+
// ensure expressions created with the expr api can be round tripped
- let plan = table
- .select(vec![
- encode(col("a").cast_to(&DataType::Utf8, &schema)?, lit("hex")),
- decode(lit("1234"), lit("hex")),
- ])?
- .into_optimized_plan()?;
+ let plan = table.select(expr_list)?.into_optimized_plan()?;
let bytes = logical_plan_to_bytes(&plan)?;
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));