This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 24f520975 chore: upgrade to DataFusion 50.0.0, Arrow 56.1.0, Parquet
56.0.0 among others (#2286)
24f520975 is described below
commit 24f520975bd55c951881c06886a8e228a242a992
Author: Matt Butrovich <[email protected]>
AuthorDate: Wed Sep 17 07:30:11 2025 -0400
chore: upgrade to DataFusion 50.0.0, Arrow 56.1.0, Parquet 56.0.0 among
others (#2286)
---
native/Cargo.lock | 342 ++++++++++++---------
native/Cargo.toml | 10 +-
native/core/Cargo.toml | 4 +-
native/core/src/execution/jni_api.rs | 9 +-
native/core/src/execution/planner.rs | 42 ++-
.../core/src/execution/shuffle/shuffle_writer.rs | 2 +-
native/spark-expr/src/agg_funcs/avg.rs | 2 +-
native/spark-expr/src/agg_funcs/avg_decimal.rs | 2 +-
native/spark-expr/src/agg_funcs/correlation.rs | 2 +-
native/spark-expr/src/agg_funcs/covariance.rs | 11 +-
native/spark-expr/src/agg_funcs/stddev.rs | 11 +-
native/spark-expr/src/agg_funcs/sum_decimal.rs | 2 +-
native/spark-expr/src/agg_funcs/variance.rs | 11 +-
.../spark-expr/src/bitwise_funcs/bitwise_count.rs | 2 +-
native/spark-expr/src/bitwise_funcs/bitwise_get.rs | 2 +-
native/spark-expr/src/bitwise_funcs/bitwise_not.rs | 2 +-
.../src/bloom_filter/bloom_filter_agg.rs | 2 +-
.../src/bloom_filter/bloom_filter_might_contain.rs | 2 +-
native/spark-expr/src/comet_scalar_funcs.rs | 20 ++
native/spark-expr/src/datetime_funcs/date_trunc.rs | 2 +-
.../src/datetime_funcs/extract_date_part.rs | 2 +-
native/spark-expr/src/math_funcs/modulo_expr.rs | 2 +
native/spark-expr/src/predicate_funcs/rlike.rs | 20 +-
native/spark-expr/src/string_funcs/string_space.rs | 2 +-
24 files changed, 317 insertions(+), 191 deletions(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index 7878fe87b..f9d14fa94 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -117,9 +117,9 @@ checksum =
"7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994"
+checksum = "c26b57282a08ae92f727497805122fec964c6245cfa0e13f0e75452eaf3bc41f"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -138,9 +138,9 @@ dependencies = [
[[package]]
name = "arrow-arith"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4"
+checksum = "cebf38ca279120ff522f4954b81a39527425b6e9f615e6b72842f4de1ffe02b8"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -152,9 +152,9 @@ dependencies = [
[[package]]
name = "arrow-array"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8"
+checksum = "744109142cdf8e7b02795e240e20756c2a782ac9180d4992802954a8f871c0de"
dependencies = [
"ahash",
"arrow-buffer",
@@ -169,9 +169,9 @@ dependencies = [
[[package]]
name = "arrow-buffer"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d"
+checksum = "601bb103c4c374bcd1f62c66bcea67b42a2ee91a690486c37d4c180236f11ccc"
dependencies = [
"bytes",
"half",
@@ -180,9 +180,9 @@ dependencies = [
[[package]]
name = "arrow-cast"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8"
+checksum = "eed61d9d73eda8df9e3014843def37af3050b5080a9acbe108f045a316d5a0be"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -201,9 +201,9 @@ dependencies = [
[[package]]
name = "arrow-csv"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b"
+checksum = "fa95b96ce0c06b4d33ac958370db8c0d31e88e54f9d6e08b0353d18374d9f991"
dependencies = [
"arrow-array",
"arrow-cast",
@@ -216,9 +216,9 @@ dependencies = [
[[package]]
name = "arrow-data"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2"
+checksum = "43407f2c6ba2367f64d85d4603d6fb9c4b92ed79d2ffd21021b37efa96523e12"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -228,24 +228,24 @@ dependencies = [
[[package]]
name = "arrow-ipc"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92"
+checksum = "e4b0487c4d2ad121cbc42c4db204f1509f8618e589bc77e635e9c40b502e3b90"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
+ "arrow-select",
"flatbuffers",
"lz4_flex",
- "zstd",
]
[[package]]
name = "arrow-json"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c"
+checksum = "26d747573390905905a2dc4c5a61a96163fe2750457f90a04ee2a88680758c79"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -265,9 +265,9 @@ dependencies = [
[[package]]
name = "arrow-ord"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7"
+checksum = "c142a147dceb59d057bad82400f1693847c80dca870d008bf7b91caf902810ae"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -278,9 +278,9 @@ dependencies = [
[[package]]
name = "arrow-row"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2"
+checksum = "dac6620667fccdab4204689ca173bd84a15de6bb6b756c3a8764d4d7d0c2fc04"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -291,9 +291,9 @@ dependencies = [
[[package]]
name = "arrow-schema"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292"
+checksum = "dfa93af9ff2bb80de539e6eb2c1c8764abd0f4b73ffb0d7c82bf1f9868785e66"
dependencies = [
"bitflags 2.9.4",
"serde",
@@ -302,9 +302,9 @@ dependencies = [
[[package]]
name = "arrow-select"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5"
+checksum = "be8b2e0052cd20d36d64f32640b68a5ab54d805d24a473baee5d52017c85536c"
dependencies = [
"ahash",
"arrow-array",
@@ -316,9 +316,9 @@ dependencies = [
[[package]]
name = "arrow-string"
-version = "55.2.0"
+version = "56.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40"
+checksum = "c2155e26e17f053c8975c546fc70cf19c00542f9abf43c23a88a46ef7204204f"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -391,11 +391,11 @@ dependencies = [
[[package]]
name = "async-io"
-version = "2.5.0"
+version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19634d6336019ef220f09fd31168ce5c184b295cbf80345437cc36094ef223ca"
+checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc"
dependencies = [
- "async-lock",
+ "autocfg",
"cfg-if",
"concurrent-queue",
"futures-io",
@@ -404,7 +404,7 @@ dependencies = [
"polling",
"rustix 1.1.2",
"slab",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.0",
]
[[package]]
@@ -573,9 +573,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sso"
-version = "1.83.0"
+version = "1.84.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "643cd43af212d2a1c4dedff6f044d7e1961e5d9e7cfe773d70f31d9842413886"
+checksum = "357a841807f6b52cb26123878b3326921e2a25faca412fabdd32bd35b7edd5d3"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -595,9 +595,9 @@ dependencies = [
[[package]]
name = "aws-sdk-ssooidc"
-version = "1.84.0"
+version = "1.85.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20ec4a95bd48e0db7a424356a161f8d87bd6a4f0af37204775f0da03d9e39fc3"
+checksum = "67e05f33b6c9026fecfe9b3b6740f34d41bc6ff641a6a32dabaab60209245b75"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -617,9 +617,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sts"
-version = "1.85.0"
+version = "1.86.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "410309ad0df4606bc721aff0d89c3407682845453247213a0ccc5ff8801ee107"
+checksum = "e7d835f123f307cafffca7b9027c14979f1d403b417d8541d67cf252e8a21e35"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -745,9 +745,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
-version = "1.9.1"
+version = "1.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3946acbe1ead1301ba6862e712c7903ca9bb230bdf1fbd1b5ac54158ef2ab1f"
+checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -1189,9 +1189,9 @@ dependencies = [
[[package]]
name = "comfy-table"
-version = "7.2.0"
+version = "7.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f8e18d0dca9578507f13f9803add0df13362b02c501c1c17734f0dbb52eaf0b"
+checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b"
dependencies = [
"unicode-segmentation",
"unicode-width",
@@ -1388,9 +1388,9 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69dfeda1633bf8ec75b068d9f6c27cdc392ffcf5ff83128d5dbab65b73c1fd02"
+checksum = "481d0c1cad7606cee11233abcdff8eec46e43dd25abda007db6d5d26ae8483c4"
dependencies = [
"arrow",
"arrow-ipc",
@@ -1416,6 +1416,7 @@ dependencies = [
"datafusion-functions-window",
"datafusion-optimizer",
"datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
"datafusion-physical-expr-common",
"datafusion-physical-optimizer",
"datafusion-physical-plan",
@@ -1438,9 +1439,9 @@ dependencies = [
[[package]]
name = "datafusion-catalog"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2848fd1e85e2953116dab9cc2eb109214b0888d7bbd2230e30c07f1794f642c0"
+checksum = "d70327e81ab3a1f5832d8b372d55fa607851d7cea6d1f8e65ff0c98fcc32d222"
dependencies = [
"arrow",
"async-trait",
@@ -1464,9 +1465,9 @@ dependencies = [
[[package]]
name = "datafusion-catalog-listing"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b"
+checksum = "268819e6bb20ba70a664abddc20deac604f30d3267f8c91847064542a8c0720c"
dependencies = [
"arrow",
"async-trait",
@@ -1595,9 +1596,9 @@ dependencies = [
[[package]]
name = "datafusion-common"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096"
+checksum = "054873d5563f115f83ef4270b560ac2ce4de713905e825a40cac49d6ff348254"
dependencies = [
"ahash",
"arrow",
@@ -1619,9 +1620,9 @@ dependencies = [
[[package]]
name = "datafusion-common-runtime"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40a2ae8393051ce25d232a6065c4558ab5a535c9637d5373bacfd464ac88ea12"
+checksum = "b8a1d1bc69aaaadb8008b65329ed890b33e845dc063225c190f77b20328fbe1d"
dependencies = [
"futures",
"log",
@@ -1630,9 +1631,9 @@ dependencies = [
[[package]]
name = "datafusion-datasource"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90cd841a77f378bc1a5c4a1c37345e1885a9203b008203f9f4b3a769729bf330"
+checksum = "d855160469020982880fd9bd0962e033d2f4728f56f85a83d8c90785638b6519"
dependencies = [
"arrow",
"async-trait",
@@ -1643,6 +1644,7 @@ dependencies = [
"datafusion-execution",
"datafusion-expr",
"datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"datafusion-session",
@@ -1660,9 +1662,9 @@ dependencies = [
[[package]]
name = "datafusion-datasource-csv"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77f4a2c64939c6f0dd15b246723a699fa30d59d0133eb36a86e8ff8c6e2a8dc6"
+checksum = "9ec3aa7575378d23aae96b955b5233bea6f9d461648174f6ccc8f3c160f2b7a7"
dependencies = [
"arrow",
"async-trait",
@@ -1685,9 +1687,9 @@ dependencies = [
[[package]]
name = "datafusion-datasource-json"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11387aaf931b2993ad9273c63ddca33f05aef7d02df9b70fb757429b4b71cdae"
+checksum = "00cfb8f33e2864eeb3188b6818acf5546d56a5a487d423cce9b684a554caabfa"
dependencies = [
"arrow",
"async-trait",
@@ -1710,9 +1712,9 @@ dependencies = [
[[package]]
name = "datafusion-datasource-parquet"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "028f430c5185120bf806347848b8d8acd9823f4038875b3820eeefa35f2bb4a2"
+checksum = "ab3bfb48fb4ff42ac1485a12ea56434eaab53f7da8f00b2443b1a3d35a0b6d10"
dependencies = [
"arrow",
"async-trait",
@@ -1725,6 +1727,7 @@ dependencies = [
"datafusion-expr",
"datafusion-functions-aggregate",
"datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
"datafusion-physical-expr-common",
"datafusion-physical-optimizer",
"datafusion-physical-plan",
@@ -1742,17 +1745,18 @@ dependencies = [
[[package]]
name = "datafusion-doc"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8"
+checksum = "2fbf41013cf55c2369b5229594898e8108c8a1beeb49d97feb5e0cce9933eb8f"
[[package]]
name = "datafusion-execution"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f"
+checksum = "26fd0c1ffe3885687758f985ed548184bf63b17b2a7a5ae695de422ad6432118"
dependencies = [
"arrow",
+ "async-trait",
"dashmap",
"datafusion-common",
"datafusion-expr",
@@ -1767,9 +1771,9 @@ dependencies = [
[[package]]
name = "datafusion-expr"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "025222545d6d7fab71e2ae2b356526a1df67a2872222cbae7535e557a42abd2e"
+checksum = "5c4fe6411218a9dab656437b1e69b00a470a7a2d7db087867a366c145eb164a7"
dependencies = [
"arrow",
"async-trait",
@@ -1788,9 +1792,9 @@ dependencies = [
[[package]]
name = "datafusion-expr-common"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2"
+checksum = "4a45bee7d2606bfb41ceb1d904ba7cecf69bd5a6f8f3e6c57c3f5a83d84bdd97"
dependencies = [
"arrow",
"datafusion-common",
@@ -1801,9 +1805,9 @@ dependencies = [
[[package]]
name = "datafusion-functions"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c620d105aa208fcee45c588765483314eb415f5571cfd6c1bae3a59c5b4d15bb"
+checksum = "9c7e1c532ff9d14f291160bca23e55ffd4899800301dd2389786c2f02d76904a"
dependencies = [
"arrow",
"arrow-buffer",
@@ -1830,9 +1834,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952"
+checksum = "b05d47426645aef1e73b1a034c75ab2401bc504175feb191accbe211ec24a342"
dependencies = [
"ahash",
"arrow",
@@ -1851,9 +1855,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate-common"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f"
+checksum = "05c99f648b2b1743de0c1c19eef07e8cc5a085237f172b2e20bf6934e0a804e4"
dependencies = [
"ahash",
"arrow",
@@ -1864,9 +1868,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-nested"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9187678af567d7c9e004b72a0b6dc5b0a00ebf4901cb3511ed2db4effe092e66"
+checksum = "4227782023f4fb68d3d5c5eb190665212f43c9a0b437553e4b938b379aff6cf6"
dependencies = [
"arrow",
"arrow-ord",
@@ -1886,9 +1890,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-table"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5"
+checksum = "3d902b1769f69058236e89f04f3bff2cf62f24311adb7bf3c6c3e945c9451076"
dependencies = [
"arrow",
"async-trait",
@@ -1902,9 +1906,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-window"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e"
+checksum = "4b8ee43974c92eb9920fe8e97e0fab48675e93b062abcb48bef4c1d4305b6ee4"
dependencies = [
"arrow",
"datafusion-common",
@@ -1920,9 +1924,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-window-common"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294"
+checksum = "a1e149d36cdd44fb425dc815c5fac55025aa9a592dd65cb3c421881096292c02"
dependencies = [
"datafusion-common",
"datafusion-physical-expr-common",
@@ -1930,9 +1934,9 @@ dependencies = [
[[package]]
name = "datafusion-macros"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a"
+checksum = "07c9faa0cdefb6e6e756482b846397b5c2d84d369e30b009472b9ab9b1430fbd"
dependencies = [
"datafusion-expr",
"quote",
@@ -1941,9 +1945,9 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d"
+checksum = "f16a4f7059302ad1de6e97ab0eebb5c34405917b1f80806a30a66e38ad118251"
dependencies = [
"arrow",
"chrono",
@@ -1960,9 +1964,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002"
+checksum = "10bb87a605d8ce9672d5347c0293c12211b0c03923fc12fbdc665fe76e6f9e01"
dependencies = [
"ahash",
"arrow",
@@ -1976,15 +1980,31 @@ dependencies = [
"indexmap",
"itertools 0.14.0",
"log",
+ "parking_lot",
"paste",
"petgraph 0.8.2",
]
+[[package]]
+name = "datafusion-physical-expr-adapter"
+version = "50.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2da3a7429a555dd5ff0bec4d24bd5532ec43876764088da635cad55b2f178dc2"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "itertools 0.14.0",
+]
+
[[package]]
name = "datafusion-physical-expr-common"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e"
+checksum = "845eb44ef1e04d2a15c6d955cb146b40a41814a7be4377f0a541857d3e257d6f"
dependencies = [
"ahash",
"arrow",
@@ -1996,9 +2016,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-optimizer"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a"
+checksum = "32b9b648ee2785722c79eae366528e52e93ece6808aef9297cf8e5521de381da"
dependencies = [
"arrow",
"datafusion-common",
@@ -2015,9 +2035,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-plan"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6d168282bb7b54880bb3159f89b51c047db4287f5014d60c3ef4c6e1468212b"
+checksum = "7e6688d17b78104e169d7069749832c20ff50f112be853d2c058afe46c889064"
dependencies = [
"ahash",
"arrow",
@@ -2029,6 +2049,7 @@ dependencies = [
"datafusion-common-runtime",
"datafusion-execution",
"datafusion-expr",
+ "datafusion-functions-aggregate-common",
"datafusion-functions-window-common",
"datafusion-physical-expr",
"datafusion-physical-expr-common",
@@ -2045,9 +2066,9 @@ dependencies = [
[[package]]
name = "datafusion-pruning"
-version = "49.0.0"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905"
+checksum = "8a893a46c56f5f190085e13949eb8ec163672c7ec2ac33bdb82c84572e71ca73"
dependencies = [
"arrow",
"arrow-schema",
@@ -2063,9 +2084,9 @@ dependencies = [
[[package]]
name = "datafusion-session"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "053201c2bb729c7938f85879034df2b5a52cfaba16f1b3b66ab8505c81b2aad3"
+checksum = "f8b62684c7a1db6121a8c83100209cffa1e664a8d9ced87e1a32f8cdc2fff3c2"
dependencies = [
"arrow",
"async-trait",
@@ -2087,11 +2108,13 @@ dependencies = [
[[package]]
name = "datafusion-spark"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "126ceb2436b498ddd66c0a9f58304bce6b903f622c681c0ca9e52cf229c55ea0"
+checksum = "b217679a4f1bbfae8ec97143950c42940eebba7c566b4141184fa8f81d296db1"
dependencies = [
"arrow",
+ "chrono",
+ "crc32fast",
"datafusion-catalog",
"datafusion-common",
"datafusion-execution",
@@ -2099,13 +2122,16 @@ dependencies = [
"datafusion-functions",
"datafusion-macros",
"log",
+ "sha1",
+ "url",
+ "xxhash-rust",
]
[[package]]
name = "datafusion-sql"
-version = "49.0.2"
+version = "50.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25"
+checksum = "f09cff94b8242843e1da5d069e9d2cfc53807f1f00b1c0da78c297f47c21456e"
dependencies = [
"arrow",
"bigdecimal",
@@ -2500,7 +2526,7 @@ dependencies = [
"js-sys",
"libc",
"r-efi",
- "wasi 0.14.5+wasi-0.2.4",
+ "wasi 0.14.7+wasi-0.2.4",
"wasm-bindgen",
]
@@ -2703,9 +2729,9 @@ checksum =
"6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
[[package]]
name = "humantime"
-version = "2.2.0"
+version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
+checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
[[package]]
name = "hyper"
@@ -2749,9 +2775,9 @@ dependencies = [
[[package]]
name = "hyper-util"
-version = "0.1.16"
+version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
+checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8"
dependencies = [
"base64",
"bytes",
@@ -2773,9 +2799,9 @@ dependencies = [
[[package]]
name = "iana-time-zone"
-version = "0.1.63"
+version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
+checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
dependencies = [
"android_system_properties",
"core-foundation-sys",
@@ -2904,9 +2930,9 @@ dependencies = [
[[package]]
name = "indexmap"
-version = "2.11.1"
+version = "2.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921"
+checksum = "92119844f513ffa41556430369ab02c295a3578af21cf945caa3e9e0c2481ac3"
dependencies = [
"equivalent",
"hashbrown 0.15.5",
@@ -3611,9 +3637,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "55.2.0"
+version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa"
+checksum = "c7288a07ed5d25939a90f9cb1ca5afa6855faa08ec7700613511ae64bdb0620c"
dependencies = [
"ahash",
"arrow-array",
@@ -3783,16 +3809,16 @@ dependencies = [
[[package]]
name = "polling"
-version = "3.10.0"
+version = "3.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5bd19146350fe804f7cb2669c851c03d69da628803dab0d98018142aaa5d829"
+checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218"
dependencies = [
"cfg-if",
"concurrent-queue",
"hermit-abi",
"pin-project-lite",
"rustix 1.1.2",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.0",
]
[[package]]
@@ -4295,9 +4321,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
-version = "0.103.4"
+version = "0.103.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
+checksum = "8572f3c2cb9934231157b45499fc41e1f58c589fdfb81a844ba873265e80f8eb"
dependencies = [
"aws-lc-rs",
"ring",
@@ -4366,9 +4392,9 @@ dependencies = [
[[package]]
name = "semver"
-version = "1.0.26"
+version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
[[package]]
name = "seq-macro"
@@ -4378,10 +4404,11 @@ checksum =
"1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]]
name = "serde"
-version = "1.0.219"
+version = "1.0.225"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+checksum = "fd6c24dee235d0da097043389623fb913daddf92c76e9f5a1db88607a0bcbd1d"
dependencies = [
+ "serde_core",
"serde_derive",
]
@@ -4395,11 +4422,20 @@ dependencies = [
"serde",
]
+[[package]]
+name = "serde_core"
+version = "1.0.225"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "659356f9a0cb1e529b24c01e43ad2bdf520ec4ceaf83047b83ddcc2251f96383"
+dependencies = [
+ "serde_derive",
+]
+
[[package]]
name = "serde_derive"
-version = "1.0.219"
+version = "1.0.225"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+checksum = "0ea936adf78b1f766949a4977b91d2f5595825bd6ec079aa9543ad2685fc4516"
dependencies = [
"proc-macro2",
"quote",
@@ -4408,14 +4444,15 @@ dependencies = [
[[package]]
name = "serde_json"
-version = "1.0.143"
+version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
+checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
+ "serde_core",
]
[[package]]
@@ -4443,6 +4480,17 @@ dependencies = [
"unsafe-libyaml",
]
+[[package]]
+name = "sha1"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
[[package]]
name = "sha2"
version = "0.10.9"
@@ -4526,9 +4574,9 @@ dependencies = [
[[package]]
name = "sqlparser"
-version = "0.55.0"
+version = "0.58.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11"
+checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c"
dependencies = [
"log",
"sqlparser_derive",
@@ -5099,18 +5147,18 @@ checksum =
"ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasi"
-version = "0.14.5+wasi-0.2.4"
+version = "0.14.7+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4494f6290a82f5fe584817a676a34b9d6763e8d9d18204009fb31dceca98fd4"
+checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
dependencies = [
"wasip2",
]
[[package]]
name = "wasip2"
-version = "1.0.0+wasi-0.2.4"
+version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03fa2761397e5bd52002cd7e73110c71af2109aca4e521a9f40473fe685b0a24"
+checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"wit-bindgen",
]
@@ -5274,13 +5322,13 @@ checksum =
"712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
-version = "0.61.2"
+version = "0.62.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c"
dependencies = [
"windows-implement",
"windows-interface",
- "windows-link 0.1.3",
+ "windows-link 0.2.0",
"windows-result",
"windows-strings",
]
@@ -5321,20 +5369,20 @@ checksum =
"45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
[[package]]
name = "windows-result"
-version = "0.3.4"
+version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
dependencies = [
- "windows-link 0.1.3",
+ "windows-link 0.2.0",
]
[[package]]
name = "windows-strings"
-version = "0.4.2"
+version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
dependencies = [
- "windows-link 0.1.3",
+ "windows-link 0.2.0",
]
[[package]]
@@ -5570,9 +5618,9 @@ checksum =
"271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]]
name = "wit-bindgen"
-version = "0.45.1"
+version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c573471f125075647d03df72e026074b7203790d41351cd6edc96f46bcccd36"
+checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
[[package]]
name = "writeable"
@@ -5586,6 +5634,12 @@ version = "0.13.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
+[[package]]
+name = "xxhash-rust"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+
[[package]]
name = "yoke"
version = "0.8.0"
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 3a18640ce..8f65f64a0 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -31,15 +31,15 @@ license = "Apache-2.0"
edition = "2021"
# Comet uses the same minimum Rust version as DataFusion
-rust-version = "1.85"
+rust-version = "1.86"
[workspace.dependencies]
-arrow = { version = "55.2.0", features = ["prettyprint", "ffi", "chrono-tz"] }
+arrow = { version = "56.0.0", features = ["prettyprint", "ffi", "chrono-tz"] }
async-trait = { version = "0.1" }
bytes = { version = "1.10.0" }
-parquet = { version = "55.2.0", default-features = false, features =
["experimental"] }
-datafusion = { version = "49.0.2", default-features = false, features =
["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] }
-datafusion-spark = { version = "49.0.2" }
+parquet = { version = "=56.0.0", default-features = false, features =
["experimental"] }
+datafusion = { version = "50.0.0", default-features = false, features =
["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] }
+datafusion-spark = { version = "50.0.0" }
datafusion-comet-spark-expr = { path = "spark-expr" }
datafusion-comet-proto = { path = "proto" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 16a8a7316..b1d2b29ff 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -36,7 +36,7 @@ publish = false
[dependencies]
arrow = { workspace = true }
-parquet = { workspace = true, default-features = false, features =
["experimental"] }
+parquet = { workspace = true, default-features = false, features =
["experimental", "arrow"] }
futures = { workspace = true }
mimalloc = { version = "*", default-features = false, optional = true }
tikv-jemallocator = { version = "0.6.0", optional = true, features =
["disable_initial_exec_tls"] }
@@ -91,7 +91,7 @@ jni = { version = "0.21", features = ["invocation"] }
lazy_static = "1.4"
assertables = "9"
hex = "0.4.3"
-datafusion-functions-nested = { version = "49.0.2" }
+datafusion-functions-nested = { version = "50.0.0" }
[features]
default = []
diff --git a/native/core/src/execution/jni_api.rs
b/native/core/src/execution/jni_api.rs
index ef5435cbc..3446f42b2 100644
--- a/native/core/src/execution/jni_api.rs
+++ b/native/core/src/execution/jni_api.rs
@@ -35,14 +35,14 @@ use datafusion::execution::memory_pool::MemoryPool;
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
use datafusion::logical_expr::ScalarUDF;
use datafusion::{
- execution::{disk_manager::DiskManagerBuilder, runtime_env::RuntimeEnv},
+ execution::disk_manager::DiskManagerBuilder,
physical_plan::{display::DisplayableExecutionPlan,
SendableRecordBatchStream},
prelude::{SessionConfig, SessionContext},
};
use datafusion_comet_proto::spark_operator::Operator;
use datafusion_spark::function::hash::sha2::SparkSha2;
use datafusion_spark::function::math::expm1::SparkExpm1;
-use datafusion_spark::function::string::char::SparkChar;
+use datafusion_spark::function::string::char::CharFunc;
use futures::poll;
use futures::stream::StreamExt;
use jni::objects::JByteBuffer;
@@ -291,8 +291,7 @@ fn prepare_datafusion_session_context(
&ScalarValue::Float64(Some(1.1)),
);
- #[allow(deprecated)]
- let runtime = RuntimeEnv::try_new(rt_config)?;
+ let runtime = rt_config.build()?;
let mut session_ctx = SessionContext::new_with_config_rt(session_config,
Arc::new(runtime));
@@ -301,7 +300,7 @@ fn prepare_datafusion_session_context(
// register UDFs from datafusion-spark crate
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkExpm1::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha2::default()));
- session_ctx.register_udf(ScalarUDF::new_from_impl(SparkChar::default()));
+ session_ctx.register_udf(ScalarUDF::new_from_impl(CharFunc::default()));
// Must be the last one to override existing functions with the same name
datafusion_comet_spark_expr::register_all_comet_functions(&mut
session_ctx)?;
diff --git a/native/core/src/execution/planner.rs
b/native/core/src/execution/planner.rs
index 0e832599d..e9f5885bf 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -40,6 +40,7 @@ use datafusion::physical_plan::InputOrderMode;
use datafusion::{
arrow::{compute::SortOptions, datatypes::SchemaRef},
common::DataFusionError,
+ config::ConfigOptions,
execution::FunctionRegistry,
functions_aggregate::first_last::{FirstValue, LastValue},
logical_expr::Operator as DataFusionOperator,
@@ -623,8 +624,13 @@ impl PhysicalPlanner {
let args = vec![child];
let comet_hour =
Arc::new(ScalarUDF::new_from_impl(SparkHour::new(timezone)));
let field_ref = Arc::new(Field::new("hour", DataType::Int32,
true));
- let expr: ScalarFunctionExpr =
- ScalarFunctionExpr::new("hour", comet_hour, args,
field_ref);
+ let expr: ScalarFunctionExpr = ScalarFunctionExpr::new(
+ "hour",
+ comet_hour,
+ args,
+ field_ref,
+ Arc::new(ConfigOptions::default()),
+ );
Ok(Arc::new(expr))
}
@@ -635,8 +641,13 @@ impl PhysicalPlanner {
let args = vec![child];
let comet_minute =
Arc::new(ScalarUDF::new_from_impl(SparkMinute::new(timezone)));
let field_ref = Arc::new(Field::new("minute", DataType::Int32,
true));
- let expr: ScalarFunctionExpr =
- ScalarFunctionExpr::new("minute", comet_minute, args,
field_ref);
+ let expr: ScalarFunctionExpr = ScalarFunctionExpr::new(
+ "minute",
+ comet_minute,
+ args,
+ field_ref,
+ Arc::new(ConfigOptions::default()),
+ );
Ok(Arc::new(expr))
}
@@ -647,8 +658,13 @@ impl PhysicalPlanner {
let args = vec![child];
let comet_second =
Arc::new(ScalarUDF::new_from_impl(SparkSecond::new(timezone)));
let field_ref = Arc::new(Field::new("second", DataType::Int32,
true));
- let expr: ScalarFunctionExpr =
- ScalarFunctionExpr::new("second", comet_second, args,
field_ref);
+ let expr: ScalarFunctionExpr = ScalarFunctionExpr::new(
+ "second",
+ comet_second,
+ args,
+ field_ref,
+ Arc::new(ConfigOptions::default()),
+ );
Ok(Arc::new(expr))
}
@@ -870,8 +886,13 @@ impl PhysicalPlanner {
ScalarUDF::new_from_impl(BloomFilterMightContain::try_new(bloom_filter_expr)?);
let field_ref = Arc::new(Field::new("might_contain",
DataType::Boolean, true));
- let expr: ScalarFunctionExpr =
- ScalarFunctionExpr::new("might_contain", Arc::new(udf),
args, field_ref);
+ let expr: ScalarFunctionExpr = ScalarFunctionExpr::new(
+ "might_contain",
+ Arc::new(udf),
+ args,
+ field_ref,
+ Arc::new(ConfigOptions::default()),
+ );
Ok(Arc::new(expr))
}
ExprStruct::CreateNamedStruct(expr) => {
@@ -1090,6 +1111,7 @@ impl PhysicalPlanner {
fun_expr,
vec![left, right],
Arc::new(Field::new(func_name, data_type, true)),
+ Arc::new(ConfigOptions::default()),
)))
}
_ => {
@@ -1115,6 +1137,7 @@ impl PhysicalPlanner {
fun_expr,
vec![left, right],
Arc::new(Field::new(op_str, data_type, true)),
+ Arc::new(ConfigOptions::default()),
)))
} else {
Ok(Arc::new(BinaryExpr::new(left, op, right)))
@@ -2354,6 +2377,8 @@ impl PhysicalPlanner {
window_frame.into(),
input_schema.as_ref(),
false, // TODO: Ignore nulls
+ false, // TODO: Spark does not support DISTINCT ... OVER
+ None,
)
.map_err(|e| ExecutionError::DataFusionError(e.to_string()))
}
@@ -2533,6 +2558,7 @@ impl PhysicalPlanner {
fun_expr,
args.to_vec(),
Arc::new(Field::new(fun_name, data_type, true)),
+ Arc::new(ConfigOptions::default()),
));
Ok(scalar_expr)
diff --git a/native/core/src/execution/shuffle/shuffle_writer.rs
b/native/core/src/execution/shuffle/shuffle_writer.rs
index fe303618c..e96d0574d 100644
--- a/native/core/src/execution/shuffle/shuffle_writer.rs
+++ b/native/core/src/execution/shuffle/shuffle_writer.rs
@@ -1350,7 +1350,7 @@ mod test {
#[tokio::test]
async fn shuffle_repartitioner_memory() {
let batch = create_batch(900);
- assert_eq!(8376, batch.get_array_memory_size());
+ assert_eq!(8316, batch.get_array_memory_size()); // Not stable across
Arrow versions
let memory_limit = 512 * 1024;
let num_partitions = 2;
diff --git a/native/spark-expr/src/agg_funcs/avg.rs
b/native/spark-expr/src/agg_funcs/avg.rs
index d8b810c93..e8b90b4f4 100644
--- a/native/spark-expr/src/agg_funcs/avg.rs
+++ b/native/spark-expr/src/agg_funcs/avg.rs
@@ -37,7 +37,7 @@ use datafusion::logical_expr::Volatility::Immutable;
use DataType::*;
/// AVG aggregate expression
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Avg {
name: String,
signature: Signature,
diff --git a/native/spark-expr/src/agg_funcs/avg_decimal.rs
b/native/spark-expr/src/agg_funcs/avg_decimal.rs
index 69c57783a..9f7c82641 100644
--- a/native/spark-expr/src/agg_funcs/avg_decimal.rs
+++ b/native/spark-expr/src/agg_funcs/avg_decimal.rs
@@ -40,7 +40,7 @@ use num::{integer::div_ceil, Integer};
use DataType::*;
/// AVG aggregate expression
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct AvgDecimal {
signature: Signature,
sum_data_type: DataType,
diff --git a/native/spark-expr/src/agg_funcs/correlation.rs
b/native/spark-expr/src/agg_funcs/correlation.rs
index e1ce29a57..9803855e3 100644
--- a/native/spark-expr/src/agg_funcs/correlation.rs
+++ b/native/spark-expr/src/agg_funcs/correlation.rs
@@ -38,7 +38,7 @@ use datafusion::physical_expr::expressions::StatsType;
/// we have our own implementation is that DataFusion has UInt64 for
state_field `count`,
/// while Spark has Double for count. Also we have added
`null_on_divide_by_zero`
/// to be consistent with Spark's implementation.
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct Correlation {
name: String,
signature: Signature,
diff --git a/native/spark-expr/src/agg_funcs/covariance.rs
b/native/spark-expr/src/agg_funcs/covariance.rs
index 28f038c3b..d40824809 100644
--- a/native/spark-expr/src/agg_funcs/covariance.rs
+++ b/native/spark-expr/src/agg_funcs/covariance.rs
@@ -38,7 +38,7 @@ use std::sync::Arc;
/// The implementation mostly is the same as the DataFusion's implementation.
The reason
/// we have our own implementation is that DataFusion has UInt64 for
state_field count,
/// while Spark has Double for count.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Covariance {
name: String,
signature: Signature,
@@ -46,6 +46,15 @@ pub struct Covariance {
null_on_divide_by_zero: bool,
}
+impl std::hash::Hash for Covariance {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.name.hash(state);
+ self.signature.hash(state);
+ (self.stats_type as u8).hash(state);
+ self.null_on_divide_by_zero.hash(state);
+ }
+}
+
impl Covariance {
/// Create a new COVAR aggregate function
pub fn new(
diff --git a/native/spark-expr/src/agg_funcs/stddev.rs
b/native/spark-expr/src/agg_funcs/stddev.rs
index abdc641be..b231b8afa 100644
--- a/native/spark-expr/src/agg_funcs/stddev.rs
+++ b/native/spark-expr/src/agg_funcs/stddev.rs
@@ -36,7 +36,7 @@ use datafusion::physical_expr::expressions::StatsType;
/// we have our own implementation is that DataFusion has UInt64 for
state_field `count`,
/// while Spark has Double for count. Also we have added
`null_on_divide_by_zero`
/// to be consistent with Spark's implementation.
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
pub struct Stddev {
name: String,
signature: Signature,
@@ -44,6 +44,15 @@ pub struct Stddev {
null_on_divide_by_zero: bool,
}
+impl std::hash::Hash for Stddev {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.name.hash(state);
+ self.signature.hash(state);
+ (self.stats_type as u8).hash(state);
+ self.null_on_divide_by_zero.hash(state);
+ }
+}
+
impl Stddev {
/// Create a new STDDEV aggregate function
pub fn new(
diff --git a/native/spark-expr/src/agg_funcs/sum_decimal.rs
b/native/spark-expr/src/agg_funcs/sum_decimal.rs
index e33effb62..cc2585590 100644
--- a/native/spark-expr/src/agg_funcs/sum_decimal.rs
+++ b/native/spark-expr/src/agg_funcs/sum_decimal.rs
@@ -29,7 +29,7 @@ use datafusion::logical_expr::{
};
use std::{any::Any, ops::BitAnd, sync::Arc};
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SumDecimal {
/// Aggregate function signature
signature: Signature,
diff --git a/native/spark-expr/src/agg_funcs/variance.rs
b/native/spark-expr/src/agg_funcs/variance.rs
index 074b1181d..c97e664dd 100644
--- a/native/spark-expr/src/agg_funcs/variance.rs
+++ b/native/spark-expr/src/agg_funcs/variance.rs
@@ -34,7 +34,7 @@ use std::sync::Arc;
/// we have our own implementation is that DataFusion has UInt64 for
state_field `count`,
/// while Spark has Double for count. Also we have added
`null_on_divide_by_zero`
/// to be consistent with Spark's implementation.
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
pub struct Variance {
name: String,
signature: Signature,
@@ -42,6 +42,15 @@ pub struct Variance {
null_on_divide_by_zero: bool,
}
+impl std::hash::Hash for Variance {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.name.hash(state);
+ self.signature.hash(state);
+ (self.stats_type as u8).hash(state);
+ self.null_on_divide_by_zero.hash(state);
+ }
+}
+
impl Variance {
/// Create a new VARIANCE aggregate function
pub fn new(
diff --git a/native/spark-expr/src/bitwise_funcs/bitwise_count.rs
b/native/spark-expr/src/bitwise_funcs/bitwise_count.rs
index bee7b1327..4ab63e532 100644
--- a/native/spark-expr/src/bitwise_funcs/bitwise_count.rs
+++ b/native/spark-expr/src/bitwise_funcs/bitwise_count.rs
@@ -22,7 +22,7 @@ use datafusion::{error::DataFusionError,
logical_expr::ColumnarValue};
use std::any::Any;
use std::sync::Arc;
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkBitwiseCount {
signature: Signature,
aliases: Vec<String>,
diff --git a/native/spark-expr/src/bitwise_funcs/bitwise_get.rs
b/native/spark-expr/src/bitwise_funcs/bitwise_get.rs
index 18b27ef3f..c55e6dd15 100644
--- a/native/spark-expr/src/bitwise_funcs/bitwise_get.rs
+++ b/native/spark-expr/src/bitwise_funcs/bitwise_get.rs
@@ -22,7 +22,7 @@ use datafusion::logical_expr::{ScalarFunctionArgs,
ScalarUDFImpl, Signature, Vol
use std::any::Any;
use std::sync::Arc;
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkBitwiseGet {
signature: Signature,
aliases: Vec<String>,
diff --git a/native/spark-expr/src/bitwise_funcs/bitwise_not.rs
b/native/spark-expr/src/bitwise_funcs/bitwise_not.rs
index d3e5d29df..45dd0b312 100644
--- a/native/spark-expr/src/bitwise_funcs/bitwise_not.rs
+++ b/native/spark-expr/src/bitwise_funcs/bitwise_not.rs
@@ -23,7 +23,7 @@ use datafusion::logical_expr::{ColumnarValue, Volatility};
use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
use std::{any::Any, sync::Arc};
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkBitwiseNot {
signature: Signature,
aliases: Vec<String>,
diff --git a/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs
b/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs
index 1971efd70..3436b2920 100644
--- a/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs
+++ b/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs
@@ -32,7 +32,7 @@ use datafusion::physical_expr::expressions::Literal;
use datafusion::physical_expr::PhysicalExpr;
use datafusion::physical_plan::Accumulator;
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BloomFilterAgg {
signature: Signature,
num_items: i32,
diff --git a/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs
b/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs
index bf0115429..ea246dfb2 100644
--- a/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs
+++ b/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs
@@ -27,7 +27,7 @@ use std::sync::Arc;
use crate::bloom_filter::spark_bloom_filter::SparkBloomFilter;
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct BloomFilterMightContain {
signature: Signature,
bloom_filter: Option<SparkBloomFilter>,
diff --git a/native/spark-expr/src/comet_scalar_funcs.rs
b/native/spark-expr/src/comet_scalar_funcs.rs
index 75f5689ad..93a820ba9 100644
--- a/native/spark-expr/src/comet_scalar_funcs.rs
+++ b/native/spark-expr/src/comet_scalar_funcs.rs
@@ -192,6 +192,26 @@ struct CometScalarFunction {
func: ScalarFunctionImplementation,
}
+impl PartialEq for CometScalarFunction {
+ fn eq(&self, other: &Self) -> bool {
+ self.name == other.name
+ && self.signature == other.signature
+ && self.data_type == other.data_type
+ // Note: we do not test ScalarFunctionImplementation equality, relying
on function metadata.
+ }
+}
+
+impl Eq for CometScalarFunction {}
+
+impl std::hash::Hash for CometScalarFunction {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.name.hash(state);
+ self.signature.hash(state);
+ self.data_type.hash(state);
+ // Note: we do not hash ScalarFunctionImplementation, relying on
function metadata.
+ }
+}
+
impl Debug for CometScalarFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CometScalarFunction")
diff --git a/native/spark-expr/src/datetime_funcs/date_trunc.rs
b/native/spark-expr/src/datetime_funcs/date_trunc.rs
index 861f5a2ae..6d36b0975 100644
--- a/native/spark-expr/src/datetime_funcs/date_trunc.rs
+++ b/native/spark-expr/src/datetime_funcs/date_trunc.rs
@@ -24,7 +24,7 @@ use std::any::Any;
use crate::kernels::temporal::{date_trunc_array_fmt_dyn, date_trunc_dyn};
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkDateTrunc {
signature: Signature,
aliases: Vec<String>,
diff --git a/native/spark-expr/src/datetime_funcs/extract_date_part.rs
b/native/spark-expr/src/datetime_funcs/extract_date_part.rs
index 4f4debd2b..acb7d2266 100644
--- a/native/spark-expr/src/datetime_funcs/extract_date_part.rs
+++ b/native/spark-expr/src/datetime_funcs/extract_date_part.rs
@@ -26,7 +26,7 @@ use std::{any::Any, fmt::Debug};
macro_rules! extract_date_part {
($struct_name:ident, $fn_name:expr, $date_part_variant:ident) => {
- #[derive(Debug)]
+ #[derive(Debug, PartialEq, Eq, Hash)]
pub struct $struct_name {
signature: Signature,
aliases: Vec<String>,
diff --git a/native/spark-expr/src/math_funcs/modulo_expr.rs
b/native/spark-expr/src/math_funcs/modulo_expr.rs
index 16e8af414..57deed72e 100644
--- a/native/spark-expr/src/math_funcs/modulo_expr.rs
+++ b/native/spark-expr/src/math_funcs/modulo_expr.rs
@@ -20,6 +20,7 @@ use crate::{divide_by_zero_error, Cast, EvalMode,
SparkCastOptions};
use arrow::compute::kernels::numeric::rem;
use arrow::datatypes::*;
use datafusion::common::{exec_err, internal_err, DataFusionError, Result,
ScalarValue};
+use datafusion::config::ConfigOptions;
use datafusion::execution::FunctionRegistry;
use datafusion::physical_expr::expressions::{lit, BinaryExpr};
use datafusion::physical_expr::ScalarFunctionExpr;
@@ -195,6 +196,7 @@ fn create_modulo_scalar_function(
modulo_expr,
vec![left, right],
Arc::new(Field::new(func_name, data_type.clone(), true)),
+ Arc::new(ConfigOptions::default()),
)))
}
diff --git a/native/spark-expr/src/predicate_funcs/rlike.rs
b/native/spark-expr/src/predicate_funcs/rlike.rs
index 0fac45d67..a78e51f1b 100644
--- a/native/spark-expr/src/predicate_funcs/rlike.rs
+++ b/native/spark-expr/src/predicate_funcs/rlike.rs
@@ -23,7 +23,6 @@ use arrow::compute::take;
use arrow::datatypes::{DataType, Schema};
use datafusion::common::{internal_err, Result};
use datafusion::physical_expr::PhysicalExpr;
-use datafusion::physical_expr_common::physical_expr::DynEq;
use datafusion::physical_plan::ColumnarValue;
use regex::Regex;
use std::any::Any;
@@ -47,19 +46,18 @@ pub struct RLike {
pattern: Regex,
}
-impl Hash for RLike {
- fn hash<H: Hasher>(&self, state: &mut H) {
- state.write(self.pattern_str.as_bytes());
+impl PartialEq for RLike {
+ fn eq(&self, other: &Self) -> bool {
+ *(self.child) == *(other.child) && self.pattern_str ==
other.pattern_str
}
}
-impl DynEq for RLike {
- fn dyn_eq(&self, other: &dyn Any) -> bool {
- if let Some(other) = other.downcast_ref::<Self>() {
- self.pattern_str == other.pattern_str
- } else {
- false
- }
+impl Eq for RLike {}
+
+impl Hash for RLike {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.child.hash(state);
+ self.pattern_str.hash(state);
}
}
diff --git a/native/spark-expr/src/string_funcs/string_space.rs
b/native/spark-expr/src/string_funcs/string_space.rs
index 101328f22..00b880730 100644
--- a/native/spark-expr/src/string_funcs/string_space.rs
+++ b/native/spark-expr/src/string_funcs/string_space.rs
@@ -27,7 +27,7 @@ use datafusion::logical_expr::{
};
use std::{any::Any, sync::Arc};
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkStringSpace {
signature: Signature,
aliases: Vec<String>,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]