This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 59fae9402 feat: add read array support (#1456)
59fae9402 is described below
commit 59fae94027310927d5025dc2f9062f25fed2e091
Author: Oleks V <[email protected]>
AuthorDate: Tue Mar 18 15:17:52 2025 -0700
feat: add read array support (#1456)
* feat: add read array support
---
native/Cargo.lock | 397 +++++++++++++--------
native/core/Cargo.toml | 1 +
native/core/src/execution/planner.rs | 130 ++++++-
native/core/src/execution/shuffle/row.rs | 1 +
.../org/apache/comet/serde/QueryPlanSerde.scala | 16 +-
.../spark/sql/comet/CometNativeScanExec.scala | 5 +-
.../org/apache/spark/sql/comet/CometScanExec.scala | 4 +-
.../apache/comet/exec/CometNativeReaderSuite.scala | 66 ++++
8 files changed, 469 insertions(+), 151 deletions(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index 401d7a6e4..7fbe7b68f 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -28,7 +28,7 @@ dependencies = [
"getrandom 0.2.15",
"once_cell",
"version_check",
- "zerocopy",
+ "zerocopy 0.7.35",
]
[[package]]
@@ -42,9 +42,9 @@ dependencies = [
[[package]]
name = "aligned-vec"
-version = "0.6.1"
+version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a"
+checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
dependencies = [
"equator",
]
@@ -99,9 +99,9 @@ checksum =
"55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
[[package]]
name = "anyhow"
-version = "1.0.96"
+version = "1.0.97"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4"
+checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f"
[[package]]
name = "arc-swap"
@@ -341,13 +341,13 @@ checksum =
"0c24e9d990669fbd16806bff449e4ac644fd9b1fca014760087732fe4102f131"
[[package]]
name = "async-trait"
-version = "0.1.86"
+version = "0.1.88"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d"
+checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -499,9 +499,9 @@ checksum =
"1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytemuck"
-version = "1.21.0"
+version = "1.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3"
+checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540"
[[package]]
name = "byteorder"
@@ -511,9 +511,9 @@ checksum =
"1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
-version = "1.10.0"
+version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "cast"
@@ -655,18 +655,18 @@ dependencies = [
[[package]]
name = "clap"
-version = "4.5.31"
+version = "4.5.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767"
+checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
-version = "4.5.31"
+version = "4.5.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863"
+checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8"
dependencies = [
"anstyle",
"clap_lex",
@@ -984,6 +984,7 @@ dependencies = [
"datafusion-comet-objectstore-hdfs",
"datafusion-comet-proto",
"datafusion-comet-spark-expr",
+ "datafusion-functions-nested",
"futures",
"hex",
"itertools 0.14.0",
@@ -1308,7 +1309,7 @@ checksum =
"64030e805d3d257e3012e4378500d4ac90b1ebacd03f1110e8ec927b77f09486"
dependencies = [
"datafusion-expr",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -1474,33 +1475,33 @@ checksum =
"97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
name = "either"
-version = "1.14.0"
+version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "equator"
-version = "0.2.2"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea"
+checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
dependencies = [
"equator-macro",
]
[[package]]
name = "equator-macro"
-version = "0.2.1"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc"
+checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -1668,7 +1669,7 @@ checksum =
"162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -1726,14 +1727,14 @@ dependencies = [
[[package]]
name = "getrandom"
-version = "0.3.1"
+version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
+checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
dependencies = [
"cfg-if",
"libc",
- "wasi 0.13.3+wasi-0.2.2",
- "windows-targets 0.52.6",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
]
[[package]]
@@ -1769,9 +1770,9 @@ dependencies = [
[[package]]
name = "half"
-version = "2.4.1"
+version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1"
dependencies = [
"cfg-if",
"crunchy",
@@ -1811,9 +1812,9 @@ checksum =
"2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
-version = "0.4.0"
+version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
+checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e"
[[package]]
name = "hex"
@@ -1832,9 +1833,9 @@ dependencies = [
[[package]]
name = "http"
-version = "1.2.0"
+version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea"
+checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
dependencies = [
"bytes",
"fnv",
@@ -1853,12 +1854,12 @@ dependencies = [
[[package]]
name = "http-body-util"
-version = "0.1.2"
+version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
dependencies = [
"bytes",
- "futures-util",
+ "futures-core",
"http",
"http-body",
"pin-project-lite",
@@ -1872,9 +1873,9 @@ checksum =
"6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
[[package]]
name = "humantime"
-version = "2.1.0"
+version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
[[package]]
name = "hyper"
@@ -2071,7 +2072,7 @@ checksum =
"1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -2097,9 +2098,9 @@ dependencies = [
[[package]]
name = "indexmap"
-version = "2.7.1"
+version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
+checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058"
dependencies = [
"equivalent",
"hashbrown 0.15.2",
@@ -2137,9 +2138,9 @@ checksum =
"469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]]
name = "is-terminal"
-version = "0.4.15"
+version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37"
+checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
dependencies = [
"hermit-abi",
"libc",
@@ -2184,9 +2185,9 @@ dependencies = [
[[package]]
name = "itoa"
-version = "1.0.14"
+version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "java-locator"
@@ -2318,9 +2319,9 @@ dependencies = [
[[package]]
name = "libc"
-version = "0.2.170"
+version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
+checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]]
name = "libloading"
@@ -2364,6 +2365,12 @@ version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+[[package]]
+name = "linux-raw-sys"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413"
+
[[package]]
name = "litemap"
version = "0.7.5"
@@ -2652,15 +2659,15 @@ dependencies = [
[[package]]
name = "once_cell"
-version = "1.20.3"
+version = "1.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
+checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc"
[[package]]
name = "oorandom"
-version = "11.1.4"
+version = "11.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
[[package]]
name = "openssl-probe"
@@ -2836,9 +2843,9 @@ checksum =
"8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
-version = "0.3.31"
+version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plotters"
@@ -2892,18 +2899,18 @@ dependencies = [
[[package]]
name = "ppv-lite86"
-version = "0.2.20"
+version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
- "zerocopy",
+ "zerocopy 0.8.23",
]
[[package]]
name = "proc-macro2"
-version = "1.0.93"
+version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
+checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
dependencies = [
"unicode-ident",
]
@@ -2971,7 +2978,7 @@ dependencies = [
"itertools 0.12.1",
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -3066,13 +3073,19 @@ dependencies = [
[[package]]
name = "quote"
-version = "1.0.38"
+version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
[[package]]
name = "rand"
version = "0.8.5"
@@ -3140,14 +3153,14 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
dependencies = [
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
name = "redox_syscall"
-version = "0.5.9"
+version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
+checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1"
dependencies = [
"bitflags 2.9.0",
]
@@ -3183,9 +3196,9 @@ checksum =
"2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
-version = "0.12.12"
+version = "0.12.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
+checksum = "989e327e510263980e231de548a33e63d34962d29ae61b467389a1a09627a254"
dependencies = [
"base64",
"bytes",
@@ -3238,9 +3251,9 @@ dependencies = [
[[package]]
name = "ring"
-version = "0.17.13"
+version = "0.17.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
"cc",
"cfg-if",
@@ -3286,15 +3299,28 @@ dependencies = [
"bitflags 2.9.0",
"errno",
"libc",
- "linux-raw-sys",
+ "linux-raw-sys 0.4.15",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "rustix"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7178faa4b75a30e269c71e61c353ce2748cf3d76f0c44c393f4e60abf49b825"
+dependencies = [
+ "bitflags 2.9.0",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.9.3",
"windows-sys 0.59.0",
]
[[package]]
name = "rustls"
-version = "0.23.23"
+version = "0.23.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395"
+checksum = "822ee9188ac4ec04a2f0531e55d035fb2de73f18b41a63c70c2712503b6fb13c"
dependencies = [
"once_cell",
"ring",
@@ -3336,9 +3362,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
-version = "0.102.8"
+version = "0.103.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
+checksum = "0aa4eeac2588ffff23e9d7a7e9b3f971c5fb5b7ebc9452745e0c232c64f83b2f"
dependencies = [
"ring",
"rustls-pki-types",
@@ -3347,15 +3373,15 @@ dependencies = [
[[package]]
name = "rustversion"
-version = "1.0.19"
+version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4"
+checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
[[package]]
name = "ryu"
-version = "1.0.19"
+version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
@@ -3406,21 +3432,21 @@ dependencies = [
[[package]]
name = "semver"
-version = "1.0.25"
+version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
[[package]]
name = "seq-macro"
-version = "0.3.5"
+version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]]
name = "serde"
-version = "1.0.218"
+version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
@@ -3437,20 +3463,20 @@ dependencies = [
[[package]]
name = "serde_derive"
-version = "1.0.218"
+version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
name = "serde_json"
-version = "1.0.139"
+version = "1.0.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6"
+checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
dependencies = [
"itoa",
"memchr",
@@ -3551,7 +3577,7 @@ dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -3589,7 +3615,7 @@ checksum =
"da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -3631,9 +3657,9 @@ checksum =
"13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "symbolic-common"
-version = "12.13.4"
+version = "12.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6189977df1d6ec30c920647919d76f29fb8d8f25e8952e835b0fcda25e8f792"
+checksum = "66135c8273581acaab470356f808a1c74a707fe7ec24728af019d7247e089e71"
dependencies = [
"debugid",
"memmap2",
@@ -3643,9 +3669,9 @@ dependencies = [
[[package]]
name = "symbolic-demangle"
-version = "12.13.4"
+version = "12.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d234917f7986498e7f62061438cee724bafb483fe84cfbe2486f68dce48240d7"
+checksum = "42bcacd080282a72e795864660b148392af7babd75691d5ae9a3b77e29c98c77"
dependencies = [
"cpp_demangle",
"rustc-demangle",
@@ -3665,9 +3691,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.98"
+version = "2.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
+checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
dependencies = [
"proc-macro2",
"quote",
@@ -3691,20 +3717,19 @@ checksum =
"c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
name = "tempfile"
-version = "3.17.1"
+version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
+checksum = "488960f40a3fd53d72c2a29a58722561dee8afdd175bd88e3db4677d7b2ba600"
dependencies = [
- "cfg-if",
"fastrand",
- "getrandom 0.3.1",
+ "getrandom 0.3.2",
"once_cell",
- "rustix",
+ "rustix 1.0.2",
"windows-sys 0.59.0",
]
@@ -3734,7 +3759,7 @@ checksum =
"4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -3745,7 +3770,7 @@ checksum =
"7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -3815,9 +3840,9 @@ checksum =
"1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
-version = "1.43.0"
+version = "1.44.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
+checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a"
dependencies = [
"backtrace",
"bytes",
@@ -3838,7 +3863,7 @@ checksum =
"6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -3853,9 +3878,9 @@ dependencies = [
[[package]]
name = "tokio-util"
-version = "0.7.13"
+version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078"
+checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034"
dependencies = [
"bytes",
"futures-core",
@@ -3910,7 +3935,7 @@ checksum =
"395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
@@ -3964,9 +3989,9 @@ checksum =
"1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
[[package]]
name = "unicode-ident"
-version = "1.0.17"
+version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unicode-segmentation"
@@ -4026,11 +4051,11 @@ checksum =
"b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "uuid"
-version = "1.15.1"
+version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587"
+checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
dependencies = [
- "getrandom 0.3.1",
+ "getrandom 0.3.2",
"js-sys",
"wasm-bindgen",
]
@@ -4068,9 +4093,9 @@ checksum =
"9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasi"
-version = "0.13.3+wasi-0.2.2"
+version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [
"wit-bindgen-rt",
]
@@ -4097,7 +4122,7 @@ dependencies = [
"log",
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
"wasm-bindgen-shared",
]
@@ -4132,7 +4157,7 @@ checksum =
"8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -4188,7 +4213,7 @@ dependencies = [
"either",
"home",
"once_cell",
- "rustix",
+ "rustix 0.38.44",
]
[[package]]
@@ -4231,34 +4256,39 @@ dependencies = [
"windows-targets 0.52.6",
]
+[[package]]
+name = "windows-link"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3"
+
[[package]]
name = "windows-registry"
-version = "0.2.0"
+version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
+checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3"
dependencies = [
"windows-result",
"windows-strings",
- "windows-targets 0.52.6",
+ "windows-targets 0.53.0",
]
[[package]]
name = "windows-result"
-version = "0.2.0"
+version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+checksum = "06374efe858fab7e4f881500e6e86ec8bc28f9462c47e5a9941a0142ad86b189"
dependencies = [
- "windows-targets 0.52.6",
+ "windows-link",
]
[[package]]
name = "windows-strings"
-version = "0.1.0"
+version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
dependencies = [
- "windows-result",
- "windows-targets 0.52.6",
+ "windows-link",
]
[[package]]
@@ -4312,13 +4342,29 @@ dependencies = [
"windows_aarch64_gnullvm 0.52.6",
"windows_aarch64_msvc 0.52.6",
"windows_i686_gnu 0.52.6",
- "windows_i686_gnullvm",
+ "windows_i686_gnullvm 0.52.6",
"windows_i686_msvc 0.52.6",
"windows_x86_64_gnu 0.52.6",
"windows_x86_64_gnullvm 0.52.6",
"windows_x86_64_msvc 0.52.6",
]
+[[package]]
+name = "windows-targets"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b"
+dependencies = [
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
+]
+
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
@@ -4331,6 +4377,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
@@ -4343,6 +4395,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
@@ -4355,12 +4413,24 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
@@ -4373,6 +4443,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
@@ -4385,6 +4461,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
@@ -4397,6 +4479,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
@@ -4409,11 +4497,17 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+
[[package]]
name = "wit-bindgen-rt"
-version = "0.33.0"
+version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags 2.9.0",
]
@@ -4450,7 +4544,7 @@ checksum =
"2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
"synstructure",
]
@@ -4460,8 +4554,16 @@ version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
- "byteorder",
- "zerocopy-derive",
+ "zerocopy-derive 0.7.35",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd97444d05a4328b90e75e503a34bad781f14e28a823ad3557f0750df1ebcbc6"
+dependencies = [
+ "zerocopy-derive 0.8.23",
]
[[package]]
@@ -4472,7 +4574,18 @@ checksum =
"fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6352c01d0edd5db859a63e2605f4ea3183ddbd15e2c4a9e7d32184df75e4f154"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
]
[[package]]
@@ -4492,7 +4605,7 @@ checksum =
"d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
"synstructure",
]
@@ -4521,7 +4634,7 @@ checksum =
"6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.98",
+ "syn 2.0.100",
]
[[package]]
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 607edb2e1..bc9197223 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -77,6 +77,7 @@ jni = { version = "0.21", features = ["invocation"] }
lazy_static = "1.4"
assertables = "7"
hex = "0.4.3"
+datafusion-functions-nested = "46.0.0"
[features]
default = []
diff --git a/native/core/src/execution/planner.rs
b/native/core/src/execution/planner.rs
index aade88bf3..87c8fc4b2 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -2686,12 +2686,14 @@ mod tests {
use arrow::array::{DictionaryArray, Int32Array, StringArray};
use arrow::datatypes::DataType;
- use datafusion::{physical_plan::common::collect, prelude::SessionContext};
+ use datafusion::logical_expr::ScalarUDF;
+ use datafusion::{assert_batches_eq, physical_plan::common::collect,
prelude::SessionContext};
use tokio::sync::mpsc;
use crate::execution::{operators::InputBatch, planner::PhysicalPlanner};
use crate::execution::operators::ExecutionError;
+ use datafusion_comet_proto::spark_expression::expr::ExprStruct;
use datafusion_comet_proto::{
spark_expression::expr::ExprStruct::*,
spark_expression::Expr,
@@ -3004,4 +3006,130 @@ mod tests {
type_info: None,
}
}
+
+ #[test]
+ fn test_create_array() {
+ let session_ctx = SessionContext::new();
+ session_ctx.register_udf(ScalarUDF::from(
+ datafusion_functions_nested::make_array::MakeArray::new(),
+ ));
+ let task_ctx = session_ctx.task_ctx();
+ let planner = PhysicalPlanner::new(Arc::from(session_ctx));
+
+ // Create a plan for
+ // ProjectionExec: expr=[make_array(col_0@0) as col_0]
+ // ScanExec: source=[CometScan parquet (unknown)], schema=[col_0:
Int32]
+ let op_scan = Operator {
+ plan_id: 0,
+ children: vec![],
+ op_struct: Some(OpStruct::Scan(spark_operator::Scan {
+ fields: vec![
+ spark_expression::DataType {
+ type_id: 3, // Int32
+ type_info: None,
+ },
+ spark_expression::DataType {
+ type_id: 3, // Int32
+ type_info: None,
+ },
+ spark_expression::DataType {
+ type_id: 3, // Int32
+ type_info: None,
+ },
+ ],
+ source: "".to_string(),
+ })),
+ };
+
+ let array_col = spark_expression::Expr {
+ expr_struct: Some(Bound(spark_expression::BoundReference {
+ index: 0,
+ datatype: Some(spark_expression::DataType {
+ type_id: 3,
+ type_info: None,
+ }),
+ })),
+ };
+
+ let array_col_1 = spark_expression::Expr {
+ expr_struct: Some(Bound(spark_expression::BoundReference {
+ index: 1,
+ datatype: Some(spark_expression::DataType {
+ type_id: 3,
+ type_info: None,
+ }),
+ })),
+ };
+
+ let projection = Operator {
+ children: vec![op_scan],
+ plan_id: 0,
+ op_struct: Some(OpStruct::Projection(spark_operator::Projection {
+ project_list: vec![spark_expression::Expr {
+ expr_struct:
Some(ExprStruct::ScalarFunc(spark_expression::ScalarFunc {
+ func: "make_array".to_string(),
+ args: vec![array_col, array_col_1],
+ return_type: None,
+ })),
+ }],
+ })),
+ };
+
+ let a = Int32Array::from(vec![0, 3]);
+ let b = Int32Array::from(vec![1, 4]);
+ let c = Int32Array::from(vec![2, 5]);
+ let input_batch = InputBatch::Batch(vec![Arc::new(a), Arc::new(b),
Arc::new(c)], 2);
+
+ let (mut scans, datafusion_plan) =
+ planner.create_plan(&projection, &mut vec![], 1).unwrap();
+ scans[0].set_input_batch(input_batch);
+
+ let mut stream = datafusion_plan.native_plan.execute(0,
task_ctx).unwrap();
+
+ let runtime = tokio::runtime::Runtime::new().unwrap();
+ let (tx, mut rx) = mpsc::channel(1);
+
+ // Separate thread to send the EOF signal once we've processed the
only input batch
+ runtime.spawn(async move {
+ // Create a dictionary array with 100 values, and use it as input
to the execution.
+ let a = Int32Array::from(vec![0, 3]);
+ let b = Int32Array::from(vec![1, 4]);
+ let c = Int32Array::from(vec![2, 5]);
+ let input_batch1 = InputBatch::Batch(vec![Arc::new(a),
Arc::new(b), Arc::new(c)], 2);
+ let input_batch2 = InputBatch::EOF;
+
+ let batches = vec![input_batch1, input_batch2];
+
+ for batch in batches.into_iter() {
+ tx.send(batch).await.unwrap();
+ }
+ });
+
+ runtime.block_on(async move {
+ loop {
+ let batch = rx.recv().await.unwrap();
+ scans[0].set_input_batch(batch);
+ match poll!(stream.next()) {
+ Poll::Ready(Some(batch)) => {
+ assert!(batch.is_ok(), "got error {}",
batch.unwrap_err());
+ let batch = batch.unwrap();
+ assert_eq!(batch.num_rows(), 2);
+ let expected = [
+ "+--------+",
+ "| col_0 |",
+ "+--------+",
+ "| [0, 1] |",
+ "| [3, 4] |",
+ "+--------+",
+ ];
+ assert_batches_eq!(expected, &[batch]);
+ }
+ Poll::Ready(None) => {
+ break;
+ }
+ _ => {}
+ }
+ }
+ });
+ }
}
diff --git a/native/core/src/execution/shuffle/row.rs
b/native/core/src/execution/shuffle/row.rs
index caed2c3ca..f40d72092 100644
--- a/native/core/src/execution/shuffle/row.rs
+++ b/native/core/src/execution/shuffle/row.rs
@@ -3197,6 +3197,7 @@ fn make_builders(
// Disable dictionary encoding for array element
let value_builder =
make_builders(field.data_type(), NESTED_TYPE_BUILDER_CAPACITY,
1.0)?;
+
match field.data_type() {
DataType::Boolean => {
let builder = downcast_builder!(BooleanBuilder,
value_builder);
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index ec94ffd55..3e36323a4 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -61,13 +61,16 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde with CometExprShim
logWarning(s"Comet native execution is disabled due to: $reason")
}
- def supportedDataType(dt: DataType, allowStruct: Boolean = false): Boolean =
dt match {
+ def supportedDataType(dt: DataType, allowComplex: Boolean = false): Boolean
= dt match {
case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _:
FloatType |
_: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _:
TimestampNTZType |
_: DecimalType | _: DateType | _: BooleanType | _: NullType =>
true
- case s: StructType if allowStruct =>
- s.fields.map(_.dataType).forall(supportedDataType(_, allowStruct))
+ case s: StructType if allowComplex =>
+ s.fields.map(_.dataType).forall(supportedDataType(_, allowComplex))
+ // TODO: Add nested array and iceberg compat support
+ // case a: ArrayType if allowComplex =>
+ // supportedDataType(a.elementType)
case dt =>
emitWarning(s"unsupported Spark data type: $dt")
false
@@ -763,7 +766,8 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde with CometExprShim
binding,
(builder, binaryExpr) => builder.setLtEq(binaryExpr))
- case Literal(value, dataType) if supportedDataType(dataType, allowStruct
= value == null) =>
+ case Literal(value, dataType)
+ if supportedDataType(dataType, allowComplex = value == null) =>
val exprBuilder = ExprOuterClass.Literal.newBuilder()
if (value == null) {
@@ -2716,7 +2720,9 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde with CometExprShim
withInfo(join, "SortMergeJoin is not enabled")
None
- case op if isCometSink(op) && op.output.forall(a =>
supportedDataType(a.dataType, true)) =>
+ case op
+ if isCometSink(op) && op.output.forall(a =>
+ supportedDataType(a.dataType, allowComplex = true)) =>
// These operators are source of Comet native execution chain
val scanBuilder = OperatorOuterClass.Scan.newBuilder()
val source = op.simpleStringWithNodeId()
diff --git
a/spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala
b/spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala
index c28fd74dd..b90dea06e 100644
--- a/spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala
@@ -182,7 +182,7 @@ object CometNativeScanExec extends DataTypeSupport {
case null => null
}
- val newArgs = mapProductIterator(scanExec, transform(_))
+ val newArgs = mapProductIterator(scanExec, transform)
val wrapped = scanExec.makeCopy(newArgs).asInstanceOf[FileSourceScanExec]
val batchScanExec = CometNativeScanExec(
nativeOp,
@@ -202,9 +202,10 @@ object CometNativeScanExec extends DataTypeSupport {
}
override def isAdditionallySupported(dt: DataType): Boolean = {
- // TODO add array and map
+ // TODO add map
dt match {
case s: StructType => s.fields.map(_.dataType).forall(isTypeSupported)
+ case a: ArrayType => isTypeSupported(a.elementType)
case _ => false
}
}
diff --git
a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
index 9e97c06d5..eeca78678 100644
--- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
@@ -487,9 +487,11 @@ object CometScanExec extends DataTypeSupport {
override def isAdditionallySupported(dt: DataType): Boolean = {
if (CometConf.COMET_NATIVE_SCAN_IMPL.get() ==
CometConf.SCAN_NATIVE_ICEBERG_COMPAT) {
- // TODO add array and map
+ // TODO add map
dt match {
case s: StructType => s.fields.map(_.dataType).forall(isTypeSupported)
+ // TODO: Add nested array and iceberg compat support
+ // case a: ArrayType => isTypeSupported(a.elementType)
case _ => false
}
} else {
diff --git
a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala
b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala
new file mode 100644
index 000000000..18841f0e7
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.exec
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.internal.SQLConf
+
+import org.apache.comet.CometConf
+
+class CometNativeReaderSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
+ override protected def test(testName: String, testTags: Tag*)(testFun: =>
Any)(implicit
+ pos: Position): Unit = {
+ // TODO: Enable Iceberg compat tests
+ Seq(CometConf.SCAN_NATIVE_DATAFUSION /*,
CometConf.SCAN_NATIVE_ICEBERG_COMPAT*/ ).foreach(
+ scan =>
+ super.test(s"$testName - $scan", testTags: _*) {
+ withSQLConf(
+ CometConf.COMET_EXEC_ENABLED.key -> "true",
+ SQLConf.USE_V1_SOURCE_LIST.key -> "parquet",
+ CometConf.COMET_ENABLED.key -> "true",
+ CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "false",
+ CometConf.COMET_NATIVE_SCAN_IMPL.key -> scan) {
+ testFun
+ }
+ })
+ }
+
+ test("native reader - read simple STRUCT fields") {
+ testSingleLineQuery(
+ """
+ |select named_struct('firstName', 'John', 'lastName', 'Doe', 'age',
35) as personal_info union all
+ |select named_struct('firstName', 'Jane', 'lastName', 'Doe', 'age',
40) as personal_info
+ |""".stripMargin,
+ "select personal_info.* from tbl")
+ }
+
+ test("native reader - read simple ARRAY fields") {
+ testSingleLineQuery(
+ """
+ |select array(1, 2, 3) as arr union all
+ |select array(2, 3, 4) as arr
+ |""".stripMargin,
+ "select arr from tbl")
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]