This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/master by this push:
new 649ac96 Upgrade to DataFusion 15.0.0 (#103)
649ac96 is described below
commit 649ac96a596b4dca71c5a8b535d2d3b3b10d88d3
Author: Andy Grove <[email protected]>
AuthorDate: Wed Dec 28 22:36:51 2022 -0700
Upgrade to DataFusion 15.0.0 (#103)
* Upgrade to DataFusion 15.0.0
* refactor
* implement size
* clippy
* fmt
* fix
* remove datatype
* error handling
---
Cargo.lock | 541 +++++++++++++++++++++++++++++++++++--------
Cargo.toml | 8 +-
datafusion/tests/test_sql.py | 2 +-
src/context.rs | 40 +++-
src/dataset.rs | 4 +-
src/udaf.rs | 4 +
6 files changed, 487 insertions(+), 112 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 75059dc..6dccce4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -14,17 +14,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
-[[package]]
-name = "ahash"
-version = "0.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
-dependencies = [
- "getrandom 0.2.7",
- "once_cell",
- "version_check",
-]
-
[[package]]
name = "ahash"
version = "0.8.0"
@@ -112,83 +101,147 @@ checksum =
"8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
-version = "26.0.0"
+version = "28.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e24e2bcd431a4aa0ff003fdd2dc21c78cfb42f31459c89d2312c2746fe17a5ac"
+checksum = "aed9849f86164fad5cb66ce4732782b15f1bc97f8febab04e782c20cce9d4b6c"
dependencies = [
- "ahash 0.8.0",
+ "ahash",
"arrow-array",
"arrow-buffer",
+ "arrow-cast",
+ "arrow-csv",
"arrow-data",
+ "arrow-ipc",
+ "arrow-json",
"arrow-schema",
"arrow-select",
"bitflags",
"chrono",
"comfy-table",
- "csv",
- "flatbuffers",
"half",
- "hashbrown",
- "indexmap",
- "lazy_static",
- "lexical-core",
+ "hashbrown 0.13.1",
"multiversion",
"num",
"pyo3",
"regex",
"regex-syntax",
- "serde_json",
]
[[package]]
name = "arrow-array"
-version = "26.0.0"
+version = "28.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9044300874385f19e77cbf90911e239bd23630d8f23bb0f948f9067998a13b7"
+checksum = "6b8504cf0a6797e908eecf221a865e7d339892720587f87c8b90262863015b08"
dependencies = [
- "ahash 0.8.0",
+ "ahash",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
"half",
- "hashbrown",
+ "hashbrown 0.13.1",
"num",
]
[[package]]
name = "arrow-buffer"
-version = "26.0.0"
+version = "28.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78476cbe9e3f808dcecab86afe42d573863c63e149c62e6e379ed2522743e626"
+checksum = "d6de64a27cea684b24784647d9608314bc80f7c4d55acb44a425e05fab39d916"
dependencies = [
"half",
"num",
]
+[[package]]
+name = "arrow-cast"
+version = "28.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bec4a54502eefe05923c385c90a005d69474fa06ca7aa2a2b123c9f9532f6178"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "chrono",
+ "lexical-core",
+ "num",
+]
+
+[[package]]
+name = "arrow-csv"
+version = "28.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7902bbf8127eac48554fe902775303377047ad49a9fd473c2b8cb399d092080"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "csv",
+ "lazy_static",
+ "lexical-core",
+ "regex",
+]
+
[[package]]
name = "arrow-data"
-version = "26.0.0"
+version = "28.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e4882efe617002449d5c6b5de9ddb632339074b36df8a96ea7147072f1faa8a"
+dependencies = [
+ "arrow-buffer",
+ "arrow-schema",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-ipc"
+version = "28.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d916feee158c485dad4f701cba31bc9a90a8db87d9df8e2aa8adc0c20a2bbb9"
+checksum = "fa0703a6de2785828561b03a4d7793ecd333233e1b166316b4bfc7cfce55a4a7"
dependencies = [
+ "arrow-array",
"arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
"arrow-schema",
+ "flatbuffers",
+]
+
+[[package]]
+name = "arrow-json"
+version = "28.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bd23fc8c6d251f96cd63b96fece56bbb9710ce5874a627cb786e2600673595a"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
"half",
+ "indexmap",
"num",
+ "serde_json",
]
[[package]]
name = "arrow-schema"
-version = "26.0.0"
+version = "28.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f9406eb7834ca6bd8350d1baa515d18b9fcec487eddacfb62f5e19511f7bd37"
+checksum = "da9f143882a80be168538a60e298546314f50f11f2a288c8d73e11108da39d26"
[[package]]
name = "arrow-select"
-version = "26.0.0"
+version = "28.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6593a01586751c74498495d2f5a01fcd438102b52965c11dd98abf4ebcacef37"
+checksum = "520406331d4ad60075359524947ebd804e479816439af82bcb17f8d280d9b38c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -210,6 +263,7 @@ dependencies = [
"memchr",
"pin-project-lite",
"tokio",
+ "xz2",
]
[[package]]
@@ -362,9 +416,9 @@ checksum =
"baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
-version = "0.4.22"
+version = "0.4.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
+checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f"
dependencies = [
"iana-time-zone",
"num-integer",
@@ -373,6 +427,43 @@ dependencies = [
"winapi",
]
+[[package]]
+name = "clap"
+version = "4.0.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d63b9e9c07271b9957ad22c173bae2a4d9a81127680962039296abcd2f8251d"
+dependencies = [
+ "bitflags",
+ "clap_derive",
+ "clap_lex",
+ "is-terminal",
+ "once_cell",
+ "strsim",
+ "termcolor",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014"
+dependencies = [
+ "heck",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8"
+dependencies = [
+ "os_str_bytes",
+]
+
[[package]]
name = "codespan-reporting"
version = "0.11.1"
@@ -528,13 +619,26 @@ dependencies = [
"syn",
]
+[[package]]
+name = "dashmap"
+version = "5.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
+dependencies = [
+ "cfg-if",
+ "hashbrown 0.12.3",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
[[package]]
name = "datafusion"
-version = "14.0.0"
+version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7a8411475928479fe57af18698626f0a44f3c29153e051dce45f7455c08a6d5"
+checksum = "b75a088adf79515b04fd3895c1a14dc249c8f7a7f27b59870a05546fe9a55542"
dependencies = [
- "ahash 0.8.0",
+ "ahash",
"apache-avro",
"arrow",
"async-compression",
@@ -542,6 +646,7 @@ dependencies = [
"bytes",
"bzip2",
"chrono",
+ "dashmap",
"datafusion-common",
"datafusion-expr",
"datafusion-optimizer",
@@ -551,14 +656,13 @@ dependencies = [
"flate2",
"futures",
"glob",
- "hashbrown",
+ "hashbrown 0.13.1",
"itertools",
"lazy_static",
"log",
"num-traits",
"num_cpus",
"object_store",
- "ordered-float 3.2.0",
"parking_lot",
"parquet",
"paste",
@@ -567,6 +671,7 @@ dependencies = [
"pyo3",
"rand 0.8.5",
"smallvec",
+ "sqllogictest",
"sqlparser",
"tempfile",
"tokio",
@@ -574,19 +679,19 @@ dependencies = [
"tokio-util",
"url",
"uuid 1.2.1",
+ "xz2",
]
[[package]]
name = "datafusion-common"
-version = "14.0.0"
+version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15f1ffcbc1f040c9ab99f41db1c743d95aff267bb2e7286aaa010738b7402251"
+checksum = "7b17262b899f79afdf502846d1138a8b48441afe24dc6e07c922105289248137"
dependencies = [
"apache-avro",
"arrow",
"chrono",
"object_store",
- "ordered-float 3.2.0",
"parquet",
"pyo3",
"sqlparser",
@@ -594,11 +699,11 @@ dependencies = [
[[package]]
name = "datafusion-expr"
-version = "14.0.0"
+version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1883d9590d303ef38fa295567e7fdb9f8f5f511fcc167412d232844678cd295c"
+checksum = "533d2226b4636a1306d1f6f4ac02e436947c5d6e8bfc85f6d8f91a425c10a407"
dependencies = [
- "ahash 0.8.0",
+ "ahash",
"arrow",
"datafusion-common",
"log",
@@ -607,9 +712,9 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
-version = "14.0.0"
+version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2127d46d566ab3463d70da9675fc07b9d634be8d17e80d0e1ce79600709fe651"
+checksum = "ce7ba274267b6baf1714a67727249aa56d648c8814b0f4c43387fbe6d147e619"
dependencies = [
"arrow",
"async-trait",
@@ -617,17 +722,17 @@ dependencies = [
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
- "hashbrown",
+ "hashbrown 0.13.1",
"log",
]
[[package]]
name = "datafusion-physical-expr"
-version = "14.0.0"
+version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d108b6fe8eeb317ecad1d74619e8758de49cccc8c771b56c97962fd52eaae23"
+checksum = "f35cb53e6c2f9c40accdf45aef2be7fde030ea3051b1145a059d96109e65b0bf"
dependencies = [
- "ahash 0.8.0",
+ "ahash",
"arrow",
"arrow-buffer",
"arrow-schema",
@@ -638,12 +743,11 @@ dependencies = [
"datafusion-expr",
"datafusion-row",
"half",
- "hashbrown",
+ "hashbrown 0.13.1",
"itertools",
"lazy_static",
"md-5",
"num-traits",
- "ordered-float 3.2.0",
"paste",
"rand 0.8.5",
"regex",
@@ -673,9 +777,9 @@ dependencies = [
[[package]]
name = "datafusion-row"
-version = "14.0.0"
+version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43537b6377d506e4788bf21e9ed943340e076b48ca4d077e6ea4405ca5e54a1c"
+checksum = "27c77b1229ae5cf6a6e0e2ba43ed4e98131dbf1cc4a97fad17c94230b32e0812"
dependencies = [
"arrow",
"datafusion-common",
@@ -685,16 +789,22 @@ dependencies = [
[[package]]
name = "datafusion-sql"
-version = "14.0.0"
+version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "244d08d4710e1088d9c0949c9b5b8d68d9cf2cde7203134a4cc389e870fe2354"
+checksum = "569423fa8a50db39717080949e3b4f8763582b87baf393cc3fcf27cc21467ba7"
dependencies = [
- "arrow",
+ "arrow-schema",
"datafusion-common",
"datafusion-expr",
"sqlparser",
]
+[[package]]
+name = "difference"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
+
[[package]]
name = "digest"
version = "0.10.5"
@@ -727,6 +837,27 @@ dependencies = [
"cfg-if",
]
+[[package]]
+name = "errno"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
+dependencies = [
+ "errno-dragonfly",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "errno-dragonfly"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
+dependencies = [
+ "cc",
+ "libc",
+]
+
[[package]]
name = "fastrand"
version = "1.8.0"
@@ -932,8 +1063,14 @@ name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hashbrown"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
dependencies = [
- "ahash 0.7.6",
+ "ahash",
]
[[package]]
@@ -951,6 +1088,15 @@ dependencies = [
"libc",
]
+[[package]]
+name = "hermit-abi"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "http"
version = "0.2.8"
@@ -985,6 +1131,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
[[package]]
name = "hyper"
version = "0.14.20"
@@ -1063,7 +1215,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
dependencies = [
"autocfg",
- "hashbrown",
+ "hashbrown 0.12.3",
]
[[package]]
@@ -1087,12 +1239,34 @@ version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
+[[package]]
+name = "io-lifetimes"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c"
+dependencies = [
+ "libc",
+ "windows-sys 0.42.0",
+]
+
[[package]]
name = "ipnet"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b"
+[[package]]
+name = "is-terminal"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "927609f78c2913a6f6ac3c27a4fe87f43e2a35367c0c4b0f8265e8f49a104330"
+dependencies = [
+ "hermit-abi 0.2.6",
+ "io-lifetimes",
+ "rustix",
+ "windows-sys 0.42.0",
+]
+
[[package]]
name = "itertools"
version = "0.10.5"
@@ -1243,6 +1417,17 @@ dependencies = [
"cc",
]
+[[package]]
+name = "libtest-mimic"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7b603516767d1ab23d0de09d023e62966c3322f7148297c35cf3d97aa8b37fa"
+dependencies = [
+ "clap",
+ "termcolor",
+ "threadpool",
+]
+
[[package]]
name = "link-cplusplus"
version = "1.0.7"
@@ -1252,6 +1437,12 @@ dependencies = [
"cc",
]
+[[package]]
+name = "linux-raw-sys"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f9f08d8963a6c613f4b1a78f4f4a4dbfadf8e6545b2d72861731e4858b8b47f"
+
[[package]]
name = "lock_api"
version = "0.4.9"
@@ -1291,6 +1482,17 @@ dependencies = [
"libc",
]
+[[package]]
+name = "lzma-sys"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
[[package]]
name = "md-5"
version = "0.10.5"
@@ -1348,7 +1550,7 @@ dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
- "windows-sys",
+ "windows-sys 0.36.1",
]
[[package]]
@@ -1454,7 +1656,7 @@ version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
dependencies = [
- "hermit-abi",
+ "hermit-abi 0.1.19",
"libc",
]
@@ -1494,21 +1696,18 @@ checksum =
"e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
[[package]]
name = "ordered-float"
-version = "1.1.1"
+version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7"
+checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87"
dependencies = [
"num-traits",
]
[[package]]
-name = "ordered-float"
-version = "3.2.0"
+name = "os_str_bytes"
+version = "6.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "129d36517b53c461acc6e1580aeb919c8ae6708a4b1eae61c4463a615d4f0411"
-dependencies = [
- "num-traits",
-]
+checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
[[package]]
name = "parking_lot"
@@ -1530,31 +1729,39 @@ dependencies = [
"libc",
"redox_syscall",
"smallvec",
- "windows-sys",
+ "windows-sys 0.36.1",
]
[[package]]
name = "parquet"
-version = "26.0.0"
+version = "28.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3bf8fa7ab6572791325a8595f55dc532dde88b996ae10a5ca8a2db746784ecc4"
+checksum = "21433e9209111bb3720b747f2f137e0d115af1af0420a7a1c26b6e88227fa353"
dependencies = [
- "ahash 0.8.0",
- "arrow",
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-schema",
+ "arrow-select",
"base64",
"brotli",
"bytes",
"chrono",
"flate2",
"futures",
- "hashbrown",
+ "hashbrown 0.13.1",
"lz4",
"num",
"num-bigint",
+ "paste",
"seq-macro",
"snap",
"thrift",
"tokio",
+ "twox-hash",
"zstd",
]
@@ -1594,6 +1801,30 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
@@ -1776,9 +2007,9 @@ dependencies = [
[[package]]
name = "regex"
-version = "1.6.0"
+version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
+checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
dependencies = [
"aho-corasick",
"memchr",
@@ -1867,6 +2098,20 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
+[[package]]
+name = "rustix"
+version = "0.36.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb93e85278e08bb5788653183213d3a60fc242b10cb9be96586f5a73dcb67c23"
+dependencies = [
+ "bitflags",
+ "errno",
+ "io-lifetimes",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.42.0",
+]
+
[[package]]
name = "rustls"
version = "0.20.6"
@@ -2050,11 +2295,30 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
+[[package]]
+name = "sqllogictest"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba41e01d229d7725401de371e323851f82d839d68732a06162405362b60852fe"
+dependencies = [
+ "async-trait",
+ "difference",
+ "futures",
+ "glob",
+ "humantime",
+ "itertools",
+ "libtest-mimic",
+ "regex",
+ "tempfile",
+ "thiserror",
+ "tracing",
+]
+
[[package]]
name = "sqlparser"
-version = "0.26.0"
+version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb"
+checksum = "aba319938d4bfe250a769ac88278b629701024fe16f34257f9563bc628081970"
dependencies = [
"log",
]
@@ -2065,6 +2329,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+
[[package]]
name = "strum"
version = "0.24.1"
@@ -2150,15 +2420,24 @@ dependencies = [
"syn",
]
+[[package]]
+name = "threadpool"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
+dependencies = [
+ "num_cpus",
+]
+
[[package]]
name = "thrift"
-version = "0.16.0"
+version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09678c4cdbb4eed72e18b7c2af1329c69825ed16fcbac62d083fc3e2b0590ff0"
+checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
dependencies = [
"byteorder",
"integer-encoding",
- "ordered-float 1.1.1",
+ "ordered-float",
]
[[package]]
@@ -2295,6 +2574,16 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
+[[package]]
+name = "twox-hash"
+version = "1.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
+dependencies = [
+ "cfg-if",
+ "static_assertions",
+]
+
[[package]]
name = "typed-builder"
version = "0.10.0"
@@ -2558,43 +2847,100 @@ version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_msvc",
+ "windows_aarch64_msvc 0.36.1",
+ "windows_i686_gnu 0.36.1",
+ "windows_i686_msvc 0.36.1",
+ "windows_x86_64_gnu 0.36.1",
+ "windows_x86_64_msvc 0.36.1",
]
+[[package]]
+name = "windows-sys"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc 0.42.0",
+ "windows_i686_gnu 0.42.0",
+ "windows_i686_msvc 0.42.0",
+ "windows_x86_64_gnu 0.42.0",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc 0.42.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e"
+
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4"
+
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
+[[package]]
+name = "windows_i686_gnu"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7"
+
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
+[[package]]
+name = "windows_i686_msvc"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246"
+
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028"
+
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5"
+
[[package]]
name = "winreg"
version = "0.10.1"
@@ -2604,6 +2950,15 @@ dependencies = [
"winapi",
]
+[[package]]
+name = "xz2"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
+dependencies = [
+ "lzma-sys",
+]
+
[[package]]
name = "zerocopy"
version = "0.6.1"
@@ -2627,18 +2982,18 @@ dependencies = [
[[package]]
name = "zstd"
-version = "0.11.2+zstd.1.5.2"
+version = "0.12.0+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
+checksum = "b8148aa921e9d53217ab9322f8553bd130f7ae33489db68b381d76137d2e6374"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
-version = "5.0.2+zstd.1.5.2"
+version = "6.0.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
+checksum = "a6cf39f730b440bab43da8fb5faf5f254574462f73f260f85f7987f32154ff17"
dependencies = [
"libc",
"zstd-sys",
diff --git a/Cargo.toml b/Cargo.toml
index 3f3eeb4..7ad7d21 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,10 +34,10 @@ default = ["mimalloc"]
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread",
"sync"] }
rand = "0.7"
pyo3 = { version = "~0.17.1", features = ["extension-module", "abi3",
"abi3-py37"] }
-datafusion = { version = "^14.0.0", features = ["pyarrow", "avro"] }
-datafusion-expr = { version = "^14.0.0" }
-datafusion-optimizer = { version = "^14.0.0" }
-datafusion-common = { version = "^14.0.0", features = ["pyarrow"] }
+datafusion = { version = "^15.0.0", features = ["pyarrow", "avro"] }
+datafusion-expr = { version = "^15.0.0" }
+datafusion-optimizer = { version = "^15.0.0" }
+datafusion-common = { version = "^15.0.0", features = ["pyarrow"] }
uuid = { version = "0.8", features = ["v4"] }
mimalloc = { version = "*", optional = true, default-features = false }
async-trait = "0.1"
diff --git a/datafusion/tests/test_sql.py b/datafusion/tests/test_sql.py
index f5e8fbf..19c2766 100644
--- a/datafusion/tests/test_sql.py
+++ b/datafusion/tests/test_sql.py
@@ -108,7 +108,7 @@ def test_register_parquet_partitioned(ctx, tmp_path):
ctx.register_parquet(
"datapp",
str(dir_root),
- table_partition_cols=["grp"],
+ table_partition_cols=[("grp", "string")],
parquet_pruning=True,
file_extension=".parquet",
)
diff --git a/src/context.rs b/src/context.rs
index 9f6ef30..344d5c7 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -35,7 +35,7 @@ use crate::store::StorageContexts;
use crate::udaf::PyAggregateUDF;
use crate::udf::PyScalarUDF;
use crate::utils::wait_for_future;
-use datafusion::arrow::datatypes::Schema;
+use datafusion::arrow::datatypes::{DataType, Schema};
use datafusion::arrow::pyarrow::PyArrowType;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::config::ConfigOptions;
@@ -218,13 +218,13 @@ impl PySessionContext {
&mut self,
name: &str,
path: &str,
- table_partition_cols: Vec<String>,
+ table_partition_cols: Vec<(String, String)>,
parquet_pruning: bool,
file_extension: &str,
py: Python,
) -> PyResult<()> {
let mut options = ParquetReadOptions::default()
- .table_partition_cols(table_partition_cols)
+
.table_partition_cols(convert_table_partition_cols(table_partition_cols)?)
.parquet_pruning(parquet_pruning);
options.file_extension = file_extension;
let result = self.ctx.register_parquet(name, path, options);
@@ -340,14 +340,14 @@ impl PySessionContext {
schema: Option<PyArrowType<Schema>>,
schema_infer_max_records: usize,
file_extension: &str,
- table_partition_cols: Vec<String>,
+ table_partition_cols: Vec<(String, String)>,
py: Python,
) -> PyResult<PyDataFrame> {
let path = path
.to_str()
.ok_or_else(|| PyValueError::new_err("Unable to convert path to a
string"))?;
-
- let mut options =
NdJsonReadOptions::default().table_partition_cols(table_partition_cols);
+ let mut options = NdJsonReadOptions::default()
+
.table_partition_cols(convert_table_partition_cols(table_partition_cols)?);
options.schema = schema.map(|s| Arc::new(s.0));
options.schema_infer_max_records = schema_infer_max_records;
options.file_extension = file_extension;
@@ -374,7 +374,7 @@ impl PySessionContext {
delimiter: &str,
schema_infer_max_records: usize,
file_extension: &str,
- table_partition_cols: Vec<String>,
+ table_partition_cols: Vec<(String, String)>,
py: Python,
) -> PyResult<PyDataFrame> {
let path = path
@@ -393,7 +393,7 @@ impl PySessionContext {
.delimiter(delimiter[0])
.schema_infer_max_records(schema_infer_max_records)
.file_extension(file_extension)
- .table_partition_cols(table_partition_cols);
+
.table_partition_cols(convert_table_partition_cols(table_partition_cols)?);
if let Some(py_schema) = schema {
options.schema = Some(&py_schema.0);
@@ -417,14 +417,14 @@ impl PySessionContext {
fn read_parquet(
&self,
path: &str,
- table_partition_cols: Vec<String>,
+ table_partition_cols: Vec<(String, String)>,
parquet_pruning: bool,
file_extension: &str,
skip_metadata: bool,
py: Python,
) -> PyResult<PyDataFrame> {
let mut options = ParquetReadOptions::default()
- .table_partition_cols(table_partition_cols)
+
.table_partition_cols(convert_table_partition_cols(table_partition_cols)?)
.parquet_pruning(parquet_pruning)
.skip_metadata(skip_metadata);
options.file_extension = file_extension;
@@ -444,11 +444,12 @@ impl PySessionContext {
&self,
path: &str,
schema: Option<PyArrowType<Schema>>,
- table_partition_cols: Vec<String>,
+ table_partition_cols: Vec<(String, String)>,
file_extension: &str,
py: Python,
) -> PyResult<PyDataFrame> {
- let mut options =
AvroReadOptions::default().table_partition_cols(table_partition_cols);
+ let mut options = AvroReadOptions::default()
+
.table_partition_cols(convert_table_partition_cols(table_partition_cols)?);
options.file_extension = file_extension;
options.schema = schema.map(|s| Arc::new(s.0));
@@ -457,3 +458,18 @@ impl PySessionContext {
Ok(df)
}
}
+
+fn convert_table_partition_cols(
+ table_partition_cols: Vec<(String, String)>,
+) -> Result<Vec<(String, DataType)>, DataFusionError> {
+ table_partition_cols
+ .into_iter()
+ .map(|(name, ty)| match ty.as_str() {
+ "string" => Ok((name, DataType::Utf8)),
+ _ => Err(DataFusionError::Common(format!(
+ "Unsupported data type '{}' for partition column",
+ ty
+ ))),
+ })
+ .collect::<Result<Vec<_>, _>>()
+}
diff --git a/src/dataset.rs b/src/dataset.rs
index 8208acc..0a2c7f5 100644
--- a/src/dataset.rs
+++ b/src/dataset.rs
@@ -98,7 +98,7 @@ impl TableProvider for Dataset {
async fn scan(
&self,
_ctx: &SessionState,
- projection: &Option<Vec<usize>>,
+ projection: Option<&Vec<usize>>,
filters: &[Expr],
// limit can be used to reduce the amount scanned
// from the datasource as a performance optimization.
@@ -108,7 +108,7 @@ impl TableProvider for Dataset {
) -> DFResult<Arc<dyn ExecutionPlan>> {
Python::with_gil(|py| {
let plan: Arc<dyn ExecutionPlan> = Arc::new(
- DatasetExec::new(py, self.dataset.as_ref(py),
projection.clone(), filters)
+ DatasetExec::new(py, self.dataset.as_ref(py),
projection.cloned(), filters)
.map_err(|err| DataFusionError::External(Box::new(err)))?,
);
Ok(plan)
diff --git a/src/udaf.rs b/src/udaf.rs
index d985058..66dc274 100644
--- a/src/udaf.rs
+++ b/src/udaf.rs
@@ -95,6 +95,10 @@ impl Accumulator for RustAccumulator {
Ok(())
})
}
+
+ fn size(&self) -> usize {
+ std::mem::size_of_val(self)
+ }
}
pub fn to_rust_accumulator(accum: PyObject) ->
AccumulatorFunctionImplementation {