This is an automated email from the ASF dual-hosted git repository.
jiekaichang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new d1e630e26 [QDP] Add remote URL support for cloud object storage (S3)
(#1130)
d1e630e26 is described below
commit d1e630e26745bf326fe8eb7a77f67a1c118245c4
Author: Guan-Ming (Wesley) Chiu <[email protected]>
AuthorDate: Tue Mar 10 01:26:47 2026 +0800
[QDP] Add remote URL support for cloud object storage (S3) (#1130)
* [QDP] Add remote URL support for cloud object storage (S3)
* Update tests
Signed-off-by: Guan-Ming Chiu <[email protected]>
---------
Signed-off-by: Guan-Ming Chiu <[email protected]>
Co-authored-by: Ryan Huang <[email protected]>
---
qdp/Cargo.lock | 1381 +++++++++++++++++++++-
qdp/Cargo.toml | 6 +
qdp/qdp-core/Cargo.toml | 5 +
qdp/qdp-core/src/lib.rs | 2 +
qdp/qdp-core/src/remote.rs | 255 ++++
qdp/qdp-python/Cargo.toml | 1 +
qdp/qdp-python/qumat_qdp/loader.py | 5 +-
qdp/qdp-python/src/engine.rs | 32 +-
qdp/qdp-python/tests/test_quantum_data_loader.py | 53 +
9 files changed, 1723 insertions(+), 17 deletions(-)
diff --git a/qdp/Cargo.lock b/qdp/Cargo.lock
index 43a9ad22e..fdf8cca56 100644
--- a/qdp/Cargo.lock
+++ b/qdp/Cargo.lock
@@ -102,7 +102,7 @@ version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -113,7 +113,7 @@ checksum =
"291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -348,6 +348,17 @@ dependencies = [
"regex-syntax",
]
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "atoi"
version = "2.0.0"
@@ -357,6 +368,12 @@ dependencies = [
"num-traits",
]
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
[[package]]
name = "autocfg"
version = "1.5.0"
@@ -473,6 +490,12 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
[[package]]
name = "chrono"
version = "0.4.43"
@@ -481,6 +504,7 @@ checksum =
"fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
dependencies = [
"iana-time-zone",
"num-traits",
+ "serde",
"windows-link",
]
@@ -526,6 +550,16 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
@@ -705,7 +739,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -746,6 +780,109 @@ dependencies = [
"miniz_oxide",
]
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "slab",
+]
+
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -763,8 +900,10 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
dependencies = [
"cfg-if",
+ "js-sys",
"libc",
"wasi",
+ "wasm-bindgen",
]
[[package]]
@@ -774,9 +913,30 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
+ "js-sys",
"libc",
"r-efi",
"wasip2",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "h2"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
]
[[package]]
@@ -818,6 +978,113 @@ dependencies = [
"digest",
]
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "humantime"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
+
+[[package]]
+name = "hyper"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-rustls"
+version = "0.27.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
+dependencies = [
+ "http",
+ "hyper",
+ "hyper-util",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls",
+ "tower-service",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "http",
+ "http-body",
+ "hyper",
+ "ipnet",
+ "libc",
+ "percent-encoding",
+ "pin-project-lite",
+ "socket2",
+ "tokio",
+ "tower-service",
+ "tracing",
+]
+
[[package]]
name = "iana-time-zone"
version = "0.1.65"
@@ -842,6 +1109,108 @@ dependencies = [
"cc",
]
+[[package]]
+name = "icu_collections"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+
+[[package]]
+name = "icu_properties"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
+
+[[package]]
+name = "icu_provider"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
[[package]]
name = "indexmap"
version = "2.13.0"
@@ -876,6 +1245,22 @@ version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
+[[package]]
+name = "ipnet"
+version = "2.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
+
+[[package]]
+name = "iri-string"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
@@ -891,6 +1276,15 @@ dependencies = [
"either",
]
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
[[package]]
name = "itoa"
version = "1.0.17"
@@ -1032,12 +1426,33 @@ version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+[[package]]
+name = "litemap"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+
[[package]]
name = "lz4_flex"
version = "0.11.5"
@@ -1057,6 +1472,16 @@ dependencies = [
"rawpointer",
]
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest",
+]
+
[[package]]
name = "memchr"
version = "2.7.6"
@@ -1082,6 +1507,17 @@ dependencies = [
"simd-adler32",
]
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "multimap"
version = "0.10.1"
@@ -1237,6 +1673,36 @@ dependencies = [
"cc",
]
+[[package]]
+name = "object_store"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf"
+dependencies = [
+ "async-trait",
+ "base64",
+ "bytes",
+ "chrono",
+ "futures",
+ "humantime",
+ "hyper",
+ "itertools 0.13.0",
+ "md-5",
+ "parking_lot",
+ "percent-encoding",
+ "quick-xml",
+ "rand 0.8.5",
+ "reqwest",
+ "ring",
+ "serde",
+ "serde_json",
+ "snafu",
+ "tokio",
+ "tracing",
+ "url",
+ "walkdir",
+]
+
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -1249,6 +1715,12 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+[[package]]
+name = "openssl-probe"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
+
[[package]]
name = "ordered-float"
version = "2.10.1"
@@ -1259,10 +1731,33 @@ dependencies = [
]
[[package]]
-name = "parquet"
-version = "54.3.1"
+name = "parking_lot"
+version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfb15796ac6f56b429fd99e33ba133783ad75b27c36b4b5ce06f1f82cc97754e"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "parquet"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfb15796ac6f56b429fd99e33ba133783ad75b27c36b4b5ce06f1f82cc97754e"
dependencies = [
"ahash",
"arrow-array",
@@ -1298,7 +1793,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700"
dependencies = [
"base64ct",
- "rand_core",
+ "rand_core 0.6.4",
"subtle",
]
@@ -1320,6 +1815,12 @@ dependencies = [
"sha2",
]
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
[[package]]
name = "pest"
version = "2.8.5"
@@ -1373,6 +1874,18 @@ dependencies = [
"indexmap",
]
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
[[package]]
name = "pkg-config"
version = "0.3.32"
@@ -1394,6 +1907,15 @@ dependencies = [
"portable-atomic",
]
+[[package]]
+name = "potential_utf"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+dependencies = [
+ "zerovec",
+]
+
[[package]]
name = "powerfmt"
version = "0.2.0"
@@ -1446,7 +1968,7 @@ checksum =
"22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
dependencies = [
"bytes",
"heck",
- "itertools",
+ "itertools 0.12.1",
"log",
"multimap",
"once_cell",
@@ -1466,7 +1988,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
dependencies = [
"anyhow",
- "itertools",
+ "itertools 0.12.1",
"proc-macro2",
"quote",
"syn",
@@ -1628,10 +2150,12 @@ dependencies = [
"bytes",
"cudarc",
"env_logger",
+ "futures",
"log",
"ndarray 0.16.1",
"ndarray-npy",
"nvtx",
+ "object_store",
"parquet",
"prost",
"prost-build",
@@ -1639,7 +2163,9 @@ dependencies = [
"qdp-kernels",
"rayon",
"tch",
+ "tempfile",
"thiserror 2.0.18",
+ "tokio",
]
[[package]]
@@ -1660,6 +2186,71 @@ dependencies = [
"qdp-core",
]
+[[package]]
+name = "quick-xml"
+version = "0.37.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
+[[package]]
+name = "quinn"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
+dependencies = [
+ "bytes",
+ "cfg_aliases",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash",
+ "rustls",
+ "socket2",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-proto"
+version = "0.11.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
+dependencies = [
+ "bytes",
+ "getrandom 0.3.4",
+ "lru-slab",
+ "rand 0.9.2",
+ "ring",
+ "rustc-hash",
+ "rustls",
+ "rustls-pki-types",
+ "slab",
+ "thiserror 2.0.18",
+ "tinyvec",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-udp"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
+dependencies = [
+ "cfg_aliases",
+ "libc",
+ "once_cell",
+ "socket2",
+ "tracing",
+ "windows-sys 0.60.2",
+]
+
[[package]]
name = "quote"
version = "1.0.44"
@@ -1682,8 +2273,18 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
]
[[package]]
@@ -1693,7 +2294,17 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
- "rand_core",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.5",
]
[[package]]
@@ -1705,6 +2316,15 @@ dependencies = [
"getrandom 0.2.17",
]
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
[[package]]
name = "rawpointer"
version = "0.2.1"
@@ -1731,6 +2351,15 @@ dependencies = [
"crossbeam-utils",
]
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags 2.10.0",
+]
+
[[package]]
name = "regex"
version = "1.12.2"
@@ -1760,6 +2389,62 @@ version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+[[package]]
+name = "reqwest"
+version = "0.12.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-util",
+ "js-sys",
+ "log",
+ "percent-encoding",
+ "pin-project-lite",
+ "quinn",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tokio-rustls",
+ "tokio-util",
+ "tower",
+ "tower-http",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wasm-streams",
+ "web-sys",
+]
+
+[[package]]
+name = "ring"
+version = "0.17.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom 0.2.17",
+ "libc",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+
[[package]]
name = "rustc-hash"
version = "2.1.1"
@@ -1785,7 +2470,54 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "rustls"
+version = "0.23.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
+dependencies = [
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
+dependencies = [
+ "web-time",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.103.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
+dependencies = [
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
]
[[package]]
@@ -1810,6 +2542,53 @@ dependencies = [
"serde_json",
]
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "schannel"
+version = "0.1.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "security-framework"
+version = "3.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38"
+dependencies = [
+ "bitflags 2.10.0",
+ "core-foundation",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
[[package]]
name = "semver"
version = "1.0.27"
@@ -1865,6 +2644,18 @@ dependencies = [
"zmij",
]
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
[[package]]
name = "sha1"
version = "0.10.6"
@@ -1905,12 +2696,61 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "snafu"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2"
+dependencies = [
+ "snafu-derive",
+]
+
+[[package]]
+name = "snafu-derive"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "snap"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
[[package]]
name = "static_assertions"
version = "1.1.0"
@@ -1934,6 +2774,26 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "target-lexicon"
version = "0.13.4"
@@ -1950,7 +2810,7 @@ dependencies = [
"lazy_static",
"libc",
"ndarray 0.16.1",
- "rand",
+ "rand 0.8.5",
"safetensors",
"thiserror 1.0.69",
"torch-sys",
@@ -1967,7 +2827,7 @@ dependencies = [
"getrandom 0.3.4",
"once_cell",
"rustix",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -2049,6 +2909,80 @@ dependencies = [
"crunchy",
]
+[[package]]
+name = "tinystr"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "tokio"
+version = "1.50.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "pin-project-lite",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio-rustls"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
+dependencies = [
+ "rustls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
[[package]]
name = "torch-sys"
version = "0.22.0"
@@ -2061,6 +2995,88 @@ dependencies = [
"zip 0.6.6",
]
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags 2.10.0",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "iri-string",
+ "pin-project-lite",
+ "tower",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
[[package]]
name = "twox-hash"
version = "1.6.3"
@@ -2101,6 +3117,30 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
[[package]]
name = "utf8parse"
version = "0.2.2"
@@ -2113,6 +3153,25 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
@@ -2141,6 +3200,20 @@ dependencies = [
"wasm-bindgen-shared",
]
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "js-sys",
+ "once_cell",
+ "wasm-bindgen",
+ "web-sys",
+]
+
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.108"
@@ -2173,6 +3246,48 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "wasm-streams"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "windows-core"
version = "0.62.2"
@@ -2232,6 +3347,24 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
[[package]]
name = "windows-sys"
version = "0.61.2"
@@ -2241,12 +3374,170 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
[[package]]
name = "wit-bindgen"
version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+[[package]]
+name = "writeable"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+
+[[package]]
+name = "yoke"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
[[package]]
name = "zerocopy"
version = "0.8.37"
@@ -2267,6 +3558,66 @@ dependencies = [
"syn",
]
+[[package]]
+name = "zerofrom"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
+
+[[package]]
+name = "zerotrie"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "zip"
version = "0.6.6"
diff --git a/qdp/Cargo.toml b/qdp/Cargo.toml
index 900c94561..2b5b89552 100644
--- a/qdp/Cargo.toml
+++ b/qdp/Cargo.toml
@@ -39,6 +39,12 @@ bytes = "1.11"
tch = "0.22"
# Optional: vendored protoc to avoid build failures when protoc is missing
protoc-bin-vendored = "3"
+# Remote I/O (S3, GCS, Azure) via object_store
+object_store = "0.11"
+# Minimal tokio runtime for async object_store calls
+tokio = { version = "1", default-features = false }
+# Temp file for remote downloads
+tempfile = "3"
# Release profile optimizations
[profile.release]
diff --git a/qdp/qdp-core/Cargo.toml b/qdp/qdp-core/Cargo.toml
index 407ed2ba4..e5fafe1f0 100644
--- a/qdp/qdp-core/Cargo.toml
+++ b/qdp/qdp-core/Cargo.toml
@@ -16,6 +16,10 @@ ndarray-npy = { workspace = true }
prost = { workspace = true }
bytes = { workspace = true }
tch = { workspace = true, optional = true }
+object_store = { workspace = true, features = ["aws"], optional = true }
+tokio = { workspace = true, features = ["rt"], optional = true }
+tempfile = { workspace = true, optional = true }
+futures = { version = "0.3", default-features = false, optional = true }
log = "0.4"
[build-dependencies]
@@ -29,6 +33,7 @@ name = "qdp_core"
default = []
observability = ["nvtx"]
pytorch = ["tch"]
+remote-io = ["object_store", "tokio", "tempfile", "futures"]
[dev-dependencies]
approx = "0.5.1"
diff --git a/qdp/qdp-core/src/lib.rs b/qdp/qdp-core/src/lib.rs
index a58075aef..49406b3d4 100644
--- a/qdp/qdp-core/src/lib.rs
+++ b/qdp/qdp-core/src/lib.rs
@@ -28,6 +28,8 @@ mod platform;
pub mod preprocessing;
pub mod reader;
pub mod readers;
+#[cfg(feature = "remote-io")]
+pub mod remote;
pub mod tf_proto;
#[macro_use]
mod profiling;
diff --git a/qdp/qdp-core/src/remote.rs b/qdp/qdp-core/src/remote.rs
new file mode 100644
index 000000000..42b375637
--- /dev/null
+++ b/qdp/qdp-core/src/remote.rs
@@ -0,0 +1,255 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Remote I/O support for cloud object storage.
+//!
+//! When the `remote-io` feature is enabled, cloud URLs (currently `s3://`)
+//! are transparently downloaded to a local temp file before being passed to
+//! readers. Adding GCS (`gs://`) or Azure (`az://`) support requires only a
+//! new match arm in [`build_store`] and the corresponding `object_store`
+//! cargo feature.
+
+use std::io::Write;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+
+use object_store::ObjectStore;
+use object_store::path::Path as ObjectPath;
+use tempfile::NamedTempFile;
+
+use crate::error::{MahoutError, Result};
+
+/// Recognized cloud URL schemes.
+const REMOTE_SCHEMES: &[&str] = &["s3://"];
+
+/// Returns true if `path` is a recognized remote URL.
+pub fn is_remote_path(path: &str) -> bool {
+ REMOTE_SCHEMES.iter().any(|s| path.starts_with(s))
+}
+
+/// Parse a cloud URL into (scheme, bucket, key).
+fn parse_url(url: &str) -> Result<(&str, &str, &str)> {
+ let (scheme, rest) = url
+ .split_once("://")
+ .ok_or_else(|| MahoutError::InvalidInput(format!("Not a remote URL:
{}", url)))?;
+ let (bucket, key) = rest.split_once('/').ok_or_else(|| {
+ MahoutError::InvalidInput(format!(
+ "Remote URL must have the form scheme://bucket/key, got: {}",
+ url
+ ))
+ })?;
+ if bucket.is_empty() || key.is_empty() {
+ return Err(MahoutError::InvalidInput(format!(
+ "Remote URL has empty bucket or key: {}",
+ url
+ )));
+ }
+ Ok((scheme, bucket, key))
+}
+
+/// Build an [`ObjectStore`] for the given scheme and bucket.
+/// Add new match arms here to support additional cloud providers.
+fn build_store(scheme: &str, bucket: &str) -> Result<Arc<dyn ObjectStore>> {
+ match scheme {
+ "s3" => {
+ let store = object_store::aws::AmazonS3Builder::from_env()
+ .with_bucket_name(bucket)
+ .build()
+ .map_err(|e| {
+ MahoutError::Io(format!(
+ "Failed to build S3 client for bucket '{}': {}",
+ bucket, e
+ ))
+ })?;
+ Ok(Arc::new(store))
+ }
+ // To add GCS: "gs" => { ... GoogleCloudStorageBuilder ... }
+ // To add Azure: "az" | "abfs" => { ... MicrosoftAzureBuilder ... }
+ _ => Err(MahoutError::InvalidInput(format!(
+ "Unsupported remote scheme '{}://'. Supported: {}",
+ scheme,
+ REMOTE_SCHEMES.join(", ")
+ ))),
+ }
+}
+
+/// Holds the temp file so it is not deleted while still needed.
+/// Drop this value to clean up the temp file.
+pub struct ResolvedPath {
+ /// Local path to the (possibly downloaded) file.
+ pub path: PathBuf,
+ /// Keeps the temp file alive; None for local paths.
+ _tempfile: Option<NamedTempFile>,
+}
+
+impl AsRef<Path> for ResolvedPath {
+ fn as_ref(&self) -> &Path {
+ &self.path
+ }
+}
+
+/// If `path` is a remote URL, download it to a local temp file and return
+/// the temp path. Otherwise return the path as-is. The returned
+/// [`ResolvedPath`] keeps the temp file alive; dropping it removes the file.
+pub fn resolve_path(path: &str) -> Result<ResolvedPath> {
+ if !is_remote_path(path) {
+ return Ok(ResolvedPath {
+ path: PathBuf::from(path),
+ _tempfile: None,
+ });
+ }
+ download_to_tempfile(path)
+}
+
+/// Download a remote object to a local temp file, streaming chunks to disk.
+fn download_to_tempfile(url: &str) -> Result<ResolvedPath> {
+ use futures::TryStreamExt;
+
+ let (scheme, bucket, key) = parse_url(url)?;
+ let store = build_store(scheme, bucket)?;
+ let object_path = ObjectPath::from(key);
+
+ // Preserve the original extension so downstream readers dispatch
correctly.
+ let extension = Path::new(key)
+ .extension()
+ .and_then(|e| e.to_str())
+ .unwrap_or("tmp");
+
+ let rt = tokio::runtime::Builder::new_current_thread()
+ .enable_all()
+ .build()
+ .map_err(|e| MahoutError::Io(format!("Failed to create tokio runtime:
{}", e)))?;
+
+ let mut tmpfile = tempfile::Builder::new()
+ .suffix(&format!(".{}", extension))
+ .tempfile()
+ .map_err(|e| MahoutError::Io(format!("Failed to create temp file: {}",
e)))?;
+
+ rt.block_on(async {
+ let result = store
+ .get(&object_path)
+ .await
+ .map_err(|e| MahoutError::Io(format!("Failed to download {}: {}",
url, e)))?;
+ let mut stream = result.into_stream();
+ while let Some(chunk) = stream
+ .try_next()
+ .await
+ .map_err(|e| MahoutError::Io(format!("Failed to read chunk from
{}: {}", url, e)))?
+ {
+ tmpfile
+ .write_all(&chunk)
+ .map_err(|e| MahoutError::Io(format!("Failed to write temp
file: {}", e)))?;
+ }
+ Ok::<(), MahoutError>(())
+ })?;
+
+ tmpfile
+ .flush()
+ .map_err(|e| MahoutError::Io(format!("Failed to flush temp file: {}",
e)))?;
+
+ let path = tmpfile.path().to_path_buf();
+ Ok(ResolvedPath {
+ path,
+ _tempfile: Some(tmpfile),
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_is_remote_path() {
+ assert!(is_remote_path("s3://bucket/key.parquet"));
+ assert!(!is_remote_path("/tmp/local.parquet"));
+ assert!(!is_remote_path("data.parquet"));
+ }
+
+ #[test]
+ fn test_parse_url_s3() {
+ let (scheme, bucket, key) =
parse_url("s3://my-bucket/path/to/data.parquet").unwrap();
+ assert_eq!(scheme, "s3");
+ assert_eq!(bucket, "my-bucket");
+ assert_eq!(key, "path/to/data.parquet");
+ }
+
+ #[test]
+ fn test_parse_url_no_key() {
+ assert!(parse_url("s3://bucket-only").is_err());
+ }
+
+ #[test]
+ fn test_parse_url_empty_parts() {
+ assert!(parse_url("s3:///key").is_err());
+ assert!(parse_url("s3://bucket/").is_err());
+ }
+
+ #[test]
+ fn test_unsupported_scheme() {
+ let err = build_store("gcs", "bucket").unwrap_err();
+ assert!(err.to_string().contains("Unsupported remote scheme"));
+ }
+
+ #[test]
+ fn test_resolve_local_path() {
+ let resolved = resolve_path("/tmp/local.parquet").unwrap();
+ assert_eq!(resolved.path, PathBuf::from("/tmp/local.parquet"));
+ }
+
+ /// Integration test: download from a real S3-compatible endpoint (MinIO).
+ ///
+ /// Requires a running MinIO with a test file uploaded. Skipped unless
+ /// `MAHOUT_TEST_S3` env var is set. Run with:
+ ///
+ /// ```sh
+ /// MAHOUT_TEST_S3=1 \
+ /// AWS_ACCESS_KEY_ID=minioadmin AWS_SECRET_ACCESS_KEY=minioadmin \
+ /// AWS_ENDPOINT=http://localhost:9123 AWS_REGION=us-east-1
AWS_ALLOW_HTTP=true \
+ /// cargo test -p qdp-core --features remote-io -- test_download_from_minio
+ /// ```
+ #[test]
+ fn test_download_from_minio() {
+ if std::env::var("MAHOUT_TEST_S3").is_err() {
+ eprintln!("skipping test_download_from_minio (set MAHOUT_TEST_S3=1
to run)");
+ return;
+ }
+ let resolved = resolve_path("s3://test-bucket/data.parquet").unwrap();
+ assert!(
+ resolved.path.exists(),
+ "temp file should exist after download"
+ );
+ assert!(
+ resolved.path.to_string_lossy().ends_with(".parquet"),
+ "temp file should preserve .parquet extension"
+ );
+ let file_len = std::fs::metadata(&resolved.path).unwrap().len();
+ assert!(file_len > 0, "downloaded file should not be empty");
+
+ // Verify it's a valid parquet that our reader can parse.
+ use crate::reader::DataReader;
+ let mut reader = crate::readers::ParquetReader::new(
+ &resolved.path,
+ None,
+ crate::reader::NullHandling::FillZero,
+ )
+ .expect("ParquetReader should open downloaded file");
+ let (data, num_samples, sample_size) =
+ reader.read_batch().expect("read_batch should succeed");
+ assert_eq!(num_samples, 8);
+ assert_eq!(sample_size, 4);
+ assert_eq!(data.len(), 32);
+ }
+}
diff --git a/qdp/qdp-python/Cargo.toml b/qdp/qdp-python/Cargo.toml
index 5c70aeabd..fbe46a019 100644
--- a/qdp/qdp-python/Cargo.toml
+++ b/qdp/qdp-python/Cargo.toml
@@ -17,3 +17,4 @@ env_logger = "0.11"
default = []
observability = ["qdp-core/observability"]
pytorch = ["qdp-core/pytorch"]
+remote-io = ["qdp-core/remote-io"]
diff --git a/qdp/qdp-python/qumat_qdp/loader.py
b/qdp/qdp-python/qumat_qdp/loader.py
index 4ed61f882..c6514f094 100644
--- a/qdp/qdp-python/qumat_qdp/loader.py
+++ b/qdp/qdp-python/qumat_qdp/loader.py
@@ -166,10 +166,13 @@ class QuantumDataLoader:
For streaming=True (Phase 2b), only .parquet is supported; data is
read in chunks to reduce memory.
For streaming=False, supports .parquet, .arrow, .feather, .ipc, .npy,
.pt, .pth, .pb.
+ Remote paths (s3://) are supported when the remote-io feature is
enabled.
"""
if not path or not isinstance(path, str):
raise ValueError(f"path must be a non-empty string, got {path!r}")
- if streaming and not (path.lower().endswith(".parquet")):
+ # For remote URLs, extract the key portion for extension checks.
+ check_path = path.split("?")[0].rsplit("/", 1)[-1] if "://" in path
else path
+ if streaming and not (check_path.lower().endswith(".parquet")):
raise ValueError(
"streaming=True supports only .parquet files; use
streaming=False for other formats."
)
diff --git a/qdp/qdp-python/src/engine.rs b/qdp/qdp-python/src/engine.rs
index b2eecc719..2c94899c5 100644
--- a/qdp/qdp-python/src/engine.rs
+++ b/qdp/qdp-python/src/engine.rs
@@ -379,13 +379,27 @@ impl QdpEngine {
})
}
- /// Internal helper to encode from file based on extension
+ /// Internal helper to encode from file based on extension.
+ /// When the `remote-io` feature is enabled, `s3://` URLs are supported.
fn encode_from_file(
&self,
path: &str,
num_qubits: usize,
encoding_method: &str,
) -> PyResult<QuantumTensor> {
+ #[cfg(feature = "remote-io")]
+ let _resolved;
+ #[cfg(feature = "remote-io")]
+ let path = {
+ _resolved = qdp_core::remote::resolve_path(path).map_err(|e| {
+ PyRuntimeError::new_err(format!("Remote path resolution
failed: {}", e))
+ })?;
+ _resolved
+ .path
+ .to_str()
+ .ok_or_else(|| PyRuntimeError::new_err("Resolved path is not
valid UTF-8"))?
+ };
+
let ptr = if path.ends_with(".parquet") {
self.engine
.encode_from_parquet(path, num_qubits, encoding_method)
@@ -653,6 +667,14 @@ impl QdpEngine {
nh,
);
let engine = self.engine.clone();
+ // Resolve S3 URLs before detaching from GIL. The _resolved guard
keeps the
+ // temp file alive until after the file is fully read inside py.detach.
+ #[cfg(feature = "remote-io")]
+ let _resolved =
qdp_core::remote::resolve_path(path_str.as_str()).map_err(|e| {
+ PyRuntimeError::new_err(format!("Remote path resolution failed:
{}", e))
+ })?;
+ #[cfg(feature = "remote-io")]
+ let path_str = _resolved.path.to_string_lossy().into_owned();
let iter = py
.detach(|| {
qdp_core::PipelineIterator::new_from_file(
@@ -693,6 +715,14 @@ impl QdpEngine {
nh,
);
let engine = self.engine.clone();
+ // Resolve S3 URLs before detaching from GIL. The _resolved guard
keeps the
+ // temp file alive; the streaming reader's open fd preserves data
after drop.
+ #[cfg(feature = "remote-io")]
+ let _resolved =
qdp_core::remote::resolve_path(path_str.as_str()).map_err(|e| {
+ PyRuntimeError::new_err(format!("Remote path resolution failed:
{}", e))
+ })?;
+ #[cfg(feature = "remote-io")]
+ let path_str = _resolved.path.to_string_lossy().into_owned();
let iter = py
.detach(|| {
qdp_core::PipelineIterator::new_from_file_streaming(
diff --git a/qdp/qdp-python/tests/test_quantum_data_loader.py
b/qdp/qdp-python/tests/test_quantum_data_loader.py
index d9aa78472..8c93c45c5 100644
--- a/qdp/qdp-python/tests/test_quantum_data_loader.py
+++ b/qdp/qdp-python/tests/test_quantum_data_loader.py
@@ -185,3 +185,56 @@ def test_null_handling_default_is_none() -> None:
"""By default, _null_handling is None (Rust will use FillZero)."""
loader = QuantumDataLoader(device_id=0)
assert loader._null_handling is None
+
+
+# --- S3 URL (source_file) builder tests ---
+
+
[email protected](not _loader_available(), reason="QuantumDataLoader not
available")
[email protected](
+ ("path", "streaming"),
+ [
+ ("s3://my-bucket/data.parquet", False),
+ ("s3://bucket/path/to/data.parquet", True),
+ ("s3://bucket/data.parquet?versionId=abc123", False),
+ ("s3://bucket/data.parquet?versionId=abc123", True),
+ ("s3://bucket/data.npy", False),
+ ],
+ ids=[
+ "parquet-no-stream",
+ "parquet-stream",
+ "parquet-query-no-stream",
+ "parquet-query-stream",
+ "npy-no-stream",
+ ],
+)
+def test_source_file_s3_accepted(path, streaming):
+ """source_file() accepts valid S3 URLs at builder level."""
+ loader = (
+ QuantumDataLoader(device_id=0)
+ .qubits(4)
+ .batches(10, size=4)
+ .source_file(path, streaming=streaming)
+ )
+ assert loader._file_path == path
+ assert loader._file_requested is True
+ assert loader._streaming_requested is streaming
+
+
[email protected](not _loader_available(), reason="QuantumDataLoader not
available")
[email protected](
+ "path",
+ [
+ "s3://bucket/data.npy",
+ "s3://bucket/data.npy?versionId=abc",
+ ],
+ ids=["npy", "npy-query"],
+)
+def test_source_file_s3_streaming_non_parquet_raises(path):
+ """source_file(s3://..., streaming=True) with non-.parquet raises
ValueError."""
+ with pytest.raises(ValueError) as exc_info:
+ QuantumDataLoader(device_id=0).qubits(4).batches(10,
size=4).source_file(
+ path, streaming=True
+ )
+ msg = str(exc_info.value).lower()
+ assert "parquet" in msg or "streaming" in msg