This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 03823577 feat(rust/sedona): Add Azure Blob Storage support for
GeoParquet (#479)
03823577 is described below
commit 03823577028a48f23a138f122e882c06da9bc30b
Author: Isaac Corley <[email protected]>
AuthorDate: Thu Jan 8 16:19:25 2026 -0600
feat(rust/sedona): Add Azure Blob Storage support for GeoParquet (#479)
Co-authored-by: Dewey Dunnington <[email protected]>
---
Cargo.lock | 327 +++++++++++++++++----------------
python/sedonadb/tests/test_context.py | 17 ++
rust/sedona-geoparquet/src/metadata.rs | 5 +
rust/sedona-geoparquet/src/provider.rs | 60 +++++-
rust/sedona/Cargo.toml | 3 +-
rust/sedona/src/object_storage.rs | 261 ++++++++++++++++++++++++++
6 files changed, 503 insertions(+), 170 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index ce582692..aa9c05ec 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -262,9 +262,9 @@ checksum =
"7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae"
+checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -283,9 +283,9 @@ dependencies = [
[[package]]
name = "arrow-arith"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491"
+checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -297,9 +297,9 @@ dependencies = [
[[package]]
name = "arrow-array"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31"
+checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002"
dependencies = [
"ahash",
"arrow-buffer",
@@ -316,9 +316,9 @@ dependencies = [
[[package]]
name = "arrow-buffer"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27"
+checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626"
dependencies = [
"bytes",
"half",
@@ -328,13 +328,14 @@ dependencies = [
[[package]]
name = "arrow-cast"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168"
+checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
+ "arrow-ord",
"arrow-schema",
"arrow-select",
"atoi",
@@ -349,9 +350,9 @@ dependencies = [
[[package]]
name = "arrow-csv"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c"
+checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b"
dependencies = [
"arrow-array",
"arrow-cast",
@@ -364,9 +365,9 @@ dependencies = [
[[package]]
name = "arrow-data"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b"
+checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -377,9 +378,9 @@ dependencies = [
[[package]]
name = "arrow-ipc"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27"
+checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -393,9 +394,9 @@ dependencies = [
[[package]]
name = "arrow-json"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8"
+checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -417,9 +418,9 @@ dependencies = [
[[package]]
name = "arrow-ord"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b"
+checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -430,9 +431,9 @@ dependencies = [
[[package]]
name = "arrow-row"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2"
+checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -443,9 +444,9 @@ dependencies = [
[[package]]
name = "arrow-schema"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5"
+checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0"
dependencies = [
"bitflags",
"serde_core",
@@ -454,9 +455,9 @@ dependencies = [
[[package]]
name = "arrow-select"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47"
+checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010"
dependencies = [
"ahash",
"arrow-array",
@@ -468,9 +469,9 @@ dependencies = [
[[package]]
name = "arrow-string"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2"
+checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -541,7 +542,7 @@ checksum =
"9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -848,9 +849,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
-version = "1.9.6"
+version = "1.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247"
+checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -951,9 +952,9 @@ dependencies = [
[[package]]
name = "bigdecimal"
-version = "0.4.9"
+version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934"
+checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695"
dependencies = [
"autocfg",
"libm",
@@ -980,7 +981,7 @@ dependencies = [
"regex",
"rustc-hash",
"shlex",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1000,15 +1001,16 @@ dependencies = [
[[package]]
name = "blake3"
-version = "1.8.2"
+version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
+ "cpufeatures",
]
[[package]]
@@ -1022,9 +1024,9 @@ dependencies = [
[[package]]
name = "bon"
-version = "3.8.1"
+version = "3.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1"
+checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c"
dependencies = [
"bon-macros",
"rustversion",
@@ -1032,17 +1034,17 @@ dependencies = [
[[package]]
name = "bon-macros"
-version = "3.8.1"
+version = "3.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645"
+checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365"
dependencies = [
- "darling 0.21.3",
+ "darling 0.23.0",
"ident_case",
"prettyplease",
"proc-macro2",
"quote",
"rustversion",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1136,9 +1138,9 @@ checksum =
"37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
-version = "1.2.50"
+version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c"
+checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -1229,9 +1231,9 @@ dependencies = [
[[package]]
name = "clap"
-version = "4.5.53"
+version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
+checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
dependencies = [
"clap_builder",
"clap_derive",
@@ -1239,9 +1241,9 @@ dependencies = [
[[package]]
name = "clap_builder"
-version = "4.5.53"
+version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
+checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
dependencies = [
"anstream",
"anstyle",
@@ -1258,7 +1260,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1333,9 +1335,9 @@ dependencies = [
[[package]]
name = "constant_time_eq"
-version = "0.3.1"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
[[package]]
name = "core-foundation"
@@ -1533,12 +1535,12 @@ dependencies = [
[[package]]
name = "darling"
-version = "0.21.3"
+version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
+checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
dependencies = [
- "darling_core 0.21.3",
- "darling_macro 0.21.3",
+ "darling_core 0.23.0",
+ "darling_macro 0.23.0",
]
[[package]]
@@ -1557,16 +1559,15 @@ dependencies = [
[[package]]
name = "darling_core"
-version = "0.21.3"
+version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
+checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
dependencies = [
- "fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim 0.11.1",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1582,13 +1583,13 @@ dependencies = [
[[package]]
name = "darling_macro"
-version = "0.21.3"
+version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
dependencies = [
- "darling_core 0.21.3",
+ "darling_core 0.23.0",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2125,7 +2126,7 @@ checksum =
"1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848"
dependencies = [
"datafusion-doc",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2385,7 +2386,7 @@ checksum =
"97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2520,9 +2521,9 @@ dependencies = [
[[package]]
name = "find-msvc-tools"
-version = "0.1.5"
+version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
+checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "fixedbitset"
@@ -2532,9 +2533,9 @@ checksum =
"1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "flatbuffers"
-version = "25.9.23"
+version = "25.12.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5"
+checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3"
dependencies = [
"bitflags",
"rustc_version",
@@ -2646,7 +2647,7 @@ checksum =
"162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2860,9 +2861,9 @@ checksum =
"0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "h2"
-version = "0.4.12"
+version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
dependencies = [
"atomic-waker",
"bytes",
@@ -3338,9 +3339,9 @@ dependencies = [
[[package]]
name = "indexmap"
-version = "2.12.1"
+version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
@@ -3369,9 +3370,9 @@ checksum =
"469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]]
name = "iri-string"
-version = "0.7.9"
+version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397"
+checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
dependencies = [
"memchr",
"serde",
@@ -3412,15 +3413,15 @@ dependencies = [
[[package]]
name = "itoa"
-version = "1.0.15"
+version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "jiff"
-version = "0.2.16"
+version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35"
+checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50"
dependencies = [
"jiff-static",
"log",
@@ -3431,13 +3432,13 @@ dependencies = [
[[package]]
name = "jiff-static"
-version = "0.2.16"
+version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
+checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -3525,9 +3526,9 @@ checksum =
"2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
[[package]]
name = "libc"
-version = "0.2.178"
+version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
+checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
[[package]]
name = "libloading"
@@ -3567,13 +3568,13 @@ dependencies = [
[[package]]
name = "libredox"
-version = "0.1.11"
+version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df15f6eac291ed1cf25865b1ee60399f57e7c227e7f51bdbd4c5270396a9ed50"
+checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616"
dependencies = [
"bitflags",
"libc",
- "redox_syscall 0.6.0",
+ "redox_syscall 0.7.0",
]
[[package]]
@@ -3588,9 +3589,9 @@ dependencies = [
[[package]]
name = "libz-rs-sys"
-version = "0.5.4"
+version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c"
+checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415"
dependencies = [
"zlib-rs",
]
@@ -3654,9 +3655,9 @@ checksum =
"112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "lz4_flex"
-version = "0.11.5"
+version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
+checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e"
dependencies = [
"twox-hash",
]
@@ -3864,7 +3865,7 @@ dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -3890,6 +3891,7 @@ dependencies = [
"futures",
"http 1.4.0",
"http-body-util",
+ "httparse",
"humantime 2.3.0",
"hyper",
"itertools 0.14.0",
@@ -3933,9 +3935,9 @@ checksum =
"d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
[[package]]
name = "openssl-probe"
-version = "0.1.6"
+version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
+checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391"
[[package]]
name = "option-ext"
@@ -3993,9 +3995,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "57.0.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a"
+checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89"
dependencies = [
"ahash",
"arrow-array",
@@ -4124,9 +4126,9 @@ dependencies = [
[[package]]
name = "portable-atomic"
-version = "1.11.1"
+version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
+checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950"
[[package]]
name = "portable-atomic-util"
@@ -4178,7 +4180,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4192,9 +4194,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
-version = "1.0.103"
+version = "1.0.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
dependencies = [
"unicode-ident",
]
@@ -4233,7 +4235,7 @@ dependencies = [
"itertools 0.14.0",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4292,7 +4294,7 @@ dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4305,7 +4307,7 @@ dependencies = [
"proc-macro2",
"pyo3-build-config",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4387,9 +4389,9 @@ dependencies = [
[[package]]
name = "quote"
-version = "1.0.42"
+version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
dependencies = [
"proc-macro2",
]
@@ -4486,7 +4488,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
dependencies = [
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4500,9 +4502,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
-version = "0.6.0"
+version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec96166dafa0886eb81fe1c0a388bece180fbef2135f97c1e2cf8302e74b43b5"
+checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27"
dependencies = [
"bitflags",
]
@@ -4570,9 +4572,9 @@ dependencies = [
[[package]]
name = "reqwest"
-version = "0.12.26"
+version = "0.12.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f"
+checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
dependencies = [
"base64",
"bytes",
@@ -4716,7 +4718,7 @@ dependencies = [
"regex",
"relative-path",
"rustc_version",
- "syn 2.0.111",
+ "syn 2.0.114",
"unicode-ident",
]
@@ -4737,9 +4739,9 @@ dependencies = [
[[package]]
name = "rustix"
-version = "1.1.2"
+version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
dependencies = [
"bitflags",
"errno",
@@ -4750,9 +4752,9 @@ dependencies = [
[[package]]
name = "rustls"
-version = "0.23.35"
+version = "0.23.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f"
+checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b"
dependencies = [
"aws-lc-rs",
"once_cell",
@@ -4765,9 +4767,9 @@ dependencies = [
[[package]]
name = "rustls-native-certs"
-version = "0.8.2"
+version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
dependencies = [
"openssl-probe",
"rustls-pki-types",
@@ -4836,9 +4838,9 @@ dependencies = [
[[package]]
name = "ryu"
-version = "1.0.20"
+version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
[[package]]
name = "same-file"
@@ -4868,7 +4870,7 @@ checksum =
"0998bef42631c5f985ac0d14d6e3e33e95dc08822843abc582419181cbecb3a0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4889,7 +4891,7 @@ dependencies = [
"proc-macro2",
"quote",
"savvy-bindgen",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5561,20 +5563,20 @@ checksum =
"d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
name = "serde_json"
-version = "1.0.145"
+version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"itoa",
"memchr",
- "ryu",
"serde",
"serde_core",
+ "zmij",
]
[[package]]
@@ -5630,10 +5632,11 @@ checksum =
"0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
-version = "1.4.7"
+version = "1.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
dependencies = [
+ "errno",
"libc",
]
@@ -5723,7 +5726,7 @@ checksum =
"da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5772,7 +5775,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5794,9 +5797,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.111"
+version = "2.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
dependencies = [
"proc-macro2",
"quote",
@@ -5820,7 +5823,7 @@ checksum =
"728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5842,9 +5845,9 @@ checksum =
"b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba"
[[package]]
name = "tempfile"
-version = "3.23.0"
+version = "3.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
dependencies = [
"fastrand",
"getrandom 0.3.4",
@@ -5888,7 +5891,7 @@ checksum =
"4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5899,7 +5902,7 @@ checksum =
"3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5989,9 +5992,9 @@ checksum =
"1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
-version = "1.48.0"
+version = "1.49.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
dependencies = [
"bytes",
"libc",
@@ -6012,7 +6015,7 @@ checksum =
"af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6027,9 +6030,9 @@ dependencies = [
[[package]]
name = "tokio-util"
-version = "0.7.17"
+version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
dependencies = [
"bytes",
"futures-core",
@@ -6132,7 +6135,7 @@ checksum =
"7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6221,9 +6224,9 @@ checksum =
"8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "url"
-version = "2.5.7"
+version = "2.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
dependencies = [
"form_urlencoded",
"idna",
@@ -6358,7 +6361,7 @@ dependencies = [
"bumpalo",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
"wasm-bindgen-shared",
]
@@ -6467,7 +6470,7 @@ checksum =
"053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6478,7 +6481,7 @@ checksum =
"3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6778,28 +6781,28 @@ checksum =
"b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
"synstructure",
]
[[package]]
name = "zerocopy"
-version = "0.8.31"
+version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
+checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
-version = "0.8.31"
+version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
+checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6819,7 +6822,7 @@ checksum =
"d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
"synstructure",
]
@@ -6859,14 +6862,20 @@ checksum =
"eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
name = "zlib-rs"
-version = "0.5.4"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3"
+
+[[package]]
+name = "zmij"
+version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235"
+checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8"
[[package]]
name = "zstd"
diff --git a/python/sedonadb/tests/test_context.py
b/python/sedonadb/tests/test_context.py
index 758b2014..d9b1d33b 100644
--- a/python/sedonadb/tests/test_context.py
+++ b/python/sedonadb/tests/test_context.py
@@ -131,3 +131,20 @@ def test_read_parquet_invalid_aws_option():
match="Unknown AWS option.*aws.unknown_option.*Valid options are",
):
con.read_parquet(url, options={"aws.unknown_option": "value"})
+
+
+def test_read_parquet_invalid_azure_option():
+ con = sedonadb.connect()
+ url = "az://container/path/file.parquet"
+
+ with pytest.raises(
+ sedonadb._lib.SedonaError,
+ match=r"Unknown Azure option 'azure\.acc_name'\..*azure\.account_name",
+ ):
+ con.read_parquet(url, options={"azure.acc_name": "test"})
+
+ with pytest.raises(
+ sedonadb._lib.SedonaError,
+ match="Unknown Azure option.*azure.unknown_option.*Valid options are",
+ ):
+ con.read_parquet(url, options={"azure.unknown_option": "value"})
diff --git a/rust/sedona-geoparquet/src/metadata.rs
b/rust/sedona-geoparquet/src/metadata.rs
index aac33cce..aee71ec2 100644
--- a/rust/sedona-geoparquet/src/metadata.rs
+++ b/rust/sedona-geoparquet/src/metadata.rs
@@ -329,6 +329,11 @@ pub struct GeoParquetColumnMetadata {
/// and multipolygons, it is not sufficient to specify `["MultiPolygon"]`,
but it is expected
/// to specify `["Polygon", "MultiPolygon"]`. Or if having 3D points, it
is not sufficient to
/// specify `["Point"]`, but it is expected to list `["Point Z"]`.
+ ///
+ /// Note: While the GeoParquet spec requires this field, some datasets in
the wild (e.g.,
+ /// Microsoft Building Footprints on Planetary Computer) omit it. We use
`#[serde(default)]`
+ /// to handle these out-of-spec files gracefully by defaulting to an empty
set.
+ #[serde(default)]
pub geometry_types: GeometryTypeAndDimensionsSet,
/// [PROJJSON](https://proj.org/specifications/projjson.html) object
representing the
diff --git a/rust/sedona-geoparquet/src/provider.rs
b/rust/sedona-geoparquet/src/provider.rs
index 84da6ea6..51b92c0e 100644
--- a/rust/sedona-geoparquet/src/provider.rs
+++ b/rust/sedona-geoparquet/src/provider.rs
@@ -54,7 +54,9 @@ pub async fn geoparquet_listing_table(
// check if the file extension matches the expected extension
for path in &table_paths {
let file_path = path.as_str();
- if !file_path.ends_with(option_extension.clone().as_str()) &&
!path.is_collection() {
+ let path_without_query =
file_path.split('?').next().unwrap_or(file_path);
+ if !path_without_query.ends_with(option_extension.clone().as_str()) &&
!path.is_collection()
+ {
return exec_err!(
"File path '{file_path}' does not match the expected
extension '{option_extension}'"
);
@@ -88,9 +90,8 @@ impl GeoParquetReadOptions<'_> {
}
/// Create GeoParquetReadOptions from table options HashMap
- /// Validates that AWS options are spelled correctly to help catch user
errors
+ /// Validates that AWS and Azure options are spelled correctly to help
catch user errors
pub fn from_table_options(options: HashMap<String, String>) ->
Result<Self, String> {
- // Validate AWS options to catch common misspellings
for key in options.keys() {
if key.starts_with("aws.") {
let common_aws_options = [
@@ -99,23 +100,19 @@ impl GeoParquetReadOptions<'_> {
"aws.region",
"aws.endpoint",
"aws.skip_signature",
- "aws.nosign", // Alternative name for skip_signature
+ "aws.nosign",
"aws.bucket_name",
"aws.use_ssl",
"aws.force_path_style",
];
if !common_aws_options.contains(&key.as_str()) {
- // Find potential matches for misspelled options
let close_matches: Vec<&str> = common_aws_options
.iter()
.filter(|&&option| {
- // Check for similar starting patterns or
abbreviations
- let key_start = &key[4..]; // Remove "aws." prefix
- let option_start = &option[4..]; // Remove "aws."
prefix
+ let key_start = &key[4..];
+ let option_start = &option[4..];
- // Check if the key is a prefix of the option
(abbreviation)
- // or if they share a common prefix of at least 4
characters
option_start.starts_with(key_start)
|| key_start.starts_with(option_start)
|| (key_start.len() >= 4
@@ -139,6 +136,49 @@ impl GeoParquetReadOptions<'_> {
));
}
}
+ } else if key.starts_with("azure.") {
+ let common_azure_options = [
+ "azure.account_name",
+ "azure.account_key",
+ "azure.sas_token",
+ "azure.container_name",
+ "azure.use_emulator",
+ "azure.client_id",
+ "azure.client_secret",
+ "azure.tenant_id",
+ "azure.allow_http",
+ ];
+
+ if !common_azure_options.contains(&key.as_str()) {
+ let close_matches: Vec<&str> = common_azure_options
+ .iter()
+ .filter(|&&option| {
+ let key_start = &key[6..];
+ let option_start = &option[6..];
+
+ option_start.starts_with(key_start)
+ || key_start.starts_with(option_start)
+ || (key_start.len() >= 4
+ && option_start.len() >= 4
+ && key_start[..4] == option_start[..4])
+ })
+ .cloned()
+ .collect();
+
+ if !close_matches.is_empty() {
+ return Err(format!(
+ "Unknown Azure option '{}'. Did you mean: {}?",
+ key,
+ close_matches.join(", ")
+ ));
+ } else {
+ return Err(format!(
+ "Unknown Azure option '{}'. Valid options are: {}",
+ key,
+ common_azure_options.join(", ")
+ ));
+ }
+ }
}
}
diff --git a/rust/sedona/Cargo.toml b/rust/sedona/Cargo.toml
index aab77f2e..1172f77a 100644
--- a/rust/sedona/Cargo.toml
+++ b/rust/sedona/Cargo.toml
@@ -31,8 +31,9 @@ rust-version.workspace = true
result_large_err = "allow"
[features]
-default = ["aws", "gcp", "http", "geo", "geos", "tg", "spatial-join"]
+default = ["aws", "azure", "gcp", "http", "geo", "geos", "tg", "spatial-join"]
aws = ["dep:aws-config", "dep:aws-credential-types", "object_store/aws"]
+azure = ["object_store/azure"]
gcp = ["object_store/gcp"]
geo = ["dep:sedona-geo"]
geos = ["dep:sedona-geos"]
diff --git a/rust/sedona/src/object_storage.rs
b/rust/sedona/src/object_storage.rs
index 8de36fdf..0d57f3fb 100644
--- a/rust/sedona/src/object_storage.rs
+++ b/rust/sedona/src/object_storage.rs
@@ -45,6 +45,9 @@ use object_store::aws::{AmazonS3Builder, AwsCredential};
#[cfg(feature = "aws")]
use object_store::gcp::GoogleCloudStorageBuilder;
+#[cfg(feature = "azure")]
+use object_store::azure::MicrosoftAzureBuilder;
+
#[cfg(feature = "http")]
use object_store::http::HttpBuilder;
#[cfg(feature = "http")]
@@ -105,6 +108,22 @@ pub async fn ensure_object_store_registered_with_options(
table_options.extensions.insert(gcp_options);
}
}
+ #[cfg(feature = "azure")]
+ "az" | "abfs" | "abfss" => {
+ if let Some(table_options) = builder.table_options() {
+ let mut azure_options = AzureOptions::default();
+
+ if let Some(options) = custom_options {
+ for (key, value) in options {
+ if key.starts_with("azure.") {
+ let _ = azure_options.set(key, value);
+ }
+ }
+ }
+
+ table_options.extensions.insert(azure_options);
+ }
+ }
_ => {}
};
let new_state = builder.build();
@@ -313,6 +332,69 @@ pub fn get_gcs_object_store_builder(
Ok(builder)
}
+#[cfg(feature = "azure")]
+pub fn get_azure_object_store_builder(
+ url: &Url,
+ azure_options: &AzureOptions,
+) -> Result<MicrosoftAzureBuilder> {
+ let container_name = azure_options
+ .container_name
+ .as_deref()
+ .or_else(|| url.host_str())
+ .ok_or_else(|| {
+ DataFusionError::Execution(format!(
+ "Not able to parse container name from url: {}",
+ url.as_str()
+ ))
+ })?;
+
+ let mut builder =
MicrosoftAzureBuilder::from_env().with_container_name(container_name);
+
+ if let Some(account_name) = &azure_options.account_name {
+ builder = builder.with_account(account_name);
+ }
+
+ if let Some(account_key) = &azure_options.account_key {
+ builder = builder.with_access_key(account_key);
+ }
+
+ if let Some(sas_token) = &azure_options.sas_token {
+ let query_pairs: Vec<(String, String)> = sas_token
+ .trim_start_matches('?')
+ .split('&')
+ .filter_map(|pair| {
+ let mut parts = pair.splitn(2, '=');
+ match (parts.next(), parts.next()) {
+ (Some(k), Some(v)) => Some((k.to_string(), v.to_string())),
+ _ => None,
+ }
+ })
+ .collect();
+ builder = builder.with_sas_authorization(query_pairs);
+ }
+
+ if let Some(true) = azure_options.use_emulator {
+ builder = builder.with_use_emulator(true);
+ }
+
+ if let (Some(client_id), Some(client_secret), Some(tenant_id)) = (
+ &azure_options.client_id,
+ &azure_options.client_secret,
+ &azure_options.tenant_id,
+ ) {
+ builder = builder
+ .with_client_id(client_id)
+ .with_client_secret(client_secret)
+ .with_tenant_id(tenant_id);
+ }
+
+ if let Some(allow_http) = azure_options.allow_http {
+ builder = builder.with_allow_http(allow_http);
+ }
+
+ Ok(builder)
+}
+
fn get_bucket_name(url: &Url) -> Result<&str> {
url.host_str().ok_or_else(|| {
DataFusionError::Execution(format!(
@@ -503,6 +585,121 @@ impl ConfigExtension for GcpOptions {
const PREFIX: &'static str = "gcp";
}
+/// This struct encapsulates Azure options one uses when setting up object
storage.
+#[cfg(feature = "azure")]
+#[derive(Default, Debug, Clone)]
+pub struct AzureOptions {
+ /// Storage account name
+ pub account_name: Option<String>,
+ /// Storage account key
+ pub account_key: Option<String>,
+ /// SAS token for authentication
+ pub sas_token: Option<String>,
+ /// Container name (optional, can be derived from URL)
+ pub container_name: Option<String>,
+ /// Use Azure Storage Emulator (Azurite)
+ pub use_emulator: Option<bool>,
+ /// Client ID for service principal authentication
+ pub client_id: Option<String>,
+ /// Client secret for service principal authentication
+ pub client_secret: Option<String>,
+ /// Tenant ID for service principal authentication
+ pub tenant_id: Option<String>,
+ /// Allow HTTP (otherwise will always use https)
+ pub allow_http: Option<bool>,
+}
+
+#[cfg(feature = "azure")]
+impl ExtensionOptions for AzureOptions {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn as_any_mut(&mut self) -> &mut dyn Any {
+ self
+ }
+
+ fn cloned(&self) -> Box<dyn ExtensionOptions> {
+ Box::new(self.clone())
+ }
+
+ fn set(&mut self, key: &str, value: &str) -> Result<()> {
+ let (_key, rem) = key.split_once('.').unwrap_or((key, ""));
+ match rem {
+ "account_name" => {
+ self.account_name.set(rem, value)?;
+ }
+ "account_key" => {
+ self.account_key.set(rem, value)?;
+ }
+ "sas_token" => {
+ self.sas_token.set(rem, value)?;
+ }
+ "container_name" => {
+ self.container_name.set(rem, value)?;
+ }
+ "use_emulator" => {
+ self.use_emulator.set(rem, value)?;
+ }
+ "client_id" => {
+ self.client_id.set(rem, value)?;
+ }
+ "client_secret" => {
+ self.client_secret.set(rem, value)?;
+ }
+ "tenant_id" => {
+ self.tenant_id.set(rem, value)?;
+ }
+ "allow_http" => {
+ self.allow_http.set(rem, value)?;
+ }
+ _ => {
+ return config_err!("Config value \"{}\" not found on
AzureOptions", rem);
+ }
+ }
+ Ok(())
+ }
+
+ fn entries(&self) -> Vec<ConfigEntry> {
+ struct Visitor(Vec<ConfigEntry>);
+
+ impl Visit for Visitor {
+ fn some<V: Display>(&mut self, key: &str, value: V, description:
&'static str) {
+ self.0.push(ConfigEntry {
+ key: key.to_string(),
+ value: Some(value.to_string()),
+ description,
+ })
+ }
+
+ fn none(&mut self, key: &str, description: &'static str) {
+ self.0.push(ConfigEntry {
+ key: key.to_string(),
+ value: None,
+ description,
+ })
+ }
+ }
+
+ let mut v = Visitor(vec![]);
+ self.account_name.visit(&mut v, "account_name", "");
+ self.account_key.visit(&mut v, "account_key", "");
+ self.sas_token.visit(&mut v, "sas_token", "");
+ self.container_name.visit(&mut v, "container_name", "");
+ self.use_emulator.visit(&mut v, "use_emulator", "");
+ self.client_id.visit(&mut v, "client_id", "");
+ self.client_secret.visit(&mut v, "client_secret", "");
+ self.tenant_id.visit(&mut v, "tenant_id", "");
+ self.allow_http.visit(&mut v, "allow_http", "");
+ v.0
+ }
+}
+
+#[cfg(feature = "azure")]
+impl ConfigExtension for AzureOptions {
+ const PREFIX: &'static str = "azure";
+}
+
pub(crate) async fn get_object_store(
state: &SessionState,
scheme: &str,
@@ -542,6 +739,16 @@ pub(crate) async fn get_object_store(
let builder = get_gcs_object_store_builder(url, options)?;
Arc::new(builder.build()?)
}
+ #[cfg(feature = "azure")]
+ "az" | "abfs" | "abfss" => {
+ let Some(options) = table_options.extensions.get::<AzureOptions>()
else {
+ return exec_err!(
+ "Given table options incompatible with the
'az'/'abfs'/'abfss' scheme"
+ );
+ };
+ let builder = get_azure_object_store_builder(url, options)?;
+ Arc::new(builder.build()?)
+ }
#[cfg(feature = "http")]
"http" | "https" => Arc::new(
HttpBuilder::new()
@@ -576,6 +783,10 @@ pub(crate) fn
register_table_options_extension_from_scheme(ctx: &SedonaContext,
ctx.ctx
.register_table_options_extension(GcpOptions::default())
}
+ #[cfg(feature = "azure")]
+ "az" | "abfs" | "abfss" => ctx
+ .ctx
+ .register_table_options_extension(AzureOptions::default()),
// For unsupported schemes, do nothing:
_ => {}
}
@@ -815,6 +1026,56 @@ mod tests {
Ok(())
}
+ #[cfg(feature = "azure")]
+ #[tokio::test]
+ async fn azure_object_store_builder() -> Result<()> {
+ use object_store::azure::AzureConfigKey;
+
+ let account_name = "fake_account_name";
+ let account_key = "fake_account_key";
+ let client_id = "fake_client_id";
+ let client_secret = "fake_client_secret";
+ let tenant_id = "fake_tenant_id";
+ let location = "az://container/path/file.parquet";
+
+ let table_url = ListingTableUrl::parse(location)?;
+ let scheme = table_url.scheme();
+ let sql = format!(
+ "CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS(\
+ 'azure.account_name' '{account_name}', \
+ 'azure.account_key' '{account_key}', \
+ 'azure.client_id' '{client_id}', \
+ 'azure.client_secret' '{client_secret}', \
+ 'azure.tenant_id' '{tenant_id}'\
+ ) LOCATION '{location}'"
+ );
+
+ let ctx = SedonaContext::new();
+ let mut plan = ctx.ctx.state().create_logical_plan(&sql).await?;
+
+ if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut
plan {
+ register_table_options_extension_from_scheme(&ctx, scheme);
+ let mut table_options = ctx.ctx.state().default_table_options();
+ table_options.alter_with_string_hash_map(&cmd.options)?;
+ let azure_options =
table_options.extensions.get::<AzureOptions>().unwrap();
+ let builder = get_azure_object_store_builder(table_url.as_ref(),
azure_options)?;
+ let config = [
+ (AzureConfigKey::AccountName, account_name),
+ (AzureConfigKey::AccessKey, account_key),
+ (AzureConfigKey::ClientId, client_id),
+ (AzureConfigKey::ClientSecret, client_secret),
+ (AzureConfigKey::AuthorityId, tenant_id),
+ ];
+ for (key, value) in config {
+ assert_eq!(value, builder.get_config_value(&key).unwrap());
+ }
+ } else {
+ return plan_err!("LogicalPlan is not a CreateExternalTable");
+ }
+
+ Ok(())
+ }
+
#[cfg(not(target_os = "windows"))]
#[test]
fn test_substitute_tilde() {