This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 57780bc6f perf: [iceberg] Remove IcebergFileStream, use iceberg-rust's
parallelization, bump iceberg-rust to latest, cache SchemaAdapter (#3051)
57780bc6f is described below
commit 57780bc6f8842ed8e876a381193eef0b3e458dae
Author: Matt Butrovich <[email protected]>
AuthorDate: Tue Jan 20 11:42:25 2026 -0500
perf: [iceberg] Remove IcebergFileStream, use iceberg-rust's
parallelization, bump iceberg-rust to latest, cache SchemaAdapter (#3051)
---
native/Cargo.lock | 619 ++++++++++-----------
native/Cargo.toml | 2 +-
native/core/Cargo.toml | 1 -
.../core/src/execution/operators/iceberg_scan.rs | 326 +++--------
native/core/src/execution/planner.rs | 1 +
.../sql/comet/CometIcebergNativeScanExec.scala | 15 +-
.../org/apache/comet/CometIcebergNativeSuite.scala | 4 -
7 files changed, 377 insertions(+), 591 deletions(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index c2bfb8400..ce0eb0f2b 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -23,7 +23,7 @@ version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
"once_cell",
"version_check",
]
@@ -128,16 +128,19 @@ dependencies = [
"serde_json",
"strum",
"strum_macros",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
"uuid",
"zstd",
]
[[package]]
name = "arc-swap"
-version = "1.7.1"
+version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
+checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e"
+dependencies = [
+ "rustversion",
+]
[[package]]
name = "array-init"
@@ -301,7 +304,7 @@ dependencies = [
"arrow-schema",
"chrono",
"half",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"itoa",
"lexical-core",
"memchr",
@@ -388,9 +391,9 @@ checksum =
"b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063"
[[package]]
name = "assertables"
-version = "9.8.3"
+version = "9.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbada39b42413d4db3d9460f6e791702490c40f72924378a1b6fc1a4181188fd"
+checksum = "4dcd1f7f2f608b9a888a851f234086946c2ca1dfeadf1431c5082fee0942eeb6"
[[package]]
name = "async-channel"
@@ -415,23 +418,6 @@ dependencies = [
"pin-project-lite",
]
-[[package]]
-name = "async-compression"
-version = "0.4.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c"
-dependencies = [
- "bzip2 0.5.2",
- "flate2",
- "futures-core",
- "memchr",
- "pin-project-lite",
- "tokio",
- "xz2",
- "zstd",
- "zstd-safe",
-]
-
[[package]]
name = "async-executor"
version = "1.13.3"
@@ -481,9 +467,9 @@ dependencies = [
[[package]]
name = "async-lock"
-version = "3.4.1"
+version = "3.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc"
+checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311"
dependencies = [
"event-listener 5.4.1",
"event-listener-strategy",
@@ -530,7 +516,7 @@ checksum =
"9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -598,9 +584,9 @@ dependencies = [
[[package]]
name = "aws-lc-rs"
-version = "1.15.2"
+version = "1.15.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288"
+checksum = "e84ce723ab67259cfeb9877c6a639ee9eb7a27b28123abd71db7f0d5d0cc9d86"
dependencies = [
"aws-lc-sys",
"zeroize",
@@ -608,9 +594,9 @@ dependencies = [
[[package]]
name = "aws-lc-sys"
-version = "0.35.0"
+version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1"
+checksum = "43a442ece363113bd4bd4c8b18977a7798dd4d3c3383f34fb61936960e8f4ad8"
dependencies = [
"cc",
"cmake",
@@ -620,9 +606,9 @@ dependencies = [
[[package]]
name = "aws-runtime"
-version = "1.5.17"
+version = "1.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d81b5b2898f6798ad58f484856768bca817e3cd9de0974c24ae0f1113fe88f1b"
+checksum = "959dab27ce613e6c9658eb3621064d0e2027e5f2acb65bc526a43577facea557"
dependencies = [
"aws-credential-types",
"aws-sigv4",
@@ -644,15 +630,16 @@ dependencies = [
[[package]]
name = "aws-sdk-sso"
-version = "1.91.0"
+version = "1.92.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ee6402a36f27b52fe67661c6732d684b2635152b676aa2babbfb5204f99115d"
+checksum = "b7d63bd2bdeeb49aa3f9b00c15e18583503b778b2e792fc06284d54e7d5b6566"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
+ "aws-smithy-observability",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -666,15 +653,16 @@ dependencies = [
[[package]]
name = "aws-sdk-ssooidc"
-version = "1.93.0"
+version = "1.94.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a45a7f750bbd170ee3677671ad782d90b894548f4e4ae168302c57ec9de5cb3e"
+checksum = "532d93574bf731f311bafb761366f9ece345a0416dbcc273d81d6d1a1205239b"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
+ "aws-smithy-observability",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -688,15 +676,16 @@ dependencies = [
[[package]]
name = "aws-sdk-sts"
-version = "1.95.0"
+version = "1.96.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55542378e419558e6b1f398ca70adb0b2088077e79ad9f14eb09441f2f7b2164"
+checksum = "357e9a029c7524db6a0099cd77fbd5da165540339e7296cca603531bc783b56c"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
+ "aws-smithy-observability",
"aws-smithy-query",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
@@ -798,9 +787,9 @@ dependencies = [
[[package]]
name = "aws-smithy-observability"
-version = "0.1.5"
+version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a"
+checksum = "ef1fcbefc7ece1d70dcce29e490f269695dfca2d2bacdeaf9e5c3f799e4e6a42"
dependencies = [
"aws-smithy-runtime-api",
]
@@ -817,9 +806,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
-version = "1.9.6"
+version = "1.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247"
+checksum = "bb5b6167fcdf47399024e81ac08e795180c576a20e4d4ce67949f9a88ae37dc1"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -841,9 +830,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime-api"
-version = "1.9.3"
+version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2"
+checksum = "efce7aaaf59ad53c5412f14fc19b2d5c6ab2c3ec688d272fd31f76ec12f44fb0"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
@@ -858,9 +847,9 @@ dependencies = [
[[package]]
name = "aws-smithy-types"
-version = "1.3.5"
+version = "1.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01"
+checksum = "65f172bcb02424eb94425db8aed1b6d583b5104d4d5ddddf22402c661a320048"
dependencies = [
"base64-simd",
"bytes",
@@ -946,9 +935,9 @@ dependencies = [
[[package]]
name = "bigdecimal"
-version = "0.4.9"
+version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934"
+checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695"
dependencies = [
"autocfg",
"libm",
@@ -1003,7 +992,7 @@ dependencies = [
"regex",
"rustc-hash 2.1.1",
"shlex",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1041,15 +1030,16 @@ dependencies = [
[[package]]
name = "blake3"
-version = "1.8.2"
+version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
+ "cpufeatures",
]
[[package]]
@@ -1076,9 +1066,9 @@ dependencies = [
[[package]]
name = "bon"
-version = "3.8.1"
+version = "3.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1"
+checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c"
dependencies = [
"bon-macros",
"rustversion",
@@ -1086,17 +1076,17 @@ dependencies = [
[[package]]
name = "bon-macros"
-version = "3.8.1"
+version = "3.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645"
+checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365"
dependencies = [
- "darling 0.21.3",
+ "darling 0.23.0",
"ident_case",
"prettyplease",
"proc-macro2",
"quote",
"rustversion",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1119,7 +1109,7 @@ dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1199,34 +1189,6 @@ dependencies = [
"either",
]
-[[package]]
-name = "bzip2"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
-dependencies = [
- "bzip2-sys",
-]
-
-[[package]]
-name = "bzip2"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c"
-dependencies = [
- "libbz2-rs-sys",
-]
-
-[[package]]
-name = "bzip2-sys"
-version = "0.1.13+1.0.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
-dependencies = [
- "cc",
- "pkg-config",
-]
-
[[package]]
name = "cast"
version = "0.3.0"
@@ -1235,9 +1197,9 @@ checksum =
"37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
-version = "1.2.52"
+version = "1.2.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3"
+checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -1274,9 +1236,9 @@ checksum =
"613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "chrono"
-version = "0.4.42"
+version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
dependencies = [
"iana-time-zone",
"js-sys",
@@ -1336,18 +1298,18 @@ dependencies = [
[[package]]
name = "clap"
-version = "4.5.53"
+version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
+checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
-version = "4.5.53"
+version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
+checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
dependencies = [
"anstyle",
"clap_lex",
@@ -1355,9 +1317,9 @@ dependencies = [
[[package]]
name = "clap_lex"
-version = "0.7.6"
+version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
+checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
[[package]]
name = "cmake"
@@ -1380,9 +1342,9 @@ dependencies = [
[[package]]
name = "comfy-table"
-version = "7.2.1"
+version = "7.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b"
+checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47"
dependencies = [
"unicode-segmentation",
"unicode-width",
@@ -1418,16 +1380,16 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
"once_cell",
"tiny-keccak",
]
[[package]]
name = "constant_time_eq"
-version = "0.3.1"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
[[package]]
name = "core-foundation"
@@ -1607,6 +1569,16 @@ dependencies = [
"darling_macro 0.21.3",
]
+[[package]]
+name = "darling"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
+dependencies = [
+ "darling_core 0.23.0",
+ "darling_macro 0.23.0",
+]
+
[[package]]
name = "darling_core"
version = "0.20.11"
@@ -1618,7 +1590,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1632,7 +1604,20 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn 2.0.111",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
+dependencies = [
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.114",
]
[[package]]
@@ -1643,7 +1628,7 @@ checksum =
"fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core 0.20.11",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -1654,7 +1639,18 @@ checksum =
"d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
dependencies = [
"darling_core 0.21.3",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
+dependencies = [
+ "darling_core 0.23.0",
+ "quote",
+ "syn 2.0.114",
]
[[package]]
@@ -1788,7 +1784,6 @@ dependencies = [
"datafusion-comet-objectstore-hdfs",
"datafusion-comet-proto",
"datafusion-comet-spark-expr",
- "datafusion-datasource",
"datafusion-functions-nested",
"datafusion-spark",
"futures",
@@ -1821,7 +1816,7 @@ dependencies = [
"simd-adler32",
"snap",
"tempfile",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
"tikv-jemalloc-ctl",
"tikv-jemallocator",
"tokio",
@@ -1883,7 +1878,7 @@ dependencies = [
"rand 0.9.2",
"regex",
"serde_json",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
"tokio",
"twox-hash",
]
@@ -1901,7 +1896,7 @@ dependencies = [
"half",
"hashbrown 0.14.5",
"hex",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"libc",
"log",
"object_store",
@@ -1930,10 +1925,8 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6"
dependencies = [
"arrow",
- "async-compression",
"async-trait",
"bytes",
- "bzip2 0.6.1",
"chrono",
"datafusion-common",
"datafusion-common-runtime",
@@ -1944,7 +1937,6 @@ dependencies = [
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"datafusion-session",
- "flate2",
"futures",
"glob",
"itertools 0.14.0",
@@ -1952,10 +1944,7 @@ dependencies = [
"object_store",
"rand 0.9.2",
"tokio",
- "tokio-util",
"url",
- "xz2",
- "zstd",
]
[[package]]
@@ -2099,7 +2088,7 @@ dependencies = [
"datafusion-functions-aggregate-common",
"datafusion-functions-window-common",
"datafusion-physical-expr-common",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"itertools 0.14.0",
"paste",
"serde_json",
@@ -2114,7 +2103,7 @@ checksum =
"5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc"
dependencies = [
"arrow",
"datafusion-common",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"itertools 0.14.0",
"paste",
]
@@ -2258,7 +2247,7 @@ checksum =
"1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848"
dependencies = [
"datafusion-doc",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2273,7 +2262,7 @@ dependencies = [
"datafusion-expr",
"datafusion-expr-common",
"datafusion-physical-expr",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"itertools 0.14.0",
"log",
"regex",
@@ -2295,7 +2284,7 @@ dependencies = [
"datafusion-physical-expr-common",
"half",
"hashbrown 0.14.5",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"itertools 0.14.0",
"parking_lot",
"paste",
@@ -2372,7 +2361,7 @@ dependencies = [
"futures",
"half",
"hashbrown 0.14.5",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"itertools 0.14.0",
"log",
"parking_lot",
@@ -2443,7 +2432,7 @@ dependencies = [
"chrono",
"datafusion-common",
"datafusion-expr",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"log",
"regex",
"sqlparser",
@@ -2486,7 +2475,7 @@ dependencies = [
"darling 0.20.11",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2496,28 +2485,28 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
name = "derive_more"
-version = "2.1.0"
+version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618"
+checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134"
dependencies = [
"derive_more-impl",
]
[[package]]
name = "derive_more-impl"
-version = "2.1.0"
+version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b"
+checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb"
dependencies = [
"proc-macro2",
"quote",
"rustc_version",
- "syn 2.0.111",
+ "syn 2.0.114",
"unicode-xid",
]
@@ -2547,7 +2536,7 @@ checksum =
"97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2600,7 +2589,7 @@ checksum =
"44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2664,9 +2653,9 @@ checksum =
"37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "find-msvc-tools"
-version = "0.1.7"
+version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41"
+checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
[[package]]
name = "findshlibs"
@@ -2688,9 +2677,9 @@ checksum =
"1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "flatbuffers"
-version = "25.9.23"
+version = "25.12.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5"
+checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3"
dependencies = [
"bitflags 2.10.0",
"rustc_version",
@@ -2698,13 +2687,13 @@ dependencies = [
[[package]]
name = "flate2"
-version = "1.1.5"
+version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
+checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369"
dependencies = [
"crc32fast",
- "libz-rs-sys",
"miniz_oxide",
+ "zlib-rs",
]
[[package]]
@@ -2823,7 +2812,7 @@ checksum =
"162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -2874,9 +2863,9 @@ dependencies = [
[[package]]
name = "getrandom"
-version = "0.2.16"
+version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
dependencies = [
"cfg-if",
"js-sys",
@@ -2925,9 +2914,9 @@ dependencies = [
[[package]]
name = "h2"
-version = "0.4.12"
+version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
dependencies = [
"atomic-waker",
"bytes",
@@ -2935,7 +2924,7 @@ dependencies = [
"futures-core",
"futures-sink",
"http 1.4.0",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"slab",
"tokio",
"tokio-util",
@@ -3205,8 +3194,8 @@ dependencies = [
[[package]]
name = "iceberg"
-version = "0.7.0"
-source =
"git+https://github.com/apache/iceberg-rust?rev=16906c1#16906c127d521395a789a9019350e467cc34d063"
+version = "0.8.0"
+source =
"git+https://github.com/apache/iceberg-rust?rev=ee21563#ee21563c2032948f636eae84870f317a0b299a05"
dependencies = [
"anyhow",
"apache-avro",
@@ -3379,9 +3368,9 @@ dependencies = [
[[package]]
name = "indexmap"
-version = "2.12.1"
+version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
@@ -3396,7 +3385,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
dependencies = [
"ahash 0.8.12",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"is-terminal",
"itoa",
"log",
@@ -3421,9 +3410,9 @@ checksum =
"469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]]
name = "iri-string"
-version = "0.7.9"
+version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397"
+checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
dependencies = [
"memchr",
"serde",
@@ -3460,9 +3449,9 @@ dependencies = [
[[package]]
name = "itoa"
-version = "1.0.15"
+version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "java-locator"
@@ -3475,9 +3464,9 @@ dependencies = [
[[package]]
name = "jiff"
-version = "0.2.16"
+version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35"
+checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50"
dependencies = [
"jiff-static",
"jiff-tzdb-platform",
@@ -3490,13 +3479,13 @@ dependencies = [
[[package]]
name = "jiff-static"
-version = "0.2.16"
+version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
+checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -3550,9 +3539,9 @@ dependencies = [
[[package]]
name = "js-sys"
-version = "0.3.83"
+version = "0.3.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
+checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
dependencies = [
"once_cell",
"wasm-bindgen",
@@ -3636,17 +3625,11 @@ dependencies = [
"lexical-util",
]
-[[package]]
-name = "libbz2-rs-sys"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
-
[[package]]
name = "libc"
-version = "0.2.179"
+version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5a2d376baa530d1238d133232d15e239abad80d05838b4b59354e5268af431f"
+checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
[[package]]
name = "libloading"
@@ -3684,15 +3667,6 @@ dependencies = [
"libc",
]
-[[package]]
-name = "libz-rs-sys"
-version = "0.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c"
-dependencies = [
- "zlib-rs",
-]
-
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
@@ -3758,7 +3732,7 @@ dependencies = [
"serde-value",
"serde_json",
"serde_yaml",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
"thread-id",
"typemap-ors",
"unicode-segmentation",
@@ -3780,17 +3754,6 @@ dependencies = [
"twox-hash",
]
-[[package]]
-name = "lzma-sys"
-version = "0.1.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
-dependencies = [
- "cc",
- "libc",
- "pkg-config",
-]
-
[[package]]
name = "md-5"
version = "0.10.6"
@@ -3860,9 +3823,9 @@ checksum =
"dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6"
[[package]]
name = "moka"
-version = "0.12.11"
+version = "0.12.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077"
+checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a"
dependencies = [
"async-lock",
"crossbeam-channel",
@@ -3873,7 +3836,6 @@ dependencies = [
"futures-util",
"parking_lot",
"portable-atomic",
- "rustc_version",
"smallvec",
"tagptr",
"uuid",
@@ -4014,9 +3976,9 @@ dependencies = [
[[package]]
name = "object_store"
-version = "0.12.4"
+version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740"
+checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00"
dependencies = [
"async-trait",
"base64",
@@ -4041,7 +4003,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
"tokio",
"tracing",
"url",
@@ -4090,7 +4052,7 @@ dependencies = [
"bytes",
"crc32c",
"futures",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
"hdrs",
"http 1.4.0",
"http-body 1.0.1",
@@ -4110,9 +4072,9 @@ dependencies = [
[[package]]
name = "openssl-probe"
-version = "0.1.6"
+version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
+checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391"
[[package]]
name = "ordered-float"
@@ -4227,7 +4189,7 @@ dependencies = [
"arrow-schema",
"chrono",
"half",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"simdutf8",
"uuid",
]
@@ -4242,7 +4204,7 @@ dependencies = [
"arrow-schema",
"chrono",
"half",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"parquet-variant",
"parquet-variant-json",
"uuid",
@@ -4288,7 +4250,7 @@ checksum =
"8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
dependencies = [
"fixedbitset",
"hashbrown 0.15.5",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"serde",
]
@@ -4327,7 +4289,7 @@ checksum =
"6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4403,9 +4365,9 @@ dependencies = [
[[package]]
name = "portable-atomic"
-version = "1.11.1"
+version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
+checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950"
[[package]]
name = "portable-atomic-util"
@@ -4450,7 +4412,7 @@ dependencies = [
"spin",
"symbolic-demangle",
"tempfile",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
]
[[package]]
@@ -4469,7 +4431,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4483,9 +4445,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
-version = "1.0.103"
+version = "1.0.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
dependencies = [
"unicode-ident",
]
@@ -4539,7 +4501,7 @@ dependencies = [
"prost",
"prost-types",
"regex",
- "syn 2.0.111",
+ "syn 2.0.114",
"tempfile",
]
@@ -4553,7 +4515,7 @@ dependencies = [
"itertools 0.14.0",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4634,7 +4596,7 @@ dependencies = [
"rustc-hash 2.1.1",
"rustls",
"socket2",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
"tokio",
"tracing",
"web-time",
@@ -4655,7 +4617,7 @@ dependencies = [
"rustls",
"rustls-pki-types",
"slab",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
"tinyvec",
"tracing",
"web-time",
@@ -4677,9 +4639,9 @@ dependencies = [
[[package]]
name = "quote"
-version = "1.0.42"
+version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
dependencies = [
"proc-macro2",
]
@@ -4714,7 +4676,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha 0.9.0",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
]
[[package]]
@@ -4734,7 +4696,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
]
[[package]]
@@ -4743,14 +4705,14 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
]
[[package]]
name = "rand_core"
-version = "0.9.3"
+version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
"getrandom 0.3.4",
]
@@ -4801,7 +4763,7 @@ checksum =
"b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -4865,7 +4827,7 @@ dependencies = [
"base64",
"chrono",
"form_urlencoded",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
"hex",
"hmac",
"home",
@@ -4943,7 +4905,7 @@ checksum =
"a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
"cc",
"cfg-if",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
"libc",
"untrusted",
"windows-sys 0.52.0",
@@ -4951,9 +4913,9 @@ dependencies = [
[[package]]
name = "rkyv"
-version = "0.7.45"
+version = "0.7.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b"
+checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1"
dependencies = [
"bitvec",
"bytecheck",
@@ -4969,9 +4931,9 @@ dependencies = [
[[package]]
name = "rkyv_derive"
-version = "0.7.45"
+version = "0.7.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0"
+checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5"
dependencies = [
"proc-macro2",
"quote",
@@ -5013,7 +4975,7 @@ dependencies = [
"regex",
"relative-path",
"rustc_version",
- "syn 2.0.111",
+ "syn 2.0.114",
"unicode-ident",
]
@@ -5029,9 +4991,9 @@ dependencies = [
[[package]]
name = "rust_decimal"
-version = "1.39.0"
+version = "1.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282"
+checksum = "61f703d19852dbf87cbc513643fa81428361eb6940f1ac14fd58155d295a3eb0"
dependencies = [
"arrayvec",
"borsh",
@@ -5045,9 +5007,9 @@ dependencies = [
[[package]]
name = "rustc-demangle"
-version = "0.1.26"
+version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
+checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
[[package]]
name = "rustc-hash"
@@ -5098,9 +5060,9 @@ dependencies = [
[[package]]
name = "rustls"
-version = "0.23.35"
+version = "0.23.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f"
+checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b"
dependencies = [
"aws-lc-rs",
"once_cell",
@@ -5113,9 +5075,9 @@ dependencies = [
[[package]]
name = "rustls-native-certs"
-version = "0.8.2"
+version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
dependencies = [
"openssl-probe",
"rustls-pki-types",
@@ -5134,9 +5096,9 @@ dependencies = [
[[package]]
name = "rustls-pki-types"
-version = "1.13.2"
+version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282"
+checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
dependencies = [
"web-time",
"zeroize",
@@ -5144,9 +5106,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
-version = "0.103.8"
+version = "0.103.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52"
+checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
dependencies = [
"aws-lc-rs",
"ring",
@@ -5162,9 +5124,9 @@ checksum =
"b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "ryu"
-version = "1.0.20"
+version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
[[package]]
name = "same-file"
@@ -5198,9 +5160,9 @@ dependencies = [
[[package]]
name = "schemars"
-version = "1.1.0"
+version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289"
+checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2"
dependencies = [
"dyn-clone",
"ref-cast",
@@ -5302,14 +5264,14 @@ checksum =
"d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
name = "serde_json"
-version = "1.0.148"
+version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"itoa",
"memchr",
@@ -5326,7 +5288,7 @@ checksum =
"175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5351,9 +5313,9 @@ dependencies = [
"chrono",
"hex",
"indexmap 1.9.3",
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"schemars 0.9.0",
- "schemars 1.1.0",
+ "schemars 1.2.0",
"serde_core",
"serde_json",
"serde_with_macros",
@@ -5369,7 +5331,7 @@ dependencies = [
"darling 0.21.3",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5378,7 +5340,7 @@ version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"itoa",
"ryu",
"serde",
@@ -5415,10 +5377,11 @@ checksum =
"0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
-version = "1.4.7"
+version = "1.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
dependencies = [
+ "errno",
"libc",
]
@@ -5495,7 +5458,7 @@ checksum =
"da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5534,7 +5497,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5545,9 +5508,9 @@ checksum =
"13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "symbolic-common"
-version = "12.17.0"
+version = "12.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3d8046c5674ab857104bc4559d505f4809b8060d57806e45d49737c97afeb60"
+checksum = "520cf51c674f8b93d533f80832babe413214bb766b6d7cb74ee99ad2971f8467"
dependencies = [
"debugid",
"memmap2",
@@ -5557,9 +5520,9 @@ dependencies = [
[[package]]
name = "symbolic-demangle"
-version = "12.17.0"
+version = "12.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1accb6e5c4b0f682de907623912e616b44be1c9e725775155546669dbff720ec"
+checksum = "9f0de2ee0ffa2641e17ba715ad51d48b9259778176517979cb38b6aa86fa7425"
dependencies = [
"cpp_demangle",
"rustc-demangle",
@@ -5579,9 +5542,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.111"
+version = "2.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
dependencies = [
"proc-macro2",
"quote",
@@ -5605,7 +5568,7 @@ checksum =
"728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5644,11 +5607,11 @@ dependencies = [
[[package]]
name = "thiserror"
-version = "2.0.17"
+version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
dependencies = [
- "thiserror-impl 2.0.17",
+ "thiserror-impl 2.0.18",
]
[[package]]
@@ -5659,28 +5622,28 @@ checksum =
"4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
name = "thiserror-impl"
-version = "2.0.17"
+version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
name = "thread-id"
-version = "5.0.0"
+version = "5.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99043e46c5a15af379c06add30d9c93a6c0e8849de00d244c4a2c417da128d80"
+checksum = "2010d27add3f3240c1fef7959f46c814487b216baee662af53be645ba7831c07"
dependencies = [
"libc",
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -5727,30 +5690,30 @@ dependencies = [
[[package]]
name = "time"
-version = "0.3.44"
+version = "0.3.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
+checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
- "serde",
+ "serde_core",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
-version = "0.1.6"
+version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
+checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca"
[[package]]
name = "time-macros"
-version = "0.2.24"
+version = "0.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
+checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd"
dependencies = [
"num-conv",
"time-core",
@@ -5825,7 +5788,7 @@ checksum =
"af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5840,9 +5803,9 @@ dependencies = [
[[package]]
name = "tokio-util"
-version = "0.7.17"
+version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
dependencies = [
"bytes",
"futures-core",
@@ -5866,7 +5829,7 @@ version = "0.23.10+spec-1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
dependencies = [
- "indexmap 2.12.1",
+ "indexmap 2.13.0",
"toml_datetime",
"toml_parser",
"winnow",
@@ -5883,9 +5846,9 @@ dependencies = [
[[package]]
name = "tower"
-version = "0.5.2"
+version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
dependencies = [
"futures-core",
"futures-util",
@@ -5945,7 +5908,7 @@ checksum =
"7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -5989,7 +5952,7 @@ checksum =
"3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6054,9 +6017,9 @@ checksum =
"8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "url"
-version = "2.5.7"
+version = "2.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
dependencies = [
"form_urlencoded",
"idna",
@@ -6133,18 +6096,18 @@ checksum =
"ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasip2"
-version = "1.0.1+wasi-0.2.4"
+version = "1.0.2+wasi-0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
-version = "0.2.106"
+version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
+checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
dependencies = [
"cfg-if",
"once_cell",
@@ -6155,11 +6118,12 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
-version = "0.4.56"
+version = "0.4.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c"
+checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f"
dependencies = [
"cfg-if",
+ "futures-util",
"js-sys",
"once_cell",
"wasm-bindgen",
@@ -6168,9 +6132,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
-version = "0.2.106"
+version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
+checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -6178,22 +6142,22 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
-version = "0.2.106"
+version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
+checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
-version = "0.2.106"
+version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
+checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
dependencies = [
"unicode-ident",
]
@@ -6213,9 +6177,9 @@ dependencies = [
[[package]]
name = "web-sys"
-version = "0.3.83"
+version = "0.3.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac"
+checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598"
dependencies = [
"js-sys",
"wasm-bindgen",
@@ -6233,9 +6197,9 @@ dependencies = [
[[package]]
name = "webpki-roots"
-version = "1.0.4"
+version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e"
+checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c"
dependencies = [
"rustls-pki-types",
]
@@ -6304,7 +6268,7 @@ checksum =
"053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6315,7 +6279,7 @@ checksum =
"3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6584,9 +6548,9 @@ dependencies = [
[[package]]
name = "wit-bindgen"
-version = "0.46.0"
+version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
[[package]]
name = "writeable"
@@ -6609,15 +6573,6 @@ version = "0.13.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
-[[package]]
-name = "xz2"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
-dependencies = [
- "lzma-sys",
-]
-
[[package]]
name = "yoke"
version = "0.8.1"
@@ -6637,28 +6592,28 @@ checksum =
"b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
"synstructure",
]
[[package]]
name = "zerocopy"
-version = "0.8.31"
+version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
+checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
-version = "0.8.31"
+version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
+checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
@@ -6678,7 +6633,7 @@ checksum =
"d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
"synstructure",
]
@@ -6718,20 +6673,20 @@ checksum =
"eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.111",
+ "syn 2.0.114",
]
[[package]]
name = "zlib-rs"
-version = "0.5.4"
+version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235"
+checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3"
[[package]]
name = "zmij"
-version = "1.0.2"
+version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f4a4e8e9dc5c62d159f04fcdbe07f4c3fb710415aab4754bf11505501e3251d"
+checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65"
[[package]]
name = "zstd"
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 21e580577..9d1632e78 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -54,7 +54,7 @@ object_store = { version = "0.12.3", features = ["gcp",
"azure", "aws", "http"]
url = "2.2"
aws-config = "1.8.10"
aws-credential-types = "1.2.9"
-iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "16906c1" }
+iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "ee21563" }
[profile.release]
debug = true
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index b13d6d54f..5e30883e3 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -60,7 +60,6 @@ tempfile = "3.24.0"
itertools = "0.14.0"
paste = "1.0.14"
datafusion = { workspace = true, features = ["parquet_encryption", "sql"] }
-datafusion-datasource = { workspace = true }
datafusion-spark = { workspace = true }
once_cell = "1.18.0"
regex = { workspace = true }
diff --git a/native/core/src/execution/operators/iceberg_scan.rs
b/native/core/src/execution/operators/iceberg_scan.rs
index ef2ad63e0..2f639e9f7 100644
--- a/native/core/src/execution/operators/iceberg_scan.rs
+++ b/native/core/src/execution/operators/iceberg_scan.rs
@@ -18,7 +18,7 @@
//! Native Iceberg table scan operator using iceberg-rust
use std::any::Any;
-use std::collections::{HashMap, VecDeque};
+use std::collections::HashMap;
use std::fmt;
use std::pin::Pin;
use std::sync::Arc;
@@ -36,16 +36,14 @@ use datafusion::physical_plan::metrics::{
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
};
-use futures::future::BoxFuture;
-use futures::{ready, FutureExt, Stream, StreamExt, TryStreamExt};
+use futures::{Stream, StreamExt, TryStreamExt};
use iceberg::io::FileIO;
use crate::execution::operators::ExecutionError;
use crate::parquet::parquet_support::SparkParquetOptions;
use crate::parquet::schema_adapter::SparkSchemaAdapterFactory;
-use datafusion::datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion::datasource::schema_adapter::{SchemaAdapterFactory,
SchemaMapper};
use datafusion_comet_spark_expr::EvalMode;
-use datafusion_datasource::file_stream::FileStreamMetrics;
/// Iceberg table scan operator that uses iceberg-rust to read Iceberg tables.
///
@@ -163,22 +161,38 @@ impl IcebergScanExec {
let batch_size = context.session_config().batch_size();
let metrics = IcebergScanMetrics::new(&self.metrics, partition);
-
- // Create parallel file stream that overlaps opening next file with
reading current file
- let file_stream = IcebergFileStream::new(
- tasks,
- file_io,
- batch_size,
- Arc::clone(&output_schema),
- metrics,
- )?;
-
- // Note: BatchSplitStream adds overhead. Since we're already setting
batch_size in
- // iceberg-rust's ArrowReaderBuilder, it should produce correctly
sized batches.
- // Only use BatchSplitStream as a safety net if needed.
- // For now, return the file_stream directly to reduce stream nesting
overhead.
-
- Ok(Box::pin(file_stream))
+ let num_tasks = tasks.len();
+ metrics.num_splits.add(num_tasks);
+
+ let task_stream =
futures::stream::iter(tasks.into_iter().map(Ok)).boxed();
+
+ let reader = iceberg::arrow::ArrowReaderBuilder::new(file_io)
+ .with_batch_size(batch_size)
+
.with_data_file_concurrency_limit(context.session_config().target_partitions())
+ .with_row_selection_enabled(true)
+ .build();
+
+ // Pass all tasks to iceberg-rust at once to utilize its
flatten_unordered
+ // parallelization, avoiding overhead of single-task streams
+ let stream = reader.read(task_stream).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to read Iceberg tasks:
{}", e))
+ })?;
+
+ let spark_options = SparkParquetOptions::new(EvalMode::Legacy, "UTC",
false);
+ let adapter_factory = SparkSchemaAdapterFactory::new(spark_options,
None);
+
+ let adapted_stream =
+ stream.map_err(|e| DataFusionError::Execution(format!("Iceberg
scan error: {}", e)));
+
+ let wrapped_stream = IcebergStreamWrapper {
+ inner: adapted_stream,
+ schema: output_schema,
+ cached_adapter: None,
+ adapter_factory,
+ baseline_metrics: metrics.baseline,
+ };
+
+ Ok(Box::pin(wrapped_stream))
}
fn load_file_io(
@@ -202,8 +216,6 @@ impl IcebergScanExec {
struct IcebergScanMetrics {
/// Baseline metrics (output rows, elapsed compute time)
baseline: BaselineMetrics,
- /// File stream metrics (time opening, time scanning, etc.)
- file_stream: FileStreamMetrics,
/// Count of file splits (FileScanTasks) processed
num_splits: Count,
}
@@ -212,243 +224,79 @@ impl IcebergScanMetrics {
fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self {
Self {
baseline: BaselineMetrics::new(metrics, partition),
- file_stream: FileStreamMetrics::new(metrics, partition),
num_splits: MetricBuilder::new(metrics).counter("num_splits",
partition),
}
}
}
-/// State machine for IcebergFileStream
-enum FileStreamState {
- /// Idle state - need to start opening next file
- Idle,
- /// Opening a file
- Opening {
- future: BoxFuture<'static, DFResult<SendableRecordBatchStream>>,
- },
- /// Reading from current file while potentially opening next file
- Reading {
- current: SendableRecordBatchStream,
- next: Option<BoxFuture<'static, DFResult<SendableRecordBatchStream>>>,
- },
- /// Error state
- Error,
-}
-
-/// Stream that reads Iceberg files with parallel opening optimization.
-/// Opens the next file while reading the current file to overlap IO with
compute.
-///
-/// Inspired by DataFusion's [`FileStream`] pattern for overlapping file
opening with reading.
-///
-/// [`FileStream`]:
https://github.com/apache/datafusion/blob/main/datafusion/datasource/src/file_stream.rs
-struct IcebergFileStream {
+/// Wrapper around iceberg-rust's stream that performs schema adaptation.
+/// Handles batches from multiple files that may have different Arrow schemas
+/// (metadata, field IDs, etc.). Caches schema adapters by source schema to
avoid
+/// recreating them for every batch from the same file.
+struct IcebergStreamWrapper<S> {
+ inner: S,
schema: SchemaRef,
- file_io: FileIO,
- batch_size: usize,
- tasks: VecDeque<iceberg::scan::FileScanTask>,
- state: FileStreamState,
- metrics: IcebergScanMetrics,
+ /// Cached schema adapter with its source schema. Created when schema
changes.
+ cached_adapter: Option<(SchemaRef, Arc<dyn SchemaMapper>)>,
+ /// Factory for creating schema adapters
+ adapter_factory: SparkSchemaAdapterFactory,
+ /// Metrics for output tracking
+ baseline_metrics: BaselineMetrics,
}
-impl IcebergFileStream {
- fn new(
- tasks: Vec<iceberg::scan::FileScanTask>,
- file_io: FileIO,
- batch_size: usize,
- schema: SchemaRef,
- metrics: IcebergScanMetrics,
- ) -> DFResult<Self> {
- Ok(Self {
- schema,
- file_io,
- batch_size,
- tasks: tasks.into_iter().collect(),
- state: FileStreamState::Idle,
- metrics,
- })
- }
-
- fn start_next_file(
- &mut self,
- ) -> Option<BoxFuture<'static, DFResult<SendableRecordBatchStream>>> {
- let task = self.tasks.pop_front()?;
-
- self.metrics.num_splits.add(1);
-
- let file_io = self.file_io.clone();
- let batch_size = self.batch_size;
- let schema = Arc::clone(&self.schema);
-
- Some(Box::pin(async move {
- let task_stream = futures::stream::iter(vec![Ok(task)]).boxed();
+impl<S> Stream for IcebergStreamWrapper<S>
+where
+ S: Stream<Item = DFResult<RecordBatch>> + Unpin,
+{
+ type Item = DFResult<RecordBatch>;
- let reader = iceberg::arrow::ArrowReaderBuilder::new(file_io)
- .with_batch_size(batch_size)
- .with_row_selection_enabled(true)
- .build();
+ fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) ->
Poll<Option<Self::Item>> {
+ let poll_result = self.inner.poll_next_unpin(cx);
- let stream = reader.read(task_stream).map_err(|e| {
- DataFusionError::Execution(format!("Failed to read Iceberg
task: {}", e))
- })?;
+ let result = match poll_result {
+ Poll::Ready(Some(Ok(batch))) => {
+ let file_schema = batch.schema();
- let target_schema = Arc::clone(&schema);
+ // Check if we need to create a new adapter for this file's
schema
+ let needs_new_adapter = match &self.cached_adapter {
+ Some((cached_schema, _)) => !Arc::ptr_eq(cached_schema,
&file_schema),
+ None => true,
+ };
- // Schema adaptation handles differences in Arrow field names and
metadata
- // between the file schema and expected output schema
- let mapped_stream = stream
- .map_err(|e| DataFusionError::Execution(format!("Iceberg scan
error: {}", e)))
- .and_then(move |batch| {
- let spark_options =
SparkParquetOptions::new(EvalMode::Legacy, "UTC", false);
- let adapter_factory =
SparkSchemaAdapterFactory::new(spark_options, None);
- let file_schema = batch.schema();
- let adapter = adapter_factory
- .create(Arc::clone(&target_schema),
Arc::clone(&file_schema));
+ if needs_new_adapter {
+ let adapter = self
+ .adapter_factory
+ .create(Arc::clone(&self.schema),
Arc::clone(&file_schema));
- let result = match
adapter.map_schema(file_schema.as_ref()) {
+ match adapter.map_schema(file_schema.as_ref()) {
Ok((schema_mapper, _projection)) => {
- schema_mapper.map_batch(batch).map_err(|e| {
- DataFusionError::Execution(format!("Batch
mapping failed: {}", e))
- })
+ self.cached_adapter = Some((file_schema,
schema_mapper));
}
- Err(e) => Err(DataFusionError::Execution(format!(
- "Schema mapping failed: {}",
- e
- ))),
- };
- futures::future::ready(result)
- });
-
- Ok(Box::pin(IcebergStreamWrapper {
- inner: mapped_stream,
- schema,
- }) as SendableRecordBatchStream)
- }))
- }
-
- fn poll_inner(&mut self, cx: &mut Context<'_>) ->
Poll<Option<DFResult<RecordBatch>>> {
- loop {
- match &mut self.state {
- FileStreamState::Idle => {
- self.metrics.file_stream.time_opening.start();
- match self.start_next_file() {
- Some(future) => {
- self.state = FileStreamState::Opening { future };
+ Err(e) => {
+ return
Poll::Ready(Some(Err(DataFusionError::Execution(format!(
+ "Schema mapping failed: {}",
+ e
+ )))));
}
- None => return Poll::Ready(None),
}
}
- FileStreamState::Opening { future } => match
ready!(future.poll_unpin(cx)) {
- Ok(stream) => {
- self.metrics.file_stream.time_opening.stop();
-
self.metrics.file_stream.time_scanning_until_data.start();
- self.metrics.file_stream.time_scanning_total.start();
- let next = self.start_next_file();
- self.state = FileStreamState::Reading {
- current: stream,
- next,
- };
- }
- Err(e) => {
- self.state = FileStreamState::Error;
- return Poll::Ready(Some(Err(e)));
- }
- },
- FileStreamState::Reading { current, next } => {
- // Poll next file opening future to drive it forward
(background IO)
- if let Some(next_future) = next {
- if let Poll::Ready(result) =
next_future.poll_unpin(cx) {
- match result {
- Ok(stream) => {
- *next =
Some(Box::pin(futures::future::ready(Ok(stream))));
- }
- Err(e) => {
- self.state = FileStreamState::Error;
- return Poll::Ready(Some(Err(e)));
- }
- }
- }
- }
- match ready!(current.poll_next_unpin(cx)) {
- Some(result) => {
- // Stop time_scanning_until_data on first batch
(idempotent)
-
self.metrics.file_stream.time_scanning_until_data.stop();
-
self.metrics.file_stream.time_scanning_total.stop();
- // Restart time_scanning_total for next batch
-
self.metrics.file_stream.time_scanning_total.start();
- return Poll::Ready(Some(result));
- }
- None => {
-
self.metrics.file_stream.time_scanning_until_data.stop();
-
self.metrics.file_stream.time_scanning_total.stop();
- match next.take() {
- Some(mut next_future) => match
next_future.poll_unpin(cx) {
- Poll::Ready(Ok(stream)) => {
-
self.metrics.file_stream.time_scanning_until_data.start();
-
self.metrics.file_stream.time_scanning_total.start();
- let next_next = self.start_next_file();
- self.state = FileStreamState::Reading {
- current: stream,
- next: next_next,
- };
- }
- Poll::Ready(Err(e)) => {
- self.state = FileStreamState::Error;
- return Poll::Ready(Some(Err(e)));
- }
- Poll::Pending => {
- self.state = FileStreamState::Opening {
- future: next_future,
- };
- }
- },
- None => {
- return Poll::Ready(None);
- }
- }
- }
- }
- }
- FileStreamState::Error => {
- return Poll::Ready(None);
- }
+ let result = self
+ .cached_adapter
+ .as_ref()
+ .expect("cached_adapter should be initialized")
+ .1
+ .map_batch(batch)
+ .map_err(|e| {
+ DataFusionError::Execution(format!("Batch mapping
failed: {}", e))
+ });
+
+ Poll::Ready(Some(result))
}
- }
- }
-}
-
-impl Stream for IcebergFileStream {
- type Item = DFResult<arrow::array::RecordBatch>;
-
- fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) ->
Poll<Option<Self::Item>> {
- self.metrics.file_stream.time_processing.start();
- let result = self.poll_inner(cx);
- self.metrics.file_stream.time_processing.stop();
- self.metrics.baseline.record_poll(result)
- }
-}
+ other => other,
+ };
-impl RecordBatchStream for IcebergFileStream {
- fn schema(&self) -> SchemaRef {
- Arc::clone(&self.schema)
- }
-}
-
-/// Wrapper around iceberg-rust's stream that avoids strict schema checks.
-/// Returns the expected output schema to prevent rejection of batches with
metadata differences.
-struct IcebergStreamWrapper<S> {
- inner: S,
- schema: SchemaRef,
-}
-
-impl<S> Stream for IcebergStreamWrapper<S>
-where
- S: Stream<Item = DFResult<RecordBatch>> + Unpin,
-{
- type Item = DFResult<RecordBatch>;
-
- fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) ->
Poll<Option<Self::Item>> {
- self.inner.poll_next_unpin(cx)
+ self.baseline_metrics.record_poll(result)
}
}
diff --git a/native/core/src/execution/planner.rs
b/native/core/src/execution/planner.rs
index 7d806213d..b13fafe45 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -2894,6 +2894,7 @@ fn parse_file_scan_tasks(
partition,
partition_spec,
name_mapping,
+ case_sensitive: false,
})
})
.collect();
diff --git
a/spark/src/main/scala/org/apache/spark/sql/comet/CometIcebergNativeScanExec.scala
b/spark/src/main/scala/org/apache/spark/sql/comet/CometIcebergNativeScanExec.scala
index c4da0e4c1..89b23cb71 100644
---
a/spark/src/main/scala/org/apache/spark/sql/comet/CometIcebergNativeScanExec.scala
+++
b/spark/src/main/scala/org/apache/spark/sql/comet/CometIcebergNativeScanExec.scala
@@ -126,20 +126,7 @@ case class CometIcebergNativeScanExec(
override lazy val metrics: Map[String, SQLMetric] = {
val baseMetrics = Map(
- "output_rows" -> SQLMetrics.createMetric(sparkContext, "number of output
rows"),
- "time_elapsed_opening" -> SQLMetrics.createNanoTimingMetric(
- sparkContext,
- "Wall clock time elapsed for file opening"),
- "time_elapsed_scanning_until_data" -> SQLMetrics.createNanoTimingMetric(
- sparkContext,
- "Wall clock time elapsed for file scanning + " +
- "first record batch of decompression + decoding"),
- "time_elapsed_scanning_total" -> SQLMetrics.createNanoTimingMetric(
- sparkContext,
- "Total elapsed wall clock time for scanning + record batch
decompression / decoding"),
- "time_elapsed_processing" -> SQLMetrics.createNanoTimingMetric(
- sparkContext,
- "Wall clock time elapsed for data decompression + decoding"))
+ "output_rows" -> SQLMetrics.createMetric(sparkContext, "number of output
rows"))
// Create IMMUTABLE metrics with captured values AND types
// these won't be affected by accumulator merges
diff --git
a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
index 174b09105..7b58e69c4 100644
--- a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
@@ -1440,10 +1440,6 @@ class CometIcebergNativeSuite extends CometTestBase with
RESTCatalogHelper {
assert(metrics("output_rows").value == 10000)
assert(metrics("num_splits").value > 0)
- assert(metrics("time_elapsed_opening").value > 0)
- assert(metrics("time_elapsed_scanning_until_data").value > 0)
- assert(metrics("time_elapsed_scanning_total").value > 0)
- assert(metrics("time_elapsed_processing").value > 0)
// ImmutableSQLMetric prevents these from being reset to 0 after
execution
assert(
metrics("totalDataManifest").value > 0,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]