This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 25f46b54 infra: upgrade spark to 4.0.1 (#2566)
25f46b54 is described below
commit 25f46b5451ff728c60e0344dbc3813dda7008107
Author: Kevin Liu <[email protected]>
AuthorDate: Sun Oct 5 08:28:27 2025 -0700
infra: upgrade spark to 4.0.1 (#2566)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
This PR changes the integration test to use Spark 4.0.1 with scala 2.13
and hadoop 3.4.1
Hadoop 3.4.x started to use `software.amazon.awssdk.bundle` instead of
`com.amazonaws.aws-java-sdk-bundle`
https://hadoop.apache.org/docs/r3.4.1/hadoop-aws/dependency-analysis.html
## Are these changes tested?
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
dev/spark/Dockerfile | 14 ++---
dev/spark/spark-defaults.conf | 2 +
poetry.lock | 133 +++++++++++++++++++++---------------------
pyproject.toml | 3 +-
4 files changed, 78 insertions(+), 74 deletions(-)
diff --git a/dev/spark/Dockerfile b/dev/spark/Dockerfile
index d0fc6a4f..cd35671c 100644
--- a/dev/spark/Dockerfile
+++ b/dev/spark/Dockerfile
@@ -13,17 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG BASE_IMAGE_SPARK_VERSION=3.5.6
+ARG BASE_IMAGE_SPARK_VERSION=4.0.1
FROM apache/spark:${BASE_IMAGE_SPARK_VERSION}
# Dependency versions - keep these compatible
ARG ICEBERG_VERSION=1.10.0
-ARG ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
-ARG SPARK_VERSION=3.5.6
-ARG SCALA_VERSION=2.12
-ARG HADOOP_VERSION=3.3.4
-ARG AWS_SDK_VERSION=1.12.753
+ARG ICEBERG_SPARK_RUNTIME_VERSION=4.0_2.13
+ARG SPARK_VERSION=4.0.1
+ARG HADOOP_VERSION=3.4.1
+ARG SCALA_VERSION=2.13
+ARG AWS_SDK_VERSION=2.24.6
ARG MAVEN_MIRROR=https://repo.maven.apache.org/maven2
USER root
@@ -47,7 +47,7 @@ ENV JARS_TO_DOWNLOAD="\
org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar
\
org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
\
org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar
\
-
com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_VERSION}/aws-java-sdk-bundle-${AWS_SDK_VERSION}.jar"
+
software/amazon/awssdk/bundle/${AWS_SDK_VERSION}/bundle-${AWS_SDK_VERSION}.jar"
# Download JARs with retry logic
RUN set -e && \
diff --git a/dev/spark/spark-defaults.conf b/dev/spark/spark-defaults.conf
index 3a12e258..4e50f590 100644
--- a/dev/spark/spark-defaults.conf
+++ b/dev/spark/spark-defaults.conf
@@ -48,3 +48,5 @@ spark.sql.defaultCatalog rest
spark.ui.enabled true
spark.eventLog.enabled true
spark.eventLog.dir /home/iceberg/spark-events
+
+spark.sql.ansi.enabled false
diff --git a/poetry.lock b/poetry.lock
index c49944ab..343cc4c7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.0 and should not be
changed by hand.
+# This file is automatically @generated by Poetry 2.2.1 and should not be
changed by hand.
[[package]]
name = "adlfs"
@@ -59,7 +59,7 @@ description = "Happy Eyeballs for asyncio"
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\""
+markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\""
files = [
{file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash =
"sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
{file = "aiohappyeyeballs-2.6.1.tar.gz", hash =
"sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
@@ -72,7 +72,7 @@ description = "Async http client/server framework (asyncio)"
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\""
+markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\""
files = [
{file = "aiohttp-3.12.12-cp310-cp310-macosx_10_9_universal2.whl", hash =
"sha256:6f25e9d274d6abbb15254f76f100c3984d6b9ad6e66263cc60a465dd5c7e48f5"},
{file = "aiohttp-3.12.12-cp310-cp310-macosx_10_9_x86_64.whl", hash =
"sha256:b8ec3c1a1c13d24941b5b913607e57b9364e4c0ea69d5363181467492c4b2ba6"},
@@ -202,7 +202,7 @@ description = "aiosignal: a list of registered asynchronous
callbacks"
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or
extra == \"s3fs\""
+markers = "extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or
extra == \"gcsfs\""
files = [
{file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash =
"sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"},
{file = "aiosignal-1.3.2.tar.gz", hash =
"sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"},
@@ -254,7 +254,7 @@ description = "Timeout context manager for asyncio programs"
optional = true
python-versions = ">=3.8"
groups = ["main"]
-markers = "(extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and
python_version <= \"3.10\""
+markers = "(extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and
python_version <= \"3.10\""
files = [
{file = "async_timeout-5.0.1-py3-none-any.whl", hash =
"sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
{file = "async_timeout-5.0.1.tar.gz", hash =
"sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -271,7 +271,7 @@ files = [
{file = "attrs-25.3.0-py3-none-any.whl", hash =
"sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"},
{file = "attrs-25.3.0.tar.gz", hash =
"sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"},
]
-markers = {main = "extra == \"ray\" or extra == \"adlfs\" or extra ==
\"gcsfs\" or extra == \"s3fs\""}
+markers = {main = "extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\"
or extra == \"gcsfs\""}
[package.extras]
benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"",
"hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\"
and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)",
"pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation ==
\"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
@@ -530,7 +530,7 @@ files = [
{file = "boto3-1.40.18-py3-none-any.whl", hash =
"sha256:daa776ba1251a7458c9d6c7627873d0c2460c8e8272d35759065580e9193700a"},
{file = "boto3-1.40.18.tar.gz", hash =
"sha256:64301d39adecc154e3e595eaf0d4f28998ef0a5551f1d033aeac51a9e1a688e5"},
]
-markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra ==
\"rest-sigv4\""}
+markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra ==
\"rest-sigv4\""}
[package.dependencies]
botocore = ">=1.40.18,<1.41.0"
@@ -551,7 +551,7 @@ files = [
{file = "botocore-1.40.18-py3-none-any.whl", hash =
"sha256:57025c46ca00cf8cec25de07a759521bfbfb3036a0f69b272654a354615dc45f"},
{file = "botocore-1.40.18.tar.gz", hash =
"sha256:afd69bdadd8c55cc89d69de0799829e555193a352d87867f746e19020271cc0f"},
]
-markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra ==
\"rest-sigv4\" or extra == \"s3fs\""}
+markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra ==
\"rest-sigv4\" or extra == \"s3fs\""}
[package.dependencies]
jmespath = ">=0.7.1,<2.0.0"
@@ -1617,7 +1617,7 @@ description = "A list-like structure which implements
collections.abc.MutableSeq
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or
extra == \"s3fs\""
+markers = "extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or
extra == \"gcsfs\""
files = [
{file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash =
"sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"},
{file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash =
"sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"},
@@ -1816,7 +1816,7 @@ description = "Google API client core library"
optional = true
python-versions = ">=3.7"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\""
+markers = "extra == \"bigquery\" or extra == \"gcsfs\""
files = [
{file = "google_api_core-2.25.0-py3-none-any.whl", hash =
"sha256:1db79d1281dcf9f3d10023283299ba38f3dc9f639ec41085968fd23e5bcf512e"},
{file = "google_api_core-2.25.0.tar.gz", hash =
"sha256:9b548e688702f82a34ed8409fb8a6961166f0b7795032f0be8f48308dff4333a"},
@@ -1853,7 +1853,7 @@ description = "Google Authentication Library"
optional = true
python-versions = ">=3.7"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra ==
\"gcp-auth\""
+markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra ==
\"gcsfs\""
files = [
{file = "google_auth-2.41.0-py2.py3-none-any.whl", hash =
"sha256:d8bed9b53ab63b7b0374656b8e1bef051f95bb14ecc0cf21ba49de7911d62e09"},
{file = "google_auth-2.41.0.tar.gz", hash =
"sha256:c9d7b534ea4a5d9813c552846797fafb080312263cd4994d6622dd50992ae101"},
@@ -1935,7 +1935,7 @@ description = "Google Cloud API client core library"
optional = true
python-versions = ">=3.7"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\""
+markers = "extra == \"bigquery\" or extra == \"gcsfs\""
files = [
{file = "google_cloud_core-2.4.3-py2.py3-none-any.whl", hash =
"sha256:5130f9f4c14b4fafdff75c79448f9495cfade0d8775facf1b09c3bf67e027f6e"},
{file = "google_cloud_core-2.4.3.tar.gz", hash =
"sha256:1fab62d7102844b278fe6dead3af32408b1df3eb06f5c7e8634cbd40edc4da53"},
@@ -1980,7 +1980,7 @@ description = "A python wrapper of the C library 'Google
CRC32C'"
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\""
+markers = "extra == \"bigquery\" or extra == \"gcsfs\""
files = [
{file = "google_crc32c-1.7.1-cp310-cp310-macosx_12_0_arm64.whl", hash =
"sha256:b07d48faf8292b4db7c3d64ab86f950c2e94e93a11fd47271c28ba458e4a0d76"},
{file = "google_crc32c-1.7.1-cp310-cp310-macosx_12_0_x86_64.whl", hash =
"sha256:7cc81b3a2fbd932a4313eb53cc7d9dde424088ca3a0337160f35d91826880c1d"},
@@ -2028,7 +2028,7 @@ description = "Utilities for Google Media Downloads and
Resumable Uploads"
optional = true
python-versions = ">=3.7"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\""
+markers = "extra == \"bigquery\" or extra == \"gcsfs\""
files = [
{file = "google_resumable_media-2.7.2-py2.py3-none-any.whl", hash =
"sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa"},
{file = "google_resumable_media-2.7.2.tar.gz", hash =
"sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0"},
@@ -2052,7 +2052,7 @@ files = [
{file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash =
"sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"},
{file = "googleapis_common_protos-1.70.0.tar.gz", hash =
"sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"},
]
-markers = {main = "python_version >= \"3.11\" and (extra == \"bigquery\" or
extra == \"gcsfs\") or extra == \"gcsfs\" or extra == \"bigquery\""}
+markers = {main = "extra == \"bigquery\" or extra == \"gcsfs\""}
[package.dependencies]
protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 ||
>4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
@@ -2226,21 +2226,21 @@ protobuf = ["grpcio-tools (>=1.73.0)"]
[[package]]
name = "grpcio-status"
-version = "1.73.0"
+version = "1.71.2"
description = "Status proto mapping for gRPC"
optional = false
python-versions = ">=3.9"
groups = ["main", "dev"]
files = [
- {file = "grpcio_status-1.73.0-py3-none-any.whl", hash =
"sha256:a3f3a9994b44c364f014e806114ba44cc52e50c426779f958c8b22f14ff0d892"},
- {file = "grpcio_status-1.73.0.tar.gz", hash =
"sha256:a2b7f430568217f884fe52a5a0133b6f4c9338beae33fb5370134a8eaf58f974"},
+ {file = "grpcio_status-1.71.2-py3-none-any.whl", hash =
"sha256:803c98cb6a8b7dc6dbb785b1111aed739f241ab5e9da0bba96888aa74704cfd3"},
+ {file = "grpcio_status-1.71.2.tar.gz", hash =
"sha256:c7a97e176df71cdc2c179cd1847d7fc86cca5832ad12e9798d7fed6b7a1aab50"},
]
markers = {main = "extra == \"bigquery\""}
[package.dependencies]
googleapis-common-protos = ">=1.5.5"
-grpcio = ">=1.73.0"
-protobuf = ">=6.30.0,<7.0.0"
+grpcio = ">=1.71.2"
+protobuf = ">=5.26.1,<6.0dev"
[[package]]
name = "hf-xet"
@@ -2491,7 +2491,7 @@ files = [
{file = "jmespath-1.0.1-py3-none-any.whl", hash =
"sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
{file = "jmespath-1.0.1.tar.gz", hash =
"sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
]
-markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra ==
\"rest-sigv4\" or extra == \"s3fs\""}
+markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra ==
\"rest-sigv4\" or extra == \"s3fs\""}
[[package]]
name = "joserfc"
@@ -3391,7 +3391,7 @@ description = "multidict implementation"
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\""
+markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\""
files = [
{file = "multidict-6.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash =
"sha256:8adee3ac041145ffe4488ea73fa0a622b464cc25340d98be76924d0cda8545ff"},
{file = "multidict-6.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash =
"sha256:b61e98c3e2a861035aaccd207da585bdcacef65fe01d7a0d07478efac005e028"},
@@ -3740,7 +3740,7 @@ files = [
{file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash =
"sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"},
{file = "numpy-1.26.4.tar.gz", hash =
"sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
]
-markers = {main = "extra == \"bodo\" or extra == \"pandas\" or extra ==
\"ray\""}
+markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra ==
\"bodo\""}
[[package]]
name = "oauthlib"
@@ -3875,7 +3875,7 @@ files = [
{file = "pandas-2.3.2-cp39-cp39-win_amd64.whl", hash =
"sha256:a9d7ec92d71a420185dec44909c32e9a362248c4ae2238234b76d5be37f208cc"},
{file = "pandas-2.3.2.tar.gz", hash =
"sha256:ab7b58f8f82706890924ccdfb5f48002b83d2b5a3845976a9fb705d36c34dcdb"},
]
-markers = {main = "extra == \"bodo\" or extra == \"pandas\" or extra ==
\"ray\""}
+markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra ==
\"bodo\""}
[package.dependencies]
numpy = [
@@ -4052,7 +4052,7 @@ description = "Accelerated property cache"
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\""
+markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\""
files = [
{file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash =
"sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"},
{file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash =
"sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"},
@@ -4161,7 +4161,7 @@ description = "Beautiful, Pythonic protocol buffers"
optional = true
python-versions = ">=3.7"
groups = ["main"]
-markers = "python_version >= \"3.13\" and (extra == \"gcsfs\" or extra ==
\"bigquery\") or extra == \"bigquery\" or extra == \"gcsfs\""
+markers = "extra == \"bigquery\" or extra == \"gcsfs\""
files = [
{file = "proto_plus-1.26.1-py3-none-any.whl", hash =
"sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
{file = "proto_plus-1.26.1.tar.gz", hash =
"sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
@@ -4175,23 +4175,24 @@ testing = ["google-api-core (>=1.31.5)"]
[[package]]
name = "protobuf"
-version = "6.31.1"
+version = "5.29.1"
description = ""
optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.8"
groups = ["main", "dev"]
files = [
- {file = "protobuf-6.31.1-cp310-abi3-win32.whl", hash =
"sha256:7fa17d5a29c2e04b7d90e5e32388b8bfd0e7107cd8e616feef7ed3fa6bdab5c9"},
- {file = "protobuf-6.31.1-cp310-abi3-win_amd64.whl", hash =
"sha256:426f59d2964864a1a366254fa703b8632dcec0790d8862d30034d8245e1cd447"},
- {file = "protobuf-6.31.1-cp39-abi3-macosx_10_9_universal2.whl", hash =
"sha256:6f1227473dc43d44ed644425268eb7c2e488ae245d51c6866d19fe158e207402"},
- {file = "protobuf-6.31.1-cp39-abi3-manylinux2014_aarch64.whl", hash =
"sha256:a40fc12b84c154884d7d4c4ebd675d5b3b5283e155f324049ae396b95ddebc39"},
- {file = "protobuf-6.31.1-cp39-abi3-manylinux2014_x86_64.whl", hash =
"sha256:4ee898bf66f7a8b0bd21bce523814e6fbd8c6add948045ce958b73af7e8878c6"},
- {file = "protobuf-6.31.1-cp39-cp39-win32.whl", hash =
"sha256:0414e3aa5a5f3ff423828e1e6a6e907d6c65c1d5b7e6e975793d5590bdeecc16"},
- {file = "protobuf-6.31.1-cp39-cp39-win_amd64.whl", hash =
"sha256:8764cf4587791e7564051b35524b72844f845ad0bb011704c3736cce762d8fe9"},
- {file = "protobuf-6.31.1-py3-none-any.whl", hash =
"sha256:720a6c7e6b77288b85063569baae8536671b39f15cc22037ec7045658d80489e"},
- {file = "protobuf-6.31.1.tar.gz", hash =
"sha256:d8cac4c982f0b957a4dc73a80e2ea24fab08e679c0de9deb835f4a12d69aca9a"},
+ {file = "protobuf-5.29.1-cp310-abi3-win32.whl", hash =
"sha256:22c1f539024241ee545cbcb00ee160ad1877975690b16656ff87dde107b5f110"},
+ {file = "protobuf-5.29.1-cp310-abi3-win_amd64.whl", hash =
"sha256:1fc55267f086dd4050d18ef839d7bd69300d0d08c2a53ca7df3920cc271a3c34"},
+ {file = "protobuf-5.29.1-cp38-abi3-macosx_10_9_universal2.whl", hash =
"sha256:d473655e29c0c4bbf8b69e9a8fb54645bc289dead6d753b952e7aa660254ae18"},
+ {file = "protobuf-5.29.1-cp38-abi3-manylinux2014_aarch64.whl", hash =
"sha256:b5ba1d0e4c8a40ae0496d0e2ecfdbb82e1776928a205106d14ad6985a09ec155"},
+ {file = "protobuf-5.29.1-cp38-abi3-manylinux2014_x86_64.whl", hash =
"sha256:8ee1461b3af56145aca2800e6a3e2f928108c749ba8feccc6f5dd0062c410c0d"},
+ {file = "protobuf-5.29.1-cp38-cp38-win32.whl", hash =
"sha256:50879eb0eb1246e3a5eabbbe566b44b10348939b7cc1b267567e8c3d07213853"},
+ {file = "protobuf-5.29.1-cp38-cp38-win_amd64.whl", hash =
"sha256:027fbcc48cea65a6b17028510fdd054147057fa78f4772eb547b9274e5219331"},
+ {file = "protobuf-5.29.1-cp39-cp39-win32.whl", hash =
"sha256:5a41deccfa5e745cef5c65a560c76ec0ed8e70908a67cc8f4da5fce588b50d57"},
+ {file = "protobuf-5.29.1-cp39-cp39-win_amd64.whl", hash =
"sha256:012ce28d862ff417fd629285aca5d9772807f15ceb1a0dbd15b88f58c776c98c"},
+ {file = "protobuf-5.29.1-py3-none-any.whl", hash =
"sha256:32600ddb9c2a53dedc25b8581ea0f1fd8ea04956373c0c07577ce58d312522e0"},
+ {file = "protobuf-5.29.1.tar.gz", hash =
"sha256:683be02ca21a6ffe80db6dd02c0b5b2892322c59ca57fd6c872d652cb80549cb"},
]
-markers = {main = "python_version >= \"3.11\" and (extra == \"ray\" or extra
== \"bigquery\" or extra == \"gcsfs\") or extra == \"ray\" or extra ==
\"gcsfs\" or extra == \"bigquery\""}
[[package]]
name = "psutil"
@@ -4330,14 +4331,14 @@ dev = ["black (==22.6.0)", "flake8", "mypy", "pytest"]
[[package]]
name = "py4j"
-version = "0.10.9.7"
+version = "0.10.9.9"
description = "Enables Python programs to dynamically access arbitrary Java
objects"
optional = false
python-versions = "*"
groups = ["dev"]
files = [
- {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash =
"sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"},
- {file = "py4j-0.10.9.7.tar.gz", hash =
"sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"},
+ {file = "py4j-0.10.9.9-py2.py3-none-any.whl", hash =
"sha256:c7c26e4158defb37b0bb124933163641a2ff6e3a3913f7811b0ddbe07ed61533"},
+ {file = "py4j-0.10.9.9.tar.gz", hash =
"sha256:f694cad19efa5bd1dee4f3e5270eb406613c974394035e5bfc4ec1aba870b879"},
]
[[package]]
@@ -4391,7 +4392,7 @@ files = [
{file = "pyarrow-19.0.1-cp39-cp39-win_amd64.whl", hash =
"sha256:8464c9fbe6d94a7fe1599e7e8965f350fd233532868232ab2596a71586c5a429"},
{file = "pyarrow-19.0.1.tar.gz", hash =
"sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e"},
]
-markers = {main = "extra == \"bodo\" or extra == \"daft\" or extra ==
\"datafusion\" or extra == \"duckdb\" or extra == \"pandas\" or extra ==
\"pyarrow\" or extra == \"ray\""}
+markers = {main = "extra == \"pyarrow\" or extra == \"pandas\" or extra ==
\"duckdb\" or extra == \"ray\" or extra == \"bodo\" or extra == \"daft\" or
extra == \"datafusion\""}
[package.extras]
test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
@@ -4403,7 +4404,7 @@ description = "Pure-Python implementation of ASN.1 types
and DER/BER/CER codecs
optional = true
python-versions = ">=3.8"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra ==
\"gcp-auth\""
+markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra ==
\"gcsfs\""
files = [
{file = "pyasn1-0.6.1-py3-none-any.whl", hash =
"sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
{file = "pyasn1-0.6.1.tar.gz", hash =
"sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
@@ -4416,7 +4417,7 @@ description = "A collection of ASN.1-based protocols
modules"
optional = true
python-versions = ">=3.8"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra ==
\"gcp-auth\""
+markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra ==
\"gcsfs\""
files = [
{file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash =
"sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"},
{file = "pyasn1_modules-0.4.2.tar.gz", hash =
"sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"},
@@ -4768,30 +4769,30 @@ files = [
[[package]]
name = "pyspark"
-version = "3.5.6"
+version = "4.0.1"
description = "Apache Spark Python API"
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
groups = ["dev"]
files = [
- {file = "pyspark-3.5.6.tar.gz", hash =
"sha256:f8b1c4360e41ab398c64904fae08740503bcb6bd389457d659fa6d9f2952cc48"},
+ {file = "pyspark-4.0.1.tar.gz", hash =
"sha256:9d1f22d994f60369228397e3479003ffe2dd736ba79165003246ff7bd48e2c73"},
]
[package.dependencies]
-googleapis-common-protos = {version = ">=1.56.4", optional = true, markers =
"extra == \"connect\""}
-grpcio = {version = ">=1.56.0", optional = true, markers = "extra ==
\"connect\""}
-grpcio-status = {version = ">=1.56.0", optional = true, markers = "extra ==
\"connect\""}
-numpy = {version = ">=1.15,<2", optional = true, markers = "extra ==
\"connect\""}
-pandas = {version = ">=1.0.5", optional = true, markers = "extra ==
\"connect\""}
-py4j = "0.10.9.7"
-pyarrow = {version = ">=4.0.0", optional = true, markers = "extra ==
\"connect\""}
+googleapis-common-protos = {version = ">=1.65.0", optional = true, markers =
"extra == \"connect\""}
+grpcio = {version = ">=1.67.0", optional = true, markers = "extra ==
\"connect\""}
+grpcio-status = {version = ">=1.67.0", optional = true, markers = "extra ==
\"connect\""}
+numpy = {version = ">=1.21", optional = true, markers = "extra == \"connect\""}
+pandas = {version = ">=2.0.0", optional = true, markers = "extra ==
\"connect\""}
+py4j = "0.10.9.9"
+pyarrow = {version = ">=11.0.0", optional = true, markers = "extra ==
\"connect\""}
[package.extras]
-connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)",
"grpcio-status (>=1.56.0)", "numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow
(>=4.0.0)"]
-ml = ["numpy (>=1.15,<2)"]
-mllib = ["numpy (>=1.15,<2)"]
-pandas-on-spark = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow
(>=4.0.0)"]
-sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+connect = ["googleapis-common-protos (>=1.65.0)", "grpcio (>=1.67.0)",
"grpcio-status (>=1.67.0)", "numpy (>=1.21)", "pandas (>=2.0.0)", "pyarrow
(>=11.0.0)"]
+ml = ["numpy (>=1.21)"]
+mllib = ["numpy (>=1.21)"]
+pandas-on-spark = ["numpy (>=1.21)", "pandas (>=2.0.0)", "pyarrow (>=11.0.0)"]
+sql = ["numpy (>=1.21)", "pandas (>=2.0.0)", "pyarrow (>=11.0.0)"]
[[package]]
name = "pytest"
@@ -4911,7 +4912,7 @@ files = [
{file = "pytz-2025.2-py2.py3-none-any.whl", hash =
"sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
{file = "pytz-2025.2.tar.gz", hash =
"sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
]
-markers = {main = "extra == \"bodo\" or extra == \"pandas\" or extra ==
\"ray\""}
+markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra ==
\"bodo\""}
[[package]]
name = "pywin32"
@@ -5466,7 +5467,7 @@ description = "Pure-Python RSA implementation"
optional = true
python-versions = "<4,>=3.6"
groups = ["main"]
-markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra ==
\"gcp-auth\""
+markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra ==
\"gcsfs\""
files = [
{file = "rsa-4.9.1-py3-none-any.whl", hash =
"sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"},
{file = "rsa-4.9.1.tar.gz", hash =
"sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"},
@@ -5508,7 +5509,7 @@ files = [
{file = "s3transfer-0.13.1-py3-none-any.whl", hash =
"sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724"},
{file = "s3transfer-0.13.1.tar.gz", hash =
"sha256:c3fdba22ba1bd367922f27ec8032d6a1cf5f10c934fb5d68cf60fd5a23d936cf"},
]
-markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra ==
\"rest-sigv4\""}
+markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra ==
\"rest-sigv4\""}
[package.dependencies]
botocore = ">=1.37.4,<2.0a.0"
@@ -5940,7 +5941,7 @@ description = "Fast, Extensible Progress Meter"
optional = true
python-versions = ">=3.7"
groups = ["main"]
-markers = "extra == \"daft\" or extra == \"hf\""
+markers = "extra == \"hf\" or extra == \"daft\""
files = [
{file = "tqdm-4.67.1-py3-none-any.whl", hash =
"sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
{file = "tqdm-4.67.1.tar.gz", hash =
"sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
@@ -5995,7 +5996,7 @@ files = [
{file = "tzdata-2025.2-py2.py3-none-any.whl", hash =
"sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"},
{file = "tzdata-2025.2.tar.gz", hash =
"sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"},
]
-markers = {main = "extra == \"bodo\" or extra == \"pandas\" or extra ==
\"ray\""}
+markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra ==
\"bodo\""}
[[package]]
name = "urllib3"
@@ -6225,7 +6226,7 @@ description = "Yet another URL library"
optional = true
python-versions = ">=3.9"
groups = ["main"]
-markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\""
+markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\""
files = [
{file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash =
"sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"},
{file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash =
"sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"},
@@ -6501,4 +6502,4 @@ zstandard = ["zstandard"]
[metadata]
lock-version = "2.1"
python-versions = "^3.9.2, !=3.9.7"
-content-hash =
"aff36ee1d48cca375367caaac63badd2691cb72abd7b23ae458eb6048e2b89ea"
+content-hash =
"21456aae4eb5ae5bf02826b4513e03a74d3c95c293bfd14ea19cb17c15c3c9f5"
diff --git a/pyproject.toml b/pyproject.toml
index ba3b5e24..c5774ec1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,7 +101,8 @@ requests-mock = "1.12.1"
moto = { version = "^5.0.2", extras = ["server"] }
typing-extensions = "4.15.0"
pytest-mock = "3.15.1"
-pyspark = { version = "3.5.6", extras = ["connect"] }
+pyspark = { version = "4.0.1", extras = ["connect"] }
+protobuf = "5.29.1" # match Spark Connect's gencode
cython = "3.1.4"
deptry = ">=0.14,<0.24"
docutils = "!=0.21.post1" #
https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520