This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 8464d71 Bump PySpark to 3.5.0 (#303)
8464d71 is described below
commit 8464d713d69e6e2f4fa6f629362a27c940a097e6
Author: Honah J <[email protected]>
AuthorDate: Thu Jan 25 01:59:35 2024 -0800
Bump PySpark to 3.5.0 (#303)
---
poetry.lock | 18 +++++++++---------
pyproject.toml | 2 +-
tests/__init__.py | 16 ----------------
tests/integration/__init__.py | 16 ----------------
tests/integration/test_writes.py | 8 +++++++-
5 files changed, 17 insertions(+), 43 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index 88ac911..cab01e4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be
changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be
changed by hand.
[[package]]
name = "adlfs"
@@ -2456,8 +2456,8 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.20.3", markers = "python_version < \"3.10\""},
- {version = ">=1.21.0", markers = "python_version >= \"3.10\" and
python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version >= \"3.11\""},
+ {version = ">=1.21.0", markers = "python_version >= \"3.10\" and
python_version < \"3.11\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@@ -2994,23 +2994,23 @@ tomli = {version = ">=1.1.0", markers = "python_version
< \"3.11\""}
[[package]]
name = "pyspark"
-version = "3.4.2"
+version = "3.5.0"
description = "Apache Spark Python API"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "pyspark-3.4.2.tar.gz", hash =
"sha256:088db1b8ff33a748b802f1710ff6f6dcef0e0f2cca7d69bbbe55b187a0d55c3f"},
+ {file = "pyspark-3.5.0.tar.gz", hash =
"sha256:d41a9b76bd2aca370a6100d075c029e22ba44c5940927877e9435a3a9c566558"},
]
[package.dependencies]
py4j = "0.10.9.7"
[package.extras]
-connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.48.1)",
"grpcio-status (>=1.48.1)", "numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow
(>=1.0.0)"]
+connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)",
"grpcio-status (>=1.56.0)", "numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow
(>=4.0.0)"]
ml = ["numpy (>=1.15)"]
mllib = ["numpy (>=1.15)"]
-pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"]
-sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"]
+pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
[[package]]
name = "pytest"
@@ -4315,4 +4315,4 @@ zstandard = ["zstandard"]
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
-content-hash =
"32b2b8186f77ccc5b67cfc8f1cc04795f0b8cb27dff43c3fad851cbed8d3f386"
+content-hash =
"8cbb637a0be18ca7ddacdc91e6cb78d6e71607b54c2cbdc89f903d2997c4f7e2"
diff --git a/pyproject.toml b/pyproject.toml
index 2ffb6c9..505fedf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,7 +82,7 @@ requests-mock = "1.11.0"
moto = { version = "^4.2.13", extras = ["server"] }
typing-extensions = "4.9.0"
pytest-mock = "3.12.0"
-pyspark = "3.4.2"
+pyspark = "3.5.0"
cython = "3.0.8"
[[tool.mypy.overrides]]
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index 13a8339..0000000
--- a/tests/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
deleted file mode 100644
index 13a8339..0000000
--- a/tests/integration/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py
index f8317e4..a095c13 100644
--- a/tests/integration/test_writes.py
+++ b/tests/integration/test_writes.py
@@ -230,10 +230,16 @@ def table_v1_v2_appended_with_null(session_catalog:
Catalog, arrow_table_with_nu
@pytest.fixture(scope="session")
def spark() -> SparkSession:
+ import importlib.metadata
import os
+ spark_version =
".".join(importlib.metadata.version("pyspark").split(".")[:2])
+ scala_version = "2.12"
+ iceberg_version = "1.4.3"
+
os.environ["PYSPARK_SUBMIT_ARGS"] = (
- "--packages
org.apache.iceberg:iceberg-spark-runtime-3.4_2.12:1.4.0,org.apache.iceberg:iceberg-aws-bundle:1.4.0
pyspark-shell"
+ f"--packages
org.apache.iceberg:iceberg-spark-runtime-{spark_version}_{scala_version}:{iceberg_version},"
+ f"org.apache.iceberg:iceberg-aws-bundle:{iceberg_version}
pyspark-shell"
)
os.environ["AWS_REGION"] = "us-east-1"
os.environ["AWS_ACCESS_KEY_ID"] = "admin"