This is an automated email from the ASF dual-hosted git repository.
tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 126d92249f9 Support newer versions of Pyarrow in Beam. (#31305)
126d92249f9 is described below
commit 126d92249f9babb1977087df277940f8646916d9
Author: tvalentyn <[email protected]>
AuthorDate: Thu May 16 08:50:52 2024 -0700
Support newer versions of Pyarrow in Beam. (#31305)
* Increase pyarrow upper bound. Add compat tests, and remove some of the
compat suites for pyarrow to reduce test suite runtime.
---
.../beam_PostCommit_Python_Dependency.json | 0
sdks/python/setup.py | 14 +----
sdks/python/test-suites/tox/py38/build.gradle | 63 +++++++++++++++-------
sdks/python/tox.ini | 30 ++++-------
4 files changed, 54 insertions(+), 53 deletions(-)
diff --git a/.github/trigger_files/beam_PostCommit_Python_Dependency.json
b/.github/trigger_files/beam_PostCommit_Python_Dependency.json
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 3e1bfda0dd3..852f14117d8 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -143,21 +143,9 @@ except ImportError:
# [BEAM-8181] pyarrow cannot be installed on 32-bit Windows platforms.
if sys.platform == 'win32' and sys.maxsize <= 2**32:
pyarrow_dependency = ['']
-elif sys.platform == 'win32' or sys.platform == 'cygwin':
- # https://github.com/apache/beam/issues/28410 - pyarrow>=13 seeing issues
- # on windows with error
- # C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2904: arrow::fs::FinalizeS3 was
- # not called even though S3 was initialized. This could lead to a
- # segmentation fault at exit. Keep pyarrow<13 until this is resolved.
- pyarrow_dependency = [
- 'pyarrow>=3.0.0,<12.0.0',
- # NOTE: We can remove this once Beam increases the pyarrow lower bound
- # to a version that fixes CVE.
- 'pyarrow-hotfix<1'
- ]
else:
pyarrow_dependency = [
- 'pyarrow>=3.0.0,<15.0.0',
+ 'pyarrow>=3.0.0,<17.0.0',
# NOTE(https://github.com/apache/beam/issues/29392): We can remove this
# once Beam increases the pyarrow lower bound to a version that fixes
CVE.
'pyarrow-hotfix<1'
diff --git a/sdks/python/test-suites/tox/py38/build.gradle
b/sdks/python/test-suites/tox/py38/build.gradle
index 52299d8c31f..2ca82d3d926 100644
--- a/sdks/python/test-suites/tox/py38/build.gradle
+++ b/sdks/python/test-suites/tox/py38/build.gradle
@@ -44,36 +44,48 @@ project.tasks.register("preCommitPyCoverage") {
// e.g. pyarrow and pandas also run on PreCommit Dataframe and Coverage
project.tasks.register("postCommitPyDep") {}
-// Create a test task for each supported major version of pyarrow
+// Create a test task for supported major versions of pyarrow
+// We should have a test for the lowest supported version and
+// For versions that we would like to prioritize for testing,
+// for example versions released in a timeframe of last 1-2 years.
+
toxTask "testPy38pyarrow-3", "py38-pyarrow-3", "${posargs}"
test.dependsOn "testPy38pyarrow-3"
postCommitPyDep.dependsOn "testPy38pyarrow-3"
-toxTask "testPy38pyarrow-4", "py38-pyarrow-4", "${posargs}"
-test.dependsOn "testPy38pyarrow-4"
-postCommitPyDep.dependsOn "testPy38pyarrow-4"
+toxTask "testPy38pyarrow-9", "py38-pyarrow-9", "${posargs}"
+test.dependsOn "testPy38pyarrow-9"
+postCommitPyDep.dependsOn "testPy38pyarrow-9"
-toxTask "testPy38pyarrow-5", "py38-pyarrow-5", "${posargs}"
-test.dependsOn "testPy38pyarrow-5"
-postCommitPyDep.dependsOn "testPy38pyarrow-5"
+toxTask "testPy38pyarrow-10", "py38-pyarrow-10", "${posargs}"
+test.dependsOn "testPy38pyarrow-10"
+postCommitPyDep.dependsOn "testPy38pyarrow-10"
-toxTask "testPy38pyarrow-6", "py38-pyarrow-6", "${posargs}"
-test.dependsOn "testPy38pyarrow-6"
-postCommitPyDep.dependsOn "testPy38pyarrow-6"
+toxTask "testPy38pyarrow-11", "py38-pyarrow-11", "${posargs}"
+test.dependsOn "testPy38pyarrow-11"
+postCommitPyDep.dependsOn "testPy38pyarrow-11"
-toxTask "testPy38pyarrow-7", "py38-pyarrow-7", "${posargs}"
-test.dependsOn "testPy38pyarrow-7"
-postCommitPyDep.dependsOn "testPy38pyarrow-7"
+toxTask "testPy38pyarrow-12", "py38-pyarrow-12", "${posargs}"
+test.dependsOn "testPy38pyarrow-12"
+postCommitPyDep.dependsOn "testPy38pyarrow-12"
-toxTask "testPy38pyarrow-8", "py38-pyarrow-8", "${posargs}"
-test.dependsOn "testPy38pyarrow-8"
-postCommitPyDep.dependsOn "testPy38pyarrow-8"
+toxTask "testPy38pyarrow-13", "py38-pyarrow-13", "${posargs}"
+test.dependsOn "testPy38pyarrow-13"
+postCommitPyDep.dependsOn "testPy38pyarrow-13"
-toxTask "testPy38pyarrow-9", "py38-pyarrow-9", "${posargs}"
-test.dependsOn "testPy38pyarrow-9"
-postCommitPyDep.dependsOn "testPy38pyarrow-9"
+toxTask "testPy38pyarrow-14", "py38-pyarrow-14", "${posargs}"
+test.dependsOn "testPy38pyarrow-14"
+postCommitPyDep.dependsOn "testPy38pyarrow-14"
+
+toxTask "testPy38pyarrow-15", "py38-pyarrow-15", "${posargs}"
+test.dependsOn "testPy38pyarrow-15"
+postCommitPyDep.dependsOn "testPy38pyarrow-15"
+
+toxTask "testPy38pyarrow-16", "py38-pyarrow-16", "${posargs}"
+test.dependsOn "testPy38pyarrow-16"
+postCommitPyDep.dependsOn "testPy38pyarrow-16"
-// Create a test task for each minor version of pandas
+// Create a test task for each supported minor version of pandas
toxTask "testPy38pandas-14", "py38-pandas-14", "${posargs}"
test.dependsOn "testPy38pandas-14"
postCommitPyDep.dependsOn "testPy38pandas-14"
@@ -86,6 +98,17 @@ toxTask "testPy38pandas-20", "py38-pandas-20", "${posargs}"
test.dependsOn "testPy38pandas-20"
postCommitPyDep.dependsOn "testPy38pandas-20"
+// TODO(https://github.com/apache/beam/issues/31192): Add below suites
+// after dependency compat tests suite switches to Python 3.9 or we add
+// Python 2.2 support.
+
+// toxTask "testPy39pandas-21", "py39-pandas-21", "${posargs}"
+// test.dependsOn "testPy39pandas-21"
+// postCommitPyDep.dependsOn "testPy39pandas-21"
+
+// toxTask "testPy39pandas-22", "py39-pandas-22", "${posargs}"
+// test.dependsOn "testPy39pandas-22"
+// postCommitPyDep.dependsOn "testPy39pandas-22"
// TODO(https://github.com/apache/beam/issues/30908): Revise what are we
testing
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index dc804f2ac55..63bcce8adf3 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -271,32 +271,22 @@ commands =
bash {toxinidir}/scripts/pytest_validates_runner.sh {envname}
{toxinidir}/apache_beam/runners/portability/spark_runner_test.py {posargs}
-[testenv:py{38,39,310}-pyarrow-{3,4,5,6,7,8,9}]
+[testenv:py{38,39}-pyarrow-{3,9,10,11,12,13,14,15,16}]
deps =
- # Pandas 2 minimum for pyarrow is 7
+ # As a courtesy to users, test against the oldest allowed version of Pyarrow.
+ # We'd have to increase the pyarrow lower bound when Python 3.9 is
deprecated.
+ # Since Pandas 2 requires pyarrow>=7, downgrade pandas for this test.
3: pyarrow>=3,<4
3: pandas<2
- 4: pyarrow>=4,<5
- 4: pandas<2
- 5: pyarrow>=5,<6
- 5: pandas<2
- 6: pyarrow>=6,<7
- 6: pandas<2
- 7: pyarrow>=7,<8
- 8: pyarrow>=8,<9
+ # Test against versions of pyarrow released in last ~2 years.
9: pyarrow>=9,<10
-commands =
- # Log pyarrow and numpy version for debugging
- /bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'"
- # Run pytest directly rather using run_pytest.sh. It doesn't handle
- # selecting tests with -m (BEAM-12985).
- # Allow exit code 5 (no tests run) so that we can run this command safely on
arbitrary subdirectories.
- /bin/sh -c 'pytest -o junit_suite_name={envname}
--junitxml=pytest_{envname}.xml -n 6 -m uses_pyarrow {posargs}; ret=$?; [ $ret
= 5 ] && exit 0 || exit $ret'
-
-[testenv:py{38,39,310,311}-pyarrow-{10,11}]
-deps =
10: pyarrow>=10,<11
11: pyarrow>=11,<12
+ 12: pyarrow>=12,<13
+ 13: pyarrow>=13,<14
+ 14: pyarrow>=14,<15
+ 15: pyarrow>=15,<16
+ 16: pyarrow>=16,<17
commands =
# Log pyarrow and numpy version for debugging
/bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'"