This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new 126d92249f9 Support newer versions of Pyarrow in Beam. (#31305) 126d92249f9 is described below commit 126d92249f9babb1977087df277940f8646916d9 Author: tvalentyn <tvalen...@users.noreply.github.com> AuthorDate: Thu May 16 08:50:52 2024 -0700 Support newer versions of Pyarrow in Beam. (#31305) * Increase pyarrow upper bound. Add compat tests, and remove some of the compat suites for pyarrow to reduce test suite runtime. --- .../beam_PostCommit_Python_Dependency.json | 0 sdks/python/setup.py | 14 +---- sdks/python/test-suites/tox/py38/build.gradle | 63 +++++++++++++++------- sdks/python/tox.ini | 30 ++++------- 4 files changed, 54 insertions(+), 53 deletions(-) diff --git a/.github/trigger_files/beam_PostCommit_Python_Dependency.json b/.github/trigger_files/beam_PostCommit_Python_Dependency.json new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 3e1bfda0dd3..852f14117d8 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -143,21 +143,9 @@ except ImportError: # [BEAM-8181] pyarrow cannot be installed on 32-bit Windows platforms. if sys.platform == 'win32' and sys.maxsize <= 2**32: pyarrow_dependency = [''] -elif sys.platform == 'win32' or sys.platform == 'cygwin': - # https://github.com/apache/beam/issues/28410 - pyarrow>=13 seeing issues - # on windows with error - # C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2904: arrow::fs::FinalizeS3 was - # not called even though S3 was initialized. This could lead to a - # segmentation fault at exit. Keep pyarrow<13 until this is resolved. - pyarrow_dependency = [ - 'pyarrow>=3.0.0,<12.0.0', - # NOTE: We can remove this once Beam increases the pyarrow lower bound - # to a version that fixes CVE. - 'pyarrow-hotfix<1' - ] else: pyarrow_dependency = [ - 'pyarrow>=3.0.0,<15.0.0', + 'pyarrow>=3.0.0,<17.0.0', # NOTE(https://github.com/apache/beam/issues/29392): We can remove this # once Beam increases the pyarrow lower bound to a version that fixes CVE. 'pyarrow-hotfix<1' diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index 52299d8c31f..2ca82d3d926 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -44,36 +44,48 @@ project.tasks.register("preCommitPyCoverage") { // e.g. pyarrow and pandas also run on PreCommit Dataframe and Coverage project.tasks.register("postCommitPyDep") {} -// Create a test task for each supported major version of pyarrow +// Create a test task for supported major versions of pyarrow +// We should have a test for the lowest supported version and +// For versions that we would like to prioritize for testing, +// for example versions released in a timeframe of last 1-2 years. + toxTask "testPy38pyarrow-3", "py38-pyarrow-3", "${posargs}" test.dependsOn "testPy38pyarrow-3" postCommitPyDep.dependsOn "testPy38pyarrow-3" -toxTask "testPy38pyarrow-4", "py38-pyarrow-4", "${posargs}" -test.dependsOn "testPy38pyarrow-4" -postCommitPyDep.dependsOn "testPy38pyarrow-4" +toxTask "testPy38pyarrow-9", "py38-pyarrow-9", "${posargs}" +test.dependsOn "testPy38pyarrow-9" +postCommitPyDep.dependsOn "testPy38pyarrow-9" -toxTask "testPy38pyarrow-5", "py38-pyarrow-5", "${posargs}" -test.dependsOn "testPy38pyarrow-5" -postCommitPyDep.dependsOn "testPy38pyarrow-5" +toxTask "testPy38pyarrow-10", "py38-pyarrow-10", "${posargs}" +test.dependsOn "testPy38pyarrow-10" +postCommitPyDep.dependsOn "testPy38pyarrow-10" -toxTask "testPy38pyarrow-6", "py38-pyarrow-6", "${posargs}" -test.dependsOn "testPy38pyarrow-6" -postCommitPyDep.dependsOn "testPy38pyarrow-6" +toxTask "testPy38pyarrow-11", "py38-pyarrow-11", "${posargs}" +test.dependsOn "testPy38pyarrow-11" +postCommitPyDep.dependsOn "testPy38pyarrow-11" -toxTask "testPy38pyarrow-7", "py38-pyarrow-7", "${posargs}" -test.dependsOn "testPy38pyarrow-7" -postCommitPyDep.dependsOn "testPy38pyarrow-7" +toxTask "testPy38pyarrow-12", "py38-pyarrow-12", "${posargs}" +test.dependsOn "testPy38pyarrow-12" +postCommitPyDep.dependsOn "testPy38pyarrow-12" -toxTask "testPy38pyarrow-8", "py38-pyarrow-8", "${posargs}" -test.dependsOn "testPy38pyarrow-8" -postCommitPyDep.dependsOn "testPy38pyarrow-8" +toxTask "testPy38pyarrow-13", "py38-pyarrow-13", "${posargs}" +test.dependsOn "testPy38pyarrow-13" +postCommitPyDep.dependsOn "testPy38pyarrow-13" -toxTask "testPy38pyarrow-9", "py38-pyarrow-9", "${posargs}" -test.dependsOn "testPy38pyarrow-9" -postCommitPyDep.dependsOn "testPy38pyarrow-9" +toxTask "testPy38pyarrow-14", "py38-pyarrow-14", "${posargs}" +test.dependsOn "testPy38pyarrow-14" +postCommitPyDep.dependsOn "testPy38pyarrow-14" + +toxTask "testPy38pyarrow-15", "py38-pyarrow-15", "${posargs}" +test.dependsOn "testPy38pyarrow-15" +postCommitPyDep.dependsOn "testPy38pyarrow-15" + +toxTask "testPy38pyarrow-16", "py38-pyarrow-16", "${posargs}" +test.dependsOn "testPy38pyarrow-16" +postCommitPyDep.dependsOn "testPy38pyarrow-16" -// Create a test task for each minor version of pandas +// Create a test task for each supported minor version of pandas toxTask "testPy38pandas-14", "py38-pandas-14", "${posargs}" test.dependsOn "testPy38pandas-14" postCommitPyDep.dependsOn "testPy38pandas-14" @@ -86,6 +98,17 @@ toxTask "testPy38pandas-20", "py38-pandas-20", "${posargs}" test.dependsOn "testPy38pandas-20" postCommitPyDep.dependsOn "testPy38pandas-20" +// TODO(https://github.com/apache/beam/issues/31192): Add below suites +// after dependency compat tests suite switches to Python 3.9 or we add +// Python 2.2 support. + +// toxTask "testPy39pandas-21", "py39-pandas-21", "${posargs}" +// test.dependsOn "testPy39pandas-21" +// postCommitPyDep.dependsOn "testPy39pandas-21" + +// toxTask "testPy39pandas-22", "py39-pandas-22", "${posargs}" +// test.dependsOn "testPy39pandas-22" +// postCommitPyDep.dependsOn "testPy39pandas-22" // TODO(https://github.com/apache/beam/issues/30908): Revise what are we testing diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index dc804f2ac55..63bcce8adf3 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -271,32 +271,22 @@ commands = bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/spark_runner_test.py {posargs} -[testenv:py{38,39,310}-pyarrow-{3,4,5,6,7,8,9}] +[testenv:py{38,39}-pyarrow-{3,9,10,11,12,13,14,15,16}] deps = - # Pandas 2 minimum for pyarrow is 7 + # As a courtesy to users, test against the oldest allowed version of Pyarrow. + # We'd have to increase the pyarrow lower bound when Python 3.9 is deprecated. + # Since Pandas 2 requires pyarrow>=7, downgrade pandas for this test. 3: pyarrow>=3,<4 3: pandas<2 - 4: pyarrow>=4,<5 - 4: pandas<2 - 5: pyarrow>=5,<6 - 5: pandas<2 - 6: pyarrow>=6,<7 - 6: pandas<2 - 7: pyarrow>=7,<8 - 8: pyarrow>=8,<9 + # Test against versions of pyarrow released in last ~2 years. 9: pyarrow>=9,<10 -commands = - # Log pyarrow and numpy version for debugging - /bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'" - # Run pytest directly rather using run_pytest.sh. It doesn't handle - # selecting tests with -m (BEAM-12985). - # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. - /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pyarrow {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' - -[testenv:py{38,39,310,311}-pyarrow-{10,11}] -deps = 10: pyarrow>=10,<11 11: pyarrow>=11,<12 + 12: pyarrow>=12,<13 + 13: pyarrow>=13,<14 + 14: pyarrow>=14,<15 + 15: pyarrow>=15,<16 + 16: pyarrow>=16,<17 commands = # Log pyarrow and numpy version for debugging /bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'"