This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 126d92249f9 Support newer versions of Pyarrow in Beam. (#31305)
126d92249f9 is described below

commit 126d92249f9babb1977087df277940f8646916d9
Author: tvalentyn <tvalen...@users.noreply.github.com>
AuthorDate: Thu May 16 08:50:52 2024 -0700

    Support newer versions of Pyarrow in Beam. (#31305)
    
    * Increase pyarrow upper bound. Add compat tests, and remove some of the 
compat suites for pyarrow to reduce test suite runtime.
---
 .../beam_PostCommit_Python_Dependency.json         |  0
 sdks/python/setup.py                               | 14 +----
 sdks/python/test-suites/tox/py38/build.gradle      | 63 +++++++++++++++-------
 sdks/python/tox.ini                                | 30 ++++-------
 4 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/.github/trigger_files/beam_PostCommit_Python_Dependency.json 
b/.github/trigger_files/beam_PostCommit_Python_Dependency.json
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 3e1bfda0dd3..852f14117d8 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -143,21 +143,9 @@ except ImportError:
 # [BEAM-8181] pyarrow cannot be installed on 32-bit Windows platforms.
 if sys.platform == 'win32' and sys.maxsize <= 2**32:
   pyarrow_dependency = ['']
-elif sys.platform == 'win32' or sys.platform == 'cygwin':
-  # https://github.com/apache/beam/issues/28410 - pyarrow>=13 seeing issues
-  # on windows with error
-  # C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2904:  arrow::fs::FinalizeS3 was
-  # not called even though S3 was initialized.  This could lead to a
-  # segmentation fault at exit. Keep pyarrow<13 until this is resolved.
-  pyarrow_dependency = [
-      'pyarrow>=3.0.0,<12.0.0',
-      # NOTE: We can remove this once Beam increases the pyarrow lower bound
-      # to a version that fixes CVE.
-      'pyarrow-hotfix<1'
-  ]
 else:
   pyarrow_dependency = [
-      'pyarrow>=3.0.0,<15.0.0',
+      'pyarrow>=3.0.0,<17.0.0',
       # NOTE(https://github.com/apache/beam/issues/29392): We can remove this
       # once Beam increases the pyarrow lower bound to a version that fixes 
CVE.
       'pyarrow-hotfix<1'
diff --git a/sdks/python/test-suites/tox/py38/build.gradle 
b/sdks/python/test-suites/tox/py38/build.gradle
index 52299d8c31f..2ca82d3d926 100644
--- a/sdks/python/test-suites/tox/py38/build.gradle
+++ b/sdks/python/test-suites/tox/py38/build.gradle
@@ -44,36 +44,48 @@ project.tasks.register("preCommitPyCoverage") {
 // e.g. pyarrow and pandas also run on PreCommit Dataframe and Coverage
 project.tasks.register("postCommitPyDep") {}
 
-// Create a test task for each supported major version of pyarrow
+// Create a test task for supported major versions of pyarrow
+// We should have a test for the lowest supported version and
+// For versions that we would like to prioritize for testing,
+// for example versions released in a timeframe of last 1-2 years.
+
 toxTask "testPy38pyarrow-3", "py38-pyarrow-3", "${posargs}"
 test.dependsOn "testPy38pyarrow-3"
 postCommitPyDep.dependsOn "testPy38pyarrow-3"
 
-toxTask "testPy38pyarrow-4", "py38-pyarrow-4", "${posargs}"
-test.dependsOn "testPy38pyarrow-4"
-postCommitPyDep.dependsOn "testPy38pyarrow-4"
+toxTask "testPy38pyarrow-9", "py38-pyarrow-9", "${posargs}"
+test.dependsOn "testPy38pyarrow-9"
+postCommitPyDep.dependsOn "testPy38pyarrow-9"
 
-toxTask "testPy38pyarrow-5", "py38-pyarrow-5", "${posargs}"
-test.dependsOn "testPy38pyarrow-5"
-postCommitPyDep.dependsOn "testPy38pyarrow-5"
+toxTask "testPy38pyarrow-10", "py38-pyarrow-10", "${posargs}"
+test.dependsOn "testPy38pyarrow-10"
+postCommitPyDep.dependsOn "testPy38pyarrow-10"
 
-toxTask "testPy38pyarrow-6", "py38-pyarrow-6", "${posargs}"
-test.dependsOn "testPy38pyarrow-6"
-postCommitPyDep.dependsOn "testPy38pyarrow-6"
+toxTask "testPy38pyarrow-11", "py38-pyarrow-11", "${posargs}"
+test.dependsOn "testPy38pyarrow-11"
+postCommitPyDep.dependsOn "testPy38pyarrow-11"
 
-toxTask "testPy38pyarrow-7", "py38-pyarrow-7", "${posargs}"
-test.dependsOn "testPy38pyarrow-7"
-postCommitPyDep.dependsOn "testPy38pyarrow-7"
+toxTask "testPy38pyarrow-12", "py38-pyarrow-12", "${posargs}"
+test.dependsOn "testPy38pyarrow-12"
+postCommitPyDep.dependsOn "testPy38pyarrow-12"
 
-toxTask "testPy38pyarrow-8", "py38-pyarrow-8", "${posargs}"
-test.dependsOn "testPy38pyarrow-8"
-postCommitPyDep.dependsOn "testPy38pyarrow-8"
+toxTask "testPy38pyarrow-13", "py38-pyarrow-13", "${posargs}"
+test.dependsOn "testPy38pyarrow-13"
+postCommitPyDep.dependsOn "testPy38pyarrow-13"
 
-toxTask "testPy38pyarrow-9", "py38-pyarrow-9", "${posargs}"
-test.dependsOn "testPy38pyarrow-9"
-postCommitPyDep.dependsOn "testPy38pyarrow-9"
+toxTask "testPy38pyarrow-14", "py38-pyarrow-14", "${posargs}"
+test.dependsOn "testPy38pyarrow-14"
+postCommitPyDep.dependsOn "testPy38pyarrow-14"
+
+toxTask "testPy38pyarrow-15", "py38-pyarrow-15", "${posargs}"
+test.dependsOn "testPy38pyarrow-15"
+postCommitPyDep.dependsOn "testPy38pyarrow-15"
+
+toxTask "testPy38pyarrow-16", "py38-pyarrow-16", "${posargs}"
+test.dependsOn "testPy38pyarrow-16"
+postCommitPyDep.dependsOn "testPy38pyarrow-16"
 
-// Create a test task for each minor version of pandas
+// Create a test task for each supported minor version of pandas
 toxTask "testPy38pandas-14", "py38-pandas-14", "${posargs}"
 test.dependsOn "testPy38pandas-14"
 postCommitPyDep.dependsOn "testPy38pandas-14"
@@ -86,6 +98,17 @@ toxTask "testPy38pandas-20", "py38-pandas-20", "${posargs}"
 test.dependsOn "testPy38pandas-20"
 postCommitPyDep.dependsOn "testPy38pandas-20"
 
+// TODO(https://github.com/apache/beam/issues/31192): Add below suites
+// after dependency compat tests suite switches to Python 3.9 or we add
+// Python 2.2 support.
+
+// toxTask "testPy39pandas-21", "py39-pandas-21", "${posargs}"
+// test.dependsOn "testPy39pandas-21"
+// postCommitPyDep.dependsOn "testPy39pandas-21"
+
+// toxTask "testPy39pandas-22", "py39-pandas-22", "${posargs}"
+// test.dependsOn "testPy39pandas-22"
+// postCommitPyDep.dependsOn "testPy39pandas-22"
 
 // TODO(https://github.com/apache/beam/issues/30908): Revise what are we 
testing
 
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index dc804f2ac55..63bcce8adf3 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -271,32 +271,22 @@ commands =
   bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} 
{toxinidir}/apache_beam/runners/portability/spark_runner_test.py {posargs}
 
 
-[testenv:py{38,39,310}-pyarrow-{3,4,5,6,7,8,9}]
+[testenv:py{38,39}-pyarrow-{3,9,10,11,12,13,14,15,16}]
 deps =
-  # Pandas 2 minimum for pyarrow is 7
+  # As a courtesy to users, test against the oldest allowed version of Pyarrow.
+  # We'd have to increase the pyarrow lower bound when Python 3.9 is 
deprecated.
+  # Since Pandas 2 requires pyarrow>=7, downgrade pandas for this test.
   3: pyarrow>=3,<4
   3: pandas<2
-  4: pyarrow>=4,<5
-  4: pandas<2
-  5: pyarrow>=5,<6
-  5: pandas<2
-  6: pyarrow>=6,<7
-  6: pandas<2
-  7: pyarrow>=7,<8
-  8: pyarrow>=8,<9
+  # Test against versions of pyarrow released in last ~2 years.
   9: pyarrow>=9,<10
-commands =
-  # Log pyarrow and numpy version for debugging
-  /bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'"
-  # Run pytest directly rather using run_pytest.sh. It doesn't handle
-  # selecting tests with -m (BEAM-12985).
-  # Allow exit code 5 (no tests run) so that we can run this command safely on 
arbitrary subdirectories.
-  /bin/sh -c 'pytest -o junit_suite_name={envname} 
--junitxml=pytest_{envname}.xml -n 6 -m uses_pyarrow {posargs}; ret=$?; [ $ret 
= 5 ] && exit 0 || exit $ret'
-
-[testenv:py{38,39,310,311}-pyarrow-{10,11}]
-deps =
   10: pyarrow>=10,<11
   11: pyarrow>=11,<12
+  12: pyarrow>=12,<13
+  13: pyarrow>=13,<14
+  14: pyarrow>=14,<15
+  15: pyarrow>=15,<16
+  16: pyarrow>=16,<17
 commands =
   # Log pyarrow and numpy version for debugging
   /bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'"

Reply via email to