This is an automated email from the ASF dual-hosted git repository.
damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 0b64865b323 Add logic for generating ml requirements (#35484)
0b64865b323 is described below
commit 0b64865b323023f3e8ba325cb2321f3748ea0b23
Author: Danny McCormick <[email protected]>
AuthorDate: Tue Jul 1 15:53:21 2025 -0400
Add logic for generating ml requirements (#35484)
* Add logic for generating ml requirements
* Add some requirements and loosen torch bounds
* Widen bounds and generate 3.12 containers
* posargs
* Tox config fixes
---
.../beam_PostCommit_Python_Dependency.json | 2 +-
.../beam_PostCommit_Python_Dependency.yml | 7 +--
sdks/python/container/common.gradle | 15 +++++-
.../container/py310/base_image_requirements.txt | 4 +-
..._requirements.txt => ml_image_requirements.txt} | 60 ++++++++++++++++++++--
.../container/py311/base_image_requirements.txt | 4 +-
..._requirements.txt => ml_image_requirements.txt} | 60 ++++++++++++++++++++--
.../container/py312/base_image_requirements.txt | 4 +-
..._requirements.txt => ml_image_requirements.txt} | 59 +++++++++++++++++++--
.../container/py39/base_image_requirements.txt | 4 +-
..._requirements.txt => ml_image_requirements.txt} | 60 ++++++++++++++++++++--
sdks/python/container/run_generate_requirements.sh | 16 ++++--
sdks/python/setup.py | 8 +--
sdks/python/test-suites/tox/py312/build.gradle | 12 +++++
sdks/python/test-suites/tox/py39/build.gradle | 2 +-
sdks/python/tox.ini | 32 ++++++++++++
16 files changed, 308 insertions(+), 41 deletions(-)
diff --git a/.github/trigger_files/beam_PostCommit_Python_Dependency.json
b/.github/trigger_files/beam_PostCommit_Python_Dependency.json
index 907b485d4d3..a7fc54b3e4b 100644
--- a/.github/trigger_files/beam_PostCommit_Python_Dependency.json
+++ b/.github/trigger_files/beam_PostCommit_Python_Dependency.json
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to
run",
- "modification": 0
+ "modification": 1
}
\ No newline at end of file
diff --git a/.github/workflows/beam_PostCommit_Python_Dependency.yml
b/.github/workflows/beam_PostCommit_Python_Dependency.yml
index c1eef980c82..a73e288171d 100644
--- a/.github/workflows/beam_PostCommit_Python_Dependency.yml
+++ b/.github/workflows/beam_PostCommit_Python_Dependency.yml
@@ -59,6 +59,7 @@ jobs:
matrix:
job_name: [beam_PostCommit_Python_Dependency]
job_phrase: [Run Python PostCommit Dependency]
+ python_version: ['39','312']
timeout-minutes: 120
if: |
github.event_name == 'workflow_dispatch' ||
@@ -70,9 +71,9 @@ jobs:
- name: Setup repository
uses: ./.github/actions/setup-action
with:
- comment_phrase: ${{ matrix.job_phrase }}
+ comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }}
github_token: ${{ secrets.GITHUB_TOKEN }}
- github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{
matrix.python_version }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
with:
@@ -81,7 +82,7 @@ jobs:
- name: Run postCommitPyDep
uses: ./.github/actions/gradle-command-self-hosted-action
with:
- gradle-command: :sdks:python:test-suites:tox:py39:postCommitPyDep
+ gradle-command: :sdks:python:test-suites:tox:py${{
matrix.python_version }}:postCommitPyDep
arguments: -PuseWheelDistribution
- name: Archive Python Test Results
uses: actions/upload-artifact@v4
diff --git a/sdks/python/container/common.gradle
b/sdks/python/container/common.gradle
index 0175778a630..6c3de38d56c 100644
--- a/sdks/python/container/common.gradle
+++ b/sdks/python/container/common.gradle
@@ -39,7 +39,20 @@ def generatePythonRequirements =
tasks.register("generatePythonRequirements") {
executable 'sh'
args '-c', "cd ${rootDir} && ${runScriptsPath} " +
"${project.ext.pythonVersion} " +
- "${files(configurations.sdkSourceTarball.files).singleFile} " +
"${pipExtraOptions}"
+ "${files(configurations.sdkSourceTarball.files).singleFile} " +
+ "base_image_requirements.txt " +
+ "[gcp,dataframe,test] " +
+ "${pipExtraOptions}"
+ }
+ // Generate versions for ML dependencies
+ exec {
+ executable 'sh'
+ args '-c', "cd ${rootDir} && ${runScriptsPath} " +
+ "${project.ext.pythonVersion} " +
+ "${files(configurations.sdkSourceTarball.files).singleFile} " +
+ "ml_image_requirements.txt " +
+ "[gcp,dataframe,test,tensorflow,torch,transformers] " +
+ "${pipExtraOptions}"
}
}
}
diff --git a/sdks/python/container/py310/base_image_requirements.txt
b/sdks/python/container/py310/base_image_requirements.txt
index e9b4f190539..81834540267 100644
--- a/sdks/python/container/py310/base_image_requirements.txt
+++ b/sdks/python/container/py310/base_image_requirements.txt
@@ -92,7 +92,7 @@ hdfs==2.7.3
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+hypothesis==6.135.19
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -112,7 +112,7 @@ milvus-lite==2.5.1
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+multidict==6.6.3
mysql-connector-python==9.3.0
nltk==3.9.1
numpy==2.2.6
diff --git a/sdks/python/container/py310/base_image_requirements.txt
b/sdks/python/container/py310/ml_image_requirements.txt
similarity index 79%
copy from sdks/python/container/py310/base_image_requirements.txt
copy to sdks/python/container/py310/ml_image_requirements.txt
index e9b4f190539..2b70da331f5 100644
--- a/sdks/python/container/py310/base_image_requirements.txt
+++ b/sdks/python/container/py310/ml_image_requirements.txt
@@ -21,6 +21,7 @@
# https://s.apache.org/beam-python-dev-wiki
# Reach out to a committer if you need help.
+absl-py==2.3.0
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
@@ -28,6 +29,7 @@ aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
asn1crypto==1.5.1
+astunparse==1.6.3
async-timeout==5.0.1
attrs==25.3.0
backports.tarfile==1.2.0
@@ -52,9 +54,13 @@ exceptiongroup==1.3.0
execnet==2.1.1
fastavro==1.11.1
fasteners==0.19
+filelock==3.18.0
+flatbuffers==25.2.10
freezegun==1.5.2
frozenlist==1.7.0
+fsspec==2025.5.1
future==1.0.0
+gast==0.6.0
google-api-core==2.25.1
google-api-python-client==2.174.0
google-apitools==0.5.31
@@ -79,20 +85,24 @@ google-cloud-videointelligence==2.16.2
google-cloud-vision==3.10.2
google-crc32c==1.7.1
google-genai==1.23.0
+google-pasta==0.2.0
google-resumable-media==2.7.2
googleapis-common-protos==1.70.0
greenlet==3.2.3
grpc-google-iam-v1==0.14.2
grpc-interceptor==0.15.4
grpcio==1.65.5
-grpcio-status==1.65.5
+grpcio-status==1.63.0rc1
guppy3==3.1.5
h11==0.16.0
+h5py==3.14.0
hdfs==2.7.3
+hf-xet==1.1.5
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+huggingface-hub==0.33.1
+hypothesis==6.135.19
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -105,22 +115,47 @@ joblib==1.5.1
jsonpickle==3.4.2
jsonschema==4.24.0
jsonschema-specifications==2025.4.1
+keras==3.10.0
keyring==25.6.0
keyrings.google-artifactregistry-auth==1.1.2
+libclang==18.1.1
+Markdown==3.8.2
+markdown-it-py==3.0.0
MarkupSafe==3.0.2
+mdurl==0.1.2
milvus-lite==2.5.1
+ml-dtypes==0.3.2
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+mpmath==1.3.0
+multidict==6.6.3
mysql-connector-python==9.3.0
+namex==0.1.0
+networkx==3.4.2
nltk==3.9.1
-numpy==2.2.6
+numpy==1.26.4
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
oauth2client==4.1.3
objsize==0.7.1
opentelemetry-api==1.34.1
opentelemetry-sdk==1.34.1
opentelemetry-semantic-conventions==0.55b1
+opt_einsum==3.4.0
+optree==0.16.0
oracledb==3.2.0
orjson==3.10.18
overrides==7.7.0
@@ -132,7 +167,7 @@ pip==25.1.1
pluggy==1.6.0
propcache==0.3.2
proto-plus==1.26.1
-protobuf==5.29.5
+protobuf==4.25.8
psycopg2-binary==2.9.10
pyarrow==18.1.0
pyarrow-hotfix==0.7
@@ -142,6 +177,7 @@ pycparser==2.22
pydantic==2.11.7
pydantic_core==2.33.2
pydot==1.4.2
+Pygments==2.19.2
PyHamcrest==2.1.0
PyJWT==2.9.0
pymilvus==2.5.11
@@ -162,8 +198,10 @@ referencing==0.36.2
regex==2024.11.6
requests==2.32.4
requests-mock==1.12.1
+rich==14.0.0
rpds-py==0.25.1
rsa==4.9.1
+safetensors==0.5.3
scikit-learn==1.7.0
scipy==1.15.3
scramp==1.4.5
@@ -177,11 +215,22 @@ soupsieve==2.7
SQLAlchemy==2.0.41
sqlalchemy_pytds==1.0.2
sqlparse==0.5.3
+sympy==1.14.0
tenacity==8.5.0
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.2
+tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64"
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
testcontainers==4.10.0
threadpoolctl==3.6.0
+tokenizers==0.21.2
tomli==2.2.1
+torch==2.7.1
tqdm==4.67.1
+transformers==4.48.3
+triton==3.3.1
typing-inspection==0.4.1
typing_extensions==4.14.0
tzdata==2025.2
@@ -190,6 +239,7 @@ uritemplate==4.2.0
urllib3==2.5.0
virtualenv-clone==0.5.7
websockets==15.0.1
+Werkzeug==3.1.3
wheel==0.45.1
wrapt==1.17.2
yarl==1.20.1
diff --git a/sdks/python/container/py311/base_image_requirements.txt
b/sdks/python/container/py311/base_image_requirements.txt
index af2e75a54b8..2f81ea5e79d 100644
--- a/sdks/python/container/py311/base_image_requirements.txt
+++ b/sdks/python/container/py311/base_image_requirements.txt
@@ -90,7 +90,7 @@ hdfs==2.7.3
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+hypothesis==6.135.19
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -110,7 +110,7 @@ milvus-lite==2.5.1
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+multidict==6.6.3
mysql-connector-python==9.3.0
nltk==3.9.1
numpy==2.2.6
diff --git a/sdks/python/container/py311/base_image_requirements.txt
b/sdks/python/container/py311/ml_image_requirements.txt
similarity index 79%
copy from sdks/python/container/py311/base_image_requirements.txt
copy to sdks/python/container/py311/ml_image_requirements.txt
index af2e75a54b8..a5d8add176d 100644
--- a/sdks/python/container/py311/base_image_requirements.txt
+++ b/sdks/python/container/py311/ml_image_requirements.txt
@@ -21,6 +21,7 @@
# https://s.apache.org/beam-python-dev-wiki
# Reach out to a committer if you need help.
+absl-py==2.3.0
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
@@ -28,6 +29,7 @@ aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
asn1crypto==1.5.1
+astunparse==1.6.3
attrs==25.3.0
backports.tarfile==1.2.0
beautifulsoup4==4.13.4
@@ -50,9 +52,13 @@ docstring_parser==0.16
execnet==2.1.1
fastavro==1.11.1
fasteners==0.19
+filelock==3.18.0
+flatbuffers==25.2.10
freezegun==1.5.2
frozenlist==1.7.0
+fsspec==2025.5.1
future==1.0.0
+gast==0.6.0
google-api-core==2.25.1
google-api-python-client==2.174.0
google-apitools==0.5.31
@@ -77,20 +83,24 @@ google-cloud-videointelligence==2.16.2
google-cloud-vision==3.10.2
google-crc32c==1.7.1
google-genai==1.23.0
+google-pasta==0.2.0
google-resumable-media==2.7.2
googleapis-common-protos==1.70.0
greenlet==3.2.3
grpc-google-iam-v1==0.14.2
grpc-interceptor==0.15.4
grpcio==1.65.5
-grpcio-status==1.65.5
+grpcio-status==1.63.0rc1
guppy3==3.1.5
h11==0.16.0
+h5py==3.14.0
hdfs==2.7.3
+hf-xet==1.1.5
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+huggingface-hub==0.33.1
+hypothesis==6.135.19
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -103,22 +113,47 @@ joblib==1.5.1
jsonpickle==3.4.2
jsonschema==4.24.0
jsonschema-specifications==2025.4.1
+keras==3.10.0
keyring==25.6.0
keyrings.google-artifactregistry-auth==1.1.2
+libclang==18.1.1
+Markdown==3.8.2
+markdown-it-py==3.0.0
MarkupSafe==3.0.2
+mdurl==0.1.2
milvus-lite==2.5.1
+ml-dtypes==0.3.2
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+mpmath==1.3.0
+multidict==6.6.3
mysql-connector-python==9.3.0
+namex==0.1.0
+networkx==3.5
nltk==3.9.1
-numpy==2.2.6
+numpy==1.26.4
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
oauth2client==4.1.3
objsize==0.7.1
opentelemetry-api==1.34.1
opentelemetry-sdk==1.34.1
opentelemetry-semantic-conventions==0.55b1
+opt_einsum==3.4.0
+optree==0.16.0
oracledb==3.2.0
orjson==3.10.18
overrides==7.7.0
@@ -130,7 +165,7 @@ pip==25.1.1
pluggy==1.6.0
propcache==0.3.2
proto-plus==1.26.1
-protobuf==5.29.5
+protobuf==4.25.8
psycopg2-binary==2.9.10
pyarrow==18.1.0
pyarrow-hotfix==0.7
@@ -140,6 +175,7 @@ pycparser==2.22
pydantic==2.11.7
pydantic_core==2.33.2
pydot==1.4.2
+Pygments==2.19.2
PyHamcrest==2.1.0
PyJWT==2.9.0
pymilvus==2.5.11
@@ -160,8 +196,10 @@ referencing==0.36.2
regex==2024.11.6
requests==2.32.4
requests-mock==1.12.1
+rich==14.0.0
rpds-py==0.25.1
rsa==4.9.1
+safetensors==0.5.3
scikit-learn==1.7.0
scipy==1.16.0
scramp==1.4.5
@@ -175,10 +213,21 @@ soupsieve==2.7
SQLAlchemy==2.0.41
sqlalchemy_pytds==1.0.2
sqlparse==0.5.3
+sympy==1.14.0
tenacity==8.5.0
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.2
+tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64"
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
testcontainers==4.10.0
threadpoolctl==3.6.0
+tokenizers==0.21.2
+torch==2.7.1
tqdm==4.67.1
+transformers==4.48.3
+triton==3.3.1
typing-inspection==0.4.1
typing_extensions==4.14.0
tzdata==2025.2
@@ -187,6 +236,7 @@ uritemplate==4.2.0
urllib3==2.5.0
virtualenv-clone==0.5.7
websockets==15.0.1
+Werkzeug==3.1.3
wheel==0.45.1
wrapt==1.17.2
yarl==1.20.1
diff --git a/sdks/python/container/py312/base_image_requirements.txt
b/sdks/python/container/py312/base_image_requirements.txt
index f48d350e01d..f39f7ab8f7d 100644
--- a/sdks/python/container/py312/base_image_requirements.txt
+++ b/sdks/python/container/py312/base_image_requirements.txt
@@ -89,7 +89,7 @@ hdfs==2.7.3
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+hypothesis==6.135.19
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -109,7 +109,7 @@ milvus-lite==2.5.1
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+multidict==6.6.3
mysql-connector-python==9.3.0
nltk==3.9.1
numpy==2.2.6
diff --git a/sdks/python/container/py312/base_image_requirements.txt
b/sdks/python/container/py312/ml_image_requirements.txt
similarity index 79%
copy from sdks/python/container/py312/base_image_requirements.txt
copy to sdks/python/container/py312/ml_image_requirements.txt
index f48d350e01d..e6e9a2930d2 100644
--- a/sdks/python/container/py312/base_image_requirements.txt
+++ b/sdks/python/container/py312/ml_image_requirements.txt
@@ -21,6 +21,7 @@
# https://s.apache.org/beam-python-dev-wiki
# Reach out to a committer if you need help.
+absl-py==2.3.0
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
@@ -28,6 +29,7 @@ aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
asn1crypto==1.5.1
+astunparse==1.6.3
attrs==25.3.0
beautifulsoup4==4.13.4
bs4==0.0.2
@@ -49,9 +51,13 @@ docstring_parser==0.16
execnet==2.1.1
fastavro==1.11.1
fasteners==0.19
+filelock==3.18.0
+flatbuffers==25.2.10
freezegun==1.5.2
frozenlist==1.7.0
+fsspec==2025.5.1
future==1.0.0
+gast==0.6.0
google-api-core==2.25.1
google-api-python-client==2.174.0
google-apitools==0.5.31
@@ -76,20 +82,24 @@ google-cloud-videointelligence==2.16.2
google-cloud-vision==3.10.2
google-crc32c==1.7.1
google-genai==1.23.0
+google-pasta==0.2.0
google-resumable-media==2.7.2
googleapis-common-protos==1.70.0
greenlet==3.2.3
grpc-google-iam-v1==0.14.2
grpc-interceptor==0.15.4
grpcio==1.65.5
-grpcio-status==1.65.5
+grpcio-status==1.63.0rc1
guppy3==3.1.5
h11==0.16.0
+h5py==3.14.0
hdfs==2.7.3
+hf-xet==1.1.5
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+huggingface-hub==0.33.1
+hypothesis==6.135.20
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -102,22 +112,47 @@ joblib==1.5.1
jsonpickle==3.4.2
jsonschema==4.24.0
jsonschema-specifications==2025.4.1
+keras==3.10.0
keyring==25.6.0
keyrings.google-artifactregistry-auth==1.1.2
+libclang==18.1.1
+Markdown==3.8.2
+markdown-it-py==3.0.0
MarkupSafe==3.0.2
+mdurl==0.1.2
milvus-lite==2.5.1
+ml-dtypes==0.3.2
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+mpmath==1.3.0
+multidict==6.6.3
mysql-connector-python==9.3.0
+namex==0.1.0
+networkx==3.5
nltk==3.9.1
-numpy==2.2.6
+numpy==1.26.4
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
oauth2client==4.1.3
objsize==0.7.1
opentelemetry-api==1.34.1
opentelemetry-sdk==1.34.1
opentelemetry-semantic-conventions==0.55b1
+opt_einsum==3.4.0
+optree==0.16.0
oracledb==3.2.0
orjson==3.10.18
overrides==7.7.0
@@ -129,7 +164,7 @@ pip==25.1.1
pluggy==1.6.0
propcache==0.3.2
proto-plus==1.26.1
-protobuf==5.29.5
+protobuf==4.25.8
psycopg2-binary==2.9.10
pyarrow==18.1.0
pyarrow-hotfix==0.7
@@ -139,6 +174,7 @@ pycparser==2.22
pydantic==2.11.7
pydantic_core==2.33.2
pydot==1.4.2
+Pygments==2.19.2
PyHamcrest==2.1.0
PyJWT==2.9.0
pymilvus==2.5.11
@@ -159,8 +195,10 @@ referencing==0.36.2
regex==2024.11.6
requests==2.32.4
requests-mock==1.12.1
+rich==14.0.0
rpds-py==0.25.1
rsa==4.9.1
+safetensors==0.5.3
scikit-learn==1.7.0
scipy==1.16.0
scramp==1.4.5
@@ -174,10 +212,20 @@ soupsieve==2.7
SQLAlchemy==2.0.41
sqlalchemy_pytds==1.0.2
sqlparse==0.5.3
+sympy==1.14.0
tenacity==8.5.0
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.2
+tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64"
+termcolor==3.1.0
testcontainers==4.10.0
threadpoolctl==3.6.0
+tokenizers==0.21.2
+torch==2.7.1
tqdm==4.67.1
+transformers==4.48.3
+triton==3.3.1
typing-inspection==0.4.1
typing_extensions==4.14.0
tzdata==2025.2
@@ -186,6 +234,7 @@ uritemplate==4.2.0
urllib3==2.5.0
virtualenv-clone==0.5.7
websockets==15.0.1
+Werkzeug==3.1.3
wheel==0.45.1
wrapt==1.17.2
yarl==1.20.1
diff --git a/sdks/python/container/py39/base_image_requirements.txt
b/sdks/python/container/py39/base_image_requirements.txt
index 1c2ebc4c7a4..db7961186ba 100644
--- a/sdks/python/container/py39/base_image_requirements.txt
+++ b/sdks/python/container/py39/base_image_requirements.txt
@@ -92,7 +92,7 @@ hdfs==2.7.3
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+hypothesis==6.135.20
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -112,7 +112,7 @@ milvus-lite==2.5.1
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+multidict==6.6.3
mysql-connector-python==9.3.0
nltk==3.9.1
numpy==2.0.2
diff --git a/sdks/python/container/py39/base_image_requirements.txt
b/sdks/python/container/py39/ml_image_requirements.txt
similarity index 79%
copy from sdks/python/container/py39/base_image_requirements.txt
copy to sdks/python/container/py39/ml_image_requirements.txt
index 1c2ebc4c7a4..3dab7e35b6d 100644
--- a/sdks/python/container/py39/base_image_requirements.txt
+++ b/sdks/python/container/py39/ml_image_requirements.txt
@@ -21,6 +21,7 @@
# https://s.apache.org/beam-python-dev-wiki
# Reach out to a committer if you need help.
+absl-py==2.3.0
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
@@ -28,6 +29,7 @@ aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
asn1crypto==1.5.1
+astunparse==1.6.3
async-timeout==5.0.1
attrs==25.3.0
backports.tarfile==1.2.0
@@ -52,9 +54,13 @@ exceptiongroup==1.3.0
execnet==2.1.1
fastavro==1.11.1
fasteners==0.19
+filelock==3.18.0
+flatbuffers==25.2.10
freezegun==1.5.2
frozenlist==1.7.0
+fsspec==2025.5.1
future==1.0.0
+gast==0.6.0
google-api-core==2.25.1
google-api-python-client==2.174.0
google-apitools==0.5.31
@@ -79,20 +85,24 @@ google-cloud-videointelligence==2.16.2
google-cloud-vision==3.10.2
google-crc32c==1.7.1
google-genai==1.23.0
+google-pasta==0.2.0
google-resumable-media==2.7.2
googleapis-common-protos==1.70.0
greenlet==3.2.3
grpc-google-iam-v1==0.14.2
grpc-interceptor==0.15.4
grpcio==1.65.5
-grpcio-status==1.65.5
+grpcio-status==1.63.0rc1
guppy3==3.1.5
h11==0.16.0
+h5py==3.14.0
hdfs==2.7.3
+hf-xet==1.1.5
httpcore==1.0.9
httplib2==0.22.0
httpx==0.28.1
-hypothesis==6.135.17
+huggingface-hub==0.33.1
+hypothesis==6.135.20
idna==3.10
importlib_metadata==8.7.0
iniconfig==2.1.0
@@ -105,22 +115,47 @@ joblib==1.5.1
jsonpickle==3.4.2
jsonschema==4.24.0
jsonschema-specifications==2025.4.1
+keras==3.10.0
keyring==25.6.0
keyrings.google-artifactregistry-auth==1.1.2
+libclang==18.1.1
+Markdown==3.8.2
+markdown-it-py==3.0.0
MarkupSafe==3.0.2
+mdurl==0.1.2
milvus-lite==2.5.1
+ml-dtypes==0.3.2
mmh3==5.1.0
mock==5.2.0
more-itertools==10.7.0
-multidict==6.6.2
+mpmath==1.3.0
+multidict==6.6.3
mysql-connector-python==9.3.0
+namex==0.1.0
+networkx==3.2.1
nltk==3.9.1
-numpy==2.0.2
+numpy==1.26.4
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
oauth2client==4.1.3
objsize==0.7.1
opentelemetry-api==1.34.1
opentelemetry-sdk==1.34.1
opentelemetry-semantic-conventions==0.55b1
+opt_einsum==3.4.0
+optree==0.16.0
oracledb==3.2.0
orjson==3.10.18
overrides==7.7.0
@@ -132,7 +167,7 @@ pip==25.1.1
pluggy==1.6.0
propcache==0.3.2
proto-plus==1.26.1
-protobuf==5.29.5
+protobuf==4.25.8
psycopg2-binary==2.9.9
pyarrow==18.1.0
pyarrow-hotfix==0.7
@@ -142,6 +177,7 @@ pycparser==2.22
pydantic==2.11.7
pydantic_core==2.33.2
pydot==1.4.2
+Pygments==2.19.2
PyHamcrest==2.1.0
PyJWT==2.9.0
pymilvus==2.5.11
@@ -162,8 +198,10 @@ referencing==0.36.2
regex==2024.11.6
requests==2.32.4
requests-mock==1.12.1
+rich==14.0.0
rpds-py==0.25.1
rsa==4.9.1
+safetensors==0.5.3
scikit-learn==1.6.1
scipy==1.13.1
scramp==1.4.5
@@ -177,11 +215,22 @@ soupsieve==2.7
SQLAlchemy==2.0.41
sqlalchemy_pytds==1.0.2
sqlparse==0.5.3
+sympy==1.14.0
tenacity==8.5.0
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.2
+tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64"
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
testcontainers==4.10.0
threadpoolctl==3.6.0
+tokenizers==0.21.2
tomli==2.2.1
+torch==2.7.1
tqdm==4.67.1
+transformers==4.48.3
+triton==3.3.1
typing-inspection==0.4.1
typing_extensions==4.14.0
tzdata==2025.2
@@ -190,6 +239,7 @@ uritemplate==4.2.0
urllib3==2.5.0
virtualenv-clone==0.5.7
websockets==15.0.1
+Werkzeug==3.1.3
wheel==0.45.1
wrapt==1.17.2
yarl==1.20.1
diff --git a/sdks/python/container/run_generate_requirements.sh
b/sdks/python/container/run_generate_requirements.sh
index 23964d10e7b..02ff9d2ccd6 100755
--- a/sdks/python/container/run_generate_requirements.sh
+++ b/sdks/python/container/run_generate_requirements.sh
@@ -38,10 +38,12 @@ fi
PY_VERSION=$1
SDK_TARBALL=$2
+REQUIREMENTS_FILE_NAME=$3
+EXTRAS=$4
# Use the PIP_EXTRA_OPTIONS environment variable to pass additional flags to
the pip install command.
# For example, you can include the --pre flag in $PIP_EXTRA_OPTIONS to
download pre-release versions of packages.
# Note that you can modify the behavior of the pip install command in this
script by passing in your own $PIP_EXTRA_OPTIONS.
-PIP_EXTRA_OPTIONS=$3
+PIP_EXTRA_OPTIONS=$5
if ! python"$PY_VERSION" --version > /dev/null 2>&1 ; then
echo "Please install a python${PY_VERSION} interpreter. See
s.apache.org/beam-python-dev-wiki for Python installation tips."
@@ -53,6 +55,14 @@ if ! python"$PY_VERSION" -m venv --help > /dev/null 2>&1 ;
then
exit 1
fi
+if [ -z "$REQUIREMENTS_FILE_NAME" ]; then
+ REQUIREMENTS_FILE_NAME="base_image_requirements.txt"
+fi
+
+if [ -z "$EXTRAS" ]; then
+ EXTRAS="[gcp,dataframe,test]"
+fi
+
set -ex
ENV_PATH="$PWD/build/python${PY_VERSION/./}_requirements_gen"
@@ -65,7 +75,7 @@ pip install --upgrade pip setuptools wheel
# Install dataframe deps to add have Dataframe support in released images.
# Install test deps since some integration tests need dependencies,
# such as pytest, installed in the runner environment.
-pip install ${PIP_EXTRA_OPTIONS:+"$PIP_EXTRA_OPTIONS"} --no-cache-dir
"$SDK_TARBALL"[gcp,dataframe,test]
+pip install ${PIP_EXTRA_OPTIONS:+"$PIP_EXTRA_OPTIONS"} --no-cache-dir
"$SDK_TARBALL""$EXTRAS"
pip install ${PIP_EXTRA_OPTIONS:+"$PIP_EXTRA_OPTIONS"} --no-cache-dir -r
"$PWD"/sdks/python/container/base_image_requirements_manual.txt
pip uninstall -y apache-beam
@@ -75,7 +85,7 @@ echo "Installed dependencies:"
pip freeze --all
PY_IMAGE="py${PY_VERSION//.}"
-REQUIREMENTS_FILE=$PWD/sdks/python/container/$PY_IMAGE/base_image_requirements.txt
+REQUIREMENTS_FILE=$PWD/sdks/python/container/$PY_IMAGE/$REQUIREMENTS_FILE_NAME
cat <<EOT > "$REQUIREMENTS_FILE"
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index d309a7ea4a6..fcb64c2d026 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -571,12 +571,12 @@ if __name__ == '__main__':
# in https://github.com/apache/beam/blob/master/sdks/python/tox.ini
# For more info, see
#
https://docs.google.com/document/d/1c84Gc-cZRCfrU8f7kWGsNR2o8oSRjCM-dGHO9KvPWPw/edit?usp=sharing
- 'torch': ['torch>=1.9.0,<2.1.0'],
- 'tensorflow': ['tensorflow>=2.12rc1,<2.13'],
+ 'torch': ['torch>=1.9.0,<2.8.0'],
+ 'tensorflow': ['tensorflow>=2.12rc1,<2.17'],
'transformers': [
'transformers>=4.28.0,<4.49.0',
- 'tensorflow==2.12.0',
- 'torch>=1.9.0,<2.1.0'
+ 'tensorflow>=2.12.0',
+ 'torch>=1.9.0'
],
'tft': ['tensorflow_transform>=1.14.0,<1.15.0'],
'onnx': [
diff --git a/sdks/python/test-suites/tox/py312/build.gradle
b/sdks/python/test-suites/tox/py312/build.gradle
index 17478ec43aa..a8f2ac7fa5c 100644
--- a/sdks/python/test-suites/tox/py312/build.gradle
+++ b/sdks/python/test-suites/tox/py312/build.gradle
@@ -23,8 +23,20 @@
plugins { id 'org.apache.beam.module' }
applyPythonNature()
+def posargs = project.findProperty("posargs") ?: ""
+
// Required to setup a Python 3 virtualenv and task names.
pythonVersion = '3.12'
+project.tasks.register("postCommitPyDep") {}
+
+toxTask "testPy312pytorch-271", "py312-pytorch-271", "${posargs}"
+test.dependsOn "testPy312pytorch-271"
+postCommitPyDep.dependsOn "testPy312pytorch-271"
+
+toxTask "testPy312tensorflow-216", "py312-tensorflow-216", "${posargs}"
+test.dependsOn "testPy312tensorflow-216"
+postCommitPyDep.dependsOn "testPy312tensorflow-216"
+
apply from: "../common.gradle"
diff --git a/sdks/python/test-suites/tox/py39/build.gradle
b/sdks/python/test-suites/tox/py39/build.gradle
index d6a5e08fcf3..9740f056e68 100644
--- a/sdks/python/test-suites/tox/py39/build.gradle
+++ b/sdks/python/test-suites/tox/py39/build.gradle
@@ -37,7 +37,7 @@ project.tasks.register("preCommitPyCoverage") {
}
// Dep Postcommit runs test suites that evaluate compatibility of particular
-// dependencies. It is exercised on a single Python version.
+// dependencies. Each suite is exercised on at most one python version.
//
// Should still leave at least one version in PreCommit unless the marked tests
// are also exercised by existing PreCommit
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 5131769509d..856ca46f64a 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -409,6 +409,22 @@ commands =
# Allow exit code 5 (no tests run) so that we can run this command safely on
arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname}
--junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret
= 5 ] && exit 0 || exit $ret'
+[testenv:py{311,312}-pytorch-271]
+deps =
+ 200:
+ torch>=2.7.1,<2.8.0
+ mpmath==1.3.0
+ numpy==1.26.4
+extras = test,gcp
+# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
+setenv =
+commands =
+ # Log torch version for debugging
+ /bin/sh -c "pip freeze | grep -E torch"
+ # Run all PyTorch>=2 unit tests
+ # Allow exit code 5 (no tests run) so that we can run this command safely on
arbitrary subdirectories.
+ /bin/sh -c 'pytest -o junit_suite_name={envname}
--junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret
= 5 ] && exit 0 || exit $ret'
+
# TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task
in tox/py39/build.gradle once onnx supports protobuf 4.x.x
[testenv:py{39,310}-onnx-113]
# TODO(https://github.com/apache/beam/issues/25443)
@@ -445,6 +461,22 @@ commands =
# Allow exit code 5 (no tests run) so that we can run this command safely on
arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname}
--junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ]
&& exit 0 || exit $ret'
+[testenv:py312-tensorflow-216]
+deps =
+ 216:
+ tensorflow>=2.16.1,<2.17
+ # Help pip resolve conflict with typing-extensions for old version of TF
https://github.com/apache/beam/issues/30852
+ pydantic<2.7
+extras = test,gcp
+commands_pre =
+ pip install -U 'protobuf==4.25.5'
+commands =
+ # Log tensorflow version for debugging
+ /bin/sh -c "pip freeze | grep -E tensorflow"
+ # Run all Tensorflow unit tests
+ # Allow exit code 5 (no tests run) so that we can run this command safely on
arbitrary subdirectories.
+ /bin/sh -c 'pytest -o junit_suite_name={envname}
--junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ]
&& exit 0 || exit $ret'
+
[testenv:py39-xgboost-{160,170}]
deps =
160: