This is an automated email from the ASF dual-hosted git repository.

vterentev pushed a commit to branch fix-vllm-10-08
in repository https://gitbox.apache.org/repos/asf/beam.git

commit 182219c5b60645cda221317b428da3bc842ebe46
Author: Vitaly Terentyev <[email protected]>
AuthorDate: Wed Oct 8 15:17:56 2025 +0400

    Fix vLLM Gemma, add vLLM extra
---
 ...ference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt |  8 ++++----
 .../ml/inference/test_resources/vllm.dockerfile            | 14 ++++----------
 .../apache_beam/ml/inference/vllm_tests_requirements.txt   |  3 ++-
 sdks/python/container/common.gradle                        | 12 ++++++++++++
 sdks/python/container/run_generate_requirements.sh         |  6 ++++++
 sdks/python/setup.py                                       |  3 ++-
 6 files changed, 30 insertions(+), 16 deletions(-)

diff --git 
a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
 
b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
index 6101fe5da45..ee7a1fda059 100644
--- 
a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
+++ 
b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
@@ -18,9 +18,9 @@
 --temp_location=gs://temp-storage-for-perf-tests/loadtests
 --staging_location=gs://temp-storage-for-perf-tests/loadtests
 --input=gs://apache-beam-ml/testing/inputs/sentences_50k.txt
---machine_type=n1-standard-8
+--machine_type=g2-standard-8
 --worker_zone=us-central1-b
---disk_size_gb=50
+--disk_size_gb=200
 --input_options={}
 --num_workers=8
 --max_num_workers=25
@@ -32,5 +32,5 @@
 --metrics_table=gemma_vllm_batch
 --influx_measurement=gemma_vllm_batch
 --model_gcs_path=gs://apache-beam-ml/models/gemma-2b-it
---dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver
---experiments=use_runner_v2
\ No newline at end of file
+--dataflow_service_options=worker_accelerator=type:nvidia-l4;count:1;install-nvidia-driver
+--experiments=use_runner_v2
diff --git 
a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile 
b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
index 5727437809c..7a9b4848f00 100644
--- a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
+++ b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
@@ -46,7 +46,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3 
&& \
     python3 -m pip install --upgrade pip setuptools wheel
 
 # 4) Copy the Beam SDK harness (for Dataflow workers)
-COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:2.68.0.dev 
\
+COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:2.69.0.dev 
\
      /opt/apache/beam /opt/apache/beam
 
 # 5) Make sure the harness is discovered first
@@ -54,15 +54,9 @@ ENV PYTHONPATH=/opt/apache/beam:$PYTHONPATH
 
 # 6) Install the Beam dev SDK from the local source package.
 # This .tar.gz file will be created by GitHub Actions workflow
-# and copied into the build context.
+# and copied into the build context. This will include vLLM dependencies
 COPY ./sdks/python/build/apache-beam.tar.gz /tmp/beam.tar.gz
-RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp]"
-
-# 7) Install vLLM, and other dependencies
-RUN python3 -m pip install --no-cache-dir \
-      openai>=1.52.2 \
-      vllm>=0.6.3 \
-      triton>=3.1.0
+RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp,vllm]"
 
 # 8) Use the Beam boot script as entrypoint
-ENTRYPOINT ["/opt/apache/beam/boot"]
\ No newline at end of file
+ENTRYPOINT ["/opt/apache/beam/boot"]
diff --git a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt 
b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt
index 939f0526d80..093208cc21f 100644
--- a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt
+++ b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt
@@ -19,4 +19,5 @@ torchvision>=0.8.2
 pillow>=8.0.0
 transformers>=4.18.0
 google-cloud-monitoring>=2.27.0
-openai>=1.52.2
\ No newline at end of file
+openai>=1.52.2
+dill
diff --git a/sdks/python/container/common.gradle 
b/sdks/python/container/common.gradle
index 0648bf4fa2e..653c527216e 100644
--- a/sdks/python/container/common.gradle
+++ b/sdks/python/container/common.gradle
@@ -56,6 +56,18 @@ def generatePythonRequirements = 
tasks.register("generatePythonRequirements") {
               "[gcp,dataframe,test,tensorflow,torch,transformers] " +
               "${pipExtraOptions}"
     }
+    // GPU requirements not used for any containers directly due to licensing,
+    // but can be picked up by customers or other consumers for use.
+    exec {
+      executable 'sh'
+      args '-c', "cd ${rootDir} && ${runScriptsPath} " +
+              "${project.ext.pythonVersion} " +
+              "${files(configurations.sdkSourceTarball.files).singleFile} " +
+              "gpu_image_requirements.txt " +
+              "container/ml " +
+              "[gcp,dataframe,test,tensorflow,torch,transformers,vllm] " +
+              "${pipExtraOptions}"
+    }
   }
 }
 
diff --git a/sdks/python/container/run_generate_requirements.sh 
b/sdks/python/container/run_generate_requirements.sh
index de14cbff2d5..cf30c24c6e7 100755
--- a/sdks/python/container/run_generate_requirements.sh
+++ b/sdks/python/container/run_generate_requirements.sh
@@ -76,6 +76,12 @@ python"${PY_VERSION}" -m venv "$ENV_PATH"
 source "$ENV_PATH"/bin/activate
 pip install --upgrade pip setuptools wheel
 
+if [[ $EXTRAS == *"vllm"* ]]; then
+  # Explicitly install torch to avoid 
https://github.com/facebookresearch/xformers/issues/740
+  # This should be overwritten later since the vllm extra is installed 
alongside torch
+  pip install --no-cache-dir torch
+fi
+
 # Install gcp extra deps since these deps are commonly used with Apache Beam.
 # Install dataframe deps to add have Dataframe support in released images.
 # Install test deps since some integration tests need dependencies,
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 719d188ed26..14e95f7ccc2 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -610,7 +610,8 @@ if __name__ == '__main__':
           ],
           'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'],
           'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'],
-          'milvus': milvus_dependency
+          'milvus': milvus_dependency,
+          'vllm': ['openai>=1.52.2', 'vllm>=0.6.3', 'triton>=3.1.0']
       },
       zip_safe=False,
       # PyPI package information.

Reply via email to