This is an automated email from the ASF dual-hosted git repository. vterentev pushed a commit to branch fix-vllm-10-08 in repository https://gitbox.apache.org/repos/asf/beam.git
commit 182219c5b60645cda221317b428da3bc842ebe46 Author: Vitaly Terentyev <[email protected]> AuthorDate: Wed Oct 8 15:17:56 2025 +0400 Fix vLLM Gemma, add vLLM extra --- ...ference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt | 8 ++++---- .../ml/inference/test_resources/vllm.dockerfile | 14 ++++---------- .../apache_beam/ml/inference/vllm_tests_requirements.txt | 3 ++- sdks/python/container/common.gradle | 12 ++++++++++++ sdks/python/container/run_generate_requirements.sh | 6 ++++++ sdks/python/setup.py | 3 ++- 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt index 6101fe5da45..ee7a1fda059 100644 --- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt +++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt @@ -18,9 +18,9 @@ --temp_location=gs://temp-storage-for-perf-tests/loadtests --staging_location=gs://temp-storage-for-perf-tests/loadtests --input=gs://apache-beam-ml/testing/inputs/sentences_50k.txt ---machine_type=n1-standard-8 +--machine_type=g2-standard-8 --worker_zone=us-central1-b ---disk_size_gb=50 +--disk_size_gb=200 --input_options={} --num_workers=8 --max_num_workers=25 @@ -32,5 +32,5 @@ --metrics_table=gemma_vllm_batch --influx_measurement=gemma_vllm_batch --model_gcs_path=gs://apache-beam-ml/models/gemma-2b-it ---dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver ---experiments=use_runner_v2 \ No newline at end of file +--dataflow_service_options=worker_accelerator=type:nvidia-l4;count:1;install-nvidia-driver +--experiments=use_runner_v2 diff --git a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile index 5727437809c..7a9b4848f00 100644 --- a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile +++ b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile @@ -46,7 +46,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \ python3 -m pip install --upgrade pip setuptools wheel # 4) Copy the Beam SDK harness (for Dataflow workers) -COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:2.68.0.dev \ +COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:2.69.0.dev \ /opt/apache/beam /opt/apache/beam # 5) Make sure the harness is discovered first @@ -54,15 +54,9 @@ ENV PYTHONPATH=/opt/apache/beam:$PYTHONPATH # 6) Install the Beam dev SDK from the local source package. # This .tar.gz file will be created by GitHub Actions workflow -# and copied into the build context. +# and copied into the build context. This will include vLLM dependencies COPY ./sdks/python/build/apache-beam.tar.gz /tmp/beam.tar.gz -RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp]" - -# 7) Install vLLM, and other dependencies -RUN python3 -m pip install --no-cache-dir \ - openai>=1.52.2 \ - vllm>=0.6.3 \ - triton>=3.1.0 +RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp,vllm]" # 8) Use the Beam boot script as entrypoint -ENTRYPOINT ["/opt/apache/beam/boot"] \ No newline at end of file +ENTRYPOINT ["/opt/apache/beam/boot"] diff --git a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt index 939f0526d80..093208cc21f 100644 --- a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt @@ -19,4 +19,5 @@ torchvision>=0.8.2 pillow>=8.0.0 transformers>=4.18.0 google-cloud-monitoring>=2.27.0 -openai>=1.52.2 \ No newline at end of file +openai>=1.52.2 +dill diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 0648bf4fa2e..653c527216e 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -56,6 +56,18 @@ def generatePythonRequirements = tasks.register("generatePythonRequirements") { "[gcp,dataframe,test,tensorflow,torch,transformers] " + "${pipExtraOptions}" } + // GPU requirements not used for any containers directly due to licensing, + // but can be picked up by customers or other consumers for use. + exec { + executable 'sh' + args '-c', "cd ${rootDir} && ${runScriptsPath} " + + "${project.ext.pythonVersion} " + + "${files(configurations.sdkSourceTarball.files).singleFile} " + + "gpu_image_requirements.txt " + + "container/ml " + + "[gcp,dataframe,test,tensorflow,torch,transformers,vllm] " + + "${pipExtraOptions}" + } } } diff --git a/sdks/python/container/run_generate_requirements.sh b/sdks/python/container/run_generate_requirements.sh index de14cbff2d5..cf30c24c6e7 100755 --- a/sdks/python/container/run_generate_requirements.sh +++ b/sdks/python/container/run_generate_requirements.sh @@ -76,6 +76,12 @@ python"${PY_VERSION}" -m venv "$ENV_PATH" source "$ENV_PATH"/bin/activate pip install --upgrade pip setuptools wheel +if [[ $EXTRAS == *"vllm"* ]]; then + # Explicitly install torch to avoid https://github.com/facebookresearch/xformers/issues/740 + # This should be overwritten later since the vllm extra is installed alongside torch + pip install --no-cache-dir torch +fi + # Install gcp extra deps since these deps are commonly used with Apache Beam. # Install dataframe deps to add have Dataframe support in released images. # Install test deps since some integration tests need dependencies, diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 719d188ed26..14e95f7ccc2 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -610,7 +610,8 @@ if __name__ == '__main__': ], 'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'], 'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'], - 'milvus': milvus_dependency + 'milvus': milvus_dependency, + 'vllm': ['openai>=1.52.2', 'vllm>=0.6.3', 'triton>=3.1.0'] }, zip_safe=False, # PyPI package information.
