This is an automated email from the ASF dual-hosted git repository.
vterentev pushed a commit to branch fix-vllm-gemma
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/fix-vllm-gemma by this push:
new 33c4256d80a Fix dockerfile - use cuda 12.2.2
33c4256d80a is described below
commit 33c4256d80a702f207619f8f404f70ad339ccd54
Author: Vitaly Terentyev <[email protected]>
AuthorDate: Fri Sep 26 22:54:13 2025 +0400
Fix dockerfile - use cuda 12.2.2
---
sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git
a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
index 200497659de..7f11d31a280 100644
--- a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
+++ b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
@@ -17,7 +17,7 @@
# Used for any vLLM integration test
# Dockerfile — Beam dev harness + install dev SDK from LOCAL source package
-FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
+FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
# 1) Non-interactive + timezone + Redirect all heavy temp/cache away from /tmp
ENV DEBIAN_FRONTEND=noninteractive \
@@ -29,8 +29,7 @@ ENV DEBIAN_FRONTEND=noninteractive \
HF_HUB_CACHE=/var/beam_hf/hub \
TRANSFORMERS_CACHE=/var/beam_hf/hub \
VLLM_CACHE_ROOT=/var/beam_hf/vllm \
- VLLM_RPC_BASE_PATH=/var/beam_tmp \
- TOKENIZERS_PARALLELISM=false
+ VLLM_RPC_BASE_PATH=/var/beam_tmp
# Make sure target dirs exist (mounted on worker PD at runtime)
RUN mkdir -p /var/beam_tmp /var/beam_hf/hub /var/beam_hf/vllm && \