damccorm commented on code in PR #36966:
URL: https://github.com/apache/beam/pull/36966#discussion_r2794253099
##########
sdks/python/apache_beam/ml/inference/vllm_inference.py:
##########
@@ -125,7 +132,7 @@ def start_server(self, retries=3):
server_cmd = [
sys.executable,
'-m',
- 'vllm.entrypoints.openai.api_server',
+ self._vllm_executable,
Review Comment:
I solved that piece, but still am running into issues:
```
{"job":"2026-02-11_07_49_28-348318588552584172",
"logger":"/opt/apache/beam-venv/beam-venv-worker-sdk-0-0/lib/python3.12/site-packages/apache_beam/ml/inference/vllm_inference.py:84",
"portability_worker_id":"sdk-0-0_sibling_2", "thread":"Thread-91
(log_stdout)", "worker":"beamapp-dannymccormick-02-02110749-wdvx-harness-qpdp"}
thread '<unnamed>' panicked at
/opt/dynamo/lib/runtime/src/storage/kv.rs:440:29:
called `Result::unwrap()` on an `Err` value: BuildError(Unable to create
lease. Check etcd server status at http://localhost:2379
{"job":"2026-02-11_07_49_28-348318588552584172",
"logger":"/opt/apache/beam-venv/beam-venv-worker-sdk-0-0/lib/python3.12/site-packages/apache_beam/ml/inference/vllm_inference.py:84",
"portability_worker_id":"sdk-0-0_sibling_2", "thread":"Thread-91
(log_stdout)", "worker":"beamapp-dannymccormick-02-02110749-wdvx-harness-qpdp"}
Caused by:
grpc request error: status: 'The service is currently unavailable', self:
"tcp connect error")
[2m2026-02-11T16:22:03.347934Z[0m [31mERROR[0m
[2mrunners._cancel_all_tasks[0m[2m:[0m unhandled exception during
asyncio.run() shutdown
task: <Task finished name='Task-4'
coro=<VllmEngineMonitor._check_engine_health() done, defined at
/usr/local/lib/python3.12/dist-packages/dynamo/vllm/engine_monitor.py:68>
exception=PanicException('called `Result::unwrap()` on an `Err` value:
BuildError(Unable to create lease. Check etcd server status at
http://localhost:2379\n\nCaused by:\n grpc request error: status: \'The service
is currently unavailable\', self: "tcp connect error")')>
Traceback (most recent call last):
File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 48,
in wrapper
return await main
^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/main.py", line
117, in worker
await init(runtime, config)
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/main.py", line
578, in init
await register_vllm_model(
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/main.py", line
370, in register_vllm_model
await register_llm(
Exception: unable to extract tokenizer kind from directory
/root/.cache/huggingface/hub/models--facebook--opt-125m/snapshots/27dcfa74d334bc871f3234de431e71c6eeba5dd6
{"job":"2026-02-11_07_49_28-348318588552584172",
"logger":"/opt/apache/beam-venv/beam-venv-worker-sdk-0-0/lib/python3.12/site-packages/apache_beam/ml/inference/vllm_inference.py:84",
"portability_worker_id":"sdk-0-0_sibling_2", "thread":"Thread-91
(log_stdout)", "worker":"beamapp-dannymccormick-02-02110749-wdvx-harness-qpdp"}
During handling of the above exception, another exception occurred:
{"job":"2026-02-11_07_49_28-348318588552584172",
"logger":"/opt/apache/beam-venv/beam-venv-worker-sdk-0-0/lib/python3.12/site-packages/apache_beam/ml/inference/vllm_inference.py:84",
"portability_worker_id":"sdk-0-0_sibling_2", "thread":"Thread-91
(log_stdout)", "worker":"beamapp-dannymccormick-02-02110749-wdvx-harness-qpdp"}
Traceback (most recent call last):
File
"/usr/local/lib/python3.12/dist-packages/dynamo/vllm/engine_monitor.py", line
71, in _check_engine_health
await self.engine_client.check_health()
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/async_llm.py",
line 734, in check_health
raise self.dead_error
vllm.v1.engine.exceptions.EngineDeadError: EngineCore encountered an issue.
See stack trace (above) for the root cause.
{"job":"2026-02-11_07_49_28-348318588552584172",
"logger":"/opt/apache/beam-venv/beam-venv-worker-sdk-0-0/lib/python3.12/site-packages/apache_beam/ml/inference/vllm_inference.py:84",
"portability_worker_id":"sdk-0-0_sibling_2", "thread":"Thread-91
(log_stdout)", "worker":"beamapp-dannymccormick-02-02110749-wdvx-harness-qpdp"}
During handling of the above exception, another exception occurred:
{"job":"2026-02-11_07_49_28-348318588552584172",
"logger":"/opt/apache/beam-venv/beam-venv-worker-sdk-0-0/lib/python3.12/site-packages/apache_beam/ml/inference/vllm_inference.py:84",
"portability_worker_id":"sdk-0-0_sibling_2", "thread":"Thread-91
(log_stdout)", "worker":"beamapp-dannymccormick-02-02110749-wdvx-harness-qpdp"}
Traceback (most recent call last):
File
"/usr/local/lib/python3.12/dist-packages/dynamo/vllm/engine_monitor.py", line
78, in _check_engine_health
self.runtime.shutdown()
pyo3_runtime.PanicException: called `Result::unwrap()` on an `Err` value:
BuildError(Unable to create lease. Check etcd server status at
http://localhost:2379
{"job":"2026-02-11_07_49_28-348318588552584172",
"logger":"/opt/apache/beam-venv/beam-venv-worker-sdk-0-0/lib/python3.12/site-packages/apache_beam/ml/inference/vllm_inference.py:84",
"portability_worker_id":"sdk-0-0_sibling_2", "thread":"Thread-91
(log_stdout)", "worker":"beamapp-dannymccormick-02-02110749-wdvx-harness-qpdp"}
Caused by:
grpc request error: status: 'The service is currently unavailable', self:
"tcp connect error")
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/__main__.py", line
7, in <module>
main()
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/main.py", line
820, in main
uvloop.run(worker())
File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 96,
in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 48,
in wrapper
return await main
^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/main.py", line
117, in worker
await init(runtime, config)
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/main.py", line
578, in init
await register_vllm_model(
File "/usr/local/lib/python3.12/dist-packages/dynamo/vllm/main.py", line
370, in register_vllm_model
await register_llm(
Exception: unable to extract tokenizer kind from directory
/root/.cache/huggingface/hub/models--facebook--opt-125m/snapshots/27dcfa74d334bc871f3234de431e71c6eeba5dd6
```
Not sure what is going on yet
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]