From 1880e859b50cfd0e3c67ea17f271ee24b2847dbc Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sat, 2 May 2026 08:28:23 +0800 Subject: [PATCH] Use vllm cu129 wheel on dash0 --- .../examples/dash0_qwen30b_a3b_community_vllm020_harness.json | 4 ++-- .../dash0_qwen30b_a3b_community_vllm020_noharness.json | 4 ++-- .../harness-early-stop-ablation-20260502.md | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json b/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json index bd598a0..d88e29e 100644 --- a/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json +++ b/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json @@ -14,7 +14,7 @@ "engine": { "engine_name": "vllm", "engine_version": "0.20.0", - "exec_path": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/vllm", + "exec_path": "/tmp/wjh/venvs/vllm-0.20.0-cu129/bin/vllm", "cwd": "/home/admin/cpfs/wjh/aituner/aituner", "host": "127.0.0.1", "port": 18230, @@ -57,7 +57,7 @@ "allowed_data_parallel_sizes": [1, 2, 4, 8], "allowed_expert_parallel_sizes": [1, 2, 4, 8] }, - "python_executable": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/python" + "python_executable": "/tmp/wjh/venvs/vllm-0.20.0-cu129/bin/python" }, "trace": { "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json", diff --git a/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json b/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json index b5520aa..9ac41a6 100644 --- a/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json +++ b/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json @@ -14,7 +14,7 @@ "engine": { "engine_name": "vllm", "engine_version": "0.20.0", - "exec_path": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/vllm", + "exec_path": "/tmp/wjh/venvs/vllm-0.20.0-cu129/bin/vllm", "cwd": "/home/admin/cpfs/wjh/aituner/aituner", "host": "127.0.0.1", "port": 18231, @@ -57,7 +57,7 @@ "allowed_data_parallel_sizes": [1, 2, 4, 8], "allowed_expert_parallel_sizes": [1, 2, 4, 8] }, - "python_executable": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/python" + "python_executable": "/tmp/wjh/venvs/vllm-0.20.0-cu129/bin/python" }, "trace": { "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json", diff --git a/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md b/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md index adb7485..2cc3053 100644 --- a/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md +++ b/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md @@ -19,9 +19,9 @@ Both specs start from the same base vLLM configuration. The base contains only s PyPI reports `vllm==0.20.0` as the current community release checked on 2026-05-02. The dash0 runtime venv is on local rootfs rather than CPFS, because installing torch/CUDA wheels into CPFS was I/O-bound: -`/tmp/wjh/venvs/vllm-0.20.0-auto` +`/tmp/wjh/venvs/vllm-0.20.0-cu129` -The first plain `pip install vllm==0.20.0` smoke pulled `torch 2.11.0+cu130` and failed on dash0's driver (`570.133.20`, CUDA 12.9). The active install uses the vLLM-documented `uv pip install vllm==0.20.0 --torch-backend=auto` path so uv selects a CUDA backend compatible with the installed driver. +The first plain `pip install vllm==0.20.0` smoke pulled `torch 2.11.0+cu130` and failed on dash0's driver (`570.133.20`, CUDA 12.9). The active install uses the vLLM 0.20.0 GitHub release `+cu129` wheel and the PyTorch CUDA 12.9 index, matching the vLLM documented CUDA 12.9 install path for this driver. Install log: