From e2158275030fb3e64148a4ad3ad930d24b66f7ab Mon Sep 17 00:00:00 2001
From: Gahow Wang <gahow.wang@gmail.com>
Date: Sat, 2 May 2026 08:21:27 +0800
Subject: [PATCH] Use uv auto torch backend for vllm 0.20

---
 .../examples/dash0_qwen30b_a3b_community_vllm020_harness.json | 4 ++--
 .../dash0_qwen30b_a3b_community_vllm020_noharness.json        | 4 ++--
 .../harness-early-stop-ablation-20260502.md                   | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json b/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json
index e6930f9..bd598a0 100644
--- a/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json
+++ b/configs/examples/dash0_qwen30b_a3b_community_vllm020_harness.json
@@ -14,7 +14,7 @@
   "engine": {
     "engine_name": "vllm",
     "engine_version": "0.20.0",
-    "exec_path": "/tmp/wjh/venvs/vllm-0.20.0/bin/vllm",
+    "exec_path": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/vllm",
     "cwd": "/home/admin/cpfs/wjh/aituner/aituner",
     "host": "127.0.0.1",
     "port": 18230,
@@ -57,7 +57,7 @@
       "allowed_data_parallel_sizes": [1, 2, 4, 8],
       "allowed_expert_parallel_sizes": [1, 2, 4, 8]
     },
-    "python_executable": "/tmp/wjh/venvs/vllm-0.20.0/bin/python"
+    "python_executable": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/python"
   },
   "trace": {
     "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json",
diff --git a/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json b/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json
index 7084dcb..b5520aa 100644
--- a/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json
+++ b/configs/examples/dash0_qwen30b_a3b_community_vllm020_noharness.json
@@ -14,7 +14,7 @@
   "engine": {
     "engine_name": "vllm",
     "engine_version": "0.20.0",
-    "exec_path": "/tmp/wjh/venvs/vllm-0.20.0/bin/vllm",
+    "exec_path": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/vllm",
     "cwd": "/home/admin/cpfs/wjh/aituner/aituner",
     "host": "127.0.0.1",
     "port": 18231,
@@ -57,7 +57,7 @@
       "allowed_data_parallel_sizes": [1, 2, 4, 8],
       "allowed_expert_parallel_sizes": [1, 2, 4, 8]
     },
-    "python_executable": "/tmp/wjh/venvs/vllm-0.20.0/bin/python"
+    "python_executable": "/tmp/wjh/venvs/vllm-0.20.0-auto/bin/python"
   },
   "trace": {
     "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json",
diff --git a/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md b/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md
index 660f36e..adb7485 100644
--- a/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md
+++ b/docs/qwen30b-community-vllm020/harness-early-stop-ablation-20260502.md
@@ -19,7 +19,9 @@ Both specs start from the same base vLLM configuration. The base contains only s
 
 PyPI reports `vllm==0.20.0` as the current community release checked on 2026-05-02. The dash0 runtime venv is on local rootfs rather than CPFS, because installing torch/CUDA wheels into CPFS was I/O-bound:
 
-`/tmp/wjh/venvs/vllm-0.20.0`
+`/tmp/wjh/venvs/vllm-0.20.0-auto`
+
+The first plain `pip install vllm==0.20.0` smoke pulled `torch 2.11.0+cu130` and failed on dash0's driver (`570.133.20`, CUDA 12.9). The active install uses the vLLM-documented `uv pip install vllm==0.20.0 --torch-backend=auto` path so uv selects a CUDA backend compatible with the installed driver.
 
 Install log: