Validate served model name consistency

2026-04-09 22:50:23 +08:00
parent baba1a3c4f
commit d582a8ed1b
3 changed files with 30 additions and 3 deletions
--- a/configs/examples/dash0_qwen235b_decode_thinking_run2_tpot40.json
+++ b/configs/examples/dash0_qwen235b_decode_thinking_run2_tpot40.json
@@ -9,7 +9,7 @@
  },
  "model": {
    "model_id": "qwen3-235b-a22b-256k-0717-internal",
-    "served_model_name": "qwen3-235b-decode-aituner"
+    "served_model_name": "qwen3-235b-decode"
  },
  "engine": {
    "engine_name": "vllm",
@@ -107,7 +107,7 @@
    "base_flags": {
      "host": "127.0.0.1",
      "port": 18120,
-      "served-model-name": "qwen3-235b-decode-aituner",
+      "served-model-name": "qwen3-235b-decode",
      "gpu-memory-utilization": 0.75,
      "max-model-len": 262144,
      "enable-chunked-prefill": true,