Validate served model name consistency

This commit is contained in:
2026-04-09 22:50:23 +08:00
parent baba1a3c4f
commit d582a8ed1b
3 changed files with 30 additions and 3 deletions

View File

@@ -9,7 +9,7 @@
},
"model": {
"model_id": "qwen3-235b-a22b-256k-0717-internal",
"served_model_name": "qwen3-235b-decode-aituner"
"served_model_name": "qwen3-235b-decode"
},
"engine": {
"engine_name": "vllm",
@@ -107,7 +107,7 @@
"base_flags": {
"host": "127.0.0.1",
"port": 18120,
"served-model-name": "qwen3-235b-decode-aituner",
"served-model-name": "qwen3-235b-decode",
"gpu-memory-utilization": 0.75,
"max-model-len": 262144,
"enable-chunked-prefill": true,