Validate served model name consistency
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
},
|
||||
"model": {
|
||||
"model_id": "qwen3-235b-a22b-256k-0717-internal",
|
||||
"served_model_name": "qwen3-235b-decode-aituner"
|
||||
"served_model_name": "qwen3-235b-decode"
|
||||
},
|
||||
"engine": {
|
||||
"engine_name": "vllm",
|
||||
@@ -107,7 +107,7 @@
|
||||
"base_flags": {
|
||||
"host": "127.0.0.1",
|
||||
"port": 18120,
|
||||
"served-model-name": "qwen3-235b-decode-aituner",
|
||||
"served-model-name": "qwen3-235b-decode",
|
||||
"gpu-memory-utilization": 0.75,
|
||||
"max-model-len": 262144,
|
||||
"enable-chunked-prefill": true,
|
||||
|
||||
Reference in New Issue
Block a user