server: add gpt-oss chat template for proper prompt formatting

The gpt-oss model requires a specific prompt format with <|start|>,
<|message|>, <|end|>, <|channel|> tokens. Without this, the model
produces degenerate output. Auto-detected via config.model_type.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Gahow Wang
2026-05-30 15:43:29 +08:00
parent 15c51f143e
commit 5cb3cf28f9
2 changed files with 35 additions and 3 deletions

View File

@@ -89,7 +89,7 @@ async fn chat_non_stream(state: Arc<AppState>, req: ChatRequest) -> Response {
return response;
}
let prompt = build_prompt(&req.messages);
let prompt = build_prompt(&req.messages, &state.model_type);
let prompt_tokens = state.engine_tokenizer.lock().unwrap().encode(&prompt);
let prompt_token_count = prompt_tokens.len();
@@ -159,7 +159,7 @@ fn chat_stream(
return response;
}
let prompt = build_prompt(&req.messages);
let prompt = build_prompt(&req.messages, &state.model_type);
let prompt_tokens = state.engine_tokenizer.lock().unwrap().encode(&prompt);
let max_seq_len = state.max_seq_len;
@@ -325,7 +325,11 @@ fn sampling_params(req: &ChatRequest) -> SamplingParams {
}
}
fn build_prompt(messages: &[Message]) -> String {
fn build_prompt(messages: &[Message], model_type: &str) -> String {
if model_type == "gpt_oss" {
return build_prompt_gpt_oss(messages);
}
// Default: Qwen3 ChatML format
let mut prompt = String::new();
for msg in messages {
match msg.role.as_str() {
@@ -343,3 +347,28 @@ fn build_prompt(messages: &[Message]) -> String {
prompt.push_str("<think>\n\n</think>\n\n");
prompt
}
fn build_prompt_gpt_oss(messages: &[Message]) -> String {
let mut prompt = String::new();
// System prompt
prompt.push_str("<|start|>system<|message|>");
prompt.push_str("You are a helpful assistant.\n\n# Valid channels: analysis, commentary, final. Channel must be included for every message.");
prompt.push_str("<|end|>");
for msg in messages {
match msg.role.as_str() {
"user" => {
prompt.push_str("<|start|>user<|message|>");
prompt.push_str(&msg.content);
prompt.push_str("<|end|>");
}
"assistant" => {
prompt.push_str("<|start|>assistant<|channel|>final<|message|>");
prompt.push_str(&msg.content);
prompt.push_str("<|end|>");
}
_ => {}
}
}
prompt.push_str("<|start|>assistant");
prompt
}

View File

@@ -11,6 +11,7 @@ use xserv_model::ModelConfig;
pub struct AppState {
pub model_name: String,
pub model_type: String,
pub engine_sender: Mutex<mpsc::Sender<GenerateRequest>>,
pub engine_tokenizer: Mutex<xserv_tokenizer::Tokenizer>,
pub max_seq_len: usize,
@@ -99,8 +100,10 @@ async fn main() {
}
});
let model_type = model_config.model_type.clone().unwrap_or_default();
let state = Arc::new(AppState {
model_name,
model_type,
engine_sender: Mutex::new(tx),
engine_tokenizer: Mutex::new(tokenizer),
max_seq_len,