server: add gpt-oss chat template for proper prompt formatting
The gpt-oss model requires a specific prompt format with <|start|>, <|message|>, <|end|>, <|channel|> tokens. Without this, the model produces degenerate output. Auto-detected via config.model_type. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -89,7 +89,7 @@ async fn chat_non_stream(state: Arc<AppState>, req: ChatRequest) -> Response {
|
||||
return response;
|
||||
}
|
||||
|
||||
let prompt = build_prompt(&req.messages);
|
||||
let prompt = build_prompt(&req.messages, &state.model_type);
|
||||
let prompt_tokens = state.engine_tokenizer.lock().unwrap().encode(&prompt);
|
||||
let prompt_token_count = prompt_tokens.len();
|
||||
|
||||
@@ -159,7 +159,7 @@ fn chat_stream(
|
||||
return response;
|
||||
}
|
||||
|
||||
let prompt = build_prompt(&req.messages);
|
||||
let prompt = build_prompt(&req.messages, &state.model_type);
|
||||
let prompt_tokens = state.engine_tokenizer.lock().unwrap().encode(&prompt);
|
||||
|
||||
let max_seq_len = state.max_seq_len;
|
||||
@@ -325,7 +325,11 @@ fn sampling_params(req: &ChatRequest) -> SamplingParams {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_prompt(messages: &[Message]) -> String {
|
||||
fn build_prompt(messages: &[Message], model_type: &str) -> String {
|
||||
if model_type == "gpt_oss" {
|
||||
return build_prompt_gpt_oss(messages);
|
||||
}
|
||||
// Default: Qwen3 ChatML format
|
||||
let mut prompt = String::new();
|
||||
for msg in messages {
|
||||
match msg.role.as_str() {
|
||||
@@ -343,3 +347,28 @@ fn build_prompt(messages: &[Message]) -> String {
|
||||
prompt.push_str("<think>\n\n</think>\n\n");
|
||||
prompt
|
||||
}
|
||||
|
||||
fn build_prompt_gpt_oss(messages: &[Message]) -> String {
|
||||
let mut prompt = String::new();
|
||||
// System prompt
|
||||
prompt.push_str("<|start|>system<|message|>");
|
||||
prompt.push_str("You are a helpful assistant.\n\n# Valid channels: analysis, commentary, final. Channel must be included for every message.");
|
||||
prompt.push_str("<|end|>");
|
||||
for msg in messages {
|
||||
match msg.role.as_str() {
|
||||
"user" => {
|
||||
prompt.push_str("<|start|>user<|message|>");
|
||||
prompt.push_str(&msg.content);
|
||||
prompt.push_str("<|end|>");
|
||||
}
|
||||
"assistant" => {
|
||||
prompt.push_str("<|start|>assistant<|channel|>final<|message|>");
|
||||
prompt.push_str(&msg.content);
|
||||
prompt.push_str("<|end|>");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
prompt.push_str("<|start|>assistant");
|
||||
prompt
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ use xserv_model::ModelConfig;
|
||||
|
||||
pub struct AppState {
|
||||
pub model_name: String,
|
||||
pub model_type: String,
|
||||
pub engine_sender: Mutex<mpsc::Sender<GenerateRequest>>,
|
||||
pub engine_tokenizer: Mutex<xserv_tokenizer::Tokenizer>,
|
||||
pub max_seq_len: usize,
|
||||
@@ -99,8 +100,10 @@ async fn main() {
|
||||
}
|
||||
});
|
||||
|
||||
let model_type = model_config.model_type.clone().unwrap_or_default();
|
||||
let state = Arc::new(AppState {
|
||||
model_name,
|
||||
model_type,
|
||||
engine_sender: Mutex::new(tx),
|
||||
engine_tokenizer: Mutex::new(tokenizer),
|
||||
max_seq_len,
|
||||
|
||||
Reference in New Issue
Block a user