Load the model's chat_template.jinja (or tokenizer_config.json chat_template field) at startup and render it with minijinja instead of hardcoded per-model prompt builders. Custom Jinja functions: strftime_now (date formatting), raise_exception (template validation errors). Falls back to Qwen3 ChatML template if no Jinja template is found. Removes the hardcoded build_prompt_gpt_oss() — the model's own template now drives prompt formatting, matching llama.cpp's behavior exactly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
25 lines
623 B
TOML
25 lines
623 B
TOML
[package]
|
|
name = "xserv-server"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
|
|
[[bin]]
|
|
name = "xserv-server"
|
|
path = "src/main.rs"
|
|
|
|
[dependencies]
|
|
xserv-cuda = { path = "../xserv-cuda" }
|
|
xserv-tensor = { path = "../xserv-tensor" }
|
|
xserv-kernels = { path = "../xserv-kernels" }
|
|
xserv-model = { path = "../xserv-model" }
|
|
xserv-tokenizer = { path = "../xserv-tokenizer" }
|
|
xserv-distributed = { path = "../xserv-distributed" }
|
|
half.workspace = true
|
|
serde.workspace = true
|
|
serde_json.workspace = true
|
|
tokio.workspace = true
|
|
axum.workspace = true
|
|
uuid.workspace = true
|
|
tokio-stream.workspace = true
|
|
minijinja.workspace = true
|