Files
xserv/Cargo.toml
Gahow Wang 1d0ec32e8d server: Jinja chat template rendering via minijinja
Load the model's chat_template.jinja (or tokenizer_config.json
chat_template field) at startup and render it with minijinja instead of
hardcoded per-model prompt builders.

Custom Jinja functions: strftime_now (date formatting), raise_exception
(template validation errors).  Falls back to Qwen3 ChatML template if
no Jinja template is found.

Removes the hardcoded build_prompt_gpt_oss() — the model's own template
now drives prompt formatting, matching llama.cpp's behavior exactly.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-31 13:23:18 +08:00

32 lines
664 B
TOML

[workspace]
resolver = "2"
members = [
"crates/xserv-cuda",
"crates/xserv-tensor",
"crates/xserv-kernels",
"crates/xserv-model",
"crates/xserv-tokenizer",
"crates/xserv-server",
"crates/xserv-distributed",
]
[workspace.package]
version = "0.1.0"
edition = "2024"
license = "MIT"
[workspace.dependencies]
half = "2"
smallvec = "1"
libc = "0.2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
safetensors = "0.5"
regex = "1"
tokio = { version = "1", features = ["full"] }
axum = "0.8"
uuid = { version = "1", features = ["v4"] }
tokio-stream = "0.1"
rand = "0.8"
minijinja = { version = "2", features = ["builtins"] }