fix(xserv-chat): UTF-8/CJK-aware line input

Cooked-mode read_line() left line editing to the terminal, so Backspace on a
multi-byte 汉字/かな/한글 deleted a byte (or behaved inconsistently across TTYs).
Replace with a raw-mode reader (libc termios): Backspace pops a whole char,
multi-byte input is reassembled from its continuation bytes, and a full-line
redraw renders double-width glyphs correctly. Non-TTY input falls back to a
plain read; raw mode is restored after each line. libc is already a locked
transitive dep, so this builds offline.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 11:36:54 +08:00
parent 7b8b520cda
commit c2362df1f1
3 changed files with 141 additions and 9 deletions

View File

@@ -18,6 +18,7 @@ license = "MIT"
[workspace.dependencies]
half = "2"
smallvec = "1"
libc = "0.2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
safetensors = "0.5"

View File

@@ -10,6 +10,7 @@ xserv-kernels = { path = "../xserv-kernels" }
xserv-tokenizer = { path = "../xserv-tokenizer" }
xserv-distributed = { path = "../xserv-distributed" }
half.workspace = true
libc.workspace = true
smallvec.workspace = true
serde.workspace = true
serde_json.workspace = true

View File

@@ -1,4 +1,4 @@
use std::io::{self, IsTerminal, Write};
use std::io::{self, IsTerminal, Read, Write};
use std::path::PathBuf;
use xserv_model::{loader, sample, ModelConfig, PagedKVCache, Qwen3, SamplingParams, BLOCK_SIZE};
@@ -22,6 +22,139 @@ enum Finish {
Length,
}
enum Line {
Text(String),
Eof,
}
/// RAII terminal raw-mode guard. Disables canonical mode + echo (keeps output
/// post-processing and signals), so we read keystrokes ourselves and edit the
/// line UTF-8-aware. Restores the original termios on drop.
struct RawMode {
orig: libc::termios,
}
impl RawMode {
fn enable() -> Option<Self> {
unsafe {
let mut orig: libc::termios = std::mem::zeroed();
if libc::tcgetattr(libc::STDIN_FILENO, &mut orig) != 0 {
return None;
}
let mut raw = orig;
raw.c_lflag &= !(libc::ICANON | libc::ECHO);
raw.c_cc[libc::VMIN as usize] = 1;
raw.c_cc[libc::VTIME as usize] = 0;
if libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &raw) != 0 {
return None;
}
Some(RawMode { orig })
}
}
}
impl Drop for RawMode {
fn drop(&mut self) {
unsafe {
libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &self.orig);
}
}
}
/// Read one line with UTF-8/CJK-aware editing. In a TTY this enters raw mode and
/// handles keystrokes so Backspace deletes a whole character (not a byte), and
/// multi-byte input (汉字/日本語/한글) renders correctly. Non-TTY (piped) input
/// falls back to a plain cooked read.
fn read_line_edited(prompt: &str) -> Line {
let cooked = || -> Line {
print!("{prompt}");
io::stdout().flush().ok();
let mut s = String::new();
match io::stdin().read_line(&mut s) {
Ok(0) | Err(_) => Line::Eof,
Ok(_) => Line::Text(s),
}
};
if !io::stdin().is_terminal() {
return cooked();
}
let Some(_raw) = RawMode::enable() else {
return cooked();
};
// Single-line editor: on every edit, rewrite the whole line so the terminal
// renders correct (incl. double-width CJK) glyphs; \x1b[K clears leftovers.
let redraw = |buf: &str| {
print!("\r{prompt}{buf}\x1b[K");
io::stdout().flush().ok();
};
let mut buf = String::new();
redraw(&buf);
let mut stdin = io::stdin().lock();
let mut byte = [0u8; 1];
loop {
if stdin.read(&mut byte).unwrap_or(0) == 0 {
// EOF on the stream.
if buf.is_empty() {
return Line::Eof;
}
break;
}
match byte[0] {
b'\r' | b'\n' => {
println!();
break;
}
0x7f | 0x08 => {
// Backspace: drop one whole char (String::pop is char-aware).
buf.pop();
redraw(&buf);
}
0x04 => {
// Ctrl-D: EOF only when the line is empty.
if buf.is_empty() {
return Line::Eof;
}
}
0x1b => {
// Escape sequence (arrows, etc.): consume and ignore the 2 bytes
// of a typical CSI sequence so they don't land in the buffer.
let mut seq = [0u8; 2];
let _ = stdin.read(&mut seq);
}
b if b < 0x20 => { /* other control bytes: ignore */ }
b if b < 0x80 => {
buf.push(b as char);
redraw(&buf);
}
b => {
// UTF-8 multi-byte: read the continuation bytes for this char.
let extra = if b >= 0xF0 { 3 } else if b >= 0xE0 { 2 } else { 1 };
let mut bytes = vec![b];
let mut cont = [0u8; 1];
let mut ok = true;
for _ in 0..extra {
if stdin.read(&mut cont).unwrap_or(0) == 0 {
ok = false;
break;
}
bytes.push(cont[0]);
}
if ok {
if let Ok(s) = std::str::from_utf8(&bytes) {
buf.push_str(s);
redraw(&buf);
}
}
}
}
}
Line::Text(buf)
}
fn main() {
let opts = parse_args();
@@ -65,14 +198,11 @@ fn main() {
eprintln!("Commands: /exit, /quit, /clear\n");
loop {
print!("user> ");
io::stdout().flush().unwrap();
let mut input = String::new();
if io::stdin().read_line(&mut input).unwrap() == 0 {
break;
}
let input = input.trim();
let line = match read_line_edited("user> ") {
Line::Eof => break,
Line::Text(s) => s,
};
let input = line.trim();
if input.is_empty() {
continue;
}