diff --git a/crates/xserv-model/src/eagle3.rs b/crates/xserv-model/src/eagle3.rs index 27f8964..83bfa57 100644 --- a/crates/xserv-model/src/eagle3.rs +++ b/crates/xserv-model/src/eagle3.rs @@ -15,7 +15,11 @@ use std::path::Path; use xserv_kernels::*; use xserv_tensor::{DType, Device, Tensor}; -pub const EAGLE_HOOK_LAYERS: [usize; 3] = [11, 23, 35]; +/// Target layers to hook for EAGLE3 auxiliary hidden states, for Qwen3-8B +/// (36 layers). Value comes from AngelSlim/vLLM speculators training config +/// `dflash_qwen3_8b_sharegpt_online_5k.sh` which specifies target_layer_ids +/// = "2 18 33". Must match training-time selection or EAGLE outputs are wrong. +pub const EAGLE_HOOK_LAYERS: [usize; 3] = [2, 18, 33]; const DRAFT_VOCAB_SIZE: usize = 32000; fn matmul_2d(a: &Tensor, b: &Tensor) -> Tensor {