Files
agentic-kvc/microbench/connector_tax/layerwise/results/mb7_layerwise_bg16.json
Gahow Wang e77bdcac5a Layerwise under load: overlap benefit survives (bg=16)
mb7 with background decode load (8/instance). Critical-path transfer overhead
stays ~constant ~90ms for layerwise vs 158/239/749ms baseline (up to 7.9x at
32k), prefill not slowed, KV correct. Confirms the overlap holds on busy
instances. DESIGN.md updated with idle-vs-load table + the two blockers
(chunk-safety, concurrent-transfer safety) that the full 1200-req trace needs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 16:30:14 +08:00

140 lines
3.3 KiB
JSON

{
"mode": "layerwise",
"model": "/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",
"raw": [
{
"t_A_s": 0.5905098549992545,
"t_B_s": 0.6900827390018094,
"t_total_s": 0.6904724189989793,
"cached": 8176,
"mode": "layerwise",
"size": 8192,
"rep": 0,
"t_prefill_only_s": 0.5852864849985053,
"kv_gib": 0.75,
"correct": true
},
{
"t_A_s": 0.5897548109969648,
"t_B_s": 0.6827381169969158,
"t_total_s": 0.6828304180016858,
"cached": 8176,
"mode": "layerwise",
"size": 8192,
"rep": 1,
"t_prefill_only_s": 0.5890174580017629,
"kv_gib": 0.75,
"correct": true
},
{
"t_A_s": 0.5850713190011447,
"t_B_s": 0.6744917560026806,
"t_total_s": 0.6745770380002796,
"cached": 8176,
"mode": "layerwise",
"size": 8192,
"rep": 2,
"t_prefill_only_s": 0.5943713950000529,
"kv_gib": 0.75,
"correct": true
},
{
"t_A_s": 1.5030149390004226,
"t_B_s": 1.596173029000056,
"t_total_s": 1.597060264000902,
"cached": 16368,
"mode": "layerwise",
"size": 16384,
"rep": 0,
"t_prefill_only_s": 1.5130829510017065,
"kv_gib": 1.5,
"correct": true
},
{
"t_A_s": 1.499876754998695,
"t_B_s": 1.5940461120007967,
"t_total_s": 1.5948001770011615,
"cached": 16368,
"mode": "layerwise",
"size": 16384,
"rep": 1,
"t_prefill_only_s": 1.5024838620010996,
"kv_gib": 1.5,
"correct": true
},
{
"t_A_s": 1.5068977490009274,
"t_B_s": 1.5950395179970656,
"t_total_s": 1.59571184500237,
"cached": 16368,
"mode": "layerwise",
"size": 16384,
"rep": 2,
"t_prefill_only_s": 1.5303227439981129,
"kv_gib": 1.5,
"correct": true
},
{
"t_A_s": 4.4503932609986805,
"t_B_s": 4.538851200999488,
"t_total_s": 4.539281312001549,
"cached": 32752,
"mode": "layerwise",
"size": 32768,
"rep": 0,
"t_prefill_only_s": 4.446753306998289,
"kv_gib": 3.0,
"correct": true
},
{
"t_A_s": 4.44226107799841,
"t_B_s": 4.551636377997056,
"t_total_s": 4.552389411001059,
"cached": 32752,
"mode": "layerwise",
"size": 32768,
"rep": 1,
"t_prefill_only_s": 4.44538704000297,
"kv_gib": 3.0,
"correct": true
},
{
"t_A_s": 4.440309538000292,
"t_B_s": 4.539836316998844,
"t_total_s": 4.540553365997766,
"cached": 32752,
"mode": "layerwise",
"size": 32768,
"rep": 2,
"t_prefill_only_s": 4.443476915999781,
"kv_gib": 3.0,
"correct": true
}
],
"summary": [
{
"size": 8192,
"n": 3,
"pf_only_ms": 589.0174580017629,
"total_ms": 682.8304180016858,
"overhead_ms": 93.8129599999229,
"all_correct": true
},
{
"size": 16384,
"n": 3,
"pf_only_ms": 1513.0829510017065,
"total_ms": 1595.71184500237,
"overhead_ms": 82.62889400066342,
"all_correct": true
},
{
"size": 32768,
"n": 3,
"pf_only_ms": 4445.38704000297,
"total_ms": 4540.553365997766,
"overhead_ms": 95.16632599479635,
"all_correct": true
}
]
}