Files
agentic-kvc/microbench/connector_tax/layerwise/results/mb7_baseline_bg16.json
Gahow Wang e77bdcac5a Layerwise under load: overlap benefit survives (bg=16)
mb7 with background decode load (8/instance). Critical-path transfer overhead
stays ~constant ~90ms for layerwise vs 158/239/749ms baseline (up to 7.9x at
32k), prefill not slowed, KV correct. Confirms the overlap holds on busy
instances. DESIGN.md updated with idle-vs-load table + the two blockers
(chunk-safety, concurrent-transfer safety) that the full 1200-req trace needs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 16:30:14 +08:00

140 lines
3.4 KiB
JSON

{
"mode": "baseline",
"model": "/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",
"raw": [
{
"t_prefill_s": 0.5868483350022871,
"t_xfer_s": 0.19584889299949282,
"t_total_s": 0.7827702419999696,
"cached": 8176,
"mode": "baseline",
"size": 8192,
"rep": 0,
"t_prefill_only_s": 0.5920699099988269,
"kv_gib": 0.75,
"correct": true
},
{
"t_prefill_s": 0.5875704979989678,
"t_xfer_s": 0.1554814909977722,
"t_total_s": 0.7431365060001554,
"cached": 8176,
"mode": "baseline",
"size": 8192,
"rep": 1,
"t_prefill_only_s": 0.5814537600017502,
"kv_gib": 0.75,
"correct": true
},
{
"t_prefill_s": 0.5852241569991747,
"t_xfer_s": 0.15129724399957922,
"t_total_s": 0.7365909610016388,
"cached": 8176,
"mode": "baseline",
"size": 8192,
"rep": 2,
"t_prefill_only_s": 0.5846994370003813,
"kv_gib": 0.75,
"correct": true
},
{
"t_prefill_s": 1.498547145001794,
"t_xfer_s": 0.2475714690008317,
"t_total_s": 1.7462187470009667,
"cached": 16368,
"mode": "baseline",
"size": 16384,
"rep": 0,
"t_prefill_only_s": 1.5670790190015396,
"kv_gib": 1.5,
"correct": true
},
{
"t_prefill_s": 1.5025789940009417,
"t_xfer_s": 0.24532966799961287,
"t_total_s": 1.7479741930001182,
"cached": 16368,
"mode": "baseline",
"size": 16384,
"rep": 1,
"t_prefill_only_s": 1.5008903820016712,
"kv_gib": 1.5,
"correct": true
},
{
"t_prefill_s": 1.5021674179988622,
"t_xfer_s": 0.24640760400143336,
"t_total_s": 1.7486415580024186,
"cached": 16368,
"mode": "baseline",
"size": 16384,
"rep": 2,
"t_prefill_only_s": 1.509417139001016,
"kv_gib": 1.5,
"correct": true
},
{
"t_prefill_s": 4.444555983998725,
"t_xfer_s": 0.4227471090016479,
"t_total_s": 4.86737214599998,
"cached": 32752,
"mode": "baseline",
"size": 32768,
"rep": 0,
"t_prefill_only_s": 4.4467717689985875,
"kv_gib": 3.0,
"correct": true
},
{
"t_prefill_s": 4.442135782999685,
"t_xfer_s": 0.7519038230020669,
"t_total_s": 5.194113359000767,
"cached": 32752,
"mode": "baseline",
"size": 32768,
"rep": 1,
"t_prefill_only_s": 4.445541313998547,
"kv_gib": 3.0,
"correct": true
},
{
"t_prefill_s": 4.439772993999213,
"t_xfer_s": 0.7855456319994119,
"t_total_s": 5.225392060998274,
"cached": 32752,
"mode": "baseline",
"size": 32768,
"rep": 2,
"t_prefill_only_s": 4.442906365002273,
"kv_gib": 3.0,
"correct": true
}
],
"summary": [
{
"size": 8192,
"n": 3,
"pf_only_ms": 584.6994370003813,
"total_ms": 743.1365060001554,
"overhead_ms": 158.43706899977406,
"all_correct": true
},
{
"size": 16384,
"n": 3,
"pf_only_ms": 1509.417139001016,
"total_ms": 1747.9741930001182,
"overhead_ms": 238.5570539991022,
"all_correct": true
},
{
"size": 32768,
"n": 3,
"pf_only_ms": 4445.541313998547,
"total_ms": 5194.113359000767,
"overhead_ms": 748.57204500222,
"all_correct": true
}
]
}