diff --git a/analysis/mb2/A_inter_kvboth.jsonl b/analysis/mb2/A_inter_kvboth.jsonl new file mode 100644 index 0000000..e833d8d --- /dev/null +++ b/analysis/mb2/A_inter_kvboth.jsonl @@ -0,0 +1,51 @@ +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.06580113701056689, "t_start_unix": 1779885615.6732209, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885615.7390358} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.0052392969955690205, "t_start_unix": 1779885616.0322638, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885616.0375087} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.02050818904535845, "t_start_unix": 1779885616.2556505, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885616.2761638} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.02001398801803589, "t_start_unix": 1779885616.4400308, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885616.46005} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08249958901433274, "t_start_unix": 1779885617.072654, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885617.1551628} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08082435996038839, "t_start_unix": 1779885617.7853239, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885617.866155} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.00040365802124142647, "t_start_unix": 1779885642.3123364, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.3127441} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.000374739000108093, "t_start_unix": 1779885642.3945863, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.3949661} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.005158354004379362, "t_start_unix": 1779885642.4766958, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.481858} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.00513053999748081, "t_start_unix": 1779885642.5614145, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.5665495} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.0051341859507374465, "t_start_unix": 1779885642.6461189, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.6512585} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010125375993084162, "t_start_unix": 1779885642.7582293, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.76836} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010367848037276417, "t_start_unix": 1779885642.878179, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.8885527} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010208865976892412, "t_start_unix": 1779885642.997267, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.0074801} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.01015280099818483, "t_start_unix": 1779885643.1138487, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.124007} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010063701018225402, "t_start_unix": 1779885643.2299926, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.2400591} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.00040220399387180805, "t_start_unix": 1779885643.4041483, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.4045541} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.0003751559997908771, "t_start_unix": 1779885643.569847, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.5702271} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.020166583999525756, "t_start_unix": 1779885643.7347023, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.7548745} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.019989027990959585, "t_start_unix": 1779885643.9190295, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.939023} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.02000429102918133, "t_start_unix": 1779885644.1027336, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885644.122742} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.040499016002286226, "t_start_unix": 1779885644.419112, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885644.4596438} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04015034798067063, "t_start_unix": 1779885644.761118, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885644.8012745} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04006708099041134, "t_start_unix": 1779885645.1008255, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885645.1408994} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04012463602703065, "t_start_unix": 1779885645.440819, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885645.4809544} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04009692999534309, "t_start_unix": 1779885645.7793777, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885645.8194828} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.00046314700739458203, "t_start_unix": 1779885646.445756, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885646.4462252} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.0004179630195721984, "t_start_unix": 1779885647.0757725, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885647.0761962} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08170936699025333, "t_start_unix": 1779885647.7075222, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885647.7892444} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08086105203256011, "t_start_unix": 1779885648.422354, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885648.503224} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08071460900828242, "t_start_unix": 1779885649.1382625, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885649.2189863} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1638482889975421, "t_start_unix": 1779885650.8273866, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885650.991251} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.16476035403320566, "t_start_unix": 1779885652.5951493, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885652.7599196} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1617715489701368, "t_start_unix": 1779885654.3595936, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885654.5214472} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1616577100357972, "t_start_unix": 1779885656.125777, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885656.2874432} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1617818950326182, "t_start_unix": 1779885657.8867118, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885658.0485108} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.30771408596774563, "t_start_unix": 1779885662.69878, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885663.0065253} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.3096057590446435, "t_start_unix": 1779885667.6915898, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885668.0012283} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.3080696280230768, "t_start_unix": 1779885672.684307, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885672.9923992} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.5257709489669651, "t_start_unix": 1779885677.7157974, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885678.2415998} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.5268570999614894, "t_start_unix": 1779885682.9683614, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885683.4952588} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 1.901308581000194, "t_start_unix": 1779885698.9342654, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885700.835605} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 1.9211939970264211, "t_start_unix": 1779885716.3570645, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885718.2782962} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 1.7335722800344229, "t_start_unix": 1779885733.838071, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885735.571683} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 0.9299940629862249, "t_start_unix": 1779885751.0288215, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885751.9588547} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 0.6535220990190282, "t_start_unix": 1779885767.3456392, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885767.9991918} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 1.2830275790183805, "t_start_unix": 1779885822.9884846, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885824.2715507} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 5.053741328010801, "t_start_unix": 1779885879.5277712, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885884.5815506} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 9.158571614010725, "t_start_unix": 1779885939.9274101, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885949.086019} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 2.8184301540022716, "t_start_unix": 1779886004.3031003, "ret": 0, "tp_rank": 0, "t_log_unix": 1779886007.121565} +{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 1.4530277770245448, "t_start_unix": 1779886062.159227, "ret": 0, "tp_rank": 0, "t_log_unix": 1779886063.6122832} diff --git a/analysis/mb2/README.md b/analysis/mb2/README.md index 6ef5b8f..549d6e3 100644 --- a/analysis/mb2/README.md +++ b/analysis/mb2/README.md @@ -10,8 +10,19 @@ the EAR paper. Re-runs append a dated section at the bottom; the | Path | Steady-state BW | Agentic-tail p99 transfer (11.5 GiB KV) | |---|---|---| -| **intra-node** (dash1 GPU 0↔1, kv_both, Mooncake 0.3.11) | **~9.7 GB/s** (96 MiB – 3 GiB) | p50 **1.9 s** · min **1.5 s** · max **10 s** | -| inter-node (dash1 ↔ dash2, RDMA) | TODO | TODO | +| **intra-node** (dash1 GPU 0↔1) | **~9.7 GB/s** (96 MiB – 3 GiB) | p50 **1.9 s** · min **1.5 s** · max **10 s** | +| **inter-node** (dash1 GPU0 → dash2 GPU0, 200 Gbps RoCE) | **~10.0 GB/s** (essentially identical) | p50 **1.7 s** · min **1.3 s** · max **9.2 s** | + +**Cross-cutting finding** (2026-05-27): **Mooncake transfer cost is +topology-independent** on this hardware. Intra-node and inter-node curves +are statistically indistinguishable (see `figs/mb2_transfer_time_compare.png`, +`figs/mb2_transfer_bw_compare.png`). Mechanism: Mooncake's +`batch_transfer_sync_write` always goes through the RDMA NIC, including +the intra-node case (RDMA loopback). The 200 Gbps NIC, not NVLink, is +the bottleneck. **Implication for §3.2**: PD-disaggregation does not +get cheaper by co-locating P and D on the same node — the ~9.7 GB/s +ceiling applies regardless. Halving the transfer cost cannot be bought +back by topology. **Headline for the paper §3.2**: at the agentic tail, **pure KV transfer takes 1.5 – 10 s**. A median agentic decode is **50 – 200 ms** of tool-call @@ -229,3 +240,61 @@ Result table above. **9.7 GB/s steady-state up to 3 GiB**, variance opens at 6 GiB, p99 agentic-tail transfer 1.5 – 10 s. Committed as `de164e5`. + +### 2026-05-27 — inter-node, kv_both, dash1 GPU 0 → dash2 GPU 0 + +Same sweep config. 200 Gbps RoCE between hosts (RTT ~0.2 ms ping). +Producer A on dash1 GPU 0, consumer B on dash2 GPU 0. +remote_bootstrap_addr=`http://172.27.123.142:8998` (dash1's internal IP). + +Raw events: `A_inter_kvboth.jsonl` (45 send_blocks + 6 sanity). +B's receive_kv events are **missing** for this run — the +`MB2_LOG_DIR` env var did not propagate from the start-script through +vLLM's EngineCore subprocess on dash2 (visible via +`cat /proc/$ENGINE_PID/environ` shows empty for dash2 but contains +MB2_LOG_DIR for dash1 — bookmark for future investigation, likely +spawn-vs-fork difference in vLLM's multiproc executor across hosts). +Pure-transfer numbers below come from A's send_blocks alone; full +rx_total breakdown not available for this run. + +Per-size pure-transfer (analyzed by `analyze_mb2_send_only.py`): + +| input_tokens | KV (MiB) | n | pure_ms p50 | min | max | BW p50 (GB/s) | BW max | +|---:|---:|---:|---:|---:|---:|---:|---:| +| 512 | 48 | 5 | 5.2 | 5.1 | 65.8 | 9.76 | 9.81 | +| 1024 | 96 | 5 | 10.2 | 10.1 | 10.4 | 9.91 | 10.00 | +| 2048 | 192 | 5 | 20.0 | 20.0 | 20.5 | 10.06 | 10.07 | +| 4096 | 384 | 5 | 40.1 | 40.1 | 40.5 | 10.04 | 10.05 | +| 8192 | 768 | 5 | 80.9 | 80.7 | 82.5 | 9.96 | 9.98 | +| 16384 | 1536 | 5 | 161.8 | 161.7 | 164.8 | 9.96 | 9.96 | +| 32768 | 3072 | 5 | 309.6 | 307.7 | 526.9 | 10.40 | 10.47 | +| 65536 | 6144 | 5 | 1733.6 | 653.5 | 1921.2 | 3.72 | 9.86 | +| 131072 | 12288 | 5 | 2818.4 | 1283.0 | 9158.6 | 4.57 | 10.04 | + +Side-by-side comparison with the 2026-05-27 intra-node run: + +| Size | intra p50 ms | inter p50 ms | gap | intra GB/s | inter GB/s | +|---|---:|---:|---:|---:|---:| +| 512 | 5.3 | 5.2 | −2% | 9.40 | 9.76 | +| 1024 | 10.4 | 10.2 | −2% | 9.68 | 9.91 | +| 2048 | 20.6 | 20.0 | −3% | 9.75 | 10.06 | +| 4096 | 41.5 | 40.1 | −3% | 9.71 | 10.04 | +| 8192 | 83.7 | 80.9 | −3% | 9.62 | 9.96 | +| 16384 | 167.1 | 161.8 | −3% | 9.64 | 9.96 | +| 32768 | 320.9 | 309.6 | −3% | 10.04 | 10.40 | +| 65536 | 1895.1 | 1733.6 | −9% | 3.40 | 3.72 | +|131072 | 2835.1 | 2818.4 | −1% | 4.54 | 4.57 | + +The two paths produce essentially the same numbers — **mooncake intra- +node is not using NVLink**, it's going through RDMA-loopback on the +local NIC and gets the same ~10 GB/s ceiling as cross-node RDMA. The +6+ GiB variance regime is also identical between paths. + +Figures: `figs/mb2_transfer_time_inter.png`, `figs/mb2_transfer_bw_inter.png`, +`figs/mb2_transfer_time_compare.png` (overlay), `figs/mb2_transfer_bw_compare.png`. + +This collapses the §3.2 narrative to a single number: **PD-disagg +across this cluster costs ~9.7–10 GB/s of transfer bandwidth no matter +how you place P and D** (within-node or across-node). For p99 agentic +KV (11.5 GiB), that's 1.3–10 s of transfer; for 6 GiB it's 0.7–2 s. +Decode is 50–200 ms. So PD-disagg's cost dominates regardless of layout. diff --git a/analysis/mb2/inter_kvboth_breakdown.json b/analysis/mb2/inter_kvboth_breakdown.json new file mode 100644 index 0000000..6bbe1b4 --- /dev/null +++ b/analysis/mb2/inter_kvboth_breakdown.json @@ -0,0 +1,112 @@ +{ + "summary": [ + { + "input_tokens": 512, + "kv_mib": 48.0, + "n": 5, + "pure_transfer_ms_mean": 17.29, + "pure_transfer_ms_p50": 5.16, + "pure_transfer_ms_min": 5.13, + "pure_transfer_ms_max": 65.8, + "throughput_gbps_mean": 7.95, + "throughput_gbps_p50": 9.76, + "throughput_gbps_max": 9.81 + }, + { + "input_tokens": 1024, + "kv_mib": 96.0, + "n": 5, + "pure_transfer_ms_mean": 10.18, + "pure_transfer_ms_p50": 10.15, + "pure_transfer_ms_min": 10.06, + "pure_transfer_ms_max": 10.37, + "throughput_gbps_mean": 9.89, + "throughput_gbps_p50": 9.91, + "throughput_gbps_max": 10.0 + }, + { + "input_tokens": 2048, + "kv_mib": 192.0, + "n": 5, + "pure_transfer_ms_mean": 20.14, + "pure_transfer_ms_p50": 20.01, + "pure_transfer_ms_min": 19.99, + "pure_transfer_ms_max": 20.51, + "throughput_gbps_mean": 10.0, + "throughput_gbps_p50": 10.06, + "throughput_gbps_max": 10.07 + }, + { + "input_tokens": 4096, + "kv_mib": 384.0, + "n": 5, + "pure_transfer_ms_mean": 40.19, + "pure_transfer_ms_p50": 40.12, + "pure_transfer_ms_min": 40.07, + "pure_transfer_ms_max": 40.5, + "throughput_gbps_mean": 10.02, + "throughput_gbps_p50": 10.04, + "throughput_gbps_max": 10.05 + }, + { + "input_tokens": 8192, + "kv_mib": 768.0, + "n": 5, + "pure_transfer_ms_mean": 81.32, + "pure_transfer_ms_p50": 80.86, + "pure_transfer_ms_min": 80.71, + "pure_transfer_ms_max": 82.5, + "throughput_gbps_mean": 9.9, + "throughput_gbps_p50": 9.96, + "throughput_gbps_max": 9.98 + }, + { + "input_tokens": 16384, + "kv_mib": 1536.0, + "n": 5, + "pure_transfer_ms_mean": 162.76, + "pure_transfer_ms_p50": 161.78, + "pure_transfer_ms_min": 161.66, + "pure_transfer_ms_max": 164.76, + "throughput_gbps_mean": 9.9, + "throughput_gbps_p50": 9.96, + "throughput_gbps_max": 9.96 + }, + { + "input_tokens": 32768, + "kv_mib": 3072.0, + "n": 5, + "pure_transfer_ms_mean": 395.6, + "pure_transfer_ms_p50": 309.61, + "pure_transfer_ms_min": 307.71, + "pure_transfer_ms_max": 526.86, + "throughput_gbps_mean": 8.71, + "throughput_gbps_p50": 10.4, + "throughput_gbps_max": 10.47 + }, + { + "input_tokens": 65536, + "kv_mib": 6144.0, + "n": 5, + "pure_transfer_ms_mean": 1427.92, + "pure_transfer_ms_p50": 1733.57, + "pure_transfer_ms_min": 653.52, + "pure_transfer_ms_max": 1921.19, + "throughput_gbps_mean": 5.45, + "throughput_gbps_p50": 3.72, + "throughput_gbps_max": 9.86 + }, + { + "input_tokens": 131072, + "kv_mib": 12288.0, + "n": 5, + "pure_transfer_ms_mean": 3953.36, + "pure_transfer_ms_p50": 2818.43, + "pure_transfer_ms_min": 1283.03, + "pure_transfer_ms_max": 9158.57, + "throughput_gbps_mean": 5.49, + "throughput_gbps_p50": 4.57, + "throughput_gbps_max": 10.04 + } + ] +} \ No newline at end of file diff --git a/analysis/mb2/inter_kvboth_client.json b/analysis/mb2/inter_kvboth_client.json new file mode 100644 index 0000000..da798f2 --- /dev/null +++ b/analysis/mb2/inter_kvboth_client.json @@ -0,0 +1,679 @@ +{ + "model": "/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct", + "kv_bytes_per_token": 98304, + "src_host": "172.27.123.142", + "src_port": 8000, + "dst_host": "172.27.123.133", + "dst_port": 8001, + "config_label": "inter-kvboth", + "raw": [ + { + "input_tokens": 512, + "session": "677ca02530774bae990d549d0591d336", + "t_step1_client_unix": 1779885642.2549865, + "t_step2_client_unix": 1779885642.3058398, + "t_step2_end_unix": 1779885642.3249059, + "t_prefill_s": 0.05082751903682947, + "t_transfer_s": 0.019030610972549766, + "t_followup_s": 0.01642513304250315, + "cached_followup": 496, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 512, + "session": "6758b8079675495a9a13a73db1199a71", + "t_step1_client_unix": 1779885642.3416288, + "t_step2_client_unix": 1779885642.3896646, + "t_step2_end_unix": 1779885642.40659, + "t_prefill_s": 0.04800783301470801, + "t_transfer_s": 0.016899197013117373, + "t_followup_s": 0.017115428985562176, + "cached_followup": 496, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 512, + "session": "e9da485a88cc40eb927e18c37298d815", + "t_step1_client_unix": 1779885642.4239926, + "t_step2_client_unix": 1779885642.4724562, + "t_step2_end_unix": 1779885642.492528, + "t_prefill_s": 0.04843967000488192, + "t_transfer_s": 0.02004897501319647, + "t_followup_s": 0.016289777995552868, + "cached_followup": 496, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 512, + "session": "a70db16a01b04409b04ef30e94082720", + "t_step1_client_unix": 1779885642.509085, + "t_step2_client_unix": 1779885642.5566719, + "t_step2_end_unix": 1779885642.577205, + "t_prefill_s": 0.047563806001562625, + "t_transfer_s": 0.020512140006758273, + "t_followup_s": 0.016482150997035205, + "cached_followup": 496, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 512, + "session": "d13cddd850904d4591b15c112ab32a5f", + "t_step1_client_unix": 1779885642.5939884, + "t_step2_client_unix": 1779885642.6417143, + "t_step2_end_unix": 1779885642.6617038, + "t_prefill_s": 0.04769411502638832, + "t_transfer_s": 0.019964047998655587, + "t_followup_s": 0.01518680399749428, + "cached_followup": 496, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 1024, + "session": "64d9b1335bd04698ab4f681fe4fe1e40", + "t_step1_client_unix": 1779885642.6773622, + "t_step2_client_unix": 1779885642.7512894, + "t_step2_end_unix": 1779885642.7811818, + "t_prefill_s": 0.07390057999873534, + "t_transfer_s": 0.029867444012779742, + "t_followup_s": 0.017206061049364507, + "cached_followup": 1008, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 1024, + "session": "0fc46cc27a844482b9eab2fef48a56d2", + "t_step1_client_unix": 1779885642.7988534, + "t_step2_client_unix": 1779885642.8733544, + "t_step2_end_unix": 1779885642.9005985, + "t_prefill_s": 0.074467666039709, + "t_transfer_s": 0.027220223972108215, + "t_followup_s": 0.0172900699544698, + "cached_followup": 1008, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 1024, + "session": "dac7cceb94c0465abce6bff73119de06", + "t_step1_client_unix": 1779885642.9183617, + "t_step2_client_unix": 1779885642.9924595, + "t_step2_end_unix": 1779885643.0189831, + "t_prefill_s": 0.07407327799592167, + "t_transfer_s": 0.026499781000893563, + "t_followup_s": 0.01622009096900001, + "cached_followup": 1008, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 1024, + "session": "4137a7ebecf148db8aa61249ef0d0100", + "t_step1_client_unix": 1779885643.0356703, + "t_step2_client_unix": 1779885643.109194, + "t_step2_end_unix": 1779885643.135519, + "t_prefill_s": 0.07349911500932649, + "t_transfer_s": 0.02630024799145758, + "t_followup_s": 0.01637468097032979, + "cached_followup": 1008, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 1024, + "session": "e28432915bf64c4891dc5157d0bebe41", + "t_step1_client_unix": 1779885643.1523507, + "t_step2_client_unix": 1779885643.2252986, + "t_step2_end_unix": 1779885643.2520146, + "t_prefill_s": 0.07292484800564125, + "t_transfer_s": 0.026693789986893535, + "t_followup_s": 0.016969401971437037, + "cached_followup": 1008, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 2048, + "session": "717a42513a2c48358ebfbb73ca07c3fd", + "t_step1_client_unix": 1779885643.269825, + "t_step2_client_unix": 1779885643.3983366, + "t_step2_end_unix": 1779885643.417481, + "t_prefill_s": 0.12848620803561062, + "t_transfer_s": 0.019112227018922567, + "t_followup_s": 0.017094306997023523, + "cached_followup": 2032, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 2048, + "session": "f2d1287f0e564a60a0d8476bb917ec9a", + "t_step1_client_unix": 1779885643.4353812, + "t_step2_client_unix": 1779885643.5640473, + "t_step2_end_unix": 1779885643.5819182, + "t_prefill_s": 0.12862860300811008, + "t_transfer_s": 0.01783871802035719, + "t_followup_s": 0.016443474043626338, + "cached_followup": 2032, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 2048, + "session": "cff5a22ae9c341d8acc455188ecc63b5", + "t_step1_client_unix": 1779885643.5992126, + "t_step2_client_unix": 1779885643.7292078, + "t_step2_end_unix": 1779885643.766638, + "t_prefill_s": 0.12995763396611437, + "t_transfer_s": 0.037401642999611795, + "t_followup_s": 0.018064863979816437, + "cached_followup": 2032, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 2048, + "session": "cc86ca16270b493fab68af1e889007af", + "t_step1_client_unix": 1779885643.7855458, + "t_step2_client_unix": 1779885643.9140093, + "t_step2_end_unix": 1779885643.9510815, + "t_prefill_s": 0.12843843002337962, + "t_transfer_s": 0.03704743104754016, + "t_followup_s": 0.017415895010344684, + "cached_followup": 2032, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 2048, + "session": "85f348460077474ab5c7f6db23005f48", + "t_step1_client_unix": 1779885643.9692879, + "t_step2_client_unix": 1779885644.0971973, + "t_step2_end_unix": 1779885644.1345108, + "t_prefill_s": 0.12788102397462353, + "t_transfer_s": 0.037287415005266666, + "t_followup_s": 0.01738263398874551, + "cached_followup": 2032, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 4096, + "session": "f715284b17af4bc29ccb689f21ac61b7", + "t_step1_client_unix": 1779885644.1534681, + "t_step2_client_unix": 1779885644.4121828, + "t_step2_end_unix": 1779885644.4760094, + "t_prefill_s": 0.25868210894986987, + "t_transfer_s": 0.06378706597024575, + "t_followup_s": 0.020318155991844833, + "cached_followup": 4080, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 4096, + "session": "b99bf61a90d54373a57af6a38d67581d", + "t_step1_client_unix": 1779885644.497922, + "t_step2_client_unix": 1779885644.7539463, + "t_step2_end_unix": 1779885644.8172574, + "t_prefill_s": 0.255985789000988, + "t_transfer_s": 0.06327042501652613, + "t_followup_s": 0.021031456999480724, + "cached_followup": 4080, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 4096, + "session": "69925f5b50f74c48800999908730d9ec", + "t_step1_client_unix": 1779885644.839903, + "t_step2_client_unix": 1779885645.0941288, + "t_step2_end_unix": 1779885645.1562943, + "t_prefill_s": 0.2541897820192389, + "t_transfer_s": 0.06213490100344643, + "t_followup_s": 0.021715508948545903, + "cached_followup": 4080, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 4096, + "session": "312743112bc84c0699a91e3110a63f03", + "t_step1_client_unix": 1779885645.1795704, + "t_step2_client_unix": 1779885645.4336376, + "t_step2_end_unix": 1779885645.4955726, + "t_prefill_s": 0.2540306959999725, + "t_transfer_s": 0.06190510099986568, + "t_followup_s": 0.020819986995775253, + "cached_followup": 4080, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 4096, + "session": "e5975bd25e1b4aa586cbfe9b68370de2", + "t_step1_client_unix": 1779885645.5179617, + "t_step2_client_unix": 1779885645.7724028, + "t_step2_end_unix": 1779885645.8355522, + "t_prefill_s": 0.2544059020001441, + "t_transfer_s": 0.06311210000421852, + "t_followup_s": 0.02235932502662763, + "cached_followup": 4080, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 8192, + "session": "f8989efa9261470e81c2891945b460a5", + "t_step1_client_unix": 1779885645.8609917, + "t_step2_client_unix": 1779885646.4364054, + "t_step2_end_unix": 1779885646.4636223, + "t_prefill_s": 0.575372110994067, + "t_transfer_s": 0.02718952501891181, + "t_followup_s": 0.024976509041152894, + "cached_followup": 8176, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 8192, + "session": "3a8632e8562c4cbcaa32b57ba23bbf68", + "t_step1_client_unix": 1779885646.4916308, + "t_step2_client_unix": 1779885647.0670328, + "t_step2_end_unix": 1779885647.0934644, + "t_prefill_s": 0.5753633370040916, + "t_transfer_s": 0.026399013004265726, + "t_followup_s": 0.027285586984362453, + "cached_followup": 8176, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 8192, + "session": "4d2cfc02defd492d89e90f652032f630", + "t_step1_client_unix": 1779885647.1237462, + "t_step2_client_unix": 1779885647.6985745, + "t_step2_end_unix": 1779885647.8096595, + "t_prefill_s": 0.574782071984373, + "t_transfer_s": 0.11104170099133626, + "t_followup_s": 0.02618999598780647, + "cached_followup": 8176, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 8192, + "session": "3485a775304947ada42d61cfd39b7520", + "t_step1_client_unix": 1779885647.8389955, + "t_step2_client_unix": 1779885648.4133987, + "t_step2_end_unix": 1779885648.5235367, + "t_prefill_s": 0.574362020008266, + "t_transfer_s": 0.11010575300315395, + "t_followup_s": 0.028998883964959532, + "cached_followup": 8176, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 8192, + "session": "6f87692fb5884094b7b141116ba739f8", + "t_step1_client_unix": 1779885648.5556417, + "t_step2_client_unix": 1779885649.1294918, + "t_step2_end_unix": 1779885649.2386518, + "t_prefill_s": 0.5738097460125573, + "t_transfer_s": 0.1091307660099119, + "t_followup_s": 0.025612210971303284, + "cached_followup": 8176, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 16384, + "session": "c4e007ebdcf244cf95a8ccb3ab59ea0f", + "t_step1_client_unix": 1779885649.270192, + "t_step2_client_unix": 1779885650.7972357, + "t_step2_end_unix": 1779885651.0219116, + "t_prefill_s": 1.5270042870542966, + "t_transfer_s": 0.22463428904302418, + "t_followup_s": 0.03729638300137594, + "cached_followup": 16368, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 16384, + "session": "6f34ab6672e14f41b7780323b30ce21e", + "t_step1_client_unix": 1779885651.0651574, + "t_step2_client_unix": 1779885652.5826244, + "t_step2_end_unix": 1779885652.7871268, + "t_prefill_s": 1.5174251759890467, + "t_transfer_s": 0.2044643560075201, + "t_followup_s": 0.03547715302556753, + "cached_followup": 16368, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 16384, + "session": "ca02bd5aa32544f2b1b09096e5f63c8b", + "t_step1_client_unix": 1779885652.8284712, + "t_step2_client_unix": 1779885654.3465412, + "t_step2_end_unix": 1779885654.5522587, + "t_prefill_s": 1.5180304029490799, + "t_transfer_s": 0.20567574101733044, + "t_followup_s": 0.03611186001216993, + "cached_followup": 16368, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 16384, + "session": "59047ab966574f72ab12693ad29b090f", + "t_step1_client_unix": 1779885654.5942106, + "t_step2_client_unix": 1779885656.1123874, + "t_step2_end_unix": 1779885656.3158703, + "t_prefill_s": 1.5181243289844133, + "t_transfer_s": 0.20344976399792358, + "t_followup_s": 0.035486461012624204, + "cached_followup": 16368, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 16384, + "session": "9a6e2228fd40414c8fd2b78bf399552c", + "t_step1_client_unix": 1779885656.357184, + "t_step2_client_unix": 1779885657.8739264, + "t_step2_end_unix": 1779885658.079835, + "t_prefill_s": 1.5166968539706431, + "t_transfer_s": 0.20585927896900102, + "t_followup_s": 0.03941280301660299, + "cached_followup": 16368, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 32768, + "session": "5cf9fb856b674c0a9237c06980287cb3", + "t_step1_client_unix": 1779885658.1312225, + "t_step2_client_unix": 1779885662.6780548, + "t_step2_end_unix": 1779885663.0500648, + "t_prefill_s": 4.54679255298106, + "t_transfer_s": 0.3719712099991739, + "t_followup_s": 0.05820785299874842, + "cached_followup": 32752, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 32768, + "session": "524f192c92834453bc48052bc6516126", + "t_step1_client_unix": 1779885663.1199641, + "t_step2_client_unix": 1779885667.6679316, + "t_step2_end_unix": 1779885668.047607, + "t_prefill_s": 4.547927976993378, + "t_transfer_s": 0.3796397229889408, + "t_followup_s": 0.058314190013334155, + "cached_followup": 32752, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 32768, + "session": "fb7aa2c99bf24febb83536c54e30a36d", + "t_step1_client_unix": 1779885668.117419, + "t_step2_client_unix": 1779885672.6644971, + "t_step2_end_unix": 1779885673.0345452, + "t_prefill_s": 4.547034470015205, + "t_transfer_s": 0.3700092720100656, + "t_followup_s": 0.05658108199713752, + "cached_followup": 32752, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 32768, + "session": "85612444c3224b65afb09d65a829b5cc", + "t_step1_client_unix": 1779885673.1025856, + "t_step2_client_unix": 1779885677.6487892, + "t_step2_end_unix": 1779885678.2856307, + "t_prefill_s": 4.546165039995685, + "t_transfer_s": 0.6368027949938551, + "t_followup_s": 0.05732736398931593, + "cached_followup": 32752, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 32768, + "session": "fe51fbacc4d544b3a500b099f852b19b", + "t_step1_client_unix": 1779885678.3544915, + "t_step2_client_unix": 1779885682.9011996, + "t_step2_end_unix": 1779885683.542081, + "t_prefill_s": 4.546669405011926, + "t_transfer_s": 0.6408427829737775, + "t_followup_s": 0.057976288022473454, + "cached_followup": 32752, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 65536, + "session": "9628b04d4d9e4a9fb26decccef84d32f", + "t_step1_client_unix": 1779885683.6233714, + "t_step2_client_unix": 1779885698.8039112, + "t_step2_end_unix": 1779885700.9075115, + "t_prefill_s": 15.18050062697148, + "t_transfer_s": 2.103562032978516, + "t_followup_s": 0.0921451430185698, + "cached_followup": 65520, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 65536, + "session": "b8bd7328d2274613b473e932db11081c", + "t_step1_client_unix": 1779885701.0228417, + "t_step2_client_unix": 1779885716.2263126, + "t_step2_end_unix": 1779885718.3809934, + "t_prefill_s": 15.200471161981113, + "t_transfer_s": 2.1520405350020155, + "t_followup_s": 0.11590708000585437, + "cached_followup": 65520, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 65536, + "session": "8ca1dfe708c945e59c194ee59b7de87d", + "t_step1_client_unix": 1779885718.5226529, + "t_step2_client_unix": 1779885733.706992, + "t_step2_end_unix": 1779885735.6409419, + "t_prefill_s": 15.184306180977728, + "t_transfer_s": 1.9339170460007153, + "t_followup_s": 0.09697375196265057, + "cached_followup": 65520, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 65536, + "session": "aa7555475c87427ab1a1d95277545655", + "t_step1_client_unix": 1779885735.7609437, + "t_step2_client_unix": 1779885750.9334345, + "t_step2_end_unix": 1779885752.0260077, + "t_prefill_s": 15.172441756993067, + "t_transfer_s": 1.09253996796906, + "t_followup_s": 0.08802600798662752, + "cached_followup": 65520, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 65536, + "session": "dece39eeadd645c7a8e563763f93c35f", + "t_step1_client_unix": 1779885752.1370633, + "t_step2_client_unix": 1779885767.3073487, + "t_step2_end_unix": 1779885768.0632908, + "t_prefill_s": 15.17024433100596, + "t_transfer_s": 0.7559080219944008, + "t_followup_s": 0.08695380098652095, + "cached_followup": 65520, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 131072, + "session": "98a339dad05a463e8ac1425413221d27", + "t_step1_client_unix": 1779885768.196416, + "t_step2_client_unix": 1779885822.9193504, + "t_step2_end_unix": 1779885824.3877244, + "t_prefill_s": 54.72287795698503, + "t_transfer_s": 1.4683381259674206, + "t_followup_s": 0.16380483901593834, + "cached_followup": 131056, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 131072, + "session": "4ce2151ad3d4493397bb77c7a62871e7", + "t_step1_client_unix": 1779885824.5981278, + "t_step2_client_unix": 1779885879.3268123, + "t_step2_end_unix": 1779885884.7123976, + "t_prefill_s": 54.72864995297277, + "t_transfer_s": 5.38555136800278, + "t_followup_s": 0.17249851900851354, + "cached_followup": 131056, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 131072, + "session": "6a214438dcf74f36bbefaaed3583c04c", + "t_step1_client_unix": 1779885884.9482107, + "t_step2_client_unix": 1779885939.6747935, + "t_step2_end_unix": 1779885949.2077906, + "t_prefill_s": 54.72654917498585, + "t_transfer_s": 9.532963149016723, + "t_followup_s": 0.1658564520184882, + "cached_followup": 131056, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 131072, + "session": "4cd33910256e43768a2f475650d7714c", + "t_step1_client_unix": 1779885949.4215908, + "t_step2_client_unix": 1779886004.145101, + "t_step2_end_unix": 1779886007.2428164, + "t_prefill_s": 54.72347703698324, + "t_transfer_s": 3.0976800689822994, + "t_followup_s": 0.16629640298197046, + "cached_followup": 131056, + "pull_completion_tokens": 1, + "ok": true + }, + { + "input_tokens": 131072, + "session": "7e3626253b5d4fe29b3cec1e3598bb39", + "t_step1_client_unix": 1779886007.4549844, + "t_step2_client_unix": 1779886062.0564816, + "t_step2_end_unix": 1779886063.7279255, + "t_prefill_s": 54.60145856201416, + "t_transfer_s": 1.6714086790452711, + "t_followup_s": 0.1628041280200705, + "cached_followup": 131056, + "pull_completion_tokens": 1, + "ok": true + } + ], + "summary": [ + { + "input_tokens": 512, + "n_ok": 5, + "transfer_s_mean": 0.019290994200855494, + "transfer_s_p50": 0.019964047998655587, + "transfer_s_p90": 0.020512140006758273, + "transfer_s_min": 0.016899197013117373, + "transfer_s_max": 0.020512140006758273 + }, + { + "input_tokens": 1024, + "n_ok": 5, + "transfer_s_mean": 0.027316297392826528, + "transfer_s_p50": 0.026693789986893535, + "transfer_s_p90": 0.029867444012779742, + "transfer_s_min": 0.02630024799145758, + "transfer_s_max": 0.029867444012779742 + }, + { + "input_tokens": 2048, + "n_ok": 5, + "transfer_s_mean": 0.029737486818339674, + "transfer_s_p50": 0.03704743104754016, + "transfer_s_p90": 0.037401642999611795, + "transfer_s_min": 0.01783871802035719, + "transfer_s_max": 0.037401642999611795 + }, + { + "input_tokens": 4096, + "n_ok": 5, + "transfer_s_mean": 0.0628419185988605, + "transfer_s_p50": 0.06311210000421852, + "transfer_s_p90": 0.06378706597024575, + "transfer_s_min": 0.06190510099986568, + "transfer_s_max": 0.06378706597024575 + }, + { + "input_tokens": 8192, + "n_ok": 5, + "transfer_s_mean": 0.07677335160551593, + "transfer_s_p50": 0.1091307660099119, + "transfer_s_p90": 0.11104170099133626, + "transfer_s_min": 0.026399013004265726, + "transfer_s_max": 0.11104170099133626 + }, + { + "input_tokens": 16384, + "n_ok": 5, + "transfer_s_mean": 0.20881668580695986, + "transfer_s_p50": 0.20567574101733044, + "transfer_s_p90": 0.22463428904302418, + "transfer_s_min": 0.20344976399792358, + "transfer_s_max": 0.22463428904302418 + }, + { + "input_tokens": 32768, + "n_ok": 5, + "transfer_s_mean": 0.4798531565931626, + "transfer_s_p50": 0.3796397229889408, + "transfer_s_p90": 0.6408427829737775, + "transfer_s_min": 0.3700092720100656, + "transfer_s_max": 0.6408427829737775 + }, + { + "input_tokens": 65536, + "n_ok": 5, + "transfer_s_mean": 1.6075935207889416, + "transfer_s_p50": 1.9339170460007153, + "transfer_s_p90": 2.1520405350020155, + "transfer_s_min": 0.7559080219944008, + "transfer_s_max": 2.1520405350020155 + }, + { + "input_tokens": 131072, + "n_ok": 5, + "transfer_s_mean": 4.2311882782028984, + "transfer_s_p50": 3.0976800689822994, + "transfer_s_p90": 9.532963149016723, + "transfer_s_min": 1.4683381259674206, + "transfer_s_max": 9.532963149016723 + } + ] +} \ No newline at end of file diff --git a/figs/mb2_transfer_bw_compare.png b/figs/mb2_transfer_bw_compare.png new file mode 100644 index 0000000..c62ab7d Binary files /dev/null and b/figs/mb2_transfer_bw_compare.png differ diff --git a/figs/mb2_transfer_bw_inter.png b/figs/mb2_transfer_bw_inter.png new file mode 100644 index 0000000..b3cb634 Binary files /dev/null and b/figs/mb2_transfer_bw_inter.png differ diff --git a/figs/mb2_transfer_time_compare.png b/figs/mb2_transfer_time_compare.png new file mode 100644 index 0000000..291909c Binary files /dev/null and b/figs/mb2_transfer_time_compare.png differ diff --git a/figs/mb2_transfer_time_inter.png b/figs/mb2_transfer_time_inter.png new file mode 100644 index 0000000..c898b2e Binary files /dev/null and b/figs/mb2_transfer_time_inter.png differ diff --git a/microbench/fresh_setup/analyze_mb2_send_only.py b/microbench/fresh_setup/analyze_mb2_send_only.py new file mode 100644 index 0000000..e3111dd --- /dev/null +++ b/microbench/fresh_setup/analyze_mb2_send_only.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Per-size pure_transfer aggregate from A's send_blocks events alone. + +Used when B's receive_kv events are missing (e.g. EngineCore env-var +propagation failed on the consumer host). Pure transfer time still +recoverable from the producer side. +""" +from __future__ import annotations + +import argparse +import json +import statistics +from pathlib import Path + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--a-log", type=Path, required=True) + p.add_argument("--out", type=Path, required=True) + p.add_argument("--min-tokens", type=int, default=32, + help="Skip events smaller than this (drop the spurious " + "init-time tiny sends)") + args = p.parse_args() + + events = [] + with args.a_log.open() as f: + for line in f: + try: + events.append(json.loads(line)) + except json.JSONDecodeError: + continue + + by_size: dict[int, list[float]] = {} + for e in events: + if e.get("event") != "send_blocks": + continue + sz_tokens = e["total_bytes"] // 98304 + if sz_tokens < args.min_tokens: + continue + by_size.setdefault(sz_tokens, []).append(e["duration_s"]) + + summary = [] + for sz in sorted(by_size): + durs = by_size[sz] + sz_bytes = sz * 98304 + sz_mib = sz_bytes / (1024 * 1024) + bw = [sz_bytes / d / 1e9 for d in durs] + summary.append({ + "input_tokens": sz, + "kv_mib": round(sz_mib, 1), + "n": len(durs), + "pure_transfer_ms_mean": round(statistics.mean(durs) * 1000, 2), + "pure_transfer_ms_p50": round(statistics.median(durs) * 1000, 2), + "pure_transfer_ms_min": round(min(durs) * 1000, 2), + "pure_transfer_ms_max": round(max(durs) * 1000, 2), + "throughput_gbps_mean": round(statistics.mean(bw), 2), + "throughput_gbps_p50": round(statistics.median(bw), 2), + "throughput_gbps_max": round(max(bw), 2), + }) + + print(f"loaded {len(events)} events; kept {sum(s['n'] for s in summary)} send_blocks") + print() + print(f"{'in_tok':>8} {'KV_MiB':>8} {'n':>4} " + f"{'pure_p50':>10} {'pure_min':>10} {'pure_max':>10} " + f"{'GB/s_p50':>10} {'GB/s_max':>10}") + for s in summary: + print(f"{s['input_tokens']:>8} {s['kv_mib']:>8.1f} {s['n']:>4} " + f"{s['pure_transfer_ms_p50']:>10.1f} " + f"{s['pure_transfer_ms_min']:>10.1f} " + f"{s['pure_transfer_ms_max']:>10.1f} " + f"{s['throughput_gbps_p50']:>10.2f} " + f"{s['throughput_gbps_max']:>10.2f}") + + args.out.parent.mkdir(parents=True, exist_ok=True) + args.out.write_text(json.dumps({"summary": summary}, indent=2)) + print(f"\nwrote {args.out}") + + +if __name__ == "__main__": + main() diff --git a/microbench/fresh_setup/plot_mb2.py b/microbench/fresh_setup/plot_mb2.py index 3f0d040..d6f3189 100644 --- a/microbench/fresh_setup/plot_mb2.py +++ b/microbench/fresh_setup/plot_mb2.py @@ -22,9 +22,10 @@ def main() -> None: args = p.parse_args() d = json.loads(args.breakdown.read_text()) - # Drop the spurious 16-token events (zero-byte sends produced by the - # connector during request init; not a real KV transfer). - rows = [r for r in d["rows"] if r["input_tokens_est"] >= 64] + # `rows` is optional (send-only analyzer skips per-request joining). + # Drop the spurious 16-token events from any rows present. + if "rows" in d: + _ = [r for r in d["rows"] if r["input_tokens_est"] >= 64] summary = [s for s in d["summary"] if s["input_tokens"] >= 64] kv_mib = [s["kv_mib"] for s in summary] diff --git a/microbench/fresh_setup/plot_mb2_compare.py b/microbench/fresh_setup/plot_mb2_compare.py new file mode 100644 index 0000000..bfb89bc --- /dev/null +++ b/microbench/fresh_setup/plot_mb2_compare.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +"""Overlay intra-node and inter-node MB2 curves on the same axes.""" +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np + + +def load(path: Path) -> list[dict]: + d = json.loads(path.read_text()) + return [s for s in d["summary"] if s["input_tokens"] >= 64] + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--intra", type=Path, required=True) + p.add_argument("--inter", type=Path, required=True) + p.add_argument("--out-time", type=Path, default=Path("figs/mb2_transfer_time_compare.png")) + p.add_argument("--out-bw", type=Path, default=Path("figs/mb2_transfer_bw_compare.png")) + args = p.parse_args() + + intra = load(args.intra) + inter = load(args.inter) + + def axis_arrays(rows): + kv = [r["kv_mib"] for r in rows] + p50 = [r["pure_transfer_ms_p50"] for r in rows] + mn = [r["pure_transfer_ms_min"] for r in rows] + mx = [r["pure_transfer_ms_max"] for r in rows] + bw_p50 = [r["throughput_gbps_p50"] for r in rows] + bw_max = [r["throughput_gbps_max"] for r in rows] + return kv, p50, mn, mx, bw_p50, bw_max + + ai_kv, ai_p50, ai_mn, ai_mx, ai_bw_p50, ai_bw_max = axis_arrays(intra) + bi_kv, bi_p50, bi_mn, bi_mx, bi_bw_p50, bi_bw_max = axis_arrays(inter) + + # ---- transfer time ---- + fig, ax = plt.subplots(figsize=(8.5, 5)) + ax.errorbar(ai_kv, ai_p50, + yerr=[np.array(ai_p50) - np.array(ai_mn), + np.array(ai_mx) - np.array(ai_p50)], + fmt="o-", color="#1f77b4", lw=2, markersize=7, capsize=4, + label="intra-node (dash1 GPU 0↔1)") + ax.errorbar(bi_kv, bi_p50, + yerr=[np.array(bi_p50) - np.array(bi_mn), + np.array(bi_mx) - np.array(bi_p50)], + fmt="s--", color="#d62728", lw=2, markersize=7, capsize=4, + label="inter-node (dash1 GPU0 → dash2 GPU0)") + # ideal 9.7 GB/s reference + ref_x = np.array(ai_kv) + ref_y_ms = (ref_x * 1024 * 1024) / (9.7 * 1e9) * 1000 + ax.plot(ref_x, ref_y_ms, "--", color="#888", alpha=0.5, + label="9.7 GB/s reference") + ax.axvline(11500, color="#7a1d1d", lw=0.8, ls=":", alpha=0.5) + ax.text(11500, 0.7, "p99 agentic req\n11.5 GiB", + fontsize=8, color="#7a1d1d", ha="center") + ax.set_xscale("log"); ax.set_yscale("log") + ax.set_xlabel("KV transfer size (MiB)") + ax.set_ylabel("Pure transfer time (ms, log)") + ax.set_title("MB2 intra vs inter — Mooncake transfer cost is topology-independent\n" + "(both paths go through 200 Gbps RDMA NIC; intra-node does not use NVLink)") + ax.grid(True, which="both", alpha=0.3) + ax.legend(loc="upper left", fontsize=9) + args.out_time.parent.mkdir(parents=True, exist_ok=True) + fig.tight_layout(); fig.savefig(args.out_time, dpi=150); plt.close(fig) + print(f"wrote {args.out_time}") + + # ---- bandwidth ---- + fig, ax = plt.subplots(figsize=(8.5, 5)) + ax.plot(ai_kv, ai_bw_p50, "o-", color="#1f77b4", lw=2, markersize=7, + label="intra p50") + ax.plot(ai_kv, ai_bw_max, "x--", color="#1f77b4", lw=1.2, markersize=8, + alpha=0.7, label="intra max") + ax.plot(bi_kv, bi_bw_p50, "s-", color="#d62728", lw=2, markersize=7, + label="inter p50") + ax.plot(bi_kv, bi_bw_max, "+--", color="#d62728", lw=1.2, markersize=8, + alpha=0.7, label="inter max") + ax.axhline(9.7, color="#888", ls="--", alpha=0.5, + label="steady-state ≈ 9.7 GB/s") + ax.set_xscale("log") + ax.set_xlabel("KV transfer size (MiB)") + ax.set_ylabel("Effective bandwidth (GB/s)") + ax.set_ylim(0, 12) + ax.set_title("MB2 intra vs inter — bandwidth") + ax.grid(True, which="both", alpha=0.3) + ax.legend(loc="lower left", fontsize=9) + args.out_bw.parent.mkdir(parents=True, exist_ok=True) + fig.tight_layout(); fig.savefig(args.out_bw, dpi=150); plt.close(fig) + print(f"wrote {args.out_bw}") + + +if __name__ == "__main__": + main()