MB2 inter-node: dash1↔dash2 transfer cost is identical to intra-node
Sweep on dash1 GPU 0 → dash2 GPU 0 over 200 Gbps RoCE. remote_bootstrap_addr=http://172.27.123.142:8998. Same 9-size × 5-rep config as the 2026-05-27 intra-node run. Per-size pure_transfer (p50) lines up within 1–3% of the intra-node numbers across all sizes: size intra p50 inter p50 512 tok 5.3 ms 5.2 ms 2048 tok 20.6 20.0 8192 tok 83.7 80.9 32k tok 320.9 309.6 64k tok 1895 1734 (bimodal in both) 128k tok 2835 2818 (bimodal in both) => Mooncake's batch_transfer_sync_write **does not use NVLink** for intra-node peers; both paths go through the 200 Gbps RDMA NIC, with the 200 Gbps NIC (not the GPU interconnect) being the bottleneck. The ~9.7 GB/s steady-state ceiling and the 6+ GiB variance regime are identical across topologies. Operational implication for §3.2: PD-disaggregation does not get cheaper by co-locating P and D on the same node — every routed request pays the same ~10 GB/s ceiling for KV transfer, no matter where it lands. Halving the transfer cost cannot be bought back by topology. Caveat: B's receive_kv events did not log on dash2 — `MB2_LOG_DIR` env var did not propagate through vLLM's EngineCore subprocess on the consumer host (cat /proc/$ENGINE_PID/environ is empty on dash2 for that var, but the producer host on dash1 worked). For this run pure_transfer numbers are from A's send_blocks alone; full rx_total breakdown is not available, but pure_transfer is the dominant term. Adds: - analyze_mb2_send_only.py — analyzer that works from A's send_blocks alone when B's receive_kv events are absent - plot_mb2_compare.py — overlay intra vs inter on the same axes - plot_mb2.py — tolerate the `rows`-less send-only schema - figs/mb2_transfer_{time,bw}_inter.png — inter-node single-curve - figs/mb2_transfer_{time,bw}_compare.png — intra vs inter overlay - analysis/mb2/A_inter_kvboth.jsonl, inter_kvboth_client.json, inter_kvboth_breakdown.json - analysis/mb2/README.md — Summary block updated to reference both paths, dated 2026-05-27 run-log entry appended with the full table and the topology-independence framing Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
51
analysis/mb2/A_inter_kvboth.jsonl
Normal file
51
analysis/mb2/A_inter_kvboth.jsonl
Normal file
@@ -0,0 +1,51 @@
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.06580113701056689, "t_start_unix": 1779885615.6732209, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885615.7390358}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.0052392969955690205, "t_start_unix": 1779885616.0322638, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885616.0375087}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.02050818904535845, "t_start_unix": 1779885616.2556505, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885616.2761638}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.02001398801803589, "t_start_unix": 1779885616.4400308, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885616.46005}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08249958901433274, "t_start_unix": 1779885617.072654, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885617.1551628}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08082435996038839, "t_start_unix": 1779885617.7853239, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885617.866155}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.00040365802124142647, "t_start_unix": 1779885642.3123364, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.3127441}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.000374739000108093, "t_start_unix": 1779885642.3945863, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.3949661}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.005158354004379362, "t_start_unix": 1779885642.4766958, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.481858}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.00513053999748081, "t_start_unix": 1779885642.5614145, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.5665495}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 50331648, "duration_s": 0.0051341859507374465, "t_start_unix": 1779885642.6461189, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.6512585}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010125375993084162, "t_start_unix": 1779885642.7582293, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.76836}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010367848037276417, "t_start_unix": 1779885642.878179, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885642.8885527}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010208865976892412, "t_start_unix": 1779885642.997267, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.0074801}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.01015280099818483, "t_start_unix": 1779885643.1138487, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.124007}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 100663296, "duration_s": 0.010063701018225402, "t_start_unix": 1779885643.2299926, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.2400591}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.00040220399387180805, "t_start_unix": 1779885643.4041483, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.4045541}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.0003751559997908771, "t_start_unix": 1779885643.569847, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.5702271}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.020166583999525756, "t_start_unix": 1779885643.7347023, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.7548745}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.019989027990959585, "t_start_unix": 1779885643.9190295, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885643.939023}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 201326592, "duration_s": 0.02000429102918133, "t_start_unix": 1779885644.1027336, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885644.122742}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.040499016002286226, "t_start_unix": 1779885644.419112, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885644.4596438}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04015034798067063, "t_start_unix": 1779885644.761118, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885644.8012745}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04006708099041134, "t_start_unix": 1779885645.1008255, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885645.1408994}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04012463602703065, "t_start_unix": 1779885645.440819, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885645.4809544}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 402653184, "duration_s": 0.04009692999534309, "t_start_unix": 1779885645.7793777, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885645.8194828}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.00046314700739458203, "t_start_unix": 1779885646.445756, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885646.4462252}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1572864, "duration_s": 0.0004179630195721984, "t_start_unix": 1779885647.0757725, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885647.0761962}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08170936699025333, "t_start_unix": 1779885647.7075222, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885647.7892444}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08086105203256011, "t_start_unix": 1779885648.422354, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885648.503224}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 805306368, "duration_s": 0.08071460900828242, "t_start_unix": 1779885649.1382625, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885649.2189863}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1638482889975421, "t_start_unix": 1779885650.8273866, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885650.991251}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.16476035403320566, "t_start_unix": 1779885652.5951493, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885652.7599196}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1617715489701368, "t_start_unix": 1779885654.3595936, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885654.5214472}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1616577100357972, "t_start_unix": 1779885656.125777, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885656.2874432}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 1610612736, "duration_s": 0.1617818950326182, "t_start_unix": 1779885657.8867118, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885658.0485108}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.30771408596774563, "t_start_unix": 1779885662.69878, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885663.0065253}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.3096057590446435, "t_start_unix": 1779885667.6915898, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885668.0012283}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.3080696280230768, "t_start_unix": 1779885672.684307, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885672.9923992}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.5257709489669651, "t_start_unix": 1779885677.7157974, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885678.2415998}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 3221225472, "duration_s": 0.5268570999614894, "t_start_unix": 1779885682.9683614, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885683.4952588}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 1.901308581000194, "t_start_unix": 1779885698.9342654, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885700.835605}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 1.9211939970264211, "t_start_unix": 1779885716.3570645, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885718.2782962}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 1.7335722800344229, "t_start_unix": 1779885733.838071, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885735.571683}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 0.9299940629862249, "t_start_unix": 1779885751.0288215, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885751.9588547}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 6442450944, "duration_s": 0.6535220990190282, "t_start_unix": 1779885767.3456392, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885767.9991918}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 1.2830275790183805, "t_start_unix": 1779885822.9884846, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885824.2715507}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 5.053741328010801, "t_start_unix": 1779885879.5277712, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885884.5815506}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 9.158571614010725, "t_start_unix": 1779885939.9274101, "ret": 0, "tp_rank": 0, "t_log_unix": 1779885949.086019}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 2.8184301540022716, "t_start_unix": 1779886004.3031003, "ret": 0, "tp_rank": 0, "t_log_unix": 1779886007.121565}
|
||||
{"event": "send_blocks", "remote_session": "172.27.123.133:16878", "total_bytes": 12884901888, "duration_s": 1.4530277770245448, "t_start_unix": 1779886062.159227, "ret": 0, "tp_rank": 0, "t_log_unix": 1779886063.6122832}
|
||||
@@ -10,8 +10,19 @@ the EAR paper. Re-runs append a dated section at the bottom; the
|
||||
|
||||
| Path | Steady-state BW | Agentic-tail p99 transfer (11.5 GiB KV) |
|
||||
|---|---|---|
|
||||
| **intra-node** (dash1 GPU 0↔1, kv_both, Mooncake 0.3.11) | **~9.7 GB/s** (96 MiB – 3 GiB) | p50 **1.9 s** · min **1.5 s** · max **10 s** |
|
||||
| inter-node (dash1 ↔ dash2, RDMA) | TODO | TODO |
|
||||
| **intra-node** (dash1 GPU 0↔1) | **~9.7 GB/s** (96 MiB – 3 GiB) | p50 **1.9 s** · min **1.5 s** · max **10 s** |
|
||||
| **inter-node** (dash1 GPU0 → dash2 GPU0, 200 Gbps RoCE) | **~10.0 GB/s** (essentially identical) | p50 **1.7 s** · min **1.3 s** · max **9.2 s** |
|
||||
|
||||
**Cross-cutting finding** (2026-05-27): **Mooncake transfer cost is
|
||||
topology-independent** on this hardware. Intra-node and inter-node curves
|
||||
are statistically indistinguishable (see `figs/mb2_transfer_time_compare.png`,
|
||||
`figs/mb2_transfer_bw_compare.png`). Mechanism: Mooncake's
|
||||
`batch_transfer_sync_write` always goes through the RDMA NIC, including
|
||||
the intra-node case (RDMA loopback). The 200 Gbps NIC, not NVLink, is
|
||||
the bottleneck. **Implication for §3.2**: PD-disaggregation does not
|
||||
get cheaper by co-locating P and D on the same node — the ~9.7 GB/s
|
||||
ceiling applies regardless. Halving the transfer cost cannot be bought
|
||||
back by topology.
|
||||
|
||||
**Headline for the paper §3.2**: at the agentic tail, **pure KV transfer
|
||||
takes 1.5 – 10 s**. A median agentic decode is **50 – 200 ms** of tool-call
|
||||
@@ -229,3 +240,61 @@ Result table above. **9.7 GB/s steady-state up to 3 GiB**, variance
|
||||
opens at 6 GiB, p99 agentic-tail transfer 1.5 – 10 s.
|
||||
|
||||
Committed as `de164e5`.
|
||||
|
||||
### 2026-05-27 — inter-node, kv_both, dash1 GPU 0 → dash2 GPU 0
|
||||
|
||||
Same sweep config. 200 Gbps RoCE between hosts (RTT ~0.2 ms ping).
|
||||
Producer A on dash1 GPU 0, consumer B on dash2 GPU 0.
|
||||
remote_bootstrap_addr=`http://172.27.123.142:8998` (dash1's internal IP).
|
||||
|
||||
Raw events: `A_inter_kvboth.jsonl` (45 send_blocks + 6 sanity).
|
||||
B's receive_kv events are **missing** for this run — the
|
||||
`MB2_LOG_DIR` env var did not propagate from the start-script through
|
||||
vLLM's EngineCore subprocess on dash2 (visible via
|
||||
`cat /proc/$ENGINE_PID/environ` shows empty for dash2 but contains
|
||||
MB2_LOG_DIR for dash1 — bookmark for future investigation, likely
|
||||
spawn-vs-fork difference in vLLM's multiproc executor across hosts).
|
||||
Pure-transfer numbers below come from A's send_blocks alone; full
|
||||
rx_total breakdown not available for this run.
|
||||
|
||||
Per-size pure-transfer (analyzed by `analyze_mb2_send_only.py`):
|
||||
|
||||
| input_tokens | KV (MiB) | n | pure_ms p50 | min | max | BW p50 (GB/s) | BW max |
|
||||
|---:|---:|---:|---:|---:|---:|---:|---:|
|
||||
| 512 | 48 | 5 | 5.2 | 5.1 | 65.8 | 9.76 | 9.81 |
|
||||
| 1024 | 96 | 5 | 10.2 | 10.1 | 10.4 | 9.91 | 10.00 |
|
||||
| 2048 | 192 | 5 | 20.0 | 20.0 | 20.5 | 10.06 | 10.07 |
|
||||
| 4096 | 384 | 5 | 40.1 | 40.1 | 40.5 | 10.04 | 10.05 |
|
||||
| 8192 | 768 | 5 | 80.9 | 80.7 | 82.5 | 9.96 | 9.98 |
|
||||
| 16384 | 1536 | 5 | 161.8 | 161.7 | 164.8 | 9.96 | 9.96 |
|
||||
| 32768 | 3072 | 5 | 309.6 | 307.7 | 526.9 | 10.40 | 10.47 |
|
||||
| 65536 | 6144 | 5 | 1733.6 | 653.5 | 1921.2 | 3.72 | 9.86 |
|
||||
| 131072 | 12288 | 5 | 2818.4 | 1283.0 | 9158.6 | 4.57 | 10.04 |
|
||||
|
||||
Side-by-side comparison with the 2026-05-27 intra-node run:
|
||||
|
||||
| Size | intra p50 ms | inter p50 ms | gap | intra GB/s | inter GB/s |
|
||||
|---|---:|---:|---:|---:|---:|
|
||||
| 512 | 5.3 | 5.2 | −2% | 9.40 | 9.76 |
|
||||
| 1024 | 10.4 | 10.2 | −2% | 9.68 | 9.91 |
|
||||
| 2048 | 20.6 | 20.0 | −3% | 9.75 | 10.06 |
|
||||
| 4096 | 41.5 | 40.1 | −3% | 9.71 | 10.04 |
|
||||
| 8192 | 83.7 | 80.9 | −3% | 9.62 | 9.96 |
|
||||
| 16384 | 167.1 | 161.8 | −3% | 9.64 | 9.96 |
|
||||
| 32768 | 320.9 | 309.6 | −3% | 10.04 | 10.40 |
|
||||
| 65536 | 1895.1 | 1733.6 | −9% | 3.40 | 3.72 |
|
||||
|131072 | 2835.1 | 2818.4 | −1% | 4.54 | 4.57 |
|
||||
|
||||
The two paths produce essentially the same numbers — **mooncake intra-
|
||||
node is not using NVLink**, it's going through RDMA-loopback on the
|
||||
local NIC and gets the same ~10 GB/s ceiling as cross-node RDMA. The
|
||||
6+ GiB variance regime is also identical between paths.
|
||||
|
||||
Figures: `figs/mb2_transfer_time_inter.png`, `figs/mb2_transfer_bw_inter.png`,
|
||||
`figs/mb2_transfer_time_compare.png` (overlay), `figs/mb2_transfer_bw_compare.png`.
|
||||
|
||||
This collapses the §3.2 narrative to a single number: **PD-disagg
|
||||
across this cluster costs ~9.7–10 GB/s of transfer bandwidth no matter
|
||||
how you place P and D** (within-node or across-node). For p99 agentic
|
||||
KV (11.5 GiB), that's 1.3–10 s of transfer; for 6 GiB it's 0.7–2 s.
|
||||
Decode is 50–200 ms. So PD-disagg's cost dominates regardless of layout.
|
||||
|
||||
112
analysis/mb2/inter_kvboth_breakdown.json
Normal file
112
analysis/mb2/inter_kvboth_breakdown.json
Normal file
@@ -0,0 +1,112 @@
|
||||
{
|
||||
"summary": [
|
||||
{
|
||||
"input_tokens": 512,
|
||||
"kv_mib": 48.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 17.29,
|
||||
"pure_transfer_ms_p50": 5.16,
|
||||
"pure_transfer_ms_min": 5.13,
|
||||
"pure_transfer_ms_max": 65.8,
|
||||
"throughput_gbps_mean": 7.95,
|
||||
"throughput_gbps_p50": 9.76,
|
||||
"throughput_gbps_max": 9.81
|
||||
},
|
||||
{
|
||||
"input_tokens": 1024,
|
||||
"kv_mib": 96.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 10.18,
|
||||
"pure_transfer_ms_p50": 10.15,
|
||||
"pure_transfer_ms_min": 10.06,
|
||||
"pure_transfer_ms_max": 10.37,
|
||||
"throughput_gbps_mean": 9.89,
|
||||
"throughput_gbps_p50": 9.91,
|
||||
"throughput_gbps_max": 10.0
|
||||
},
|
||||
{
|
||||
"input_tokens": 2048,
|
||||
"kv_mib": 192.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 20.14,
|
||||
"pure_transfer_ms_p50": 20.01,
|
||||
"pure_transfer_ms_min": 19.99,
|
||||
"pure_transfer_ms_max": 20.51,
|
||||
"throughput_gbps_mean": 10.0,
|
||||
"throughput_gbps_p50": 10.06,
|
||||
"throughput_gbps_max": 10.07
|
||||
},
|
||||
{
|
||||
"input_tokens": 4096,
|
||||
"kv_mib": 384.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 40.19,
|
||||
"pure_transfer_ms_p50": 40.12,
|
||||
"pure_transfer_ms_min": 40.07,
|
||||
"pure_transfer_ms_max": 40.5,
|
||||
"throughput_gbps_mean": 10.02,
|
||||
"throughput_gbps_p50": 10.04,
|
||||
"throughput_gbps_max": 10.05
|
||||
},
|
||||
{
|
||||
"input_tokens": 8192,
|
||||
"kv_mib": 768.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 81.32,
|
||||
"pure_transfer_ms_p50": 80.86,
|
||||
"pure_transfer_ms_min": 80.71,
|
||||
"pure_transfer_ms_max": 82.5,
|
||||
"throughput_gbps_mean": 9.9,
|
||||
"throughput_gbps_p50": 9.96,
|
||||
"throughput_gbps_max": 9.98
|
||||
},
|
||||
{
|
||||
"input_tokens": 16384,
|
||||
"kv_mib": 1536.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 162.76,
|
||||
"pure_transfer_ms_p50": 161.78,
|
||||
"pure_transfer_ms_min": 161.66,
|
||||
"pure_transfer_ms_max": 164.76,
|
||||
"throughput_gbps_mean": 9.9,
|
||||
"throughput_gbps_p50": 9.96,
|
||||
"throughput_gbps_max": 9.96
|
||||
},
|
||||
{
|
||||
"input_tokens": 32768,
|
||||
"kv_mib": 3072.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 395.6,
|
||||
"pure_transfer_ms_p50": 309.61,
|
||||
"pure_transfer_ms_min": 307.71,
|
||||
"pure_transfer_ms_max": 526.86,
|
||||
"throughput_gbps_mean": 8.71,
|
||||
"throughput_gbps_p50": 10.4,
|
||||
"throughput_gbps_max": 10.47
|
||||
},
|
||||
{
|
||||
"input_tokens": 65536,
|
||||
"kv_mib": 6144.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 1427.92,
|
||||
"pure_transfer_ms_p50": 1733.57,
|
||||
"pure_transfer_ms_min": 653.52,
|
||||
"pure_transfer_ms_max": 1921.19,
|
||||
"throughput_gbps_mean": 5.45,
|
||||
"throughput_gbps_p50": 3.72,
|
||||
"throughput_gbps_max": 9.86
|
||||
},
|
||||
{
|
||||
"input_tokens": 131072,
|
||||
"kv_mib": 12288.0,
|
||||
"n": 5,
|
||||
"pure_transfer_ms_mean": 3953.36,
|
||||
"pure_transfer_ms_p50": 2818.43,
|
||||
"pure_transfer_ms_min": 1283.03,
|
||||
"pure_transfer_ms_max": 9158.57,
|
||||
"throughput_gbps_mean": 5.49,
|
||||
"throughput_gbps_p50": 4.57,
|
||||
"throughput_gbps_max": 10.04
|
||||
}
|
||||
]
|
||||
}
|
||||
679
analysis/mb2/inter_kvboth_client.json
Normal file
679
analysis/mb2/inter_kvboth_client.json
Normal file
@@ -0,0 +1,679 @@
|
||||
{
|
||||
"model": "/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",
|
||||
"kv_bytes_per_token": 98304,
|
||||
"src_host": "172.27.123.142",
|
||||
"src_port": 8000,
|
||||
"dst_host": "172.27.123.133",
|
||||
"dst_port": 8001,
|
||||
"config_label": "inter-kvboth",
|
||||
"raw": [
|
||||
{
|
||||
"input_tokens": 512,
|
||||
"session": "677ca02530774bae990d549d0591d336",
|
||||
"t_step1_client_unix": 1779885642.2549865,
|
||||
"t_step2_client_unix": 1779885642.3058398,
|
||||
"t_step2_end_unix": 1779885642.3249059,
|
||||
"t_prefill_s": 0.05082751903682947,
|
||||
"t_transfer_s": 0.019030610972549766,
|
||||
"t_followup_s": 0.01642513304250315,
|
||||
"cached_followup": 496,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 512,
|
||||
"session": "6758b8079675495a9a13a73db1199a71",
|
||||
"t_step1_client_unix": 1779885642.3416288,
|
||||
"t_step2_client_unix": 1779885642.3896646,
|
||||
"t_step2_end_unix": 1779885642.40659,
|
||||
"t_prefill_s": 0.04800783301470801,
|
||||
"t_transfer_s": 0.016899197013117373,
|
||||
"t_followup_s": 0.017115428985562176,
|
||||
"cached_followup": 496,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 512,
|
||||
"session": "e9da485a88cc40eb927e18c37298d815",
|
||||
"t_step1_client_unix": 1779885642.4239926,
|
||||
"t_step2_client_unix": 1779885642.4724562,
|
||||
"t_step2_end_unix": 1779885642.492528,
|
||||
"t_prefill_s": 0.04843967000488192,
|
||||
"t_transfer_s": 0.02004897501319647,
|
||||
"t_followup_s": 0.016289777995552868,
|
||||
"cached_followup": 496,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 512,
|
||||
"session": "a70db16a01b04409b04ef30e94082720",
|
||||
"t_step1_client_unix": 1779885642.509085,
|
||||
"t_step2_client_unix": 1779885642.5566719,
|
||||
"t_step2_end_unix": 1779885642.577205,
|
||||
"t_prefill_s": 0.047563806001562625,
|
||||
"t_transfer_s": 0.020512140006758273,
|
||||
"t_followup_s": 0.016482150997035205,
|
||||
"cached_followup": 496,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 512,
|
||||
"session": "d13cddd850904d4591b15c112ab32a5f",
|
||||
"t_step1_client_unix": 1779885642.5939884,
|
||||
"t_step2_client_unix": 1779885642.6417143,
|
||||
"t_step2_end_unix": 1779885642.6617038,
|
||||
"t_prefill_s": 0.04769411502638832,
|
||||
"t_transfer_s": 0.019964047998655587,
|
||||
"t_followup_s": 0.01518680399749428,
|
||||
"cached_followup": 496,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 1024,
|
||||
"session": "64d9b1335bd04698ab4f681fe4fe1e40",
|
||||
"t_step1_client_unix": 1779885642.6773622,
|
||||
"t_step2_client_unix": 1779885642.7512894,
|
||||
"t_step2_end_unix": 1779885642.7811818,
|
||||
"t_prefill_s": 0.07390057999873534,
|
||||
"t_transfer_s": 0.029867444012779742,
|
||||
"t_followup_s": 0.017206061049364507,
|
||||
"cached_followup": 1008,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 1024,
|
||||
"session": "0fc46cc27a844482b9eab2fef48a56d2",
|
||||
"t_step1_client_unix": 1779885642.7988534,
|
||||
"t_step2_client_unix": 1779885642.8733544,
|
||||
"t_step2_end_unix": 1779885642.9005985,
|
||||
"t_prefill_s": 0.074467666039709,
|
||||
"t_transfer_s": 0.027220223972108215,
|
||||
"t_followup_s": 0.0172900699544698,
|
||||
"cached_followup": 1008,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 1024,
|
||||
"session": "dac7cceb94c0465abce6bff73119de06",
|
||||
"t_step1_client_unix": 1779885642.9183617,
|
||||
"t_step2_client_unix": 1779885642.9924595,
|
||||
"t_step2_end_unix": 1779885643.0189831,
|
||||
"t_prefill_s": 0.07407327799592167,
|
||||
"t_transfer_s": 0.026499781000893563,
|
||||
"t_followup_s": 0.01622009096900001,
|
||||
"cached_followup": 1008,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 1024,
|
||||
"session": "4137a7ebecf148db8aa61249ef0d0100",
|
||||
"t_step1_client_unix": 1779885643.0356703,
|
||||
"t_step2_client_unix": 1779885643.109194,
|
||||
"t_step2_end_unix": 1779885643.135519,
|
||||
"t_prefill_s": 0.07349911500932649,
|
||||
"t_transfer_s": 0.02630024799145758,
|
||||
"t_followup_s": 0.01637468097032979,
|
||||
"cached_followup": 1008,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 1024,
|
||||
"session": "e28432915bf64c4891dc5157d0bebe41",
|
||||
"t_step1_client_unix": 1779885643.1523507,
|
||||
"t_step2_client_unix": 1779885643.2252986,
|
||||
"t_step2_end_unix": 1779885643.2520146,
|
||||
"t_prefill_s": 0.07292484800564125,
|
||||
"t_transfer_s": 0.026693789986893535,
|
||||
"t_followup_s": 0.016969401971437037,
|
||||
"cached_followup": 1008,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 2048,
|
||||
"session": "717a42513a2c48358ebfbb73ca07c3fd",
|
||||
"t_step1_client_unix": 1779885643.269825,
|
||||
"t_step2_client_unix": 1779885643.3983366,
|
||||
"t_step2_end_unix": 1779885643.417481,
|
||||
"t_prefill_s": 0.12848620803561062,
|
||||
"t_transfer_s": 0.019112227018922567,
|
||||
"t_followup_s": 0.017094306997023523,
|
||||
"cached_followup": 2032,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 2048,
|
||||
"session": "f2d1287f0e564a60a0d8476bb917ec9a",
|
||||
"t_step1_client_unix": 1779885643.4353812,
|
||||
"t_step2_client_unix": 1779885643.5640473,
|
||||
"t_step2_end_unix": 1779885643.5819182,
|
||||
"t_prefill_s": 0.12862860300811008,
|
||||
"t_transfer_s": 0.01783871802035719,
|
||||
"t_followup_s": 0.016443474043626338,
|
||||
"cached_followup": 2032,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 2048,
|
||||
"session": "cff5a22ae9c341d8acc455188ecc63b5",
|
||||
"t_step1_client_unix": 1779885643.5992126,
|
||||
"t_step2_client_unix": 1779885643.7292078,
|
||||
"t_step2_end_unix": 1779885643.766638,
|
||||
"t_prefill_s": 0.12995763396611437,
|
||||
"t_transfer_s": 0.037401642999611795,
|
||||
"t_followup_s": 0.018064863979816437,
|
||||
"cached_followup": 2032,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 2048,
|
||||
"session": "cc86ca16270b493fab68af1e889007af",
|
||||
"t_step1_client_unix": 1779885643.7855458,
|
||||
"t_step2_client_unix": 1779885643.9140093,
|
||||
"t_step2_end_unix": 1779885643.9510815,
|
||||
"t_prefill_s": 0.12843843002337962,
|
||||
"t_transfer_s": 0.03704743104754016,
|
||||
"t_followup_s": 0.017415895010344684,
|
||||
"cached_followup": 2032,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 2048,
|
||||
"session": "85f348460077474ab5c7f6db23005f48",
|
||||
"t_step1_client_unix": 1779885643.9692879,
|
||||
"t_step2_client_unix": 1779885644.0971973,
|
||||
"t_step2_end_unix": 1779885644.1345108,
|
||||
"t_prefill_s": 0.12788102397462353,
|
||||
"t_transfer_s": 0.037287415005266666,
|
||||
"t_followup_s": 0.01738263398874551,
|
||||
"cached_followup": 2032,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 4096,
|
||||
"session": "f715284b17af4bc29ccb689f21ac61b7",
|
||||
"t_step1_client_unix": 1779885644.1534681,
|
||||
"t_step2_client_unix": 1779885644.4121828,
|
||||
"t_step2_end_unix": 1779885644.4760094,
|
||||
"t_prefill_s": 0.25868210894986987,
|
||||
"t_transfer_s": 0.06378706597024575,
|
||||
"t_followup_s": 0.020318155991844833,
|
||||
"cached_followup": 4080,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 4096,
|
||||
"session": "b99bf61a90d54373a57af6a38d67581d",
|
||||
"t_step1_client_unix": 1779885644.497922,
|
||||
"t_step2_client_unix": 1779885644.7539463,
|
||||
"t_step2_end_unix": 1779885644.8172574,
|
||||
"t_prefill_s": 0.255985789000988,
|
||||
"t_transfer_s": 0.06327042501652613,
|
||||
"t_followup_s": 0.021031456999480724,
|
||||
"cached_followup": 4080,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 4096,
|
||||
"session": "69925f5b50f74c48800999908730d9ec",
|
||||
"t_step1_client_unix": 1779885644.839903,
|
||||
"t_step2_client_unix": 1779885645.0941288,
|
||||
"t_step2_end_unix": 1779885645.1562943,
|
||||
"t_prefill_s": 0.2541897820192389,
|
||||
"t_transfer_s": 0.06213490100344643,
|
||||
"t_followup_s": 0.021715508948545903,
|
||||
"cached_followup": 4080,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 4096,
|
||||
"session": "312743112bc84c0699a91e3110a63f03",
|
||||
"t_step1_client_unix": 1779885645.1795704,
|
||||
"t_step2_client_unix": 1779885645.4336376,
|
||||
"t_step2_end_unix": 1779885645.4955726,
|
||||
"t_prefill_s": 0.2540306959999725,
|
||||
"t_transfer_s": 0.06190510099986568,
|
||||
"t_followup_s": 0.020819986995775253,
|
||||
"cached_followup": 4080,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 4096,
|
||||
"session": "e5975bd25e1b4aa586cbfe9b68370de2",
|
||||
"t_step1_client_unix": 1779885645.5179617,
|
||||
"t_step2_client_unix": 1779885645.7724028,
|
||||
"t_step2_end_unix": 1779885645.8355522,
|
||||
"t_prefill_s": 0.2544059020001441,
|
||||
"t_transfer_s": 0.06311210000421852,
|
||||
"t_followup_s": 0.02235932502662763,
|
||||
"cached_followup": 4080,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 8192,
|
||||
"session": "f8989efa9261470e81c2891945b460a5",
|
||||
"t_step1_client_unix": 1779885645.8609917,
|
||||
"t_step2_client_unix": 1779885646.4364054,
|
||||
"t_step2_end_unix": 1779885646.4636223,
|
||||
"t_prefill_s": 0.575372110994067,
|
||||
"t_transfer_s": 0.02718952501891181,
|
||||
"t_followup_s": 0.024976509041152894,
|
||||
"cached_followup": 8176,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 8192,
|
||||
"session": "3a8632e8562c4cbcaa32b57ba23bbf68",
|
||||
"t_step1_client_unix": 1779885646.4916308,
|
||||
"t_step2_client_unix": 1779885647.0670328,
|
||||
"t_step2_end_unix": 1779885647.0934644,
|
||||
"t_prefill_s": 0.5753633370040916,
|
||||
"t_transfer_s": 0.026399013004265726,
|
||||
"t_followup_s": 0.027285586984362453,
|
||||
"cached_followup": 8176,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 8192,
|
||||
"session": "4d2cfc02defd492d89e90f652032f630",
|
||||
"t_step1_client_unix": 1779885647.1237462,
|
||||
"t_step2_client_unix": 1779885647.6985745,
|
||||
"t_step2_end_unix": 1779885647.8096595,
|
||||
"t_prefill_s": 0.574782071984373,
|
||||
"t_transfer_s": 0.11104170099133626,
|
||||
"t_followup_s": 0.02618999598780647,
|
||||
"cached_followup": 8176,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 8192,
|
||||
"session": "3485a775304947ada42d61cfd39b7520",
|
||||
"t_step1_client_unix": 1779885647.8389955,
|
||||
"t_step2_client_unix": 1779885648.4133987,
|
||||
"t_step2_end_unix": 1779885648.5235367,
|
||||
"t_prefill_s": 0.574362020008266,
|
||||
"t_transfer_s": 0.11010575300315395,
|
||||
"t_followup_s": 0.028998883964959532,
|
||||
"cached_followup": 8176,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 8192,
|
||||
"session": "6f87692fb5884094b7b141116ba739f8",
|
||||
"t_step1_client_unix": 1779885648.5556417,
|
||||
"t_step2_client_unix": 1779885649.1294918,
|
||||
"t_step2_end_unix": 1779885649.2386518,
|
||||
"t_prefill_s": 0.5738097460125573,
|
||||
"t_transfer_s": 0.1091307660099119,
|
||||
"t_followup_s": 0.025612210971303284,
|
||||
"cached_followup": 8176,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 16384,
|
||||
"session": "c4e007ebdcf244cf95a8ccb3ab59ea0f",
|
||||
"t_step1_client_unix": 1779885649.270192,
|
||||
"t_step2_client_unix": 1779885650.7972357,
|
||||
"t_step2_end_unix": 1779885651.0219116,
|
||||
"t_prefill_s": 1.5270042870542966,
|
||||
"t_transfer_s": 0.22463428904302418,
|
||||
"t_followup_s": 0.03729638300137594,
|
||||
"cached_followup": 16368,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 16384,
|
||||
"session": "6f34ab6672e14f41b7780323b30ce21e",
|
||||
"t_step1_client_unix": 1779885651.0651574,
|
||||
"t_step2_client_unix": 1779885652.5826244,
|
||||
"t_step2_end_unix": 1779885652.7871268,
|
||||
"t_prefill_s": 1.5174251759890467,
|
||||
"t_transfer_s": 0.2044643560075201,
|
||||
"t_followup_s": 0.03547715302556753,
|
||||
"cached_followup": 16368,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 16384,
|
||||
"session": "ca02bd5aa32544f2b1b09096e5f63c8b",
|
||||
"t_step1_client_unix": 1779885652.8284712,
|
||||
"t_step2_client_unix": 1779885654.3465412,
|
||||
"t_step2_end_unix": 1779885654.5522587,
|
||||
"t_prefill_s": 1.5180304029490799,
|
||||
"t_transfer_s": 0.20567574101733044,
|
||||
"t_followup_s": 0.03611186001216993,
|
||||
"cached_followup": 16368,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 16384,
|
||||
"session": "59047ab966574f72ab12693ad29b090f",
|
||||
"t_step1_client_unix": 1779885654.5942106,
|
||||
"t_step2_client_unix": 1779885656.1123874,
|
||||
"t_step2_end_unix": 1779885656.3158703,
|
||||
"t_prefill_s": 1.5181243289844133,
|
||||
"t_transfer_s": 0.20344976399792358,
|
||||
"t_followup_s": 0.035486461012624204,
|
||||
"cached_followup": 16368,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 16384,
|
||||
"session": "9a6e2228fd40414c8fd2b78bf399552c",
|
||||
"t_step1_client_unix": 1779885656.357184,
|
||||
"t_step2_client_unix": 1779885657.8739264,
|
||||
"t_step2_end_unix": 1779885658.079835,
|
||||
"t_prefill_s": 1.5166968539706431,
|
||||
"t_transfer_s": 0.20585927896900102,
|
||||
"t_followup_s": 0.03941280301660299,
|
||||
"cached_followup": 16368,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 32768,
|
||||
"session": "5cf9fb856b674c0a9237c06980287cb3",
|
||||
"t_step1_client_unix": 1779885658.1312225,
|
||||
"t_step2_client_unix": 1779885662.6780548,
|
||||
"t_step2_end_unix": 1779885663.0500648,
|
||||
"t_prefill_s": 4.54679255298106,
|
||||
"t_transfer_s": 0.3719712099991739,
|
||||
"t_followup_s": 0.05820785299874842,
|
||||
"cached_followup": 32752,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 32768,
|
||||
"session": "524f192c92834453bc48052bc6516126",
|
||||
"t_step1_client_unix": 1779885663.1199641,
|
||||
"t_step2_client_unix": 1779885667.6679316,
|
||||
"t_step2_end_unix": 1779885668.047607,
|
||||
"t_prefill_s": 4.547927976993378,
|
||||
"t_transfer_s": 0.3796397229889408,
|
||||
"t_followup_s": 0.058314190013334155,
|
||||
"cached_followup": 32752,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 32768,
|
||||
"session": "fb7aa2c99bf24febb83536c54e30a36d",
|
||||
"t_step1_client_unix": 1779885668.117419,
|
||||
"t_step2_client_unix": 1779885672.6644971,
|
||||
"t_step2_end_unix": 1779885673.0345452,
|
||||
"t_prefill_s": 4.547034470015205,
|
||||
"t_transfer_s": 0.3700092720100656,
|
||||
"t_followup_s": 0.05658108199713752,
|
||||
"cached_followup": 32752,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 32768,
|
||||
"session": "85612444c3224b65afb09d65a829b5cc",
|
||||
"t_step1_client_unix": 1779885673.1025856,
|
||||
"t_step2_client_unix": 1779885677.6487892,
|
||||
"t_step2_end_unix": 1779885678.2856307,
|
||||
"t_prefill_s": 4.546165039995685,
|
||||
"t_transfer_s": 0.6368027949938551,
|
||||
"t_followup_s": 0.05732736398931593,
|
||||
"cached_followup": 32752,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 32768,
|
||||
"session": "fe51fbacc4d544b3a500b099f852b19b",
|
||||
"t_step1_client_unix": 1779885678.3544915,
|
||||
"t_step2_client_unix": 1779885682.9011996,
|
||||
"t_step2_end_unix": 1779885683.542081,
|
||||
"t_prefill_s": 4.546669405011926,
|
||||
"t_transfer_s": 0.6408427829737775,
|
||||
"t_followup_s": 0.057976288022473454,
|
||||
"cached_followup": 32752,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 65536,
|
||||
"session": "9628b04d4d9e4a9fb26decccef84d32f",
|
||||
"t_step1_client_unix": 1779885683.6233714,
|
||||
"t_step2_client_unix": 1779885698.8039112,
|
||||
"t_step2_end_unix": 1779885700.9075115,
|
||||
"t_prefill_s": 15.18050062697148,
|
||||
"t_transfer_s": 2.103562032978516,
|
||||
"t_followup_s": 0.0921451430185698,
|
||||
"cached_followup": 65520,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 65536,
|
||||
"session": "b8bd7328d2274613b473e932db11081c",
|
||||
"t_step1_client_unix": 1779885701.0228417,
|
||||
"t_step2_client_unix": 1779885716.2263126,
|
||||
"t_step2_end_unix": 1779885718.3809934,
|
||||
"t_prefill_s": 15.200471161981113,
|
||||
"t_transfer_s": 2.1520405350020155,
|
||||
"t_followup_s": 0.11590708000585437,
|
||||
"cached_followup": 65520,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 65536,
|
||||
"session": "8ca1dfe708c945e59c194ee59b7de87d",
|
||||
"t_step1_client_unix": 1779885718.5226529,
|
||||
"t_step2_client_unix": 1779885733.706992,
|
||||
"t_step2_end_unix": 1779885735.6409419,
|
||||
"t_prefill_s": 15.184306180977728,
|
||||
"t_transfer_s": 1.9339170460007153,
|
||||
"t_followup_s": 0.09697375196265057,
|
||||
"cached_followup": 65520,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 65536,
|
||||
"session": "aa7555475c87427ab1a1d95277545655",
|
||||
"t_step1_client_unix": 1779885735.7609437,
|
||||
"t_step2_client_unix": 1779885750.9334345,
|
||||
"t_step2_end_unix": 1779885752.0260077,
|
||||
"t_prefill_s": 15.172441756993067,
|
||||
"t_transfer_s": 1.09253996796906,
|
||||
"t_followup_s": 0.08802600798662752,
|
||||
"cached_followup": 65520,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 65536,
|
||||
"session": "dece39eeadd645c7a8e563763f93c35f",
|
||||
"t_step1_client_unix": 1779885752.1370633,
|
||||
"t_step2_client_unix": 1779885767.3073487,
|
||||
"t_step2_end_unix": 1779885768.0632908,
|
||||
"t_prefill_s": 15.17024433100596,
|
||||
"t_transfer_s": 0.7559080219944008,
|
||||
"t_followup_s": 0.08695380098652095,
|
||||
"cached_followup": 65520,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 131072,
|
||||
"session": "98a339dad05a463e8ac1425413221d27",
|
||||
"t_step1_client_unix": 1779885768.196416,
|
||||
"t_step2_client_unix": 1779885822.9193504,
|
||||
"t_step2_end_unix": 1779885824.3877244,
|
||||
"t_prefill_s": 54.72287795698503,
|
||||
"t_transfer_s": 1.4683381259674206,
|
||||
"t_followup_s": 0.16380483901593834,
|
||||
"cached_followup": 131056,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 131072,
|
||||
"session": "4ce2151ad3d4493397bb77c7a62871e7",
|
||||
"t_step1_client_unix": 1779885824.5981278,
|
||||
"t_step2_client_unix": 1779885879.3268123,
|
||||
"t_step2_end_unix": 1779885884.7123976,
|
||||
"t_prefill_s": 54.72864995297277,
|
||||
"t_transfer_s": 5.38555136800278,
|
||||
"t_followup_s": 0.17249851900851354,
|
||||
"cached_followup": 131056,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 131072,
|
||||
"session": "6a214438dcf74f36bbefaaed3583c04c",
|
||||
"t_step1_client_unix": 1779885884.9482107,
|
||||
"t_step2_client_unix": 1779885939.6747935,
|
||||
"t_step2_end_unix": 1779885949.2077906,
|
||||
"t_prefill_s": 54.72654917498585,
|
||||
"t_transfer_s": 9.532963149016723,
|
||||
"t_followup_s": 0.1658564520184882,
|
||||
"cached_followup": 131056,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 131072,
|
||||
"session": "4cd33910256e43768a2f475650d7714c",
|
||||
"t_step1_client_unix": 1779885949.4215908,
|
||||
"t_step2_client_unix": 1779886004.145101,
|
||||
"t_step2_end_unix": 1779886007.2428164,
|
||||
"t_prefill_s": 54.72347703698324,
|
||||
"t_transfer_s": 3.0976800689822994,
|
||||
"t_followup_s": 0.16629640298197046,
|
||||
"cached_followup": 131056,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
},
|
||||
{
|
||||
"input_tokens": 131072,
|
||||
"session": "7e3626253b5d4fe29b3cec1e3598bb39",
|
||||
"t_step1_client_unix": 1779886007.4549844,
|
||||
"t_step2_client_unix": 1779886062.0564816,
|
||||
"t_step2_end_unix": 1779886063.7279255,
|
||||
"t_prefill_s": 54.60145856201416,
|
||||
"t_transfer_s": 1.6714086790452711,
|
||||
"t_followup_s": 0.1628041280200705,
|
||||
"cached_followup": 131056,
|
||||
"pull_completion_tokens": 1,
|
||||
"ok": true
|
||||
}
|
||||
],
|
||||
"summary": [
|
||||
{
|
||||
"input_tokens": 512,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 0.019290994200855494,
|
||||
"transfer_s_p50": 0.019964047998655587,
|
||||
"transfer_s_p90": 0.020512140006758273,
|
||||
"transfer_s_min": 0.016899197013117373,
|
||||
"transfer_s_max": 0.020512140006758273
|
||||
},
|
||||
{
|
||||
"input_tokens": 1024,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 0.027316297392826528,
|
||||
"transfer_s_p50": 0.026693789986893535,
|
||||
"transfer_s_p90": 0.029867444012779742,
|
||||
"transfer_s_min": 0.02630024799145758,
|
||||
"transfer_s_max": 0.029867444012779742
|
||||
},
|
||||
{
|
||||
"input_tokens": 2048,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 0.029737486818339674,
|
||||
"transfer_s_p50": 0.03704743104754016,
|
||||
"transfer_s_p90": 0.037401642999611795,
|
||||
"transfer_s_min": 0.01783871802035719,
|
||||
"transfer_s_max": 0.037401642999611795
|
||||
},
|
||||
{
|
||||
"input_tokens": 4096,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 0.0628419185988605,
|
||||
"transfer_s_p50": 0.06311210000421852,
|
||||
"transfer_s_p90": 0.06378706597024575,
|
||||
"transfer_s_min": 0.06190510099986568,
|
||||
"transfer_s_max": 0.06378706597024575
|
||||
},
|
||||
{
|
||||
"input_tokens": 8192,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 0.07677335160551593,
|
||||
"transfer_s_p50": 0.1091307660099119,
|
||||
"transfer_s_p90": 0.11104170099133626,
|
||||
"transfer_s_min": 0.026399013004265726,
|
||||
"transfer_s_max": 0.11104170099133626
|
||||
},
|
||||
{
|
||||
"input_tokens": 16384,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 0.20881668580695986,
|
||||
"transfer_s_p50": 0.20567574101733044,
|
||||
"transfer_s_p90": 0.22463428904302418,
|
||||
"transfer_s_min": 0.20344976399792358,
|
||||
"transfer_s_max": 0.22463428904302418
|
||||
},
|
||||
{
|
||||
"input_tokens": 32768,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 0.4798531565931626,
|
||||
"transfer_s_p50": 0.3796397229889408,
|
||||
"transfer_s_p90": 0.6408427829737775,
|
||||
"transfer_s_min": 0.3700092720100656,
|
||||
"transfer_s_max": 0.6408427829737775
|
||||
},
|
||||
{
|
||||
"input_tokens": 65536,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 1.6075935207889416,
|
||||
"transfer_s_p50": 1.9339170460007153,
|
||||
"transfer_s_p90": 2.1520405350020155,
|
||||
"transfer_s_min": 0.7559080219944008,
|
||||
"transfer_s_max": 2.1520405350020155
|
||||
},
|
||||
{
|
||||
"input_tokens": 131072,
|
||||
"n_ok": 5,
|
||||
"transfer_s_mean": 4.2311882782028984,
|
||||
"transfer_s_p50": 3.0976800689822994,
|
||||
"transfer_s_p90": 9.532963149016723,
|
||||
"transfer_s_min": 1.4683381259674206,
|
||||
"transfer_s_max": 9.532963149016723
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
figs/mb2_transfer_bw_compare.png
Normal file
BIN
figs/mb2_transfer_bw_compare.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
BIN
figs/mb2_transfer_bw_inter.png
Normal file
BIN
figs/mb2_transfer_bw_inter.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 71 KiB |
BIN
figs/mb2_transfer_time_compare.png
Normal file
BIN
figs/mb2_transfer_time_compare.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 104 KiB |
BIN
figs/mb2_transfer_time_inter.png
Normal file
BIN
figs/mb2_transfer_time_inter.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 95 KiB |
80
microbench/fresh_setup/analyze_mb2_send_only.py
Normal file
80
microbench/fresh_setup/analyze_mb2_send_only.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Per-size pure_transfer aggregate from A's send_blocks events alone.
|
||||
|
||||
Used when B's receive_kv events are missing (e.g. EngineCore env-var
|
||||
propagation failed on the consumer host). Pure transfer time still
|
||||
recoverable from the producer side.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--a-log", type=Path, required=True)
|
||||
p.add_argument("--out", type=Path, required=True)
|
||||
p.add_argument("--min-tokens", type=int, default=32,
|
||||
help="Skip events smaller than this (drop the spurious "
|
||||
"init-time tiny sends)")
|
||||
args = p.parse_args()
|
||||
|
||||
events = []
|
||||
with args.a_log.open() as f:
|
||||
for line in f:
|
||||
try:
|
||||
events.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
by_size: dict[int, list[float]] = {}
|
||||
for e in events:
|
||||
if e.get("event") != "send_blocks":
|
||||
continue
|
||||
sz_tokens = e["total_bytes"] // 98304
|
||||
if sz_tokens < args.min_tokens:
|
||||
continue
|
||||
by_size.setdefault(sz_tokens, []).append(e["duration_s"])
|
||||
|
||||
summary = []
|
||||
for sz in sorted(by_size):
|
||||
durs = by_size[sz]
|
||||
sz_bytes = sz * 98304
|
||||
sz_mib = sz_bytes / (1024 * 1024)
|
||||
bw = [sz_bytes / d / 1e9 for d in durs]
|
||||
summary.append({
|
||||
"input_tokens": sz,
|
||||
"kv_mib": round(sz_mib, 1),
|
||||
"n": len(durs),
|
||||
"pure_transfer_ms_mean": round(statistics.mean(durs) * 1000, 2),
|
||||
"pure_transfer_ms_p50": round(statistics.median(durs) * 1000, 2),
|
||||
"pure_transfer_ms_min": round(min(durs) * 1000, 2),
|
||||
"pure_transfer_ms_max": round(max(durs) * 1000, 2),
|
||||
"throughput_gbps_mean": round(statistics.mean(bw), 2),
|
||||
"throughput_gbps_p50": round(statistics.median(bw), 2),
|
||||
"throughput_gbps_max": round(max(bw), 2),
|
||||
})
|
||||
|
||||
print(f"loaded {len(events)} events; kept {sum(s['n'] for s in summary)} send_blocks")
|
||||
print()
|
||||
print(f"{'in_tok':>8} {'KV_MiB':>8} {'n':>4} "
|
||||
f"{'pure_p50':>10} {'pure_min':>10} {'pure_max':>10} "
|
||||
f"{'GB/s_p50':>10} {'GB/s_max':>10}")
|
||||
for s in summary:
|
||||
print(f"{s['input_tokens']:>8} {s['kv_mib']:>8.1f} {s['n']:>4} "
|
||||
f"{s['pure_transfer_ms_p50']:>10.1f} "
|
||||
f"{s['pure_transfer_ms_min']:>10.1f} "
|
||||
f"{s['pure_transfer_ms_max']:>10.1f} "
|
||||
f"{s['throughput_gbps_p50']:>10.2f} "
|
||||
f"{s['throughput_gbps_max']:>10.2f}")
|
||||
|
||||
args.out.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.out.write_text(json.dumps({"summary": summary}, indent=2))
|
||||
print(f"\nwrote {args.out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -22,9 +22,10 @@ def main() -> None:
|
||||
args = p.parse_args()
|
||||
|
||||
d = json.loads(args.breakdown.read_text())
|
||||
# Drop the spurious 16-token events (zero-byte sends produced by the
|
||||
# connector during request init; not a real KV transfer).
|
||||
rows = [r for r in d["rows"] if r["input_tokens_est"] >= 64]
|
||||
# `rows` is optional (send-only analyzer skips per-request joining).
|
||||
# Drop the spurious 16-token events from any rows present.
|
||||
if "rows" in d:
|
||||
_ = [r for r in d["rows"] if r["input_tokens_est"] >= 64]
|
||||
summary = [s for s in d["summary"] if s["input_tokens"] >= 64]
|
||||
|
||||
kv_mib = [s["kv_mib"] for s in summary]
|
||||
|
||||
99
microbench/fresh_setup/plot_mb2_compare.py
Normal file
99
microbench/fresh_setup/plot_mb2_compare.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Overlay intra-node and inter-node MB2 curves on the same axes."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
|
||||
def load(path: Path) -> list[dict]:
|
||||
d = json.loads(path.read_text())
|
||||
return [s for s in d["summary"] if s["input_tokens"] >= 64]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--intra", type=Path, required=True)
|
||||
p.add_argument("--inter", type=Path, required=True)
|
||||
p.add_argument("--out-time", type=Path, default=Path("figs/mb2_transfer_time_compare.png"))
|
||||
p.add_argument("--out-bw", type=Path, default=Path("figs/mb2_transfer_bw_compare.png"))
|
||||
args = p.parse_args()
|
||||
|
||||
intra = load(args.intra)
|
||||
inter = load(args.inter)
|
||||
|
||||
def axis_arrays(rows):
|
||||
kv = [r["kv_mib"] for r in rows]
|
||||
p50 = [r["pure_transfer_ms_p50"] for r in rows]
|
||||
mn = [r["pure_transfer_ms_min"] for r in rows]
|
||||
mx = [r["pure_transfer_ms_max"] for r in rows]
|
||||
bw_p50 = [r["throughput_gbps_p50"] for r in rows]
|
||||
bw_max = [r["throughput_gbps_max"] for r in rows]
|
||||
return kv, p50, mn, mx, bw_p50, bw_max
|
||||
|
||||
ai_kv, ai_p50, ai_mn, ai_mx, ai_bw_p50, ai_bw_max = axis_arrays(intra)
|
||||
bi_kv, bi_p50, bi_mn, bi_mx, bi_bw_p50, bi_bw_max = axis_arrays(inter)
|
||||
|
||||
# ---- transfer time ----
|
||||
fig, ax = plt.subplots(figsize=(8.5, 5))
|
||||
ax.errorbar(ai_kv, ai_p50,
|
||||
yerr=[np.array(ai_p50) - np.array(ai_mn),
|
||||
np.array(ai_mx) - np.array(ai_p50)],
|
||||
fmt="o-", color="#1f77b4", lw=2, markersize=7, capsize=4,
|
||||
label="intra-node (dash1 GPU 0↔1)")
|
||||
ax.errorbar(bi_kv, bi_p50,
|
||||
yerr=[np.array(bi_p50) - np.array(bi_mn),
|
||||
np.array(bi_mx) - np.array(bi_p50)],
|
||||
fmt="s--", color="#d62728", lw=2, markersize=7, capsize=4,
|
||||
label="inter-node (dash1 GPU0 → dash2 GPU0)")
|
||||
# ideal 9.7 GB/s reference
|
||||
ref_x = np.array(ai_kv)
|
||||
ref_y_ms = (ref_x * 1024 * 1024) / (9.7 * 1e9) * 1000
|
||||
ax.plot(ref_x, ref_y_ms, "--", color="#888", alpha=0.5,
|
||||
label="9.7 GB/s reference")
|
||||
ax.axvline(11500, color="#7a1d1d", lw=0.8, ls=":", alpha=0.5)
|
||||
ax.text(11500, 0.7, "p99 agentic req\n11.5 GiB",
|
||||
fontsize=8, color="#7a1d1d", ha="center")
|
||||
ax.set_xscale("log"); ax.set_yscale("log")
|
||||
ax.set_xlabel("KV transfer size (MiB)")
|
||||
ax.set_ylabel("Pure transfer time (ms, log)")
|
||||
ax.set_title("MB2 intra vs inter — Mooncake transfer cost is topology-independent\n"
|
||||
"(both paths go through 200 Gbps RDMA NIC; intra-node does not use NVLink)")
|
||||
ax.grid(True, which="both", alpha=0.3)
|
||||
ax.legend(loc="upper left", fontsize=9)
|
||||
args.out_time.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.tight_layout(); fig.savefig(args.out_time, dpi=150); plt.close(fig)
|
||||
print(f"wrote {args.out_time}")
|
||||
|
||||
# ---- bandwidth ----
|
||||
fig, ax = plt.subplots(figsize=(8.5, 5))
|
||||
ax.plot(ai_kv, ai_bw_p50, "o-", color="#1f77b4", lw=2, markersize=7,
|
||||
label="intra p50")
|
||||
ax.plot(ai_kv, ai_bw_max, "x--", color="#1f77b4", lw=1.2, markersize=8,
|
||||
alpha=0.7, label="intra max")
|
||||
ax.plot(bi_kv, bi_bw_p50, "s-", color="#d62728", lw=2, markersize=7,
|
||||
label="inter p50")
|
||||
ax.plot(bi_kv, bi_bw_max, "+--", color="#d62728", lw=1.2, markersize=8,
|
||||
alpha=0.7, label="inter max")
|
||||
ax.axhline(9.7, color="#888", ls="--", alpha=0.5,
|
||||
label="steady-state ≈ 9.7 GB/s")
|
||||
ax.set_xscale("log")
|
||||
ax.set_xlabel("KV transfer size (MiB)")
|
||||
ax.set_ylabel("Effective bandwidth (GB/s)")
|
||||
ax.set_ylim(0, 12)
|
||||
ax.set_title("MB2 intra vs inter — bandwidth")
|
||||
ax.grid(True, which="both", alpha=0.3)
|
||||
ax.legend(loc="lower left", fontsize=9)
|
||||
args.out_bw.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.tight_layout(); fig.savefig(args.out_bw, dpi=150); plt.close(fig)
|
||||
print(f"wrote {args.out_bw}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user