Per-step timing from engine_step.jsonl definitively resolves H3: plain: 53 μs/step (p50) noop_connector: 69 μs/step (+16 μs = negligible framework cost) mooncake_producer: 1461 μs/step (build_connector_meta = 1386 μs) mooncake_both: 1452 μs/step (same as producer) The substrate tax is NOT in the v1 framework — it's specifically in Mooncake's build_connector_meta() which walks set(cache.keys()) every scheduler step (O(|cache|) per step, E2 audit §6.5). Accumulated per-request tax: 256 decode steps × 1.4ms = 358ms. Observed TTFT tax at rate=1.0: plain 378ms vs mooncake_both 422ms (+12%). At rate=2.0 (near saturation): +29%, approaching trace-replay's +45%. Also fixes kill_vllm() to properly kill EngineCore subprocesses.
409 lines
11 KiB
JSON
409 lines
11 KiB
JSON
plain
|
|
[
|
|
{
|
|
"rate_target": 0.5,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 424.285424329,
|
|
"n_completed_total": 201,
|
|
"n_after_warmup": 190,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 253.27869,
|
|
"ttft_ms_p90": 290.162753,
|
|
"ttft_ms_p99": 567.620172,
|
|
"tpot_ms_p50": 6.3617311960784315,
|
|
"tpot_ms_p90": 9.665774274509804,
|
|
"tpot_ms_p99": 13.281522015686274,
|
|
"e2e_ms_p50": 1880.48985,
|
|
"e2e_ms_p90": 2745.165083,
|
|
"e2e_ms_p99": 3789.202891,
|
|
"throughput_effective_rps": 0.45862101064196187,
|
|
"throughput_ratio": 0.9172420212839237,
|
|
"inflight_p50": 2,
|
|
"inflight_p90": 4,
|
|
"phase": "A",
|
|
"cell": "A_r0.5_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 1.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 207.950502616,
|
|
"n_completed_total": 201,
|
|
"n_after_warmup": 183,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 259.603361,
|
|
"ttft_ms_p90": 378.280699,
|
|
"ttft_ms_p99": 524.687149,
|
|
"tpot_ms_p50": 8.502388435294117,
|
|
"tpot_ms_p90": 15.440471870588235,
|
|
"tpot_ms_p99": 17.428640921568626,
|
|
"e2e_ms_p50": 2493.103336,
|
|
"e2e_ms_p90": 4232.406556,
|
|
"e2e_ms_p99": 4871.38149,
|
|
"throughput_effective_rps": 0.9244735304107706,
|
|
"throughput_ratio": 0.9244735304107706,
|
|
"inflight_p50": 3,
|
|
"inflight_p90": 7,
|
|
"phase": "A",
|
|
"cell": "A_r1.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 2.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 120.789826906,
|
|
"n_completed_total": 223,
|
|
"n_after_warmup": 205,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 293.365502,
|
|
"ttft_ms_p90": 560.857942,
|
|
"ttft_ms_p99": 840.781127,
|
|
"tpot_ms_p50": 25.272439737254903,
|
|
"tpot_ms_p90": 35.26223112156863,
|
|
"tpot_ms_p99": 42.42426647843137,
|
|
"e2e_ms_p50": 6825.599527,
|
|
"e2e_ms_p90": 9263.482823,
|
|
"e2e_ms_p99": 11140.719046,
|
|
"throughput_effective_rps": 1.8503503952030986,
|
|
"throughput_ratio": 0.9251751976015493,
|
|
"inflight_p50": 13,
|
|
"inflight_p90": 23,
|
|
"phase": "A",
|
|
"cell": "A_r2.0_4096x256"
|
|
}
|
|
]noop_connector
|
|
[
|
|
{
|
|
"rate_target": 0.5,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 401.135882038,
|
|
"n_completed_total": 219,
|
|
"n_after_warmup": 211,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 264.899183,
|
|
"ttft_ms_p90": 465.774583,
|
|
"ttft_ms_p99": 683.105893,
|
|
"tpot_ms_p50": 11.97862425490196,
|
|
"tpot_ms_p90": 21.070080823529413,
|
|
"tpot_ms_p99": 25.905328713725492,
|
|
"e2e_ms_p50": 3317.307319,
|
|
"e2e_ms_p90": 5651.525028,
|
|
"e2e_ms_p99": 6872.18281,
|
|
"throughput_effective_rps": 0.5394544701462617,
|
|
"throughput_ratio": 1.0789089402925234,
|
|
"inflight_p50": 3,
|
|
"inflight_p90": 6,
|
|
"phase": "A",
|
|
"cell": "A_r0.5_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 1.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 217.019525223,
|
|
"n_completed_total": 205,
|
|
"n_after_warmup": 195,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 317.569147,
|
|
"ttft_ms_p90": 616.471598,
|
|
"ttft_ms_p99": 871.3817,
|
|
"tpot_ms_p50": 25.686870282352942,
|
|
"tpot_ms_p90": 37.06463774901961,
|
|
"tpot_ms_p99": 42.32126850588235,
|
|
"e2e_ms_p50": 6867.08031,
|
|
"e2e_ms_p90": 9953.52699,
|
|
"e2e_ms_p99": 11064.851185,
|
|
"throughput_effective_rps": 0.9419401372404239,
|
|
"throughput_ratio": 0.9419401372404239,
|
|
"inflight_p50": 7,
|
|
"inflight_p90": 12,
|
|
"phase": "A",
|
|
"cell": "A_r1.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 2.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 238.461594285,
|
|
"n_completed_total": 339,
|
|
"n_after_warmup": 315,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 34774.269216,
|
|
"ttft_ms_p90": 64874.196589,
|
|
"ttft_ms_p99": 70307.478291,
|
|
"tpot_ms_p50": 90.2518300627451,
|
|
"tpot_ms_p90": 92.71257150196078,
|
|
"tpot_ms_p99": 101.1596845882353,
|
|
"e2e_ms_p50": 57836.284489,
|
|
"e2e_ms_p90": 80611.595233,
|
|
"e2e_ms_p99": 85181.738441,
|
|
"throughput_effective_rps": 1.3787875418878743,
|
|
"throughput_ratio": 0.6893937709439372,
|
|
"inflight_p50": 74,
|
|
"inflight_p90": 125,
|
|
"phase": "A",
|
|
"cell": "A_r2.0_4096x256"
|
|
}
|
|
]mooncake_producer
|
|
[
|
|
{
|
|
"rate_target": 0.5,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 401.356396668,
|
|
"n_completed_total": 203,
|
|
"n_after_warmup": 197,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 264.054176,
|
|
"ttft_ms_p90": 453.351916,
|
|
"ttft_ms_p99": 663.441612,
|
|
"tpot_ms_p50": 10.750173619607843,
|
|
"tpot_ms_p90": 16.46543019607843,
|
|
"tpot_ms_p99": 19.549762741176473,
|
|
"e2e_ms_p50": 3036.897016,
|
|
"e2e_ms_p90": 4472.36076,
|
|
"e2e_ms_p99": 5392.21106,
|
|
"throughput_effective_rps": 0.5033774883386443,
|
|
"throughput_ratio": 1.0067549766772885,
|
|
"inflight_p50": 2,
|
|
"inflight_p90": 5,
|
|
"phase": "A",
|
|
"cell": "A_r0.5_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 1.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 232.645091396,
|
|
"n_completed_total": 207,
|
|
"n_after_warmup": 199,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 293.729297,
|
|
"ttft_ms_p90": 615.19522,
|
|
"ttft_ms_p99": 817.593711,
|
|
"tpot_ms_p50": 25.506409952941176,
|
|
"tpot_ms_p90": 39.46163431372549,
|
|
"tpot_ms_p99": 48.344151764705884,
|
|
"e2e_ms_p50": 6860.61264,
|
|
"e2e_ms_p90": 10558.202004,
|
|
"e2e_ms_p99": 12793.997244,
|
|
"throughput_effective_rps": 0.8937991794575678,
|
|
"throughput_ratio": 0.8937991794575678,
|
|
"inflight_p50": 6,
|
|
"inflight_p90": 14,
|
|
"phase": "A",
|
|
"cell": "A_r1.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 2.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 232.801098162,
|
|
"n_completed_total": 326,
|
|
"n_after_warmup": 313,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 32682.299634,
|
|
"ttft_ms_p90": 60520.025556,
|
|
"ttft_ms_p99": 65848.06529,
|
|
"tpot_ms_p50": 89.82475232156862,
|
|
"tpot_ms_p90": 90.80036660392157,
|
|
"tpot_ms_p99": 105.81740028627452,
|
|
"e2e_ms_p50": 55363.619077,
|
|
"e2e_ms_p90": 76987.277716,
|
|
"e2e_ms_p99": 85493.952539,
|
|
"throughput_effective_rps": 1.404840472430777,
|
|
"throughput_ratio": 0.7024202362153885,
|
|
"inflight_p50": 74,
|
|
"inflight_p90": 117,
|
|
"phase": "A",
|
|
"cell": "A_r2.0_4096x256"
|
|
}
|
|
]mooncake_both
|
|
[
|
|
{
|
|
"rate_target": 0.5,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 445.059962844,
|
|
"n_completed_total": 200,
|
|
"n_after_warmup": 196,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 256.440661,
|
|
"ttft_ms_p90": 266.103771,
|
|
"ttft_ms_p99": 461.167638,
|
|
"tpot_ms_p50": 5.941321478431372,
|
|
"tpot_ms_p90": 8.577892874509804,
|
|
"tpot_ms_p99": 13.845412023529411,
|
|
"e2e_ms_p50": 1807.769317,
|
|
"e2e_ms_p90": 2514.381893,
|
|
"e2e_ms_p99": 3815.785105,
|
|
"throughput_effective_rps": 0.45051261145415944,
|
|
"throughput_ratio": 0.9010252229083189,
|
|
"inflight_p50": 1,
|
|
"inflight_p90": 3,
|
|
"phase": "A",
|
|
"cell": "A_r0.5_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 1.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 206.654679737,
|
|
"n_completed_total": 203,
|
|
"n_after_warmup": 186,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 264.012039,
|
|
"ttft_ms_p90": 421.973366,
|
|
"ttft_ms_p99": 614.736154,
|
|
"tpot_ms_p50": 9.826510094117646,
|
|
"tpot_ms_p90": 16.444674933333335,
|
|
"tpot_ms_p99": 19.560320356862743,
|
|
"e2e_ms_p50": 2785.772229,
|
|
"e2e_ms_p90": 4532.58161,
|
|
"e2e_ms_p99": 5253.24676,
|
|
"throughput_effective_rps": 0.9458203600786452,
|
|
"throughput_ratio": 0.9458203600786452,
|
|
"inflight_p50": 3,
|
|
"inflight_p90": 8,
|
|
"phase": "A",
|
|
"cell": "A_r1.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 2.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 109.933145689,
|
|
"n_completed_total": 237,
|
|
"n_after_warmup": 208,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 316.799702,
|
|
"ttft_ms_p90": 725.661939,
|
|
"ttft_ms_p99": 945.498778,
|
|
"tpot_ms_p50": 35.43492209411765,
|
|
"tpot_ms_p90": 54.209878376470584,
|
|
"tpot_ms_p99": 58.219066156862745,
|
|
"e2e_ms_p50": 9571.144788,
|
|
"e2e_ms_p90": 14359.084682,
|
|
"e2e_ms_p99": 15374.620667,
|
|
"throughput_effective_rps": 2.081391499946502,
|
|
"throughput_ratio": 1.040695749973251,
|
|
"inflight_p50": 23,
|
|
"inflight_p90": 38,
|
|
"phase": "A",
|
|
"cell": "A_r2.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 4.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 124.259570043,
|
|
"n_completed_total": 343,
|
|
"n_after_warmup": 307,
|
|
"n_dropped": 0,
|
|
"n_errors": 0,
|
|
"ttft_ms_p50": 11665.573018,
|
|
"ttft_ms_p90": 23444.380711,
|
|
"ttft_ms_p99": 27466.05648,
|
|
"tpot_ms_p50": 91.8016532509804,
|
|
"tpot_ms_p90": 93.95965819607844,
|
|
"tpot_ms_p99": 95.09967682352942,
|
|
"e2e_ms_p50": 34656.343746,
|
|
"e2e_ms_p90": 40580.01734,
|
|
"e2e_ms_p99": 43193.129063,
|
|
"throughput_effective_rps": 2.686864652864218,
|
|
"throughput_ratio": 0.6717161632160545,
|
|
"inflight_p50": 101,
|
|
"inflight_p90": 130,
|
|
"phase": "A",
|
|
"cell": "A_r4.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 8.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 80.65869788,
|
|
"n_completed_total": 480,
|
|
"n_after_warmup": 96,
|
|
"n_dropped": 103,
|
|
"n_errors": 202,
|
|
"ttft_ms_p50": 22195.273494,
|
|
"ttft_ms_p90": 33607.452699,
|
|
"ttft_ms_p99": 34484.768556,
|
|
"tpot_ms_p50": 88.98730769019608,
|
|
"tpot_ms_p90": 89.86937862352941,
|
|
"tpot_ms_p99": 92.71757069803921,
|
|
"e2e_ms_p50": 44887.058146,
|
|
"e2e_ms_p90": 56519.577831,
|
|
"e2e_ms_p99": 57394.551538,
|
|
"throughput_effective_rps": 1.3586437746565503,
|
|
"throughput_ratio": 0.16983047183206879,
|
|
"inflight_p50": 127,
|
|
"inflight_p90": 165,
|
|
"phase": "A",
|
|
"cell": "A_r8.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 16.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 60.028652326,
|
|
"n_completed_total": 916,
|
|
"n_after_warmup": 0,
|
|
"n_dropped": 0,
|
|
"n_errors": 916,
|
|
"ttft_ms_p50": null,
|
|
"ttft_ms_p90": null,
|
|
"ttft_ms_p99": null,
|
|
"tpot_ms_p50": null,
|
|
"tpot_ms_p90": null,
|
|
"tpot_ms_p99": null,
|
|
"e2e_ms_p50": null,
|
|
"e2e_ms_p90": null,
|
|
"e2e_ms_p99": null,
|
|
"throughput_effective_rps": 0.0,
|
|
"throughput_ratio": 0.0,
|
|
"inflight_p50": null,
|
|
"inflight_p90": null,
|
|
"phase": "A",
|
|
"cell": "A_r16.0_4096x256"
|
|
},
|
|
{
|
|
"rate_target": 32.0,
|
|
"input_tokens": 4096,
|
|
"output_tokens": 256,
|
|
"duration_actual_s": 60.011136933,
|
|
"n_completed_total": 1815,
|
|
"n_after_warmup": 0,
|
|
"n_dropped": 0,
|
|
"n_errors": 1815,
|
|
"ttft_ms_p50": null,
|
|
"ttft_ms_p90": null,
|
|
"ttft_ms_p99": null,
|
|
"tpot_ms_p50": null,
|
|
"tpot_ms_p90": null,
|
|
"tpot_ms_p99": null,
|
|
"e2e_ms_p50": null,
|
|
"e2e_ms_p90": null,
|
|
"e2e_ms_p99": null,
|
|
"throughput_effective_rps": 0.0,
|
|
"throughput_ratio": 0.0,
|
|
"inflight_p50": null,
|
|
"inflight_p90": null,
|
|
"phase": "A",
|
|
"cell": "A_r32.0_4096x256"
|
|
}
|
|
] |