Files
agentic-kvc/microbench/connector_tax/results/20260526_1728/all_summaries.json
Gahow Wang a473c71cac Connector tax Phase A: build_connector_meta is 1.4ms/step (the tax source)
Per-step timing from engine_step.jsonl definitively resolves H3:
  plain:            53 μs/step (p50)
  noop_connector:   69 μs/step (+16 μs = negligible framework cost)
  mooncake_producer: 1461 μs/step (build_connector_meta = 1386 μs)
  mooncake_both:    1452 μs/step (same as producer)

The substrate tax is NOT in the v1 framework — it's specifically in
Mooncake's build_connector_meta() which walks set(cache.keys()) every
scheduler step (O(|cache|) per step, E2 audit §6.5).

Accumulated per-request tax: 256 decode steps × 1.4ms = 358ms.
Observed TTFT tax at rate=1.0: plain 378ms vs mooncake_both 422ms (+12%).
At rate=2.0 (near saturation): +29%, approaching trace-replay's +45%.

Also fixes kill_vllm() to properly kill EngineCore subprocesses.
2026-05-26 19:33:15 +08:00

409 lines
11 KiB
JSON

plain
[
{
"rate_target": 0.5,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 424.285424329,
"n_completed_total": 201,
"n_after_warmup": 190,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 253.27869,
"ttft_ms_p90": 290.162753,
"ttft_ms_p99": 567.620172,
"tpot_ms_p50": 6.3617311960784315,
"tpot_ms_p90": 9.665774274509804,
"tpot_ms_p99": 13.281522015686274,
"e2e_ms_p50": 1880.48985,
"e2e_ms_p90": 2745.165083,
"e2e_ms_p99": 3789.202891,
"throughput_effective_rps": 0.45862101064196187,
"throughput_ratio": 0.9172420212839237,
"inflight_p50": 2,
"inflight_p90": 4,
"phase": "A",
"cell": "A_r0.5_4096x256"
},
{
"rate_target": 1.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 207.950502616,
"n_completed_total": 201,
"n_after_warmup": 183,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 259.603361,
"ttft_ms_p90": 378.280699,
"ttft_ms_p99": 524.687149,
"tpot_ms_p50": 8.502388435294117,
"tpot_ms_p90": 15.440471870588235,
"tpot_ms_p99": 17.428640921568626,
"e2e_ms_p50": 2493.103336,
"e2e_ms_p90": 4232.406556,
"e2e_ms_p99": 4871.38149,
"throughput_effective_rps": 0.9244735304107706,
"throughput_ratio": 0.9244735304107706,
"inflight_p50": 3,
"inflight_p90": 7,
"phase": "A",
"cell": "A_r1.0_4096x256"
},
{
"rate_target": 2.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 120.789826906,
"n_completed_total": 223,
"n_after_warmup": 205,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 293.365502,
"ttft_ms_p90": 560.857942,
"ttft_ms_p99": 840.781127,
"tpot_ms_p50": 25.272439737254903,
"tpot_ms_p90": 35.26223112156863,
"tpot_ms_p99": 42.42426647843137,
"e2e_ms_p50": 6825.599527,
"e2e_ms_p90": 9263.482823,
"e2e_ms_p99": 11140.719046,
"throughput_effective_rps": 1.8503503952030986,
"throughput_ratio": 0.9251751976015493,
"inflight_p50": 13,
"inflight_p90": 23,
"phase": "A",
"cell": "A_r2.0_4096x256"
}
]noop_connector
[
{
"rate_target": 0.5,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 401.135882038,
"n_completed_total": 219,
"n_after_warmup": 211,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 264.899183,
"ttft_ms_p90": 465.774583,
"ttft_ms_p99": 683.105893,
"tpot_ms_p50": 11.97862425490196,
"tpot_ms_p90": 21.070080823529413,
"tpot_ms_p99": 25.905328713725492,
"e2e_ms_p50": 3317.307319,
"e2e_ms_p90": 5651.525028,
"e2e_ms_p99": 6872.18281,
"throughput_effective_rps": 0.5394544701462617,
"throughput_ratio": 1.0789089402925234,
"inflight_p50": 3,
"inflight_p90": 6,
"phase": "A",
"cell": "A_r0.5_4096x256"
},
{
"rate_target": 1.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 217.019525223,
"n_completed_total": 205,
"n_after_warmup": 195,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 317.569147,
"ttft_ms_p90": 616.471598,
"ttft_ms_p99": 871.3817,
"tpot_ms_p50": 25.686870282352942,
"tpot_ms_p90": 37.06463774901961,
"tpot_ms_p99": 42.32126850588235,
"e2e_ms_p50": 6867.08031,
"e2e_ms_p90": 9953.52699,
"e2e_ms_p99": 11064.851185,
"throughput_effective_rps": 0.9419401372404239,
"throughput_ratio": 0.9419401372404239,
"inflight_p50": 7,
"inflight_p90": 12,
"phase": "A",
"cell": "A_r1.0_4096x256"
},
{
"rate_target": 2.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 238.461594285,
"n_completed_total": 339,
"n_after_warmup": 315,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 34774.269216,
"ttft_ms_p90": 64874.196589,
"ttft_ms_p99": 70307.478291,
"tpot_ms_p50": 90.2518300627451,
"tpot_ms_p90": 92.71257150196078,
"tpot_ms_p99": 101.1596845882353,
"e2e_ms_p50": 57836.284489,
"e2e_ms_p90": 80611.595233,
"e2e_ms_p99": 85181.738441,
"throughput_effective_rps": 1.3787875418878743,
"throughput_ratio": 0.6893937709439372,
"inflight_p50": 74,
"inflight_p90": 125,
"phase": "A",
"cell": "A_r2.0_4096x256"
}
]mooncake_producer
[
{
"rate_target": 0.5,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 401.356396668,
"n_completed_total": 203,
"n_after_warmup": 197,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 264.054176,
"ttft_ms_p90": 453.351916,
"ttft_ms_p99": 663.441612,
"tpot_ms_p50": 10.750173619607843,
"tpot_ms_p90": 16.46543019607843,
"tpot_ms_p99": 19.549762741176473,
"e2e_ms_p50": 3036.897016,
"e2e_ms_p90": 4472.36076,
"e2e_ms_p99": 5392.21106,
"throughput_effective_rps": 0.5033774883386443,
"throughput_ratio": 1.0067549766772885,
"inflight_p50": 2,
"inflight_p90": 5,
"phase": "A",
"cell": "A_r0.5_4096x256"
},
{
"rate_target": 1.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 232.645091396,
"n_completed_total": 207,
"n_after_warmup": 199,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 293.729297,
"ttft_ms_p90": 615.19522,
"ttft_ms_p99": 817.593711,
"tpot_ms_p50": 25.506409952941176,
"tpot_ms_p90": 39.46163431372549,
"tpot_ms_p99": 48.344151764705884,
"e2e_ms_p50": 6860.61264,
"e2e_ms_p90": 10558.202004,
"e2e_ms_p99": 12793.997244,
"throughput_effective_rps": 0.8937991794575678,
"throughput_ratio": 0.8937991794575678,
"inflight_p50": 6,
"inflight_p90": 14,
"phase": "A",
"cell": "A_r1.0_4096x256"
},
{
"rate_target": 2.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 232.801098162,
"n_completed_total": 326,
"n_after_warmup": 313,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 32682.299634,
"ttft_ms_p90": 60520.025556,
"ttft_ms_p99": 65848.06529,
"tpot_ms_p50": 89.82475232156862,
"tpot_ms_p90": 90.80036660392157,
"tpot_ms_p99": 105.81740028627452,
"e2e_ms_p50": 55363.619077,
"e2e_ms_p90": 76987.277716,
"e2e_ms_p99": 85493.952539,
"throughput_effective_rps": 1.404840472430777,
"throughput_ratio": 0.7024202362153885,
"inflight_p50": 74,
"inflight_p90": 117,
"phase": "A",
"cell": "A_r2.0_4096x256"
}
]mooncake_both
[
{
"rate_target": 0.5,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 445.059962844,
"n_completed_total": 200,
"n_after_warmup": 196,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 256.440661,
"ttft_ms_p90": 266.103771,
"ttft_ms_p99": 461.167638,
"tpot_ms_p50": 5.941321478431372,
"tpot_ms_p90": 8.577892874509804,
"tpot_ms_p99": 13.845412023529411,
"e2e_ms_p50": 1807.769317,
"e2e_ms_p90": 2514.381893,
"e2e_ms_p99": 3815.785105,
"throughput_effective_rps": 0.45051261145415944,
"throughput_ratio": 0.9010252229083189,
"inflight_p50": 1,
"inflight_p90": 3,
"phase": "A",
"cell": "A_r0.5_4096x256"
},
{
"rate_target": 1.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 206.654679737,
"n_completed_total": 203,
"n_after_warmup": 186,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 264.012039,
"ttft_ms_p90": 421.973366,
"ttft_ms_p99": 614.736154,
"tpot_ms_p50": 9.826510094117646,
"tpot_ms_p90": 16.444674933333335,
"tpot_ms_p99": 19.560320356862743,
"e2e_ms_p50": 2785.772229,
"e2e_ms_p90": 4532.58161,
"e2e_ms_p99": 5253.24676,
"throughput_effective_rps": 0.9458203600786452,
"throughput_ratio": 0.9458203600786452,
"inflight_p50": 3,
"inflight_p90": 8,
"phase": "A",
"cell": "A_r1.0_4096x256"
},
{
"rate_target": 2.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 109.933145689,
"n_completed_total": 237,
"n_after_warmup": 208,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 316.799702,
"ttft_ms_p90": 725.661939,
"ttft_ms_p99": 945.498778,
"tpot_ms_p50": 35.43492209411765,
"tpot_ms_p90": 54.209878376470584,
"tpot_ms_p99": 58.219066156862745,
"e2e_ms_p50": 9571.144788,
"e2e_ms_p90": 14359.084682,
"e2e_ms_p99": 15374.620667,
"throughput_effective_rps": 2.081391499946502,
"throughput_ratio": 1.040695749973251,
"inflight_p50": 23,
"inflight_p90": 38,
"phase": "A",
"cell": "A_r2.0_4096x256"
},
{
"rate_target": 4.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 124.259570043,
"n_completed_total": 343,
"n_after_warmup": 307,
"n_dropped": 0,
"n_errors": 0,
"ttft_ms_p50": 11665.573018,
"ttft_ms_p90": 23444.380711,
"ttft_ms_p99": 27466.05648,
"tpot_ms_p50": 91.8016532509804,
"tpot_ms_p90": 93.95965819607844,
"tpot_ms_p99": 95.09967682352942,
"e2e_ms_p50": 34656.343746,
"e2e_ms_p90": 40580.01734,
"e2e_ms_p99": 43193.129063,
"throughput_effective_rps": 2.686864652864218,
"throughput_ratio": 0.6717161632160545,
"inflight_p50": 101,
"inflight_p90": 130,
"phase": "A",
"cell": "A_r4.0_4096x256"
},
{
"rate_target": 8.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 80.65869788,
"n_completed_total": 480,
"n_after_warmup": 96,
"n_dropped": 103,
"n_errors": 202,
"ttft_ms_p50": 22195.273494,
"ttft_ms_p90": 33607.452699,
"ttft_ms_p99": 34484.768556,
"tpot_ms_p50": 88.98730769019608,
"tpot_ms_p90": 89.86937862352941,
"tpot_ms_p99": 92.71757069803921,
"e2e_ms_p50": 44887.058146,
"e2e_ms_p90": 56519.577831,
"e2e_ms_p99": 57394.551538,
"throughput_effective_rps": 1.3586437746565503,
"throughput_ratio": 0.16983047183206879,
"inflight_p50": 127,
"inflight_p90": 165,
"phase": "A",
"cell": "A_r8.0_4096x256"
},
{
"rate_target": 16.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 60.028652326,
"n_completed_total": 916,
"n_after_warmup": 0,
"n_dropped": 0,
"n_errors": 916,
"ttft_ms_p50": null,
"ttft_ms_p90": null,
"ttft_ms_p99": null,
"tpot_ms_p50": null,
"tpot_ms_p90": null,
"tpot_ms_p99": null,
"e2e_ms_p50": null,
"e2e_ms_p90": null,
"e2e_ms_p99": null,
"throughput_effective_rps": 0.0,
"throughput_ratio": 0.0,
"inflight_p50": null,
"inflight_p90": null,
"phase": "A",
"cell": "A_r16.0_4096x256"
},
{
"rate_target": 32.0,
"input_tokens": 4096,
"output_tokens": 256,
"duration_actual_s": 60.011136933,
"n_completed_total": 1815,
"n_after_warmup": 0,
"n_dropped": 0,
"n_errors": 1815,
"ttft_ms_p50": null,
"ttft_ms_p90": null,
"ttft_ms_p99": null,
"tpot_ms_p50": null,
"tpot_ms_p90": null,
"tpot_ms_p99": null,
"e2e_ms_p50": null,
"e2e_ms_p90": null,
"e2e_ms_p99": null,
"throughput_effective_rps": 0.0,
"throughput_ratio": 0.0,
"inflight_p50": null,
"inflight_p90": null,
"phase": "A",
"cell": "A_r32.0_4096x256"
}
]