analysis/characterization/window_1_results.md is the headline write-up for Window 1: workload characterization (KV per request, real reuse decomposition, APC theoretical ceilings), B3 5-policy sweep with per-policy interpretation, B2 same-vs-different-worker interference microbench with causal reading, and an explicit list of what Window 1 does *not* answer (deferred to B4 SRR sweep + B5 attribution). Under window_1_results/: - 5 raw result JSONs from the B3 sweep, the B2 microbench, the APC upper bound, and the KV footprint - per-policy hotspot_index.json snapshots so render_window1_figures.py can plot per-worker TTFT p90 distributions - 8 PNG figures (figures/) covering the headline claims Three takeaways the figures pin down: 1) intra-session reuse dominates (93.2%), so session-affinity routing is the right primary lever 2) unified hybrid affinity hits 79.4% APC (97% of the 79.6% intra- session ceiling) AND cuts TTFT p90 from lmetric's 15.6s to 7.24s 3) B2 different-worker control sits at idx ≈ 1.0 across 32× prefill- size variation; same-worker TTFT idx scales 2.15× -> 218×, which is the cleanest causal evidence for same-worker prefill-decode interference Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
133 lines
4.2 KiB
JSON
133 lines
4.2 KiB
JSON
{
|
|
"rows": [
|
|
{
|
|
"policy": "capped",
|
|
"n_ok": 770,
|
|
"n_total": 770,
|
|
"ttft_p50_s": 1.195636051998008,
|
|
"ttft_p90_s": 12.762421467981767,
|
|
"ttft_p99_s": 46.05476947501302,
|
|
"tpot_p50_s": 0.007229394937166944,
|
|
"tpot_p90_s": 0.015995440982929352,
|
|
"tpot_p99_s": 0.10145225453431651,
|
|
"e2e_p50_s": 2.5921602529706433,
|
|
"e2e_p90_s": 21.238469071977306,
|
|
"e2e_p99_s": 73.38509433099534,
|
|
"apc_ratio": 0.3158312503528108,
|
|
"interference_index": 6.331064378362814,
|
|
"hotspot_index_ttft_p90": 1.9366915542605314,
|
|
"reuse_intra_frac": 0.9192657105586233,
|
|
"reuse_cross_frac": 0.0602232594931501,
|
|
"n_slow": 185,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 60,
|
|
"hot_worker_queue": 66,
|
|
"same_worker_prefill_overlap": 45,
|
|
"unknown": 14
|
|
}
|
|
},
|
|
{
|
|
"policy": "lmetric",
|
|
"n_ok": 1214,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 0.9369571270071901,
|
|
"ttft_p90_s": 15.592678204004187,
|
|
"ttft_p99_s": 52.95170431700535,
|
|
"tpot_p50_s": 0.008851506907892485,
|
|
"tpot_p90_s": 0.02120516549011311,
|
|
"tpot_p99_s": 0.17592118933357093,
|
|
"e2e_p50_s": 2.7527842019917443,
|
|
"e2e_p90_s": 24.75416105298791,
|
|
"e2e_p99_s": 79.61890332301846,
|
|
"apc_ratio": 0.5694312382571595,
|
|
"interference_index": 6.530231061794441,
|
|
"hotspot_index_ttft_p90": 2.237981740718548,
|
|
"reuse_intra_frac": 0.9321238805590836,
|
|
"reuse_cross_frac": 0.05679481258506571,
|
|
"n_slow": 295,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 94,
|
|
"hot_worker_queue": 68,
|
|
"same_worker_prefill_overlap": 69,
|
|
"unknown": 64
|
|
}
|
|
},
|
|
{
|
|
"policy": "load_only",
|
|
"n_ok": 1214,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 1.2542553890380077,
|
|
"ttft_p90_s": 20.14692750602262,
|
|
"ttft_p99_s": 52.64810254302574,
|
|
"tpot_p50_s": 0.00923045912795929,
|
|
"tpot_p90_s": 0.02672785480314115,
|
|
"tpot_p99_s": 0.3207044094773148,
|
|
"e2e_p50_s": 3.584156609023921,
|
|
"e2e_p90_s": 33.42658680601744,
|
|
"e2e_p99_s": 93.91839688795153,
|
|
"apc_ratio": 0.5412093853102866,
|
|
"interference_index": 9.16424627504275,
|
|
"hotspot_index_ttft_p90": 1.1400531308102801,
|
|
"reuse_intra_frac": 0.9353191550754928,
|
|
"reuse_cross_frac": 0.053372184678592026,
|
|
"n_slow": 379,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 151,
|
|
"hot_worker_queue": 33,
|
|
"same_worker_prefill_overlap": 108,
|
|
"unknown": 87
|
|
}
|
|
},
|
|
{
|
|
"policy": "sticky",
|
|
"n_ok": 1214,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 0.540947844972834,
|
|
"ttft_p90_s": 18.016640832996927,
|
|
"ttft_p99_s": 71.37327494798228,
|
|
"tpot_p50_s": 0.00894752275507555,
|
|
"tpot_p90_s": 0.0360956137329512,
|
|
"tpot_p99_s": 0.34523129428917954,
|
|
"e2e_p50_s": 2.0788628259906545,
|
|
"e2e_p90_s": 34.605129147996195,
|
|
"e2e_p99_s": 133.5824547969969,
|
|
"apc_ratio": 0.7720092868396378,
|
|
"interference_index": 13.651718321568111,
|
|
"hotspot_index_ttft_p90": 2.3493858974059214,
|
|
"reuse_intra_frac": 0.9327723488279339,
|
|
"reuse_cross_frac": 0.05495149683864246,
|
|
"n_slow": 234,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 20,
|
|
"hot_worker_queue": 51,
|
|
"same_worker_prefill_overlap": 134,
|
|
"unknown": 29
|
|
}
|
|
},
|
|
{
|
|
"policy": "unified",
|
|
"n_ok": 1213,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 0.4997710260213353,
|
|
"ttft_p90_s": 7.239999514014926,
|
|
"ttft_p99_s": 42.022206099005416,
|
|
"tpot_p50_s": 0.008079791456705824,
|
|
"tpot_p90_s": 0.017107906969874808,
|
|
"tpot_p99_s": 0.11808861252148231,
|
|
"e2e_p50_s": 1.7495028690318577,
|
|
"e2e_p90_s": 17.893827292020433,
|
|
"e2e_p99_s": 68.18008507299237,
|
|
"apc_ratio": 0.794261466256467,
|
|
"interference_index": null,
|
|
"hotspot_index_ttft_p90": 3.3497107140827365,
|
|
"reuse_intra_frac": 0.9311187350942534,
|
|
"reuse_cross_frac": 0.056702150437367635,
|
|
"n_slow": 189,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 18,
|
|
"hot_worker_queue": 116,
|
|
"unknown": 55
|
|
}
|
|
}
|
|
]
|
|
} |