analysis/characterization/window_1_results.md is the headline write-up for Window 1: workload characterization (KV per request, real reuse decomposition, APC theoretical ceilings), B3 5-policy sweep with per-policy interpretation, B2 same-vs-different-worker interference microbench with causal reading, and an explicit list of what Window 1 does *not* answer (deferred to B4 SRR sweep + B5 attribution). Under window_1_results/: - 5 raw result JSONs from the B3 sweep, the B2 microbench, the APC upper bound, and the KV footprint - per-policy hotspot_index.json snapshots so render_window1_figures.py can plot per-worker TTFT p90 distributions - 8 PNG figures (figures/) covering the headline claims Three takeaways the figures pin down: 1) intra-session reuse dominates (93.2%), so session-affinity routing is the right primary lever 2) unified hybrid affinity hits 79.4% APC (97% of the 79.6% intra- session ceiling) AND cuts TTFT p90 from lmetric's 15.6s to 7.24s 3) B2 different-worker control sits at idx ≈ 1.0 across 32× prefill- size variation; same-worker TTFT idx scales 2.15× -> 218×, which is the cleanest causal evidence for same-worker prefill-decode interference Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
194 lines
7.1 KiB
JSON
194 lines
7.1 KiB
JSON
{
|
|
"rows": [
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 0.9868436853823819,
|
|
"n_decode_clean": 207,
|
|
"n_decode_overlap": 33,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8101",
|
|
"prefill_size": 16384,
|
|
"tpot_p50_clean_s": 0.0061757058808297825,
|
|
"tpot_p50_overlap_s": 0.006127697048765241,
|
|
"tpot_p90_clean_s": 0.006862485770023231,
|
|
"tpot_p90_overlap_s": 0.006772200748173878,
|
|
"tpot_p99_clean_s": 0.007128368820806946,
|
|
"tpot_p99_overlap_s": 0.0070623818792478,
|
|
"ttft_p90_clean_s": 0.043039703369140626,
|
|
"ttft_p90_overlap_s": 0.04307723045349121,
|
|
"variant": "different"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 1.0176125863449343,
|
|
"n_decode_clean": 228,
|
|
"n_decode_overlap": 12,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8101",
|
|
"prefill_size": 2048,
|
|
"tpot_p50_clean_s": 0.0062349300191860005,
|
|
"tpot_p50_overlap_s": 0.006218204594621754,
|
|
"tpot_p90_clean_s": 0.006892242576136734,
|
|
"tpot_p90_overlap_s": 0.007013632793619174,
|
|
"tpot_p99_clean_s": 0.007111345902837888,
|
|
"tpot_p99_overlap_s": 0.007131954732567373,
|
|
"ttft_p90_clean_s": 0.04290406703948975,
|
|
"ttft_p90_overlap_s": 0.040976309776306154,
|
|
"variant": "different"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 0.9221676118155049,
|
|
"n_decode_clean": 176,
|
|
"n_decode_overlap": 64,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8101",
|
|
"prefill_size": 32768,
|
|
"tpot_p50_clean_s": 0.00620933012528853,
|
|
"tpot_p50_overlap_s": 0.005991364970351711,
|
|
"tpot_p90_clean_s": 0.0069098352181791054,
|
|
"tpot_p90_overlap_s": 0.006372026241186894,
|
|
"tpot_p99_clean_s": 0.007242970394365715,
|
|
"tpot_p99_overlap_s": 0.006935877366499467,
|
|
"ttft_p90_clean_s": 0.04308474063873291,
|
|
"ttft_p90_overlap_s": 0.04266033172607422,
|
|
"variant": "different"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 1.0162810692345416,
|
|
"n_decode_clean": 114,
|
|
"n_decode_overlap": 126,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8101",
|
|
"prefill_size": 65536,
|
|
"tpot_p50_clean_s": 0.006080349286397299,
|
|
"tpot_p50_overlap_s": 0.006312949488861392,
|
|
"tpot_p90_clean_s": 0.0068880830148253785,
|
|
"tpot_p90_overlap_s": 0.007000228371283021,
|
|
"tpot_p99_clean_s": 0.007222196574162956,
|
|
"tpot_p99_overlap_s": 0.00723441562267265,
|
|
"ttft_p90_clean_s": 0.04367616176605225,
|
|
"ttft_p90_overlap_s": 0.04332089424133301,
|
|
"variant": "different"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 0.92169565663476,
|
|
"n_decode_clean": 220,
|
|
"n_decode_overlap": 20,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8101",
|
|
"prefill_size": 8192,
|
|
"tpot_p50_clean_s": 0.006260122915711066,
|
|
"tpot_p50_overlap_s": 0.006120474651606396,
|
|
"tpot_p90_clean_s": 0.006968991684191154,
|
|
"tpot_p90_overlap_s": 0.006423289366442748,
|
|
"tpot_p99_clean_s": 0.007601349209294174,
|
|
"tpot_p99_overlap_s": 0.006715166592838788,
|
|
"ttft_p90_clean_s": 0.04314079284667969,
|
|
"ttft_p90_overlap_s": 0.042817187309265134,
|
|
"variant": "different"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 3.3716068170318985,
|
|
"n_decode_clean": 203,
|
|
"n_decode_overlap": 37,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8100",
|
|
"prefill_size": 16384,
|
|
"tpot_p50_clean_s": 0.006435276281954062,
|
|
"tpot_p50_overlap_s": 0.009116151116111061,
|
|
"tpot_p90_clean_s": 0.0071605749804564195,
|
|
"tpot_p90_overlap_s": 0.024142643417974917,
|
|
"tpot_p99_clean_s": 0.008356584539317119,
|
|
"tpot_p99_overlap_s": 0.024809808827409838,
|
|
"ttft_p90_clean_s": 0.04402604103088379,
|
|
"ttft_p90_overlap_s": 1.3574100017547606,
|
|
"variant": "same"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 1.1589170446597312,
|
|
"n_decode_clean": 228,
|
|
"n_decode_overlap": 12,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8100",
|
|
"prefill_size": 2048,
|
|
"tpot_p50_clean_s": 0.006142637946388938,
|
|
"tpot_p50_overlap_s": 0.007610858088791972,
|
|
"tpot_p90_clean_s": 0.006933137142296993,
|
|
"tpot_p90_overlap_s": 0.008034930807171445,
|
|
"tpot_p99_clean_s": 0.007201877651792584,
|
|
"tpot_p99_overlap_s": 0.0084272463153107,
|
|
"ttft_p90_clean_s": 0.043091440200805665,
|
|
"ttft_p90_overlap_s": 0.09247522354125978,
|
|
"variant": "same"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 7.891276559921504,
|
|
"n_decode_clean": 173,
|
|
"n_decode_overlap": 67,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8100",
|
|
"prefill_size": 32768,
|
|
"tpot_p50_clean_s": 0.006226602226796776,
|
|
"tpot_p50_overlap_s": 0.012180752224392362,
|
|
"tpot_p90_clean_s": 0.00694006813897027,
|
|
"tpot_p90_overlap_s": 0.054765997029314145,
|
|
"tpot_p99_clean_s": 0.010443444107518053,
|
|
"tpot_p99_overlap_s": 0.058983875428787386,
|
|
"ttft_p90_clean_s": 0.04411859512329101,
|
|
"ttft_p90_overlap_s": 4.174754428863525,
|
|
"variant": "same"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 2.259323176730457,
|
|
"n_decode_clean": 110,
|
|
"n_decode_overlap": 130,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8100",
|
|
"prefill_size": 65536,
|
|
"tpot_p50_clean_s": 0.0064652375500611585,
|
|
"tpot_p50_overlap_s": 0.020095128001588764,
|
|
"tpot_p90_clean_s": 0.009607415488272014,
|
|
"tpot_p90_overlap_s": 0.021706256481132124,
|
|
"tpot_p99_clean_s": 0.016912007837584522,
|
|
"tpot_p99_overlap_s": 0.16948255478733715,
|
|
"ttft_p90_clean_s": 0.06447408199310305,
|
|
"ttft_p90_overlap_s": 14.060086917877197,
|
|
"variant": "same"
|
|
},
|
|
{
|
|
"decode_endpoint": "http://127.0.0.1:8100",
|
|
"interference_index": 1.8961314610807898,
|
|
"n_decode_clean": 221,
|
|
"n_decode_overlap": 19,
|
|
"n_decode_total": 240,
|
|
"n_prefill_injections": 4,
|
|
"prefill_endpoint": "http://127.0.0.1:8100",
|
|
"prefill_size": 8192,
|
|
"tpot_p50_clean_s": 0.00617263052198622,
|
|
"tpot_p50_overlap_s": 0.008303543533941712,
|
|
"tpot_p90_clean_s": 0.007060385713673601,
|
|
"tpot_p90_overlap_s": 0.013387419479061859,
|
|
"tpot_p99_clean_s": 0.0076809098022152696,
|
|
"tpot_p99_overlap_s": 0.013849472662415166,
|
|
"ttft_p90_clean_s": 0.04307150840759277,
|
|
"ttft_p90_overlap_s": 0.52073073387146,
|
|
"variant": "same"
|
|
}
|
|
]
|
|
} |