Files
agentic-kvc/analysis/characterization/window_1_results/b2_sweep_summary.json
Gahow Wang 0c3220cbb8 Window 1 results: combined B1' + B2 + B3 report and artifacts
analysis/characterization/window_1_results.md is the headline write-up
for Window 1: workload characterization (KV per request, real reuse
decomposition, APC theoretical ceilings), B3 5-policy sweep with
per-policy interpretation, B2 same-vs-different-worker interference
microbench with causal reading, and an explicit list of what Window 1
does *not* answer (deferred to B4 SRR sweep + B5 attribution).

Under window_1_results/:
- 5 raw result JSONs from the B3 sweep, the B2 microbench, the APC
  upper bound, and the KV footprint
- per-policy hotspot_index.json snapshots so render_window1_figures.py
  can plot per-worker TTFT p90 distributions
- 8 PNG figures (figures/) covering the headline claims

Three takeaways the figures pin down:
1) intra-session reuse dominates (93.2%), so session-affinity routing
   is the right primary lever
2) unified hybrid affinity hits 79.4% APC (97% of the 79.6% intra-
   session ceiling) AND cuts TTFT p90 from lmetric's 15.6s to 7.24s
3) B2 different-worker control sits at idx ≈ 1.0 across 32× prefill-
   size variation; same-worker TTFT idx scales 2.15× -> 218×, which
   is the cleanest causal evidence for same-worker prefill-decode
   interference

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 23:25:09 +08:00

194 lines
7.1 KiB
JSON

{
"rows": [
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 0.9868436853823819,
"n_decode_clean": 207,
"n_decode_overlap": 33,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8101",
"prefill_size": 16384,
"tpot_p50_clean_s": 0.0061757058808297825,
"tpot_p50_overlap_s": 0.006127697048765241,
"tpot_p90_clean_s": 0.006862485770023231,
"tpot_p90_overlap_s": 0.006772200748173878,
"tpot_p99_clean_s": 0.007128368820806946,
"tpot_p99_overlap_s": 0.0070623818792478,
"ttft_p90_clean_s": 0.043039703369140626,
"ttft_p90_overlap_s": 0.04307723045349121,
"variant": "different"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 1.0176125863449343,
"n_decode_clean": 228,
"n_decode_overlap": 12,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8101",
"prefill_size": 2048,
"tpot_p50_clean_s": 0.0062349300191860005,
"tpot_p50_overlap_s": 0.006218204594621754,
"tpot_p90_clean_s": 0.006892242576136734,
"tpot_p90_overlap_s": 0.007013632793619174,
"tpot_p99_clean_s": 0.007111345902837888,
"tpot_p99_overlap_s": 0.007131954732567373,
"ttft_p90_clean_s": 0.04290406703948975,
"ttft_p90_overlap_s": 0.040976309776306154,
"variant": "different"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 0.9221676118155049,
"n_decode_clean": 176,
"n_decode_overlap": 64,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8101",
"prefill_size": 32768,
"tpot_p50_clean_s": 0.00620933012528853,
"tpot_p50_overlap_s": 0.005991364970351711,
"tpot_p90_clean_s": 0.0069098352181791054,
"tpot_p90_overlap_s": 0.006372026241186894,
"tpot_p99_clean_s": 0.007242970394365715,
"tpot_p99_overlap_s": 0.006935877366499467,
"ttft_p90_clean_s": 0.04308474063873291,
"ttft_p90_overlap_s": 0.04266033172607422,
"variant": "different"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 1.0162810692345416,
"n_decode_clean": 114,
"n_decode_overlap": 126,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8101",
"prefill_size": 65536,
"tpot_p50_clean_s": 0.006080349286397299,
"tpot_p50_overlap_s": 0.006312949488861392,
"tpot_p90_clean_s": 0.0068880830148253785,
"tpot_p90_overlap_s": 0.007000228371283021,
"tpot_p99_clean_s": 0.007222196574162956,
"tpot_p99_overlap_s": 0.00723441562267265,
"ttft_p90_clean_s": 0.04367616176605225,
"ttft_p90_overlap_s": 0.04332089424133301,
"variant": "different"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 0.92169565663476,
"n_decode_clean": 220,
"n_decode_overlap": 20,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8101",
"prefill_size": 8192,
"tpot_p50_clean_s": 0.006260122915711066,
"tpot_p50_overlap_s": 0.006120474651606396,
"tpot_p90_clean_s": 0.006968991684191154,
"tpot_p90_overlap_s": 0.006423289366442748,
"tpot_p99_clean_s": 0.007601349209294174,
"tpot_p99_overlap_s": 0.006715166592838788,
"ttft_p90_clean_s": 0.04314079284667969,
"ttft_p90_overlap_s": 0.042817187309265134,
"variant": "different"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 3.3716068170318985,
"n_decode_clean": 203,
"n_decode_overlap": 37,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8100",
"prefill_size": 16384,
"tpot_p50_clean_s": 0.006435276281954062,
"tpot_p50_overlap_s": 0.009116151116111061,
"tpot_p90_clean_s": 0.0071605749804564195,
"tpot_p90_overlap_s": 0.024142643417974917,
"tpot_p99_clean_s": 0.008356584539317119,
"tpot_p99_overlap_s": 0.024809808827409838,
"ttft_p90_clean_s": 0.04402604103088379,
"ttft_p90_overlap_s": 1.3574100017547606,
"variant": "same"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 1.1589170446597312,
"n_decode_clean": 228,
"n_decode_overlap": 12,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8100",
"prefill_size": 2048,
"tpot_p50_clean_s": 0.006142637946388938,
"tpot_p50_overlap_s": 0.007610858088791972,
"tpot_p90_clean_s": 0.006933137142296993,
"tpot_p90_overlap_s": 0.008034930807171445,
"tpot_p99_clean_s": 0.007201877651792584,
"tpot_p99_overlap_s": 0.0084272463153107,
"ttft_p90_clean_s": 0.043091440200805665,
"ttft_p90_overlap_s": 0.09247522354125978,
"variant": "same"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 7.891276559921504,
"n_decode_clean": 173,
"n_decode_overlap": 67,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8100",
"prefill_size": 32768,
"tpot_p50_clean_s": 0.006226602226796776,
"tpot_p50_overlap_s": 0.012180752224392362,
"tpot_p90_clean_s": 0.00694006813897027,
"tpot_p90_overlap_s": 0.054765997029314145,
"tpot_p99_clean_s": 0.010443444107518053,
"tpot_p99_overlap_s": 0.058983875428787386,
"ttft_p90_clean_s": 0.04411859512329101,
"ttft_p90_overlap_s": 4.174754428863525,
"variant": "same"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 2.259323176730457,
"n_decode_clean": 110,
"n_decode_overlap": 130,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8100",
"prefill_size": 65536,
"tpot_p50_clean_s": 0.0064652375500611585,
"tpot_p50_overlap_s": 0.020095128001588764,
"tpot_p90_clean_s": 0.009607415488272014,
"tpot_p90_overlap_s": 0.021706256481132124,
"tpot_p99_clean_s": 0.016912007837584522,
"tpot_p99_overlap_s": 0.16948255478733715,
"ttft_p90_clean_s": 0.06447408199310305,
"ttft_p90_overlap_s": 14.060086917877197,
"variant": "same"
},
{
"decode_endpoint": "http://127.0.0.1:8100",
"interference_index": 1.8961314610807898,
"n_decode_clean": 221,
"n_decode_overlap": 19,
"n_decode_total": 240,
"n_prefill_injections": 4,
"prefill_endpoint": "http://127.0.0.1:8100",
"prefill_size": 8192,
"tpot_p50_clean_s": 0.00617263052198622,
"tpot_p50_overlap_s": 0.008303543533941712,
"tpot_p90_clean_s": 0.007060385713673601,
"tpot_p90_overlap_s": 0.013387419479061859,
"tpot_p99_clean_s": 0.0076809098022152696,
"tpot_p99_overlap_s": 0.013849472662415166,
"ttft_p90_clean_s": 0.04307150840759277,
"ttft_p90_overlap_s": 0.52073073387146,
"variant": "same"
}
]
}