After the B3 audit bug fixes (joined_analysis hotspot median + b3_analyze percentile interp), regenerate b3_policy_comparison.json and the per-policy hotspot_index.json from the same raw run on dash0 and re-render the three affected figures (apc-vs-hotspot, latency-bars, per-worker TTFT). Key number changes in window_1_results.md: - hotspot_index magnitudes corrected (all five policies; lmetric smallest delta at +0.7%, sticky largest at +16.1%) - "capped reduces hotspot 13%" -> "~10% (2.253 -> 2.020)" - TTFT/E2E/TPOT percentiles shift by <1% from floor->interp (unified TTFT p90 7.24 -> 7.35 s) Restructured "Caveats" into "Limitations (read this before quoting B3 numbers)": 1. Agentic dispatch coupling is by design — promoted from caveat to top-level methodology framing, tied to agentic_dispatch_coupling.md 2. B3 interference_index is binary (not size-graded) — added 3. Hot-sweep cache contamination (<1%) — kept 4. Unified interference unrecoverable — kept with explicit warning not to read unified's failure attribution as causal 5. w600 is a sample, not full trace — kept 6. Reuse decomposition is per-token in expectation — added current_results/characterization_claim_matrix.md updates: - The "heavy-tail not sole cause" claim now cites the corrected ~10% drop with the median bug noted - New supported claim: "B3 saturated-replay latency gaps include an agentic dispatch-coupling feedback term, which is intentional and matches production"; cited against agentic_dispatch_coupling.md. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
133 lines
4.2 KiB
JSON
133 lines
4.2 KiB
JSON
{
|
|
"rows": [
|
|
{
|
|
"policy": "capped",
|
|
"n_ok": 770,
|
|
"n_total": 770,
|
|
"ttft_p50_s": 1.1989156164927408,
|
|
"ttft_p90_s": 12.827629912580612,
|
|
"ttft_p99_s": 46.61752380923125,
|
|
"tpot_p50_s": 0.007231239004497606,
|
|
"tpot_p90_s": 0.015998617687440243,
|
|
"tpot_p99_s": 0.11515370831539476,
|
|
"e2e_p50_s": 2.598489043477457,
|
|
"e2e_p90_s": 21.245602010778384,
|
|
"e2e_p99_s": 74.60736650204846,
|
|
"apc_ratio": 0.3158312503528108,
|
|
"interference_index": 6.331064378362814,
|
|
"hotspot_index_ttft_p90": 2.0204268015410918,
|
|
"reuse_intra_frac": 0.9192657105586233,
|
|
"reuse_cross_frac": 0.0602232594931501,
|
|
"n_slow": 185,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 60,
|
|
"hot_worker_queue": 66,
|
|
"same_worker_prefill_overlap": 45,
|
|
"unknown": 14
|
|
}
|
|
},
|
|
{
|
|
"policy": "lmetric",
|
|
"n_ok": 1214,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 0.9387824369769078,
|
|
"ttft_p90_s": 15.671339168207492,
|
|
"ttft_p99_s": 53.56683189840049,
|
|
"tpot_p50_s": 0.008854518407308914,
|
|
"tpot_p90_s": 0.02122720699121469,
|
|
"tpot_p99_s": 0.18280341184277568,
|
|
"e2e_p50_s": 2.754255389008904,
|
|
"e2e_p90_s": 24.8209177934099,
|
|
"e2e_p99_s": 80.59924928059091,
|
|
"apc_ratio": 0.5694312382571595,
|
|
"interference_index": 6.530231061794441,
|
|
"hotspot_index_ttft_p90": 2.252837147833725,
|
|
"reuse_intra_frac": 0.9321238805590836,
|
|
"reuse_cross_frac": 0.05679481258506571,
|
|
"n_slow": 295,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 94,
|
|
"hot_worker_queue": 68,
|
|
"same_worker_prefill_overlap": 69,
|
|
"unknown": 64
|
|
}
|
|
},
|
|
{
|
|
"policy": "load_only",
|
|
"n_ok": 1214,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 1.2609447415161412,
|
|
"ttft_p90_s": 20.197147866390882,
|
|
"ttft_p99_s": 52.84285237012196,
|
|
"tpot_p50_s": 0.009231464695980247,
|
|
"tpot_p90_s": 0.026851662550158716,
|
|
"tpot_p99_s": 0.3211630676943426,
|
|
"e2e_p50_s": 3.58568156149704,
|
|
"e2e_p90_s": 33.459180271782685,
|
|
"e2e_p99_s": 93.95083751494239,
|
|
"apc_ratio": 0.5412093853102866,
|
|
"interference_index": 9.16424627504275,
|
|
"hotspot_index_ttft_p90": 1.2940319990630569,
|
|
"reuse_intra_frac": 0.9353191550754928,
|
|
"reuse_cross_frac": 0.053372184678592026,
|
|
"n_slow": 379,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 151,
|
|
"hot_worker_queue": 33,
|
|
"same_worker_prefill_overlap": 108,
|
|
"unknown": 87
|
|
}
|
|
},
|
|
{
|
|
"policy": "sticky",
|
|
"n_ok": 1214,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 0.5415176274836995,
|
|
"ttft_p90_s": 18.021296651283045,
|
|
"ttft_p99_s": 74.09429564891524,
|
|
"tpot_p50_s": 0.008952101894096181,
|
|
"tpot_p90_s": 0.03641285916619554,
|
|
"tpot_p99_s": 0.35152006935195085,
|
|
"e2e_p50_s": 2.081947358994512,
|
|
"e2e_p90_s": 34.62592205510591,
|
|
"e2e_p99_s": 139.68334607904353,
|
|
"apc_ratio": 0.7720092868396378,
|
|
"interference_index": 13.651718321568111,
|
|
"hotspot_index_ttft_p90": 2.727756623171119,
|
|
"reuse_intra_frac": 0.9327723488279339,
|
|
"reuse_cross_frac": 0.05495149683864246,
|
|
"n_slow": 234,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 20,
|
|
"hot_worker_queue": 51,
|
|
"same_worker_prefill_overlap": 134,
|
|
"unknown": 29
|
|
}
|
|
},
|
|
{
|
|
"policy": "unified",
|
|
"n_ok": 1213,
|
|
"n_total": 1214,
|
|
"ttft_p50_s": 0.4997710260213353,
|
|
"ttft_p90_s": 7.345769894809922,
|
|
"ttft_p99_s": 42.34170345296613,
|
|
"tpot_p50_s": 0.008079791456705824,
|
|
"tpot_p90_s": 0.017110194704198407,
|
|
"tpot_p99_s": 0.12655874612209597,
|
|
"e2e_p50_s": 1.7495028690318577,
|
|
"e2e_p90_s": 18.033410895219994,
|
|
"e2e_p99_s": 68.80023987947489,
|
|
"apc_ratio": 0.794261466256467,
|
|
"interference_index": null,
|
|
"hotspot_index_ttft_p90": 3.667136528736114,
|
|
"reuse_intra_frac": 0.9311187350942534,
|
|
"reuse_cross_frac": 0.056702150437367635,
|
|
"n_slow": 189,
|
|
"failure_counts": {
|
|
"cache_miss_large_append": 18,
|
|
"hot_worker_queue": 116,
|
|
"unknown": 55
|
|
}
|
|
}
|
|
]
|
|
} |