diff --git a/analysis/mb5_pd_ablation/fig1.json b/analysis/mb5_pd_ablation/fig1.json
new file mode 100644
index 0000000..9907313
--- /dev/null
+++ b/analysis/mb5_pd_ablation/fig1.json
@@ -0,0 +1 @@
+[{"name": "fig1_p2048_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 2.555279068008531, "e2e_p90": 3.4275179531075994, "e2e_p99": 5.231042563370427, "e2e_mean": 2.533418247078953, "ttft_p90": 1.059612769272644, "tpot_p99": 0.016176813139529973, "tps": 488.9617809203525, "wall": 209.42332099506166, "pu": 35.58080808080808, "du": null, "apc": 0.21875}, {"name": "fig1_p2048_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 1.9831702220253646, "e2e_p90": 2.408317962428555, "e2e_p99": 3.386159433723659, "e2e_mean": 2.080101182157232, "ttft_p90": 1.0395420689717867, "tpot_p99": 0.005522104923522062, "tps": 530.468328273858, "wall": 193.0369723169133, "pu": 46.917582417582416, "du": 48.58058608058608, "apc": 0.21875}, {"name": "fig1_p2048_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 2.3523911059601232, "e2e_p90": 2.7588249894673935, "e2e_p99": 3.603395572100996, "e2e_mean": 2.4113844815955963, "ttft_p90": 0.7664745874935761, "tpot_p99": 0.009031482047424195, "tps": 488.72074894811755, "wall": 209.52660639106762, "pu": 14.218855218855218, "du": 90.16161616161617, "apc": 0.21875}, {"name": "fig1_p2048_pd_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 2.0429230239242315, "e2e_p90": 2.2362921022577216, "e2e_p99": 2.9135718233766945, "e2e_mean": 2.095764471256989, "ttft_p90": 0.7477957331226207, "tpot_p99": 0.006522443569635094, "tps": 527.6226407393885, "wall": 194.07810069806874, "pu": 23.669444444444444, "du": 65.01666666666667, "apc": 0.21875}, {"name": "fig1_p4096_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 2.2734450550633483, "e2e_p90": 3.0487391501781533, "e2e_p99": 4.6287568241392725, "e2e_mean": 2.2661249774988392, "ttft_p90": 0.713115519611165, "tpot_p99": 0.014206131751343207, "tps": 520.0122707724117, "wall": 196.9184301130008, "pu": 34.659946236559136, "du": null, "apc": 0.4375}, {"name": "fig1_p4096_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 1.8291561939986423, "e2e_p90": 2.2601341274916207, "e2e_p99": 3.2612337802827804, "e2e_mean": 1.9412393476464787, "ttft_p90": 0.8800801524659628, "tpot_p99": 0.005551423189517877, "tps": 552.1771045541858, "wall": 185.44774702796713, "pu": 38.293103448275865, "du": 49.05747126436781, "apc": 0.4375}, {"name": "fig1_p4096_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 2.2462158624548465, "e2e_p90": 2.64611944751814, "e2e_p99": 3.4558432800625423, "e2e_mean": 2.2963840230583448, "ttft_p90": 0.711689621617552, "tpot_p99": 0.008991341477657172, "tps": 502.68863490365385, "wall": 203.70462526893243, "pu": 10.604166666666666, "du": 88.75520833333333, "apc": 0.4375}, {"name": "fig1_p4096_pd_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 1.8967114535043947, "e2e_p90": 2.18965909028193, "e2e_p99": 2.9952263131842467, "e2e_mean": 1.9644193770724814, "ttft_p90": 0.7082089037983679, "tpot_p99": 0.006562968838594706, "tps": 548.1341659016695, "wall": 186.81557613098994, "pu": 19.133522727272727, "du": 67.32102272727273, "apc": 0.4375}, {"name": "fig1_p512_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 2.6645242294762284, "e2e_p90": 3.61182137549622, "e2e_p99": 5.3448568455432515, "e2e_mean": 2.6195515102380886, "ttft_p90": 1.112424884561915, "tpot_p99": 0.01658212741880276, "tps": 475.13983834945145, "wall": 215.5155003539985, "pu": 36.375, "du": null, "apc": 0.0546875}, {"name": "fig1_p512_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 2.103162212530151, "e2e_p90": 2.4044417485478347, "e2e_p99": 3.3844505867047685, "e2e_mean": 2.1739998702009324, "ttft_p90": 1.0342383790179164, "tpot_p99": 0.00550937183462905, "tps": 517.9536953932844, "wall": 197.7010704060085, "pu": 55.854838709677416, "du": 46.65232974910394, "apc": 0.0546875}, {"name": "fig1_p512_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 2.3785464115208015, "e2e_p90": 2.7350583746214396, "e2e_p99": 3.4022513560648044, "e2e_mean": 2.445902353489655, "ttft_p90": 0.7912747941561975, "tpot_p99": 0.009167485321045615, "tps": 482.02537525954966, "wall": 212.436948874034, "pu": 17.535353535353536, "du": 91.0959595959596, "apc": 0.0546875}, {"name": "fig1_p512_pd_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 2.153953853994608, "e2e_p90": 2.2966971063637174, "e2e_p99": 3.044859012498054, "e2e_mean": 2.1845501415853503, "ttft_p90": 0.7954113337909803, "tpot_p99": 0.006337697780334992, "tps": 512.0369567409949, "wall": 199.9855648149969, "pu": 26.21276595744681, "du": 62.944148936170215, "apc": 0.0546875}, {"name": "fig1_p6144_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 2.0121258909930475, "e2e_p90": 2.7345886924420504, "e2e_p99": 3.9082167004665824, "e2e_mean": 2.0664054725714958, "ttft_p90": 0.5843230376602151, "tpot_p99": 0.01391471299074371, "tps": 547.3893798822313, "wall": 187.0697601441061, "pu": 34.44744318181818, "du": null, "apc": 0.65625}, {"name": "fig1_p6144_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 1.6655802099849097, "e2e_p90": 2.1824162190663636, "e2e_p99": 3.4232188416458658, "e2e_mean": 1.811165077216283, "ttft_p90": 0.7816491658217275, "tpot_p99": 0.005717973897763179, "tps": 574.0385687244718, "wall": 178.38522632292006, "pu": 21.53012048192771, "du": 51.61244979919679, "apc": 0.65625}, {"name": "fig1_p6144_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 2.104133589542471, "e2e_p90": 2.6115266072680243, "e2e_p99": 3.388375885724087, "e2e_mean": 2.1801070072923903, "ttft_p90": 0.7013537635677495, "tpot_p99": 0.00888146581120022, "tps": 521.6799713459584, "wall": 196.2889235249022, "pu": 6.709677419354839, "du": 88.70430107526882, "apc": 0.65625}, {"name": "fig1_p6144_pd_4P+4D_rep1", "arm": "4P+4D", "n": 399, "req": 400, "e2e_p50": 1.748427166021429, "e2e_p90": 2.1873498664470388, "e2e_p99": 3.1015963148581767, "e2e_mean": 1.8389387578233134, "ttft_p90": 0.6869595416123048, "tpot_p99": 0.006769578668425845, "tps": 569.2010105657868, "wall": 179.45154366199858, "pu": 13.089285714285714, "du": 68.10119047619048, "apc": 0.65625}, {"name": "fig1_p7168_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 1.9153751800186, "e2e_p90": 2.6549224384711154, "e2e_p99": 3.861135128394235, "e2e_mean": 1.958507129738573, "ttft_p90": 0.5802406982053072, "tpot_p99": 0.013510460370503293, "tps": 564.6374955198476, "wall": 181.35529576498084, "pu": 33.595588235294116, "du": null, "apc": 0.765625}, {"name": "fig1_p7168_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 1.5798494375194423, "e2e_p90": 2.1327995942323468, "e2e_p99": 3.3373043064947687, "e2e_mean": 1.7140900615448482, "ttft_p90": 0.727025477041025, "tpot_p99": 0.005695829117182167, "tps": 593.7272444367894, "wall": 172.46976782602724, "pu": 18.30246913580247, "du": 51.51234567901235, "apc": 0.765625}, {"name": "fig1_p7168_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 2.1042530980193987, "e2e_p90": 2.584451850724873, "e2e_p99": 3.5201327085494967, "e2e_mean": 2.164814479558263, "ttft_p90": 0.6972717452561484, "tpot_p99": 0.009263812077688232, "tps": 527.1747330056295, "wall": 194.24299684504513, "pu": 5.574275362318841, "du": 86.96195652173913, "apc": 0.765625}, {"name": "fig1_p7168_pd_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 1.621718394511845, "e2e_p90": 2.1209186871652492, "e2e_p99": 2.9950801801832854, "e2e_mean": 1.7384027546702419, "ttft_p90": 0.681954695621971, "tpot_p99": 0.006750334063133991, "tps": 587.6633213260067, "wall": 174.24943208799232, "pu": 9.521341463414634, "du": 70.10060975609755, "apc": 0.765625}, {"name": "fig1_p7680_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 1.805449907493312, "e2e_p90": 2.2804414638434545, "e2e_p99": 3.2436008435313126, "e2e_mean": 1.8311505301928264, "ttft_p90": 0.5787636383553035, "tpot_p99": 0.01186512264145045, "tps": 586.5851277673049, "wall": 174.5697174249217, "pu": 33.792682926829265, "du": null, "apc": 0.8203125}, {"name": "fig1_p7680_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 1.5353662585257553, "e2e_p90": 2.0703843546216376, "e2e_p99": 3.239132529124615, "e2e_mean": 1.6632775271314313, "ttft_p90": 0.684448261326179, "tpot_p99": 0.005772996509146383, "tps": 601.1715360533158, "wall": 170.3340791419614, "pu": 13.35625, "du": 52.83125, "apc": 0.8203125}, {"name": "fig1_p7680_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 1.935218213009648, "e2e_p90": 2.540124618355185, "e2e_p99": 3.5381310180295236, "e2e_mean": 2.0565583147658617, "ttft_p90": 0.6883091802941635, "tpot_p99": 0.009528811932669258, "tps": 539.8052979764931, "wall": 189.698027017992, "pu": 5.219101123595506, "du": 90.43820224719101, "apc": 0.8203125}, {"name": "fig1_p7680_pd_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 1.6052846949896775, "e2e_p90": 2.16270094960928, "e2e_p99": 3.0139038068498483, "e2e_mean": 1.7113545338093537, "ttft_p90": 0.6793354631052353, "tpot_p99": 0.006723990285321704, "tps": 594.143824556681, "wall": 172.34884175797924, "pu": 8.765432098765432, "du": 69.9320987654321, "apc": 0.8203125}]
diff --git a/analysis/mb5_pd_ablation/fig2.json b/analysis/mb5_pd_ablation/fig2.json
new file mode 100644
index 0000000..6afed7a
--- /dev/null
+++ b/analysis/mb5_pd_ablation/fig2.json
@@ -0,0 +1 @@
+[{"name": "fig2_in16384_out128_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 1.6966288615367375, "e2e_p90": 3.142477283347398, "e2e_p99": 4.572902428222587, "e2e_mean": 1.8778391962422756, "ttft_p90": 1.528641331603285, "tpot_p99": 0.02700975849941244, "tps": 293.2414474758892, "wall": 174.600147560006, "pu": 30.718373493975903, "du": null, "apc": 0.73828125}, {"name": "fig2_in16384_out128_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 1.5746862735250033, "e2e_p90": 3.6393908081925486, "e2e_p99": 6.788023261578052, "e2e_mean": 2.1054475268305395, "ttft_p90": 2.8525443844730045, "tpot_p99": 0.007377313970786145, "tps": 272.743216323279, "wall": 187.72235911199823, "pu": 54.79545454545455, "du": 28.009469696969695, "apc": 0.73828125}, {"name": "fig2_in16384_out128_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 1.2106705509941094, "e2e_p90": 2.6971542384708305, "e2e_p99": 4.516567796494346, "e2e_mean": 1.6196880877471995, "ttft_p90": 1.8512291587772782, "tpot_p99": 0.007638815456312003, "tps": 307.7022111731225, "wall": 166.3946443699533, "pu": 28.876582278481013, "du": 47.36708860759494, "apc": 0.73828125}, {"name": "fig2_in16384_out128_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 1.3666948495083489, "e2e_p90": 2.656380763812923, "e2e_p99": 4.434802388340466, "e2e_mean": 1.6502306728763505, "ttft_p90": 1.7600484249996953, "tpot_p99": 0.009977159781425488, "tps": 307.56190002160906, "wall": 166.47055437101517, "pu": 21.023206751054854, "du": 70.51898734177215, "apc": 0.73828125}, {"name": "fig2_in2048_out2048_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 11.900513574946672, "e2e_p90": 14.623661132121924, "e2e_p99": 17.82160759311984, "e2e_mean": 12.263538628305833, "ttft_p90": 0.13757785173365847, "tpot_p99": 0.00867108589104906, "tps": 1109.2196116287032, "wall": 738.5372485410189, "pu": 54.30869565217391, "du": null, "apc": 0.65625}, {"name": "fig2_in2048_out2048_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 11.239029973454308, "e2e_p90": 12.24954682419775, "e2e_p99": 12.908233385497004, "e2e_mean": 11.36166481389053, "ttft_p90": 0.1597270941361785, "tpot_p99": 0.006243306631126823, "tps": 1159.3604844966112, "wall": 706.5964477439411, "pu": 1.9437689969604863, "du": 86.7517730496454, "apc": 0.65625}, {"name": "fig2_in2048_out2048_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 12.676327208988369, "e2e_p90": 13.124083981337025, "e2e_p99": 13.789963249830762, "e2e_mean": 12.521095666602777, "ttft_p90": 0.1668232314521447, "tpot_p99": 0.006606968528777976, "tps": 1070.1894910008175, "wall": 765.4719158509979, "pu": 0.5945378151260504, "du": 92.65546218487395, "apc": 0.65625}, {"name": "fig2_in2048_out2048_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 15.628125407500193, "e2e_p90": 16.762494630913714, "e2e_p99": 17.865684803246978, "e2e_mean": 15.437463862727746, "ttft_p90": 0.1816938084899448, "tpot_p99": 0.008672833048181654, "tps": 897.4033352505149, "wall": 912.8559788239654, "pu": 0.2651869158878505, "du": 98.21028037383178, "apc": 0.65625}, {"name": "fig2_in32768_out64_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 2.8777761650271714, "e2e_p90": 7.02909248394426, "e2e_p99": 12.042338756883982, "e2e_mean": 3.6056005006073972, "ttft_p90": 4.589756254199893, "tpot_p99": 0.15461345151164715, "tps": 97.4559162735194, "wall": 262.6828722039936, "pu": 36.19410569105691, "du": null, "apc": 0.73828125}, {"name": "fig2_in32768_out64_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 378, "req": 400, "e2e_p50": 5.744399158516899, "e2e_p90": 17.501065154711252, "e2e_p99": 431.9109102533118, "e2e_mean": 24.76107206362763, "ttft_p90": 17.079777074372398, "tpot_p99": 0.008512455084701146, "tps": 17.72103702655267, "wall": 1365.1571273030713, "pu": 22.84921875, "du": 2.06796875, "apc": 0.8334464289939819}, {"name": "fig2_in32768_out64_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 2.331694360531401, "e2e_p90": 8.168041506421288, "e2e_p99": 16.819581468357928, "e2e_mean": 4.067478344673291, "ttft_p90": 7.7613852798473095, "tpot_p99": 0.008991237692276223, "tps": 89.86030789358054, "wall": 284.8866268109996, "pu": 53.20335820895522, "du": 15.065298507462687, "apc": 0.73828125}, {"name": "fig2_in32768_out64_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 1.881187686463818, "e2e_p90": 6.823026831133758, "e2e_p99": 12.242816790416828, "e2e_mean": 3.2513622655556538, "ttft_p90": 6.349652938055806, "tpot_p99": 0.011577233054050565, "tps": 105.74545516262978, "wall": 242.09078263107222, "pu": 42.801169590643276, "du": 31.153508771929825, "apc": 0.73828125}, {"name": "fig2_in4096_out1024_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 6.376699871034361, "e2e_p90": 8.016901113302447, "e2e_p99": 9.421493258888365, "e2e_mean": 6.472622742803069, "ttft_p90": 0.26107478952035307, "tpot_p99": 0.009009339244909244, "tps": 964.4334957573764, "wall": 424.7052822220139, "pu": 50.12248743718593, "du": null, "apc": 0.65625}, {"name": "fig2_in4096_out1024_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 5.711871185048949, "e2e_p90": 6.152766603662167, "e2e_p99": 6.618846287685439, "e2e_mean": 5.7896922694568635, "ttft_p90": 0.2993865112075582, "tpot_p99": 0.006226416155723225, "tps": 1026.860822805463, "wall": 398.88560445897747, "pu": 3.8877005347593583, "du": 83.79411764705883, "apc": 0.65625}, {"name": "fig2_in4096_out1024_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 6.441164412011858, "e2e_p90": 6.755943879298865, "e2e_p99": 7.246829778881511, "e2e_mean": 6.4186325767840025, "ttft_p90": 0.30361198947066437, "tpot_p99": 0.0066874305859860395, "tps": 948.5732059117771, "wall": 431.8064198390348, "pu": 2.6683168316831685, "du": 88.01608910891089, "apc": 0.65625}, {"name": "fig2_in4096_out1024_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 8.175307728059124, "e2e_p90": 8.772436089895201, "e2e_p99": 9.845743471009191, "e2e_mean": 8.103695073690615, "ttft_p90": 0.3135268738726154, "tpot_p99": 0.008783244535960586, "tps": 795.3463509805472, "wall": 514.9957619030029, "pu": 1.2988980716253444, "du": 95.7107438016529, "apc": 0.65625}, {"name": "fig2_in8192_out512_colo_8C-proxy_rep1", "arm": "colo", "n": 400, "req": 400, "e2e_p50": 3.569815175491385, "e2e_p90": 4.748414856137243, "e2e_p99": 6.3728869484120505, "e2e_mean": 3.6905484462657476, "ttft_p90": 0.5787142073037103, "tpot_p99": 0.011623186658178922, "tps": 749.35951206451, "wall": 273.3000605220441, "pu": 43.21484375, "du": null, "apc": 0.7109375}, {"name": "fig2_in8192_out512_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 400, "req": 400, "e2e_p50": 3.0584998495178297, "e2e_p90": 3.546729282848538, "e2e_p99": 4.885626904441742, "e2e_mean": 3.183082153094583, "ttft_p90": 0.6684098902973354, "tpot_p99": 0.006093405278323496, "tps": 801.0277344160907, "wall": 255.67154693999328, "pu": 14.795833333333333, "du": 70.95138888888889, "apc": 0.7109375}, {"name": "fig2_in8192_out512_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 400, "req": 400, "e2e_p50": 3.3473425395786762, "e2e_p90": 3.8297921021352526, "e2e_p99": 4.728309926969231, "e2e_mean": 3.4304884171887533, "ttft_p90": 0.647590011463035, "tpot_p99": 0.0067240075080280985, "tps": 768.7152035389245, "wall": 266.41856315208133, "pu": 7.96, "du": 83.674, "apc": 0.7109375}, {"name": "fig2_in8192_out512_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 400, "req": 400, "e2e_p50": 4.395662502502091, "e2e_p90": 4.981798351998441, "e2e_p99": 6.572449592349585, "e2e_mean": 4.434228266531718, "ttft_p90": 0.6629501176299528, "tpot_p99": 0.009418493171829412, "tps": 645.2526253784575, "wall": 317.3950665909797, "pu": 5.468680089485459, "du": 94.43959731543625, "apc": 0.7109375}]
diff --git a/analysis/mb5_pd_ablation/fig3.json b/analysis/mb5_pd_ablation/fig3.json
new file mode 100644
index 0000000..b85c9ed
--- /dev/null
+++ b/analysis/mb5_pd_ablation/fig3.json
@@ -0,0 +1 @@
+[{"name": "fig3_N16_colo_8C-proxy_rep1", "arm": "colo", "n": 720, "req": 720, "e2e_p50": 2.273988057495444, "e2e_p90": 3.22202166619245, "e2e_p99": 4.154007889915082, "e2e_mean": 2.349281024678405, "ttft_p90": 0.5880337386857718, "tpot_p99": 0.013491011632263985, "tps": 1007.0977376198009, "wall": 183.02096521001658, "pu": 53.47674418604651, "du": null, "apc": 0.7109375}, {"name": "fig3_N16_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 720, "req": 720, "e2e_p50": 1.8571838534990093, "e2e_p90": 2.6877894366974946, "e2e_p99": 5.491437585417586, "e2e_mean": 2.106012088546211, "ttft_p90": 1.098052769700007, "tpot_p99": 0.00736965422303468, "tps": 1083.2982957028535, "wall": 170.14704142999835, "pu": 43.6625, "du": 80.82708333333333, "apc": 0.7109375}, {"name": "fig3_N16_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 720, "req": 720, "e2e_p50": 1.9961085925024236, "e2e_p90": 2.6387570307022545, "e2e_p99": 4.1306983676511875, "e2e_mean": 2.1104672683014645, "ttft_p90": 0.751794431997405, "tpot_p99": 0.008509515762943705, "tps": 1093.4268194900712, "wall": 168.57095208800456, "pu": 19.946202531645568, "du": 98.21518987341773, "apc": 0.7109375}, {"name": "fig3_N16_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 720, "req": 720, "e2e_p50": 3.0437641629832797, "e2e_p90": 3.5130674026062483, "e2e_p99": 5.287108179212371, "e2e_mean": 3.041667996072081, "ttft_p90": 0.7129690356960056, "tpot_p99": 0.013359745218868221, "tps": 871.970575501767, "wall": 211.38327964098426, "pu": 10.943333333333333, "du": 96.86, "apc": 0.7109375}, {"name": "fig3_N32_colo_8C-proxy_rep1", "arm": "colo", "n": 1320, "req": 1320, "e2e_p50": 3.270167972994386, "e2e_p90": 4.661326845278381, "e2e_p99": 6.208903694198525, "e2e_mean": 3.2551948325128417, "ttft_p90": 0.9038233671861248, "tpot_p99": 0.01838023195033048, "tps": 1580.8633971808533, "wall": 213.7566095860093, "pu": 66.3625, "du": null, "apc": 0.7109375}, {"name": "fig3_N32_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 1320, "req": 1320, "e2e_p50": 2.5704439695036854, "e2e_p90": 6.883691897706018, "e2e_p99": 17.488955901044665, "e2e_mean": 3.761722041763344, "ttft_p90": 5.035864923497138, "tpot_p99": 0.010349134326673354, "tps": 1479.6196585494329, "wall": 228.38301589699404, "pu": 56.61682242990654, "du": 82.69626168224299, "apc": 0.7109375}, {"name": "fig3_N32_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 1320, "req": 1320, "e2e_p50": 3.1077146044990513, "e2e_p90": 3.770394044389833, "e2e_p99": 6.062736103993954, "e2e_mean": 3.2164792430455265, "ttft_p90": 1.0083121383970153, "tpot_p99": 0.011962187868884226, "tps": 1608.9998823250762, "wall": 210.01866048100055, "pu": 29.68877551020408, "du": 94.66836734693878, "apc": 0.7109375}, {"name": "fig3_N32_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 1320, "req": 1320, "e2e_p50": 4.650343854504172, "e2e_p90": 5.231803922989639, "e2e_p99": 7.026731992097026, "e2e_mean": 4.642735430796385, "ttft_p90": 0.8066709822014674, "tpot_p99": 0.018365715299701022, "tps": 1244.6646057403061, "wall": 271.4948255470081, "pu": 17.44750656167979, "du": 97.92125984251969, "apc": 0.7109375}, {"name": "fig3_N64_colo_8C-proxy_rep1", "arm": "colo", "n": 2640, "req": 2640, "e2e_p50": 4.616785284990328, "e2e_p90": 6.662268486898392, "e2e_p99": 9.11107949850848, "e2e_mean": 4.8815010888681805, "ttft_p90": 1.4007563413004391, "tpot_p99": 0.028896959955475372, "tps": 2431.5635136762567, "wall": 277.9446213100164, "pu": 80.68076923076923, "du": null, "apc": 0.7109375}, {"name": "fig3_N64_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 2639, "req": 2640, "e2e_p50": 11.69906226900639, "e2e_p90": 31.074856758594986, "e2e_p99": 33.94995162280335, "e2e_mean": 14.142758539245058, "ttft_p90": 29.560715823207286, "tpot_p99": 0.013875843108832534, "tps": 698.1370406736987, "wall": 967.6953959469975, "pu": 43.86363636363637, "du": 45.14781966001478, "apc": 0.4577210235884805}, {"name": "fig3_N64_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 2601, "req": 2640, "e2e_p50": 4.077710573998047, "e2e_p90": 16.441288907983107, "e2e_p99": 385.4163983319886, "e2e_mean": 13.444935590261034, "ttft_p90": 14.423547562997555, "tpot_p99": 0.0182731644510675, "tps": 864.9622710798936, "wall": 769.8092995070037, "pu": 19.74375, "du": 51.03541666666667, "apc": 0.014043434389389917}, {"name": "fig3_N64_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 2640, "req": 2640, "e2e_p50": 7.523401059006574, "e2e_p90": 7.9631150633882495, "e2e_p99": 11.943453508106181, "e2e_mean": 7.532413133775613, "ttft_p90": 0.9084304597956361, "tpot_p99": 0.028826642419567665, "tps": 1712.6176644681352, "wall": 394.62398060099804, "pu": 21.95225225225225, "du": 98.26216216216216, "apc": 0.7109375}, {"name": "fig3_N8_colo_8C-proxy_rep1", "arm": "colo", "n": 360, "req": 360, "e2e_p50": 1.9761100795149105, "e2e_p90": 2.687890137603972, "e2e_p99": 3.689032165001845, "e2e_mean": 2.0329397837324197, "ttft_p90": 0.5798680250212783, "tpot_p99": 0.012779506407645046, "tps": 564.0943793942257, "wall": 163.3769159319927, "pu": 31.852272727272727, "du": null, "apc": 0.7109375}, {"name": "fig3_N8_pd2_2P+6D_rep1", "arm": "2P+6D", "n": 360, "req": 360, "e2e_p50": 1.6348335455040797, "e2e_p90": 2.1373069930952626, "e2e_p99": 3.27345111219359, "e2e_mean": 1.7528110698889374, "ttft_p90": 0.7096388279009263, "tpot_p99": 0.005832891406421778, "tps": 607.517244709636, "wall": 151.699397511009, "pu": 24.97222222222222, "du": 53.789351851851855, "apc": 0.7109375}, {"name": "fig3_N8_pd4_4P+4D_rep1", "arm": "4P+4D", "n": 360, "req": 360, "e2e_p50": 1.6989141649974044, "e2e_p90": 2.1767679123018753, "e2e_p99": 3.204218823980048, "e2e_mean": 1.7972881239612535, "ttft_p90": 0.671729751705425, "tpot_p99": 0.006599093441914785, "tps": 601.1982226696963, "wall": 153.2938663570094, "pu": 13.072916666666666, "du": 68.96527777777777, "apc": 0.7109375}, {"name": "fig3_N8_pd6_6P+2D_rep1", "arm": "6P+2D", "n": 360, "req": 360, "e2e_p50": 2.1166427994903643, "e2e_p90": 2.5305729087849618, "e2e_p99": 3.9926339721458506, "e2e_mean": 2.1701972734549297, "ttft_p90": 0.698665179402451, "tpot_p99": 0.009245334605164794, "tps": 539.9759996392945, "wall": 170.67425230299705, "pu": 7.295833333333333, "du": 92.01875, "apc": 0.7109375}]
diff --git a/figs/mb5_pd_ablation/fig1_reuse_axis.png b/figs/mb5_pd_ablation/fig1_reuse_axis.png
new file mode 100644
index 0000000..6588a29
Binary files /dev/null and b/figs/mb5_pd_ablation/fig1_reuse_axis.png differ
diff --git a/figs/mb5_pd_ablation/fig2_shape_axis.png b/figs/mb5_pd_ablation/fig2_shape_axis.png
new file mode 100644
index 0000000..42ad5d4
Binary files /dev/null and b/figs/mb5_pd_ablation/fig2_shape_axis.png differ
diff --git a/figs/mb5_pd_ablation/fig3_concurrency_axis.png b/figs/mb5_pd_ablation/fig3_concurrency_axis.png
new file mode 100644
index 0000000..97dcb69
Binary files /dev/null and b/figs/mb5_pd_ablation/fig3_concurrency_axis.png differ
diff --git a/microbench/fresh_setup/fig_agg.py b/microbench/fresh_setup/fig_agg.py
new file mode 100644
index 0000000..f8497f6
--- /dev/null
+++ b/microbench/fresh_setup/fig_agg.py
@@ -0,0 +1,140 @@
+"""Aggregate a set of MB5 run dirs into one comparison table.
+
+Pulls the three core metrics the analysis cares about, per run:
+  - E2E latency  (from replay_metrics.summary.json: latency_stats_s)
+  - TPS          (output tokens / wall_clock_s)
+  - GPU util by workers (gpu_util.csv over run_window, split prefill/decode by role)
+plus honest reuse (producer-side APC from instance_apc.txt) and TTFT/TPOT for logs.
+
+Arm + GPU role split + producer APC ports are inferred from the dir name:
+  *_colo_*  -> 8 kv_both     ; apc ports 8000-8007 (all keep prefix)
+  *_pd6_*   -> 6P+2D P0-5/D6-7 ; apc 8000-8005
+  *_pd_*    -> 4P+4D P0-3/D4-7 ; apc 8000-8003   (note: "pd" not "pd4")
+  *_pd2_*   -> 2P+6D P0-1/D2-7 ; apc 8000-8001
+
+Usage: fig_agg.py <run_dir> [<run_dir> ...]
+"""
+from __future__ import annotations
+
+import csv
+import json
+import re
+import statistics
+import sys
+from pathlib import Path
+
+
+def arm_of(name: str):
+    # New driver naming (run_conc.sh / run_reuse_fixed.sh): "..._<CONFIG>_rep<r>".
+    if "8C-proxy" in name:
+        return "colo", list(range(8)), [], list(range(8000, 8008))
+    if "6P+2D" in name:
+        return "6P+2D", [0, 1, 2, 3, 4, 5], [6, 7], list(range(8000, 8006))
+    if "2P+6D" in name:
+        return "2P+6D", [0, 1], [2, 3, 4, 5, 6, 7], list(range(8000, 8002))
+    if "4P+4D" in name:
+        return "4P+4D", [0, 1, 2, 3], [4, 5, 6, 7], list(range(8000, 8004))
+    # Legacy naming (original May-30 corrected runs).
+    if "_colo_" in name or name.endswith("_colo"):
+        return "colo", list(range(8)), [], list(range(8000, 8008))
+    if "_pd6_" in name:
+        return "6P+2D", [0, 1, 2, 3, 4, 5], [6, 7], list(range(8000, 8006))
+    if "_pd2_" in name:
+        return "2P+6D", [0, 1], [2, 3, 4, 5, 6, 7], list(range(8000, 8002))
+    if "_pd4_" in name or "_pd_" in name:
+        return "4P+4D", [0, 1, 2, 3], [4, 5, 6, 7], list(range(8000, 8004))
+    return "?", list(range(8)), [], list(range(8000, 8008))
+
+
+def util_split(run: Path, pgpus, dgpus):
+    win = {}
+    wp = run / "run_window.json"
+    if wp.exists():
+        win = json.load(open(wp))
+    t0, t1 = win.get("t_start_unix"), win.get("t_end_unix")
+    csvp = run / "gpu_util.csv"
+    if not csvp.exists():
+        return None, None
+    by = {}
+    for row in csv.DictReader(open(csvp)):
+        try:
+            ts = float(row["timestamp"]); g = int(row["gpu"]); u = float(row["util_pct"])
+        except (ValueError, KeyError):
+            continue
+        if t0 and not (t0 <= ts <= t1):
+            continue
+        by.setdefault(g, []).append(u)
+    pm = [v for g in pgpus for v in by.get(g, [])]
+    dm = [v for g in dgpus for v in by.get(g, [])]
+    return (statistics.fmean(pm) if pm else None,
+            statistics.fmean(dm) if dm else None)
+
+
+def apc(run: Path, ports):
+    f = run / "instance_apc.txt"
+    if not f.exists():
+        return None
+    q = h = 0
+    for line in open(f):
+        m = dict(re.findall(r"(\w+)=(\S+)", line))
+        try:
+            p = int(m.get("port", -1))
+        except ValueError:
+            continue
+        if p in ports:
+            q += float(m.get("queries", 0)); h += float(m.get("hits", 0))
+    return (h / q) if q else None
+
+
+def main():
+    args = sys.argv[1:]
+    as_json = False
+    if "--json" in args:
+        as_json = True
+        args = [a for a in args if a != "--json"]
+    rows = []
+    for d in args:
+        run = Path(d)
+        sp = run / "replay_metrics.summary.json"
+        if not sp.exists():
+            continue
+        s = json.load(open(sp))
+        arm, pg, dg, ports = arm_of(run.name)
+        lat = s.get("latency_stats_s", {})
+        ttft = s.get("ttft_stats_s", {})
+        tpot = s.get("tpot_stats_s", {})
+        wall = s.get("wall_clock_s") or 1.0
+        out = s.get("actual_output_tokens_stats", {})
+        n = s.get("success_count", 0); req = s.get("request_count", 0)
+        tot_out = out.get("count", 0) * out.get("mean", 0)
+        tps = tot_out / wall
+        pu, du = util_split(run, pg, dg)
+        a = apc(run, ports)
+        rows.append({
+            "name": run.name, "arm": arm, "n": n, "req": req,
+            "e2e_p50": lat.get("p50"), "e2e_p90": lat.get("p90"), "e2e_p99": lat.get("p99"),
+            "e2e_mean": lat.get("mean"),
+            "ttft_p90": ttft.get("p90"), "tpot_p99": tpot.get("p99"),
+            "tps": tps, "wall": wall, "pu": pu, "du": du, "apc": a,
+        })
+
+    if as_json:
+        print(json.dumps(rows))
+        return
+
+    def f(x, w=7, p=1):
+        return f"{x:>{w}.{p}f}" if isinstance(x, (int, float)) else f"{'-':>{w}}"
+
+    hdr = (f"{'run':<34}{'arm':>7}{'ok/req':>9}{'E2Ep50':>8}{'E2Ep90':>8}{'E2Ep99':>8}"
+           f"{'TPS':>8}{'Putil':>7}{'Dutil':>7}{'APC%':>7}{'TTFTp90':>9}{'TPOTp99ms':>10}")
+    print(hdr); print("-" * len(hdr))
+    for r in sorted(rows, key=lambda r: r["name"]):
+        print(f"{r['name']:<34}{r['arm']:>7}{str(r['n'])+'/'+str(r['req']):>9}"
+              f"{f(r['e2e_p50'])}{f(r['e2e_p90'])}{f(r['e2e_p99'])}"
+              f"{f(r['tps'],8,1)}{f(r['pu'])}{f(r['du'])}"
+              f"{f((r['apc'] or 0)*100)}{f(r['ttft_p90'],9,2)}"
+              f"{f((r['tpot_p99'] or 0)*1000,10,1)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/microbench/fresh_setup/gpu_util_report.py b/microbench/fresh_setup/gpu_util_report.py
new file mode 100644
index 0000000..25eb3a3
--- /dev/null
+++ b/microbench/fresh_setup/gpu_util_report.py
@@ -0,0 +1,71 @@
+"""Per-GPU utilization report from gpu_util.csv (companion to bench_report.py).
+
+bench_report's per-worker GPU util needs request routing (breakdown.json), which
+the MB5 proxy doesn't log. But worker == GPU by index, and the prefill/decode role
+split is fixed by config, so per-GPU util from gpu_util.csv directly answers
+"GPU utils by workers" — and for PD it exposes the key signal: are the prefill-side
+GPUs saturated while the decode-side idles (or vice versa, or stalled at ~0)?
+
+Usage:
+  gpu_util_report.py <run_dir> [--prefill-gpus 0,1,2,3 --decode-gpus 4,5,6,7]
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import statistics
+from pathlib import Path
+
+
+def pct(xs, p):
+    xs = sorted(xs)
+    return xs[max(0, min(len(xs) - 1, int(round(p / 100 * (len(xs) - 1)))))] if xs else None
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("run_dir", type=Path)
+    ap.add_argument("--prefill-gpus", default="")
+    ap.add_argument("--decode-gpus", default="")
+    a = ap.parse_args()
+
+    win = {}
+    wp = a.run_dir / "run_window.json"
+    if wp.exists():
+        win = json.load(open(wp))
+    t0, t1 = win.get("t_start_unix"), win.get("t_end_unix")
+
+    csvp = a.run_dir / "gpu_util.csv"
+    if not csvp.exists():
+        print(f"{a.run_dir.name}: gpu_util.csv absent"); return
+    by_gpu = {}
+    for row in csv.DictReader(open(csvp)):
+        try:
+            ts = float(row["timestamp"]); g = int(row["gpu"]); u = float(row["util_pct"]); m = float(row["mem_used_mb"])
+        except (ValueError, KeyError):
+            continue
+        if t0 and not (t0 <= ts <= t1):
+            continue
+        by_gpu.setdefault(g, {"u": [], "m": []})
+        by_gpu[g]["u"].append(u); by_gpu[g]["m"].append(m)
+
+    print(f"=== {a.run_dir.name}: per-GPU util over replay window ({sum(len(d['u']) for d in by_gpu.values())} samples) ===")
+    print(f"{'gpu':>4}{'util_mean':>11}{'util_p90':>10}{'util_max':>10}{'mem_max_GB':>12}")
+    for g in sorted(by_gpu):
+        u, m = by_gpu[g]["u"], by_gpu[g]["m"]
+        print(f"{g:>4}{statistics.fmean(u):>11.1f}{pct(u,90):>10.1f}{max(u):>10.1f}{max(m)/1024:>12.1f}")
+
+    def agg(gpus, label):
+        gpus = [int(x) for x in gpus.split(",") if x != ""]
+        us = [v for g in gpus for v in by_gpu.get(g, {}).get("u", [])]
+        if us:
+            print(f"  {label:<14} gpus={gpus} util mean={statistics.fmean(us):.1f}% p90={pct(us,90):.1f}% max={max(us):.1f}%")
+    if a.prefill_gpus:
+        agg(a.prefill_gpus, "prefill-side")
+    if a.decode_gpus:
+        agg(a.decode_gpus, "decode-side")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/microbench/fresh_setup/mb5_run.sh b/microbench/fresh_setup/mb5_run.sh
index c887f70..cbbb0ec 100755
--- a/microbench/fresh_setup/mb5_run.sh
+++ b/microbench/fresh_setup/mb5_run.sh
@@ -69,6 +69,13 @@ run_one() {
     source "${VENV}/bin/activate"
     local replay_out="${rundir}/replay_metrics.jsonl"
     mkdir -p "$(dirname "${replay_out}")"
+    # bench_report.py inputs: worker->gpu map (worker i == gpu i for every config;
+    # for PD, workers 0-3 are producers on gpu0-3, 4-7 consumers on gpu4-7).
+    printf '{"base_port":8000,"n_instances":8,"gpu_indices":[0,1,2,3,4,5,6,7]}\n' \
+        > "${rundir}/bench_config.json"
+    # per-GPU utilization timeseries over the replay window (2s sampling)
+    bash "${SCRIPT_DIR}/gpu_monitor.sh" "${rundir}/gpu_util.csv" 2 >/dev/null 2>&1 &
+    local GPU_MON=$!
     local t0
     t0=$(date +%s.%N)
     if ! PYTHONPATH="${FRESH_ROOT}" python -m replayer \
@@ -82,6 +89,7 @@ run_one() {
         t1=$(date +%s.%N)
         local wall=$(python -c "print(${t1} - ${t0})")
         echo "[mb5-run] REPLAY FAILED after ${wall} s; see ${OUT_ROOT}/${config}_rep${rep}_replay.log"
+        kill "${GPU_MON}" 2>/dev/null || true
         bash "${LAUNCH}" stop > /dev/null 2>&1 || true
         return 1
     fi
@@ -91,6 +99,9 @@ run_one() {
     wall_clock_s=$(python -c "print(${t1} - ${t0})")
     echo "[mb5-run] replay done in ${wall_clock_s}s"
     echo "${wall_clock_s}" > "${rundir}/wall_clock_s.txt"
+    kill "${GPU_MON}" 2>/dev/null || true
+    printf '{"t_start_unix":%s,"t_end_unix":%s}\n' "${t0}" "${t1}" > "${rundir}/run_window.json"
+    cp -f "${replay_out}" "${rundir}/metrics.jsonl"   # bench_report.py expects metrics.jsonl
 
     # Per-instance prefix-cache counters, scraped from each backend BEFORE
     # teardown. For PD this is the only honest reuse signal: producer ports
diff --git a/microbench/fresh_setup/partial_summary.py b/microbench/fresh_setup/partial_summary.py
new file mode 100644
index 0000000..bf17361
--- /dev/null
+++ b/microbench/fresh_setup/partial_summary.py
@@ -0,0 +1,98 @@
+"""Compute a per-run summary directly from replay_metrics.jsonl (for partial / in-flight runs).
+
+Used when the replayer hasn't completed (so replay_metrics.summary.json doesn't exist
+yet) but enough records have streamed to disk to read out the per-arm result.
+
+Also accepts a finished run's directory and prints the same one-line summary for
+apples-to-apples comparison.
+"""
+from __future__ import annotations
+
+import json
+import re
+import statistics
+import sys
+from pathlib import Path
+
+
+def stats(xs):
+    xs = sorted(xs)
+    n = len(xs)
+    if n == 0:
+        return None
+    return {
+        "n": n,
+        "mean": statistics.fmean(xs),
+        "p50": xs[n // 2],
+        "p90": xs[int(0.9 * (n - 1))],
+        "p99": xs[int(0.99 * (n - 1))],
+    }
+
+
+def apc(run: Path, producer_ports):
+    f = run / "instance_apc.txt"
+    if not f.exists():
+        return None
+    q = h = 0.0
+    for line in open(f):
+        m = dict(re.findall(r"(\w+)=(\S+)", line))
+        try:
+            p = int(m.get("port", -1))
+        except ValueError:
+            continue
+        if p in producer_ports:
+            q += float(m.get("queries", 0))
+            h += float(m.get("hits", 0))
+    return (h / q) if q else None
+
+
+def main():
+    for d in sys.argv[1:]:
+        run = Path(d)
+        # prefer the live replay_metrics.jsonl (so partials work); fall back to metrics.jsonl
+        for fn in ("replay_metrics.partial.jsonl", "replay_metrics.jsonl", "metrics.jsonl"):
+            p = run / fn
+            if p.exists():
+                rec_path = p
+                break
+        else:
+            print(f"{run.name}: no records"); continue
+        recs = [json.loads(l) for l in open(rec_path)]
+        oks = [r for r in recs if r.get("error") is None]
+        lat = stats([r["latency_s"] for r in oks if "latency_s" in r])
+        ttft = stats([r["ttft_s"] for r in oks if "ttft_s" in r])
+        tpot = stats([r["tpot_s"] for r in oks if "tpot_s" in r])
+        out = sum(r.get("actual_output_tokens", r.get("output_length", 0)) for r in oks)
+        ts = [r["t_dispatch_unix"] for r in oks if "t_dispatch_unix" in r]
+        tf = [r["t_finish_unix"] for r in oks if "t_finish_unix" in r]
+        span = max(tf) - min(ts) if ts and tf else 0
+        tps = out / span if span else 0
+
+        # producer ports by arm tag in dirname
+        n = run.name
+        if "_colo_" in n:
+            ports = list(range(8000, 8008))
+        elif "_pd6_" in n:
+            ports = list(range(8000, 8006))
+        elif "_pd2_" in n:
+            ports = list(range(8000, 8002))
+        else:
+            ports = list(range(8000, 8004))
+        a = apc(run, ports)
+
+        print(f"{run.name}")
+        print(f"  n_ok={len(oks)}/{len(recs)}"
+              + (f"  (target=1214 -> {len(oks)*100/1214:.1f}%)" if len(recs) < 1214 else ""))
+        if lat:
+            print(f"  E2E   mean={lat['mean']:.2f}  p50={lat['p50']:.2f}  p90={lat['p90']:.2f}  p99={lat['p99']:.2f}")
+        if ttft:
+            print(f"  TTFT  mean={ttft['mean']:.2f}  p50={ttft['p50']:.2f}  p90={ttft['p90']:.2f}  p99={ttft['p99']:.2f}")
+        if tpot:
+            print(f"  TPOT  mean={tpot['mean']*1000:.1f}ms  p90={tpot['p90']*1000:.1f}ms  p99={tpot['p99']*1000:.1f}ms")
+        print(f"  output_tokens={out:.0f}  span={span:.0f}s  TPS={tps:.0f}")
+        if a is not None:
+            print(f"  producer APC={a*100:.1f}%")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/replayer/__main__.py b/replayer/__main__.py
index 105f1f4..258ee34 100644
--- a/replayer/__main__.py
+++ b/replayer/__main__.py
@@ -30,12 +30,23 @@ def main() -> None:
                    default=float(_env_think) if _env_think else None,
                    help="Closed-loop think-time (s) after each turn completes; "
                         "ignore absolute trace schedule. Env: REPLAY_INTER_TURN_THINK_S")
+    p.add_argument("--no-realized-prefix",
+                   action="store_true",
+                   default=bool(os.environ.get("REPLAY_NO_REALIZED_PREFIX")),
+                   help="Controlled-reuse mode: prompt = hash-built tokens only "
+                        "(reuse set by hash_ids). Env: REPLAY_NO_REALIZED_PREFIX")
     p.add_argument("--dispatch-mode", choices=["tracets", "thinktime"],
                    default=os.environ.get("REPLAY_DISPATCH_MODE", "tracets"),
                    help="tracets (Mode 1): absolute trace ts = max(prev_finished, ts). "
                         "thinktime (Mode 2): turn-k at prev_finished + "
                         "time_to_parent_chat. Env: REPLAY_DISPATCH_MODE")
     p.add_argument("--request-timeout", type=float, default=600.0)
+    _env_maxdur = os.environ.get("REPLAY_MAX_DURATION")
+    p.add_argument("--max-duration", type=float,
+                   default=float(_env_maxdur) if _env_maxdur else None,
+                   help="Overall wall-clock deadline (s): cancel in-flight + write "
+                        "summary (un-run turns counted as failures) to bound a "
+                        "collapsed config's drain. Env: REPLAY_MAX_DURATION")
     p.add_argument("--request-limit", type=int, default=None,
                    help="Limit number of requests to replay")
     p.add_argument("-v", "--verbose", action="store_true")
@@ -56,7 +67,9 @@ def main() -> None:
         request_limit=args.request_limit,
         max_inflight_sessions=args.max_inflight_sessions,
         inter_turn_think_s=args.inter_turn_think,
+        no_realized_prefix=args.no_realized_prefix,
         dispatch_mode=args.dispatch_mode,
+        max_duration_s=args.max_duration,
     )
 
     results = asyncio.run(replay_trace(config))
diff --git a/replayer/replay.py b/replayer/replay.py
index 0d55441..b2832eb 100644
--- a/replayer/replay.py
+++ b/replayer/replay.py
@@ -66,6 +66,13 @@ class ReplayConfig:
     # max_inflight_sessions=N this is a stable N-user closed-loop (no open-loop
     # runaway), so it removes the "immediate retrigger under load" artifact.
     inter_turn_think_s: float | None = None
+    # Controlled-reuse mode: skip _apply_realized_prefix so each turn's prompt is
+    # exactly the hash-built tokens. Then prefix-cache reuse is governed solely by
+    # the generated hash_ids (shared prefix blocks hit, fresh delta blocks miss) —
+    # required for the reuse-fraction sweep, where realized-prefix would otherwise
+    # force every fixed-length turn to ≈ the prior turn (≈100% reuse regardless).
+    # Keep OFF (realized-prefix ON) for the real agentic trace.
+    no_realized_prefix: bool = False
     # Dispatch timing for intra-session turns:
     #  "tracets"   (Mode 1): fire at absolute trace timestamp -> effectively
     #              max(prev_finished, trace_ts); collapses think-time to 0 when
@@ -73,6 +80,25 @@ class ReplayConfig:
     #  "thinktime" (Mode 2): turn-1 at trace arrival; turn-k at
     #              prev_finished + time_to_parent_chat (real production gap).
     dispatch_mode: str = "tracets"
+    # Overall wall-clock deadline for the whole replay (seconds). When exceeded,
+    # stop awaiting in-flight sessions, cancel them, and write the summary over
+    # whatever completed — un-run turns are counted as failures so completion%
+    # stays honest (request_count == full trace). None = no deadline (default,
+    # original behavior unchanged). Used to bound the slow drain of a collapsed
+    # config in a sweep. Env: REPLAY_MAX_DURATION.
+    max_duration_s: float | None = None
+
+
+def _skipped_metric() -> "RequestMetrics":
+    """Placeholder failure row for a turn never run due to a max_duration cutoff.
+    Only its error (non-None) matters: it counts toward request/error totals but
+    is excluded from latency/ttft/tpot percentiles (successes only)."""
+    return RequestMetrics(
+        request_id="deadline_skipped", session_id="", turn_id=-1,
+        trace_timestamp_s=0.0, input_length=0, output_length=0,
+        request_type="skipped", effective_input_length=None, cached_tokens=0,
+        latency_s=None, ttft_s=None, tpot_s=None, error="deadline_skipped",
+    )
 
 
 def _build_prompt_token_ids(req: TraceRequest) -> list[int]:
@@ -318,10 +344,9 @@ async def _run_session(
                 if elapsed < target_wall:
                     await asyncio.sleep(target_wall - elapsed)
 
-            token_ids = _apply_realized_prefix(
-                _build_prompt_token_ids(req),
-                realized_context,
-            )
+            token_ids = _build_prompt_token_ids(req)
+            if not config.no_realized_prefix:
+                token_ids = _apply_realized_prefix(token_ids, realized_context)
             result = await _dispatch_request(
                 client=client, config=config, req=req,
                 prompt_token_ids=token_ids, sem=request_sem,
@@ -410,25 +435,44 @@ async def replay_trace(config: ReplayConfig) -> list[RequestMetrics]:
             trust_env=False,
             limits=limits,
         ) as client:
+            states = [_SessionState(session_id=sid, turns=turns)
+                      for sid, turns in sessions]
             tasks = [
                 asyncio.create_task(_run_session(
-                    state=_SessionState(session_id=sid, turns=turns),
-                    config=config, client=client,
+                    state=st, config=config, client=client,
                     request_sem=request_sem,
                     earliest_ts=earliest_ts, sweep_start=sweep_start,
                     sink=sink,
                     session_sem=session_sem,
                 ))
-                for sid, turns in sessions
+                for st in states
             ]
-            all_results = await asyncio.gather(*tasks)
+            if config.max_duration_s and config.max_duration_s > 0:
+                _done, pending = await asyncio.wait(
+                    tasks, timeout=config.max_duration_s)
+                if pending:
+                    logger.warning(
+                        "max_duration %.0fs reached: cancelling %d in-flight "
+                        "session(s); un-run turns counted as failures",
+                        config.max_duration_s, len(pending))
+                    for t in pending:
+                        t.cancel()
+                    await asyncio.gather(*pending, return_exceptions=True)
+            else:
+                await asyncio.gather(*tasks)
     finally:
         sink.close()
 
     sweep_elapsed = time.perf_counter() - sweep_start
     post_metrics = await _snapshot_prefix_cache_metrics(config.endpoint_url)
 
-    flat = [m for group in all_results for m in group]
+    # Build from the session states (identical to the gather return in the
+    # uncapped path) so partially-completed (cancelled) sessions still contribute
+    # their finished turns; pad un-run turns as failures so request_count == trace.
+    flat = [m for st in states for m in st.metrics]
+    missing = n_requests - len(flat)
+    if missing > 0:
+        flat.extend(_skipped_metric() for _ in range(missing))
     summary_path = config.output_path.with_suffix(".summary.json")
     write_summary_json(summary_path, flat)