Files
replaysim/docs/assets/frontier_vllm_alignment/frontier_vllm_alignment.json

435 lines
16 KiB
JSON

[
{
"decode_tps_ratio": 0.6129207541251469,
"e2e_p50_ratio": 0.7394530533987747,
"e2e_p95_ratio": 1.2287231205931113,
"fixture": "coder_100",
"frontier_complete": false,
"frontier_completed": 96,
"frontier_decode_tps": 347.79923381681954,
"frontier_e2e_p50_s": 30.939283157873398,
"frontier_e2e_p95_s": 119.6361375789676,
"frontier_preemptions": 0,
"frontier_prefix_hit": 0.24878456156190046,
"frontier_rps": 0.4048148795016268,
"frontier_total": 100,
"frontier_total_tps": 2348.908820556559,
"frontier_tpot_p50_s": 0.056889664283438265,
"frontier_tpot_p95_s": 0.14568807925543142,
"frontier_ttft_p50_s": 0.9087481136376141,
"frontier_ttft_p95_s": 12.762958146117297,
"kv_blocks": 15281,
"label": "TP1 N100 raw",
"notes": "Frontier incomplete before lifecycle fix; included as TP1 100-request baseline.",
"prefix_hit_delta": -0.0022975070751777016,
"request_count": 100,
"rps_ratio": 0.5884039239601411,
"run_id": "tp1_n100_scale1",
"scale_label": "raw",
"scale_value": 1.0,
"total_tps_ratio": 0.6129207541251469,
"tp": 1,
"tpot_p50_ratio": 0.8609035602986401,
"tpot_p95_ratio": 0.23454605053898236,
"ttft_p50_ratio": 0.20180834300191677,
"ttft_p95_ratio": 0.439186240241972,
"vllm_completed": 100,
"vllm_decode_tps": 567.445679520595,
"vllm_e2e_p50_s": 41.84076732886024,
"vllm_e2e_p95_s": 97.36622968502343,
"vllm_preemptions": 8,
"vllm_prefix_hit": 0.25108206863707816,
"vllm_rps": 0.6879880691092217,
"vllm_total": 100,
"vllm_total_tps": 3832.3205810011714,
"vllm_tpot_p50_s": 0.06608134395878643,
"vllm_tpot_p95_s": 0.6211491471318447,
"vllm_ttft_p50_s": 4.503025494981557,
"vllm_ttft_p95_s": 29.060469059972093
},
{
"decode_tps_ratio": 0.8960505440100501,
"e2e_p50_ratio": 0.7927945162118318,
"e2e_p95_ratio": 0.951923946910999,
"fixture": "coder_500",
"frontier_complete": false,
"frontier_completed": 439,
"frontier_decode_tps": 656.2204997652797,
"frontier_e2e_p50_s": 177.7998574092898,
"frontier_e2e_p95_s": 397.29145000151055,
"frontier_preemptions": 0,
"frontier_prefix_hit": 0.11923746923408568,
"frontier_rps": 0.6609904720097601,
"frontier_total": 500,
"frontier_total_tps": 4733.748762075876,
"frontier_tpot_p50_s": 0.05643274739314083,
"frontier_tpot_p95_s": 0.08942839772817235,
"frontier_ttft_p50_s": 136.77557892500107,
"frontier_ttft_p95_s": 340.237122196321,
"kv_blocks": 15281,
"label": "TP1 N500 raw",
"notes": "Frontier incomplete; useful as high-pressure stress signal.",
"prefix_hit_delta": -0.2676124002320734,
"request_count": 500,
"rps_ratio": 0.786732377640824,
"run_id": "tp1_n500_scale1",
"scale_label": "raw",
"scale_value": 1.0,
"total_tps_ratio": 0.8960505440100502,
"tp": 1,
"tpot_p50_ratio": 1.134268756171416,
"tpot_p95_ratio": 0.9733188934802052,
"ttft_p50_ratio": 0.736706497600023,
"ttft_p95_ratio": 0.9051387119015346,
"vllm_completed": 500,
"vllm_decode_tps": 732.3476383692921,
"vllm_e2e_p50_s": 224.26978715602309,
"vllm_e2e_p95_s": 417.3562933159992,
"vllm_preemptions": 63,
"vllm_prefix_hit": 0.38684986946615907,
"vllm_rps": 0.8401719451179492,
"vllm_total": 500,
"vllm_total_tps": 5282.903730956031,
"vllm_tpot_p50_s": 0.049752536236317216,
"vllm_tpot_p95_s": 0.09187985389702198,
"vllm_ttft_p50_s": 185.6581683079712,
"vllm_ttft_p95_s": 375.8950067239348
},
{
"decode_tps_ratio": 0.8044430383408974,
"e2e_p50_ratio": 0.8754552629577944,
"e2e_p95_ratio": 1.030008185534932,
"fixture": "coder_200_ts0667",
"frontier_complete": false,
"frontier_completed": 176,
"frontier_decode_tps": 593.287826008356,
"frontier_e2e_p50_s": 73.20731168652793,
"frontier_e2e_p95_s": 189.24029025053343,
"frontier_preemptions": 0,
"frontier_prefix_hit": 0.17027600800712456,
"frontier_rps": 0.5830903705506575,
"frontier_total": 200,
"frontier_total_tps": 3913.43752605849,
"frontier_tpot_p50_s": 0.05837096651496554,
"frontier_tpot_p95_s": 0.23589456903741046,
"frontier_ttft_p50_s": 20.58014532403832,
"frontier_ttft_p95_s": 96.7179381828816,
"kv_blocks": 15281,
"label": "TP1 N200 scale 0.667",
"notes": "Dense-arrival run; Frontier incomplete before lifecycle fix.",
"prefix_hit_delta": -0.09947893983522305,
"request_count": 200,
"rps_ratio": 0.7079098737399896,
"run_id": "tp1_n200_scale0667",
"scale_label": "0.667",
"scale_value": 0.6666666666666666,
"total_tps_ratio": 0.8044430383408974,
"tp": 1,
"tpot_p50_ratio": 1.1344230703885074,
"tpot_p95_ratio": 0.930639595403931,
"ttft_p50_ratio": 0.5954345540217358,
"ttft_p95_ratio": 0.800618794003408,
"vllm_completed": 200,
"vllm_decode_tps": 737.5137800085473,
"vllm_e2e_p50_s": 83.62199050490744,
"vllm_e2e_p95_s": 183.7269770358689,
"vllm_preemptions": 26,
"vllm_prefix_hit": 0.2697549478423476,
"vllm_rps": 0.8236788215286605,
"vllm_total": 200,
"vllm_total_tps": 4864.778908559713,
"vllm_tpot_p50_s": 0.051454318973762715,
"vllm_tpot_p95_s": 0.2534757495838373,
"vllm_ttft_p50_s": 34.563236522022635,
"vllm_ttft_p95_s": 120.80398175423034
},
{
"decode_tps_ratio": 0.7393232177681209,
"e2e_p50_ratio": 1.1173620884379967,
"e2e_p95_ratio": 1.2258475306262637,
"fixture": "coder_200_ts2",
"frontier_complete": true,
"frontier_completed": 200,
"frontier_decode_tps": 531.5597035900641,
"frontier_e2e_p50_s": 61.45769412455945,
"frontier_e2e_p95_s": 174.48408358603848,
"frontier_preemptions": 33,
"frontier_prefix_hit": 0.23134168999974056,
"frontier_rps": 0.5936627654877362,
"frontier_total": 200,
"frontier_total_tps": 3506.267279013048,
"frontier_tpot_p50_s": 0.054213625462090735,
"frontier_tpot_p95_s": 0.06653162646338621,
"frontier_ttft_p50_s": 9.595321273711544,
"frontier_ttft_p95_s": 77.50341053197451,
"kv_blocks": 15281,
"label": "TP1 N200 scale 2",
"notes": "After Frontier decode-preemption lifecycle fix.",
"prefix_hit_delta": -0.038413257842607046,
"request_count": 200,
"rps_ratio": 0.7393232177681209,
"run_id": "tp1_n200_scale2",
"scale_label": "2",
"scale_value": 2.0,
"total_tps_ratio": 0.7393232177681209,
"tp": 1,
"tpot_p50_ratio": 1.0907433399899442,
"tpot_p95_ratio": 0.9693811149298648,
"ttft_p50_ratio": 1.0410723384685256,
"ttft_p95_ratio": 1.1198067467817787,
"vllm_completed": 200,
"vllm_decode_tps": 718.9814830849542,
"vllm_e2e_p50_s": 55.002487340942025,
"vllm_e2e_p95_s": 142.3375087250024,
"vllm_preemptions": 43,
"vllm_prefix_hit": 0.2697549478423476,
"vllm_rps": 0.8029813635231063,
"vllm_total": 200,
"vllm_total_tps": 4742.53640998563,
"vllm_tpot_p50_s": 0.049703375188695206,
"vllm_tpot_p95_s": 0.06863309532102842,
"vllm_ttft_p50_s": 9.216767095960677,
"vllm_ttft_p95_s": 69.2114159471821
},
{
"decode_tps_ratio": 0.7356557719569122,
"e2e_p50_ratio": 1.3476957295017153,
"e2e_p95_ratio": 1.258652459348984,
"fixture": "coder_200_ts3",
"frontier_complete": true,
"frontier_completed": 200,
"frontier_decode_tps": 513.9343093668691,
"frontier_e2e_p50_s": 44.76058145123308,
"frontier_e2e_p95_s": 154.54831351855702,
"frontier_preemptions": 20,
"frontier_prefix_hit": 0.21767512777477313,
"frontier_rps": 0.573978165231764,
"frontier_total": 200,
"frontier_total_tps": 3390.0068803652352,
"frontier_tpot_p50_s": 0.053393334371887605,
"frontier_tpot_p95_s": 0.06861254670772189,
"frontier_ttft_p50_s": 1.0014741156186515,
"frontier_ttft_p95_s": 45.94665669959886,
"kv_blocks": 15281,
"label": "TP1 N200 scale 3",
"notes": "After Frontier decode-preemption lifecycle fix.",
"prefix_hit_delta": -0.05207982006757447,
"request_count": 200,
"rps_ratio": 0.7356557719569123,
"run_id": "tp1_n200_scale3",
"scale_label": "3",
"scale_value": 3.0,
"total_tps_ratio": 0.7356557719569123,
"tp": 1,
"tpot_p50_ratio": 1.1566613307426805,
"tpot_p95_ratio": 0.9611804148017213,
"ttft_p50_ratio": 0.8587856162345445,
"ttft_p95_ratio": 1.4243304641052532,
"vllm_completed": 200,
"vllm_decode_tps": 698.607050957755,
"vllm_e2e_p50_s": 33.2126758818049,
"vllm_e2e_p95_s": 122.78871134808287,
"vllm_preemptions": 16,
"vllm_prefix_hit": 0.2697549478423476,
"vllm_rps": 0.7802265503945264,
"vllm_total": 200,
"vllm_total_tps": 4608.1428428781355,
"vllm_tpot_p50_s": 0.04616159713544178,
"vllm_tpot_p95_s": 0.07138362959869063,
"vllm_ttft_p50_s": 1.1661514779552817,
"vllm_ttft_p95_s": 32.25842447206378
},
{
"decode_tps_ratio": 0.6070363250137228,
"e2e_p50_ratio": 1.5837949050918096,
"e2e_p95_ratio": 1.4718353941122981,
"fixture": "coder_200_ts2",
"frontier_complete": true,
"frontier_completed": 200,
"frontier_decode_tps": 694.538225813865,
"frontier_e2e_p50_s": 26.05122481685102,
"frontier_e2e_p95_s": 106.75916510714146,
"frontier_preemptions": 0,
"frontier_prefix_hit": 0.2697549478423476,
"frontier_rps": 0.7756823572006221,
"frontier_total": 200,
"frontier_total_tps": 4581.304110804026,
"frontier_tpot_p50_s": 0.042955276577521156,
"frontier_tpot_p95_s": 0.05288764732371923,
"frontier_ttft_p50_s": 0.2690959621493789,
"frontier_ttft_p95_s": 6.744624223172184,
"kv_blocks": 69055,
"label": "TP2 N200 scale 2",
"notes": "Uses true-mixed TP2/TP4 attention profile.",
"prefix_hit_delta": 0.0,
"request_count": 200,
"rps_ratio": 0.6070363250137228,
"run_id": "tp2_n200_scale2",
"scale_label": "2",
"scale_value": 2.0,
"total_tps_ratio": 0.6070363250137228,
"tp": 2,
"tpot_p50_ratio": 1.4296981582601855,
"tpot_p95_ratio": 1.218502045500008,
"ttft_p50_ratio": 1.1953492307083635,
"ttft_p95_ratio": 9.432094021900193,
"vllm_completed": 200,
"vllm_decode_tps": 1144.1460703330465,
"vllm_e2e_p50_s": 16.448610065039247,
"vllm_e2e_p95_s": 72.53471789998002,
"vllm_preemptions": 0,
"vllm_prefix_hit": 0.2697549478423476,
"vllm_rps": 1.2778186827338327,
"vllm_total": 200,
"vllm_total_tps": 7547.001591215254,
"vllm_tpot_p50_s": 0.030044996791346416,
"vllm_tpot_p95_s": 0.043403823177019754,
"vllm_ttft_p50_s": 0.22511911601759493,
"vllm_ttft_p95_s": 0.7150717759504914
},
{
"decode_tps_ratio": 0.6321128225155744,
"e2e_p50_ratio": 2.1882239414176055,
"e2e_p95_ratio": 1.8819058641979227,
"fixture": "coder_200_ts3",
"frontier_complete": true,
"frontier_completed": 200,
"frontier_decode_tps": 615.822856748031,
"frontier_e2e_p50_s": 21.785964943721574,
"frontier_e2e_p95_s": 101.59183927019191,
"frontier_preemptions": 0,
"frontier_prefix_hit": 0.2697549478423476,
"frontier_rps": 0.6877705321122985,
"frontier_total": 200,
"frontier_total_tps": 4062.0828059403734,
"frontier_tpot_p50_s": 0.0393789684875167,
"frontier_tpot_p95_s": 0.04670767224504207,
"frontier_ttft_p50_s": 0.13415354950526392,
"frontier_ttft_p95_s": 0.574137821753455,
"kv_blocks": 69055,
"label": "TP2 N200 scale 3",
"notes": "Uses true-mixed TP2/TP4 attention profile.",
"prefix_hit_delta": 0.0,
"request_count": 200,
"rps_ratio": 0.6321128225155745,
"run_id": "tp2_n200_scale3",
"scale_label": "3",
"scale_value": 3.0,
"total_tps_ratio": 0.6321128225155745,
"tp": 2,
"tpot_p50_ratio": 2.066305230245682,
"tpot_p95_ratio": 1.668678182045304,
"ttft_p50_ratio": 0.8737883511042303,
"ttft_p95_ratio": 0.9156237864420547,
"vllm_completed": 200,
"vllm_decode_tps": 974.229338201501,
"vllm_e2e_p50_s": 9.956003373954445,
"vllm_e2e_p95_s": 53.98348621092737,
"vllm_preemptions": 0,
"vllm_prefix_hit": 0.2697549478423476,
"vllm_rps": 1.0880502777577379,
"vllm_total": 200,
"vllm_total_tps": 6426.199028481642,
"vllm_tpot_p50_s": 0.01905767256023186,
"vllm_tpot_p95_s": 0.02799082096692385,
"vllm_ttft_p50_s": 0.15353094297461212,
"vllm_ttft_p95_s": 0.6270455510821193
},
{
"decode_tps_ratio": 0.554961482872708,
"e2e_p50_ratio": 2.0140798462106178,
"e2e_p95_ratio": 1.9471360828275543,
"fixture": "coder_200_ts2",
"frontier_complete": true,
"frontier_completed": 200,
"frontier_decode_tps": 763.3502329676248,
"frontier_e2e_p50_s": 18.65216281946347,
"frontier_e2e_p95_s": 84.93775413567799,
"frontier_preemptions": 0,
"frontier_prefix_hit": 0.2697549478423476,
"frontier_rps": 0.8525337930595883,
"frontier_total": 200,
"frontier_total_tps": 5035.200987216818,
"frontier_tpot_p50_s": 0.03366585046876145,
"frontier_tpot_p95_s": 0.03838265621202119,
"frontier_ttft_p50_s": 0.09755515041058871,
"frontier_ttft_p95_s": 0.3856872342439675,
"kv_blocks": 177077,
"label": "TP4 N200 scale 2",
"notes": "Uses true-mixed TP2/TP4 attention profile.",
"prefix_hit_delta": 0.0,
"request_count": 200,
"rps_ratio": 0.5549614828727081,
"run_id": "tp4_n200_scale2",
"scale_label": "2",
"scale_value": 2.0,
"total_tps_ratio": 0.5549614828727081,
"tp": 4,
"tpot_p50_ratio": 2.0597817670263323,
"tpot_p95_ratio": 1.3554681431066735,
"ttft_p50_ratio": 0.5721801588631308,
"ttft_p95_ratio": 0.27163724014492546,
"vllm_completed": 200,
"vllm_decode_tps": 1375.5012852715674,
"vllm_e2e_p50_s": 9.26088548800908,
"vllm_e2e_p95_s": 43.621889032190666,
"vllm_preemptions": 0,
"vllm_prefix_hit": 0.2697549478423476,
"vllm_rps": 1.5362035373095158,
"vllm_total": 200,
"vllm_total_tps": 9073.06388391597,
"vllm_tpot_p50_s": 0.016344377354773947,
"vllm_tpot_p95_s": 0.02831690025857032,
"vllm_ttft_p50_s": 0.1704972619190812,
"vllm_ttft_p95_s": 1.4198614079505205
},
{
"decode_tps_ratio": 0.5882440161960838,
"e2e_p50_ratio": 3.045852424279607,
"e2e_p95_ratio": 2.9785685814353515,
"fixture": "coder_200_ts3",
"frontier_complete": true,
"frontier_completed": 200,
"frontier_decode_tps": 660.2306058712501,
"frontier_e2e_p50_s": 16.902919407154563,
"frontier_e2e_p95_s": 83.00995364867583,
"frontier_preemptions": 0,
"frontier_prefix_hit": 0.2697549478423476,
"frontier_rps": 0.7373665172396945,
"frontier_total": 200,
"frontier_total_tps": 4355.004629460394,
"frontier_tpot_p50_s": 0.031067781092248118,
"frontier_tpot_p95_s": 0.035782850818878296,
"frontier_ttft_p50_s": 0.08859749134958328,
"frontier_ttft_p95_s": 0.3458954617429286,
"kv_blocks": 177077,
"label": "TP4 N200 scale 3",
"notes": "Uses true-mixed TP2/TP4 attention profile.",
"prefix_hit_delta": 0.0,
"request_count": 200,
"rps_ratio": 0.5882440161960838,
"run_id": "tp4_n200_scale3",
"scale_label": "3",
"scale_value": 3.0,
"total_tps_ratio": 0.5882440161960839,
"tp": 4,
"tpot_p50_ratio": 3.301471070786272,
"tpot_p95_ratio": 2.7971158799197804,
"ttft_p50_ratio": 0.8850343170011207,
"ttft_p95_ratio": 1.086290918512101,
"vllm_completed": 200,
"vllm_decode_tps": 1122.3753879226379,
"vllm_e2e_p50_s": 5.549487320007756,
"vllm_e2e_p95_s": 27.869075825903565,
"vllm_preemptions": 0,
"vllm_prefix_hit": 0.2697549478423476,
"vllm_rps": 1.2535044929278167,
"vllm_total": 200,
"vllm_total_tps": 7403.398095950554,
"vllm_tpot_p50_s": 0.00941028421153152,
"vllm_tpot_p95_s": 0.01279276667647553,
"vllm_ttft_p50_s": 0.1001062779687345,
"vllm_ttft_p95_s": 0.3184188101440668
}
]