Extends exp(c) (dispatch ablation, 1 round-robin policy) to the full 5-policy routing comparison, both modes on the SAME ttp trace (807 reqs, fresh vLLM/arm, dash0 8xH20). Confirms exp(c)'s prediction and finds something stronger: the dispatch mode FLIPS which policy wins. - thinktime helps every policy but helps LPWL most (TTFT p90 -40%, E2E mean -31% vs -3..-16% for the rest): tracets bursts punish prefill-spreading. - Ranking flip: tracets -> LPWL only ties unified_ab on TTFT p90 and is 3rd on E2E mean; thinktime -> LPWL is 1st on both (TTFT p90 -31%, best TPOT/balance, zero knobs) vs the tuned unified+A+B. - => benchmark agentic routing with thinktime; tracets' burst artifact erases LPWL's advantage. Caveat n=1: tracets ranking is run-sensitive (does not reproduce dash1 lpwl_5policy_600s.md), the thinktime advantage is the robust signal (appears in both environments). README + grouped-bar fig (figs/exp_d_policy_dispatch.png) + bench_report summaries in results/. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
803 lines
21 KiB
JSON
803 lines
21 KiB
JSON
{
|
|
"leastwork": {
|
|
"n_total": 807,
|
|
"n_ok": 807,
|
|
"window_s": 986.1941225528717,
|
|
"ttft_ms": {
|
|
"n": 807,
|
|
"mean": 3043.454534307026,
|
|
"p50": 681.8344180064742,
|
|
"p90": 6712.89858900127,
|
|
"p99": 41146.725983999204
|
|
},
|
|
"tpot_ms": {
|
|
"n": 806,
|
|
"mean": 17.12884673518703,
|
|
"p50": 7.770131949655479,
|
|
"p90": 17.997618232737178,
|
|
"p99": 133.81680370757084
|
|
},
|
|
"e2e_ms": {
|
|
"n": 807,
|
|
"mean": 6787.973176127951,
|
|
"p50": 2026.8339599715546,
|
|
"p90": 17635.302426991984,
|
|
"p99": 69945.72682998842
|
|
},
|
|
"throughput": {
|
|
"decode_tps": 234.00362537409853,
|
|
"prefill_tps": 8660.302069020001,
|
|
"total_tps": 8894.305694394101,
|
|
"total_output_tokens": 230773,
|
|
"total_new_prefill_tokens": 8540739
|
|
},
|
|
"apc": 0.6756355919409787,
|
|
"per_worker": {
|
|
"0": {
|
|
"n": 96,
|
|
"decode_tps": 48.631399136561754,
|
|
"prefill_tps": 812.7547930676582,
|
|
"ttft_p90_ms": 5368.347445008112,
|
|
"gpu_util_mean": 48.6875,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"1": {
|
|
"n": 111,
|
|
"decode_tps": 28.45180209284375,
|
|
"prefill_tps": 954.9580335787387,
|
|
"ttft_p90_ms": 3442.4916800053325,
|
|
"gpu_util_mean": 40.479166666666664,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"2": {
|
|
"n": 99,
|
|
"decode_tps": 35.558922120953866,
|
|
"prefill_tps": 901.7494422882478,
|
|
"ttft_p90_ms": 5583.948273997521,
|
|
"gpu_util_mean": 48.395833333333336,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"3": {
|
|
"n": 88,
|
|
"decode_tps": 20.717016592141224,
|
|
"prefill_tps": 1149.215934349922,
|
|
"ttft_p90_ms": 6448.1909119931515,
|
|
"gpu_util_mean": 38.020833333333336,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"4": {
|
|
"n": 124,
|
|
"decode_tps": 38.884839326290034,
|
|
"prefill_tps": 891.8842445776638,
|
|
"ttft_p90_ms": 4944.760143000167,
|
|
"gpu_util_mean": 40.020833333333336,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"5": {
|
|
"n": 110,
|
|
"decode_tps": 20.013301183451194,
|
|
"prefill_tps": 1581.959336729224,
|
|
"ttft_p90_ms": 27228.53080899222,
|
|
"gpu_util_mean": 78.19791666666667,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"6": {
|
|
"n": 64,
|
|
"decode_tps": 25.779914337947165,
|
|
"prefill_tps": 1114.0737658787832,
|
|
"ttft_p90_ms": 18414.893322013086,
|
|
"gpu_util_mean": 49.833333333333336,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"7": {
|
|
"n": 115,
|
|
"decode_tps": 15.966430583909537,
|
|
"prefill_tps": 1253.7065185497638,
|
|
"ttft_p90_ms": 9039.336649002507,
|
|
"gpu_util_mean": 39.5625,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
}
|
|
},
|
|
"decisions": {},
|
|
"gpu_captured": true,
|
|
"spread": {
|
|
"n_ratio": 1.9375,
|
|
"ttft_p90_ratio": 7.909541500751002,
|
|
"gpu_util_ratio": 2.0567123287671234,
|
|
"gpu_util_min": 38.020833333333336,
|
|
"gpu_util_max": 78.19791666666667
|
|
},
|
|
"per_class": {
|
|
"WARM<5k": {
|
|
"n": 92,
|
|
"ttft_ms": {
|
|
"n": 92,
|
|
"mean": 192.46459313074845,
|
|
"p50": 177.03324498143047,
|
|
"p90": 313.57523999758996,
|
|
"p99": 553.8838730135467
|
|
}
|
|
},
|
|
"MED5-20k": {
|
|
"n": 278,
|
|
"ttft_ms": {
|
|
"n": 278,
|
|
"mean": 772.5742901807313,
|
|
"p50": 677.829442982329,
|
|
"p90": 1460.6262099987362,
|
|
"p99": 2101.3274399738293
|
|
}
|
|
},
|
|
"HEAVY20-50k": {
|
|
"n": 248,
|
|
"ttft_ms": {
|
|
"n": 248,
|
|
"mean": 2004.694984432952,
|
|
"p50": 1127.2326559992507,
|
|
"p90": 5081.04542500223,
|
|
"p99": 9901.586207997752
|
|
}
|
|
},
|
|
"HEAVY+>50k": {
|
|
"n": 189,
|
|
"ttft_ms": {
|
|
"n": 189,
|
|
"mean": 9134.502951365745,
|
|
"p50": 2167.4920289951842,
|
|
"p90": 28926.44312098855,
|
|
"p99": 49472.52169801504
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"unified_ab": {
|
|
"n_total": 807,
|
|
"n_ok": 807,
|
|
"window_s": 986.5525379180908,
|
|
"ttft_ms": {
|
|
"n": 807,
|
|
"mean": 3592.357064001708,
|
|
"p50": 676.4678099716548,
|
|
"p90": 9736.127940996084,
|
|
"p99": 42370.66501099616
|
|
},
|
|
"tpot_ms": {
|
|
"n": 806,
|
|
"mean": 13.200466578008895,
|
|
"p50": 7.819523662692517,
|
|
"p90": 19.090397550442486,
|
|
"p99": 133.40408908212945
|
|
},
|
|
"e2e_ms": {
|
|
"n": 807,
|
|
"mean": 7131.188424004758,
|
|
"p50": 2037.0979200233705,
|
|
"p90": 18689.829077018658,
|
|
"p99": 63787.50272799516
|
|
},
|
|
"throughput": {
|
|
"decode_tps": 233.91861166055818,
|
|
"prefill_tps": 8640.029468666471,
|
|
"total_tps": 8873.948080327029,
|
|
"total_output_tokens": 230773,
|
|
"total_new_prefill_tokens": 8523843
|
|
},
|
|
"apc": 0.6762772765819173,
|
|
"per_worker": {
|
|
"0": {
|
|
"n": 58,
|
|
"decode_tps": 29.088161954921237,
|
|
"prefill_tps": 930.9397773565431,
|
|
"ttft_p90_ms": 13273.868343996583,
|
|
"gpu_util_mean": 44.989583333333336,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"1": {
|
|
"n": 98,
|
|
"decode_tps": 24.162930086120934,
|
|
"prefill_tps": 1018.370498666148,
|
|
"ttft_p90_ms": 4365.537890000269,
|
|
"gpu_util_mean": 38.90625,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"2": {
|
|
"n": 110,
|
|
"decode_tps": 35.40713612040818,
|
|
"prefill_tps": 965.8167845888297,
|
|
"ttft_p90_ms": 4610.747697995976,
|
|
"gpu_util_mean": 52.114583333333336,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"3": {
|
|
"n": 102,
|
|
"decode_tps": 20.719626390233998,
|
|
"prefill_tps": 1126.5056419045684,
|
|
"ttft_p90_ms": 10947.632670984603,
|
|
"gpu_util_mean": 41.703125,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"4": {
|
|
"n": 99,
|
|
"decode_tps": 44.64435324746667,
|
|
"prefill_tps": 911.5449663712324,
|
|
"ttft_p90_ms": 4116.690531984204,
|
|
"gpu_util_mean": 42.671875,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"5": {
|
|
"n": 110,
|
|
"decode_tps": 29.724722072971574,
|
|
"prefill_tps": 918.851216898154,
|
|
"ttft_p90_ms": 4543.632891000016,
|
|
"gpu_util_mean": 40.864583333333336,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"6": {
|
|
"n": 125,
|
|
"decode_tps": 28.516474205589404,
|
|
"prefill_tps": 1522.1155917037186,
|
|
"ttft_p90_ms": 25507.55575299263,
|
|
"gpu_util_mean": 76.203125,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"7": {
|
|
"n": 105,
|
|
"decode_tps": 21.655207582846195,
|
|
"prefill_tps": 1245.884991177276,
|
|
"ttft_p90_ms": 20629.490054008784,
|
|
"gpu_util_mean": 47.276041666666664,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
}
|
|
},
|
|
"decisions": {
|
|
"lmetric_fallback": 389,
|
|
"affinity": 418
|
|
},
|
|
"gpu_captured": true,
|
|
"spread": {
|
|
"n_ratio": 2.1551724137931036,
|
|
"ttft_p90_ratio": 6.196131468910353,
|
|
"gpu_util_ratio": 1.9586345381526105,
|
|
"gpu_util_min": 38.90625,
|
|
"gpu_util_max": 76.203125
|
|
},
|
|
"per_class": {
|
|
"WARM<5k": {
|
|
"n": 92,
|
|
"ttft_ms": {
|
|
"n": 92,
|
|
"mean": 448.3382160131283,
|
|
"p50": 179.28761898656376,
|
|
"p90": 323.1771159917116,
|
|
"p99": 5748.067840992007
|
|
}
|
|
},
|
|
"MED5-20k": {
|
|
"n": 278,
|
|
"ttft_ms": {
|
|
"n": 278,
|
|
"mean": 1455.8712874500252,
|
|
"p50": 685.6210659898352,
|
|
"p90": 1802.9974120145198,
|
|
"p99": 32571.255193004617
|
|
}
|
|
},
|
|
"HEAVY20-50k": {
|
|
"n": 248,
|
|
"ttft_ms": {
|
|
"n": 248,
|
|
"mean": 2672.607777120579,
|
|
"p50": 1117.918328003725,
|
|
"p90": 5214.129884989234,
|
|
"p99": 22190.210508997552
|
|
}
|
|
},
|
|
"HEAVY+>50k": {
|
|
"n": 189,
|
|
"ttft_ms": {
|
|
"n": 189,
|
|
"mean": 9472.201524545819,
|
|
"p50": 2150.3282230114564,
|
|
"p90": 28876.64386598044,
|
|
"p99": 48314.48572798399
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"unified_def": {
|
|
"n_total": 807,
|
|
"n_ok": 807,
|
|
"window_s": 979.5575842857361,
|
|
"ttft_ms": {
|
|
"n": 807,
|
|
"mean": 4037.2454534798544,
|
|
"p50": 695.2703970018774,
|
|
"p90": 11267.881545994896,
|
|
"p99": 46221.317757997895
|
|
},
|
|
"tpot_ms": {
|
|
"n": 806,
|
|
"mean": 16.476541787288614,
|
|
"p50": 8.307468241425875,
|
|
"p90": 21.768670571627954,
|
|
"p99": 200.26358073773736
|
|
},
|
|
"e2e_ms": {
|
|
"n": 807,
|
|
"mean": 7974.606969135101,
|
|
"p50": 2098.1516239990015,
|
|
"p90": 24096.24872301356,
|
|
"p99": 72334.40188399982
|
|
},
|
|
"throughput": {
|
|
"decode_tps": 235.5890084484137,
|
|
"prefill_tps": 8253.263646460364,
|
|
"total_tps": 8488.852654908778,
|
|
"total_output_tokens": 230773,
|
|
"total_new_prefill_tokens": 8084547
|
|
},
|
|
"apc": 0.6929610772463206,
|
|
"per_worker": {
|
|
"0": {
|
|
"n": 96,
|
|
"decode_tps": 39.88024862110074,
|
|
"prefill_tps": 791.1724766697671,
|
|
"ttft_p90_ms": 5825.010653992649,
|
|
"gpu_util_mean": 47.68586387434555,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"1": {
|
|
"n": 55,
|
|
"decode_tps": 17.82028977166094,
|
|
"prefill_tps": 910.7254277965683,
|
|
"ttft_p90_ms": 16298.377383005572,
|
|
"gpu_util_mean": 39.2565445026178,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"2": {
|
|
"n": 98,
|
|
"decode_tps": 27.174512685142215,
|
|
"prefill_tps": 1043.4608606959093,
|
|
"ttft_p90_ms": 9739.183520985534,
|
|
"gpu_util_mean": 40.83769633507853,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"3": {
|
|
"n": 103,
|
|
"decode_tps": 24.518211471470025,
|
|
"prefill_tps": 1003.2661844138513,
|
|
"ttft_p90_ms": 6705.797864007764,
|
|
"gpu_util_mean": 33.50785340314136,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"4": {
|
|
"n": 102,
|
|
"decode_tps": 49.593817432818994,
|
|
"prefill_tps": 689.9175820202374,
|
|
"ttft_p90_ms": 2474.3239340023138,
|
|
"gpu_util_mean": 45.246073298429316,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"5": {
|
|
"n": 112,
|
|
"decode_tps": 20.50823792523468,
|
|
"prefill_tps": 1346.1127974027988,
|
|
"ttft_p90_ms": 23553.059853002196,
|
|
"gpu_util_mean": 50.109947643979055,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"6": {
|
|
"n": 81,
|
|
"decode_tps": 19.51697409799575,
|
|
"prefill_tps": 990.0816609032532,
|
|
"ttft_p90_ms": 5961.234248999972,
|
|
"gpu_util_mean": 38.717277486910994,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"7": {
|
|
"n": 160,
|
|
"decode_tps": 36.57671644299036,
|
|
"prefill_tps": 1478.5266565579789,
|
|
"ttft_p90_ms": 17912.180206010817,
|
|
"gpu_util_mean": 85.15183246073299,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
}
|
|
},
|
|
"decisions": {
|
|
"lmetric_fallback": 349,
|
|
"affinity": 458
|
|
},
|
|
"gpu_captured": true,
|
|
"spread": {
|
|
"n_ratio": 2.909090909090909,
|
|
"ttft_p90_ratio": 9.518988006919619,
|
|
"gpu_util_ratio": 2.5412500000000002,
|
|
"gpu_util_min": 33.50785340314136,
|
|
"gpu_util_max": 85.15183246073299
|
|
},
|
|
"per_class": {
|
|
"WARM<5k": {
|
|
"n": 92,
|
|
"ttft_ms": {
|
|
"n": 92,
|
|
"mean": 594.1550390875225,
|
|
"p50": 196.222682017833,
|
|
"p90": 338.4021449892316,
|
|
"p99": 7637.84466200741
|
|
}
|
|
},
|
|
"MED5-20k": {
|
|
"n": 278,
|
|
"ttft_ms": {
|
|
"n": 278,
|
|
"mean": 1386.6929373560054,
|
|
"p50": 662.5233909871895,
|
|
"p90": 1772.5210430216976,
|
|
"p99": 19121.71271801344
|
|
}
|
|
},
|
|
"HEAVY20-50k": {
|
|
"n": 248,
|
|
"ttft_ms": {
|
|
"n": 248,
|
|
"mean": 3761.512416864031,
|
|
"p50": 1186.4990000030957,
|
|
"p90": 7436.603061010828,
|
|
"p99": 37502.096537995385
|
|
}
|
|
},
|
|
"HEAVY+>50k": {
|
|
"n": 189,
|
|
"ttft_ms": {
|
|
"n": 189,
|
|
"mean": 9973.751859232492,
|
|
"p50": 2084.2301140073687,
|
|
"p90": 34646.72368601896,
|
|
"p99": 51783.358982007485
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"lmetric": {
|
|
"n_total": 807,
|
|
"n_ok": 807,
|
|
"window_s": 1036.9893975257874,
|
|
"ttft_ms": {
|
|
"n": 807,
|
|
"mean": 4942.361280256006,
|
|
"p50": 1195.667241991032,
|
|
"p90": 15606.655231997138,
|
|
"p99": 46217.127193987835
|
|
},
|
|
"tpot_ms": {
|
|
"n": 806,
|
|
"mean": 19.707597229545165,
|
|
"p50": 9.35281297406689,
|
|
"p90": 30.177805961172382,
|
|
"p99": 232.18400578116416
|
|
},
|
|
"e2e_ms": {
|
|
"n": 807,
|
|
"mean": 9901.839828112516,
|
|
"p50": 3177.2723750036675,
|
|
"p90": 27819.4430010044,
|
|
"p99": 73672.06387300394
|
|
},
|
|
"throughput": {
|
|
"decode_tps": 222.5413302687709,
|
|
"prefill_tps": 13134.949144609054,
|
|
"total_tps": 13357.490474877826,
|
|
"total_output_tokens": 230773,
|
|
"total_new_prefill_tokens": 13620803
|
|
},
|
|
"apc": 0.48270240989877555,
|
|
"per_worker": {
|
|
"0": {
|
|
"n": 121,
|
|
"decode_tps": 40.13348651326501,
|
|
"prefill_tps": 1973.9218210754154,
|
|
"ttft_p90_ms": 23894.41591600189,
|
|
"gpu_util_mean": 90.75247524752476,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"1": {
|
|
"n": 128,
|
|
"decode_tps": 44.98117349250917,
|
|
"prefill_tps": 1626.6328315647543,
|
|
"ttft_p90_ms": 5918.853377981577,
|
|
"gpu_util_mean": 64.96039603960396,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"2": {
|
|
"n": 109,
|
|
"decode_tps": 26.800659742819484,
|
|
"prefill_tps": 1578.2861463241723,
|
|
"ttft_p90_ms": 13917.768498009536,
|
|
"gpu_util_mean": 58.306930693069305,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"3": {
|
|
"n": 99,
|
|
"decode_tps": 19.435107097610242,
|
|
"prefill_tps": 1683.1715002723502,
|
|
"ttft_p90_ms": 16737.5574040052,
|
|
"gpu_util_mean": 59.16831683168317,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"4": {
|
|
"n": 116,
|
|
"decode_tps": 19.955845305048445,
|
|
"prefill_tps": 1884.7752972820501,
|
|
"ttft_p90_ms": 11347.276910004439,
|
|
"gpu_util_mean": 50.36138613861386,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"5": {
|
|
"n": 61,
|
|
"decode_tps": 12.497716978516857,
|
|
"prefill_tps": 1726.7611455549827,
|
|
"ttft_p90_ms": 31680.082703998778,
|
|
"gpu_util_mean": 55.93069306930693,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"6": {
|
|
"n": 88,
|
|
"decode_tps": 38.23472071614312,
|
|
"prefill_tps": 1208.0914259963265,
|
|
"ttft_p90_ms": 9533.787049993407,
|
|
"gpu_util_mean": 51.62871287128713,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"7": {
|
|
"n": 85,
|
|
"decode_tps": 20.50262042285856,
|
|
"prefill_tps": 1453.3089765390037,
|
|
"ttft_p90_ms": 14970.007644995349,
|
|
"gpu_util_mean": 51.757425742574256,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
}
|
|
},
|
|
"decisions": {},
|
|
"gpu_captured": true,
|
|
"spread": {
|
|
"n_ratio": 2.098360655737705,
|
|
"ttft_p90_ratio": 5.352402007768976,
|
|
"gpu_util_ratio": 1.8020249680526887,
|
|
"gpu_util_min": 50.36138613861386,
|
|
"gpu_util_max": 90.75247524752476
|
|
},
|
|
"per_class": {
|
|
"WARM<5k": {
|
|
"n": 92,
|
|
"ttft_ms": {
|
|
"n": 92,
|
|
"mean": 511.51012201982036,
|
|
"p50": 255.4193850082811,
|
|
"p90": 471.22472297633067,
|
|
"p99": 3532.1444049768616
|
|
}
|
|
},
|
|
"MED5-20k": {
|
|
"n": 278,
|
|
"ttft_ms": {
|
|
"n": 278,
|
|
"mean": 1010.5527848093863,
|
|
"p50": 818.2104199950118,
|
|
"p90": 1878.1264800054487,
|
|
"p99": 4416.228823014535
|
|
}
|
|
},
|
|
"HEAVY20-50k": {
|
|
"n": 248,
|
|
"ttft_ms": {
|
|
"n": 248,
|
|
"mean": 3164.034748000338,
|
|
"p50": 2636.801838991232,
|
|
"p90": 7400.190736021614,
|
|
"p99": 9636.447697004769
|
|
}
|
|
},
|
|
"HEAVY+>50k": {
|
|
"n": 189,
|
|
"ttft_ms": {
|
|
"n": 189,
|
|
"mean": 15215.938255342222,
|
|
"p50": 12060.85875100689,
|
|
"p90": 36602.47571900254,
|
|
"p99": 52271.21993701439
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"sticky": {
|
|
"n_total": 807,
|
|
"n_ok": 807,
|
|
"window_s": 994.9787130355835,
|
|
"ttft_ms": {
|
|
"n": 807,
|
|
"mean": 4455.946148958436,
|
|
"p50": 713.0627470032778,
|
|
"p90": 14838.208375993418,
|
|
"p99": 43174.81458699331
|
|
},
|
|
"tpot_ms": {
|
|
"n": 806,
|
|
"mean": 19.138733289320065,
|
|
"p50": 8.24416923684399,
|
|
"p90": 23.769559945071954,
|
|
"p99": 184.6952650922511
|
|
},
|
|
"e2e_ms": {
|
|
"n": 807,
|
|
"mean": 8663.490226920512,
|
|
"p50": 2352.715140004875,
|
|
"p90": 24966.471978026675,
|
|
"p99": 70932.61348700617
|
|
},
|
|
"throughput": {
|
|
"decode_tps": 231.93762537485247,
|
|
"prefill_tps": 8105.277926394779,
|
|
"total_tps": 8337.215551769632,
|
|
"total_output_tokens": 230773,
|
|
"total_new_prefill_tokens": 8064579
|
|
},
|
|
"apc": 0.6937194318219754,
|
|
"per_worker": {
|
|
"0": {
|
|
"n": 156,
|
|
"decode_tps": 44.672312500428745,
|
|
"prefill_tps": 1949.5271351907093,
|
|
"ttft_p90_ms": 20576.009418989997,
|
|
"gpu_util_mean": 93.18041237113403,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"1": {
|
|
"n": 114,
|
|
"decode_tps": 44.75372127725863,
|
|
"prefill_tps": 929.0429914624127,
|
|
"ttft_p90_ms": 5498.717762995511,
|
|
"gpu_util_mean": 53.08247422680412,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"2": {
|
|
"n": 88,
|
|
"decode_tps": 29.785561853462614,
|
|
"prefill_tps": 904.2113044360427,
|
|
"ttft_p90_ms": 12234.77461998118,
|
|
"gpu_util_mean": 49.628865979381445,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"3": {
|
|
"n": 98,
|
|
"decode_tps": 29.982550992458386,
|
|
"prefill_tps": 1018.2680159145942,
|
|
"ttft_p90_ms": 16286.48554199026,
|
|
"gpu_util_mean": 44.123711340206185,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"4": {
|
|
"n": 110,
|
|
"decode_tps": 37.69427376549181,
|
|
"prefill_tps": 949.1017120546454,
|
|
"ttft_p90_ms": 6709.773182024946,
|
|
"gpu_util_mean": 45.7680412371134,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"5": {
|
|
"n": 99,
|
|
"decode_tps": 19.964246209244884,
|
|
"prefill_tps": 980.7747514746083,
|
|
"ttft_p90_ms": 14065.780322009232,
|
|
"gpu_util_mean": 36.324742268041234,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"6": {
|
|
"n": 79,
|
|
"decode_tps": 11.21531531660107,
|
|
"prefill_tps": 682.5723918534845,
|
|
"ttft_p90_ms": 4579.089447972365,
|
|
"gpu_util_mean": 22.288659793814432,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
},
|
|
"7": {
|
|
"n": 63,
|
|
"decode_tps": 13.869643459906332,
|
|
"prefill_tps": 691.7796240082818,
|
|
"ttft_p90_ms": 18229.593775991816,
|
|
"gpu_util_mean": 30.762886597938145,
|
|
"gpu_util_max": 100.0,
|
|
"gpu_mem_max_mb": 89575.0
|
|
}
|
|
},
|
|
"decisions": {},
|
|
"gpu_captured": true,
|
|
"spread": {
|
|
"n_ratio": 2.4761904761904763,
|
|
"ttft_p90_ratio": 4.493471825081102,
|
|
"gpu_util_ratio": 4.180619796484737,
|
|
"gpu_util_min": 22.288659793814432,
|
|
"gpu_util_max": 93.18041237113403
|
|
},
|
|
"per_class": {
|
|
"WARM<5k": {
|
|
"n": 92,
|
|
"ttft_ms": {
|
|
"n": 92,
|
|
"mean": 827.0193562525294,
|
|
"p50": 197.0047799986787,
|
|
"p90": 507.2060489910655,
|
|
"p99": 19187.98109301133
|
|
}
|
|
},
|
|
"MED5-20k": {
|
|
"n": 278,
|
|
"ttft_ms": {
|
|
"n": 278,
|
|
"mean": 2624.659966896439,
|
|
"p50": 736.4085000008345,
|
|
"p90": 3899.43698499701,
|
|
"p99": 33760.123436979484
|
|
}
|
|
},
|
|
"HEAVY20-50k": {
|
|
"n": 248,
|
|
"ttft_ms": {
|
|
"n": 248,
|
|
"mean": 3807.600332329692,
|
|
"p50": 1086.1541359918192,
|
|
"p90": 9912.624888995197,
|
|
"p99": 40516.03257699753
|
|
}
|
|
},
|
|
"HEAVY+>50k": {
|
|
"n": 189,
|
|
"ttft_ms": {
|
|
"n": 189,
|
|
"mean": 9766.785228673292,
|
|
"p50": 2521.5582190139685,
|
|
"p90": 34039.37866198248,
|
|
"p99": 47948.314540000865
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} |