Files
agentic-kvc/v2/exp_d_policy_dispatch/results/thinktime.json
Gahow Wang 9b6091fe6e v2 exp(d): 5-policy routing under tracets vs thinktime — ranking flip
Extends exp(c) (dispatch ablation, 1 round-robin policy) to the full 5-policy
routing comparison, both modes on the SAME ttp trace (807 reqs, fresh vLLM/arm,
dash0 8xH20). Confirms exp(c)'s prediction and finds something stronger: the
dispatch mode FLIPS which policy wins.

- thinktime helps every policy but helps LPWL most (TTFT p90 -40%, E2E mean -31%
  vs -3..-16% for the rest): tracets bursts punish prefill-spreading.
- Ranking flip: tracets -> LPWL only ties unified_ab on TTFT p90 and is 3rd on
  E2E mean; thinktime -> LPWL is 1st on both (TTFT p90 -31%, best TPOT/balance,
  zero knobs) vs the tuned unified+A+B.
- => benchmark agentic routing with thinktime; tracets' burst artifact erases
  LPWL's advantage. Caveat n=1: tracets ranking is run-sensitive (does not
  reproduce dash1 lpwl_5policy_600s.md), the thinktime advantage is the robust
  signal (appears in both environments).

README + grouped-bar fig (figs/exp_d_policy_dispatch.png) + bench_report
summaries in results/.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-30 20:59:18 +08:00

803 lines
21 KiB
JSON

{
"leastwork": {
"n_total": 807,
"n_ok": 807,
"window_s": 986.1941225528717,
"ttft_ms": {
"n": 807,
"mean": 3043.454534307026,
"p50": 681.8344180064742,
"p90": 6712.89858900127,
"p99": 41146.725983999204
},
"tpot_ms": {
"n": 806,
"mean": 17.12884673518703,
"p50": 7.770131949655479,
"p90": 17.997618232737178,
"p99": 133.81680370757084
},
"e2e_ms": {
"n": 807,
"mean": 6787.973176127951,
"p50": 2026.8339599715546,
"p90": 17635.302426991984,
"p99": 69945.72682998842
},
"throughput": {
"decode_tps": 234.00362537409853,
"prefill_tps": 8660.302069020001,
"total_tps": 8894.305694394101,
"total_output_tokens": 230773,
"total_new_prefill_tokens": 8540739
},
"apc": 0.6756355919409787,
"per_worker": {
"0": {
"n": 96,
"decode_tps": 48.631399136561754,
"prefill_tps": 812.7547930676582,
"ttft_p90_ms": 5368.347445008112,
"gpu_util_mean": 48.6875,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"1": {
"n": 111,
"decode_tps": 28.45180209284375,
"prefill_tps": 954.9580335787387,
"ttft_p90_ms": 3442.4916800053325,
"gpu_util_mean": 40.479166666666664,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"2": {
"n": 99,
"decode_tps": 35.558922120953866,
"prefill_tps": 901.7494422882478,
"ttft_p90_ms": 5583.948273997521,
"gpu_util_mean": 48.395833333333336,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"3": {
"n": 88,
"decode_tps": 20.717016592141224,
"prefill_tps": 1149.215934349922,
"ttft_p90_ms": 6448.1909119931515,
"gpu_util_mean": 38.020833333333336,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"4": {
"n": 124,
"decode_tps": 38.884839326290034,
"prefill_tps": 891.8842445776638,
"ttft_p90_ms": 4944.760143000167,
"gpu_util_mean": 40.020833333333336,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"5": {
"n": 110,
"decode_tps": 20.013301183451194,
"prefill_tps": 1581.959336729224,
"ttft_p90_ms": 27228.53080899222,
"gpu_util_mean": 78.19791666666667,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"6": {
"n": 64,
"decode_tps": 25.779914337947165,
"prefill_tps": 1114.0737658787832,
"ttft_p90_ms": 18414.893322013086,
"gpu_util_mean": 49.833333333333336,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"7": {
"n": 115,
"decode_tps": 15.966430583909537,
"prefill_tps": 1253.7065185497638,
"ttft_p90_ms": 9039.336649002507,
"gpu_util_mean": 39.5625,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
}
},
"decisions": {},
"gpu_captured": true,
"spread": {
"n_ratio": 1.9375,
"ttft_p90_ratio": 7.909541500751002,
"gpu_util_ratio": 2.0567123287671234,
"gpu_util_min": 38.020833333333336,
"gpu_util_max": 78.19791666666667
},
"per_class": {
"WARM<5k": {
"n": 92,
"ttft_ms": {
"n": 92,
"mean": 192.46459313074845,
"p50": 177.03324498143047,
"p90": 313.57523999758996,
"p99": 553.8838730135467
}
},
"MED5-20k": {
"n": 278,
"ttft_ms": {
"n": 278,
"mean": 772.5742901807313,
"p50": 677.829442982329,
"p90": 1460.6262099987362,
"p99": 2101.3274399738293
}
},
"HEAVY20-50k": {
"n": 248,
"ttft_ms": {
"n": 248,
"mean": 2004.694984432952,
"p50": 1127.2326559992507,
"p90": 5081.04542500223,
"p99": 9901.586207997752
}
},
"HEAVY+>50k": {
"n": 189,
"ttft_ms": {
"n": 189,
"mean": 9134.502951365745,
"p50": 2167.4920289951842,
"p90": 28926.44312098855,
"p99": 49472.52169801504
}
}
}
},
"unified_ab": {
"n_total": 807,
"n_ok": 807,
"window_s": 986.5525379180908,
"ttft_ms": {
"n": 807,
"mean": 3592.357064001708,
"p50": 676.4678099716548,
"p90": 9736.127940996084,
"p99": 42370.66501099616
},
"tpot_ms": {
"n": 806,
"mean": 13.200466578008895,
"p50": 7.819523662692517,
"p90": 19.090397550442486,
"p99": 133.40408908212945
},
"e2e_ms": {
"n": 807,
"mean": 7131.188424004758,
"p50": 2037.0979200233705,
"p90": 18689.829077018658,
"p99": 63787.50272799516
},
"throughput": {
"decode_tps": 233.91861166055818,
"prefill_tps": 8640.029468666471,
"total_tps": 8873.948080327029,
"total_output_tokens": 230773,
"total_new_prefill_tokens": 8523843
},
"apc": 0.6762772765819173,
"per_worker": {
"0": {
"n": 58,
"decode_tps": 29.088161954921237,
"prefill_tps": 930.9397773565431,
"ttft_p90_ms": 13273.868343996583,
"gpu_util_mean": 44.989583333333336,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"1": {
"n": 98,
"decode_tps": 24.162930086120934,
"prefill_tps": 1018.370498666148,
"ttft_p90_ms": 4365.537890000269,
"gpu_util_mean": 38.90625,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"2": {
"n": 110,
"decode_tps": 35.40713612040818,
"prefill_tps": 965.8167845888297,
"ttft_p90_ms": 4610.747697995976,
"gpu_util_mean": 52.114583333333336,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"3": {
"n": 102,
"decode_tps": 20.719626390233998,
"prefill_tps": 1126.5056419045684,
"ttft_p90_ms": 10947.632670984603,
"gpu_util_mean": 41.703125,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"4": {
"n": 99,
"decode_tps": 44.64435324746667,
"prefill_tps": 911.5449663712324,
"ttft_p90_ms": 4116.690531984204,
"gpu_util_mean": 42.671875,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"5": {
"n": 110,
"decode_tps": 29.724722072971574,
"prefill_tps": 918.851216898154,
"ttft_p90_ms": 4543.632891000016,
"gpu_util_mean": 40.864583333333336,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"6": {
"n": 125,
"decode_tps": 28.516474205589404,
"prefill_tps": 1522.1155917037186,
"ttft_p90_ms": 25507.55575299263,
"gpu_util_mean": 76.203125,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"7": {
"n": 105,
"decode_tps": 21.655207582846195,
"prefill_tps": 1245.884991177276,
"ttft_p90_ms": 20629.490054008784,
"gpu_util_mean": 47.276041666666664,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
}
},
"decisions": {
"lmetric_fallback": 389,
"affinity": 418
},
"gpu_captured": true,
"spread": {
"n_ratio": 2.1551724137931036,
"ttft_p90_ratio": 6.196131468910353,
"gpu_util_ratio": 1.9586345381526105,
"gpu_util_min": 38.90625,
"gpu_util_max": 76.203125
},
"per_class": {
"WARM<5k": {
"n": 92,
"ttft_ms": {
"n": 92,
"mean": 448.3382160131283,
"p50": 179.28761898656376,
"p90": 323.1771159917116,
"p99": 5748.067840992007
}
},
"MED5-20k": {
"n": 278,
"ttft_ms": {
"n": 278,
"mean": 1455.8712874500252,
"p50": 685.6210659898352,
"p90": 1802.9974120145198,
"p99": 32571.255193004617
}
},
"HEAVY20-50k": {
"n": 248,
"ttft_ms": {
"n": 248,
"mean": 2672.607777120579,
"p50": 1117.918328003725,
"p90": 5214.129884989234,
"p99": 22190.210508997552
}
},
"HEAVY+>50k": {
"n": 189,
"ttft_ms": {
"n": 189,
"mean": 9472.201524545819,
"p50": 2150.3282230114564,
"p90": 28876.64386598044,
"p99": 48314.48572798399
}
}
}
},
"unified_def": {
"n_total": 807,
"n_ok": 807,
"window_s": 979.5575842857361,
"ttft_ms": {
"n": 807,
"mean": 4037.2454534798544,
"p50": 695.2703970018774,
"p90": 11267.881545994896,
"p99": 46221.317757997895
},
"tpot_ms": {
"n": 806,
"mean": 16.476541787288614,
"p50": 8.307468241425875,
"p90": 21.768670571627954,
"p99": 200.26358073773736
},
"e2e_ms": {
"n": 807,
"mean": 7974.606969135101,
"p50": 2098.1516239990015,
"p90": 24096.24872301356,
"p99": 72334.40188399982
},
"throughput": {
"decode_tps": 235.5890084484137,
"prefill_tps": 8253.263646460364,
"total_tps": 8488.852654908778,
"total_output_tokens": 230773,
"total_new_prefill_tokens": 8084547
},
"apc": 0.6929610772463206,
"per_worker": {
"0": {
"n": 96,
"decode_tps": 39.88024862110074,
"prefill_tps": 791.1724766697671,
"ttft_p90_ms": 5825.010653992649,
"gpu_util_mean": 47.68586387434555,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"1": {
"n": 55,
"decode_tps": 17.82028977166094,
"prefill_tps": 910.7254277965683,
"ttft_p90_ms": 16298.377383005572,
"gpu_util_mean": 39.2565445026178,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"2": {
"n": 98,
"decode_tps": 27.174512685142215,
"prefill_tps": 1043.4608606959093,
"ttft_p90_ms": 9739.183520985534,
"gpu_util_mean": 40.83769633507853,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"3": {
"n": 103,
"decode_tps": 24.518211471470025,
"prefill_tps": 1003.2661844138513,
"ttft_p90_ms": 6705.797864007764,
"gpu_util_mean": 33.50785340314136,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"4": {
"n": 102,
"decode_tps": 49.593817432818994,
"prefill_tps": 689.9175820202374,
"ttft_p90_ms": 2474.3239340023138,
"gpu_util_mean": 45.246073298429316,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"5": {
"n": 112,
"decode_tps": 20.50823792523468,
"prefill_tps": 1346.1127974027988,
"ttft_p90_ms": 23553.059853002196,
"gpu_util_mean": 50.109947643979055,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"6": {
"n": 81,
"decode_tps": 19.51697409799575,
"prefill_tps": 990.0816609032532,
"ttft_p90_ms": 5961.234248999972,
"gpu_util_mean": 38.717277486910994,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"7": {
"n": 160,
"decode_tps": 36.57671644299036,
"prefill_tps": 1478.5266565579789,
"ttft_p90_ms": 17912.180206010817,
"gpu_util_mean": 85.15183246073299,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
}
},
"decisions": {
"lmetric_fallback": 349,
"affinity": 458
},
"gpu_captured": true,
"spread": {
"n_ratio": 2.909090909090909,
"ttft_p90_ratio": 9.518988006919619,
"gpu_util_ratio": 2.5412500000000002,
"gpu_util_min": 33.50785340314136,
"gpu_util_max": 85.15183246073299
},
"per_class": {
"WARM<5k": {
"n": 92,
"ttft_ms": {
"n": 92,
"mean": 594.1550390875225,
"p50": 196.222682017833,
"p90": 338.4021449892316,
"p99": 7637.84466200741
}
},
"MED5-20k": {
"n": 278,
"ttft_ms": {
"n": 278,
"mean": 1386.6929373560054,
"p50": 662.5233909871895,
"p90": 1772.5210430216976,
"p99": 19121.71271801344
}
},
"HEAVY20-50k": {
"n": 248,
"ttft_ms": {
"n": 248,
"mean": 3761.512416864031,
"p50": 1186.4990000030957,
"p90": 7436.603061010828,
"p99": 37502.096537995385
}
},
"HEAVY+>50k": {
"n": 189,
"ttft_ms": {
"n": 189,
"mean": 9973.751859232492,
"p50": 2084.2301140073687,
"p90": 34646.72368601896,
"p99": 51783.358982007485
}
}
}
},
"lmetric": {
"n_total": 807,
"n_ok": 807,
"window_s": 1036.9893975257874,
"ttft_ms": {
"n": 807,
"mean": 4942.361280256006,
"p50": 1195.667241991032,
"p90": 15606.655231997138,
"p99": 46217.127193987835
},
"tpot_ms": {
"n": 806,
"mean": 19.707597229545165,
"p50": 9.35281297406689,
"p90": 30.177805961172382,
"p99": 232.18400578116416
},
"e2e_ms": {
"n": 807,
"mean": 9901.839828112516,
"p50": 3177.2723750036675,
"p90": 27819.4430010044,
"p99": 73672.06387300394
},
"throughput": {
"decode_tps": 222.5413302687709,
"prefill_tps": 13134.949144609054,
"total_tps": 13357.490474877826,
"total_output_tokens": 230773,
"total_new_prefill_tokens": 13620803
},
"apc": 0.48270240989877555,
"per_worker": {
"0": {
"n": 121,
"decode_tps": 40.13348651326501,
"prefill_tps": 1973.9218210754154,
"ttft_p90_ms": 23894.41591600189,
"gpu_util_mean": 90.75247524752476,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"1": {
"n": 128,
"decode_tps": 44.98117349250917,
"prefill_tps": 1626.6328315647543,
"ttft_p90_ms": 5918.853377981577,
"gpu_util_mean": 64.96039603960396,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"2": {
"n": 109,
"decode_tps": 26.800659742819484,
"prefill_tps": 1578.2861463241723,
"ttft_p90_ms": 13917.768498009536,
"gpu_util_mean": 58.306930693069305,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"3": {
"n": 99,
"decode_tps": 19.435107097610242,
"prefill_tps": 1683.1715002723502,
"ttft_p90_ms": 16737.5574040052,
"gpu_util_mean": 59.16831683168317,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"4": {
"n": 116,
"decode_tps": 19.955845305048445,
"prefill_tps": 1884.7752972820501,
"ttft_p90_ms": 11347.276910004439,
"gpu_util_mean": 50.36138613861386,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"5": {
"n": 61,
"decode_tps": 12.497716978516857,
"prefill_tps": 1726.7611455549827,
"ttft_p90_ms": 31680.082703998778,
"gpu_util_mean": 55.93069306930693,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"6": {
"n": 88,
"decode_tps": 38.23472071614312,
"prefill_tps": 1208.0914259963265,
"ttft_p90_ms": 9533.787049993407,
"gpu_util_mean": 51.62871287128713,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"7": {
"n": 85,
"decode_tps": 20.50262042285856,
"prefill_tps": 1453.3089765390037,
"ttft_p90_ms": 14970.007644995349,
"gpu_util_mean": 51.757425742574256,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
}
},
"decisions": {},
"gpu_captured": true,
"spread": {
"n_ratio": 2.098360655737705,
"ttft_p90_ratio": 5.352402007768976,
"gpu_util_ratio": 1.8020249680526887,
"gpu_util_min": 50.36138613861386,
"gpu_util_max": 90.75247524752476
},
"per_class": {
"WARM<5k": {
"n": 92,
"ttft_ms": {
"n": 92,
"mean": 511.51012201982036,
"p50": 255.4193850082811,
"p90": 471.22472297633067,
"p99": 3532.1444049768616
}
},
"MED5-20k": {
"n": 278,
"ttft_ms": {
"n": 278,
"mean": 1010.5527848093863,
"p50": 818.2104199950118,
"p90": 1878.1264800054487,
"p99": 4416.228823014535
}
},
"HEAVY20-50k": {
"n": 248,
"ttft_ms": {
"n": 248,
"mean": 3164.034748000338,
"p50": 2636.801838991232,
"p90": 7400.190736021614,
"p99": 9636.447697004769
}
},
"HEAVY+>50k": {
"n": 189,
"ttft_ms": {
"n": 189,
"mean": 15215.938255342222,
"p50": 12060.85875100689,
"p90": 36602.47571900254,
"p99": 52271.21993701439
}
}
}
},
"sticky": {
"n_total": 807,
"n_ok": 807,
"window_s": 994.9787130355835,
"ttft_ms": {
"n": 807,
"mean": 4455.946148958436,
"p50": 713.0627470032778,
"p90": 14838.208375993418,
"p99": 43174.81458699331
},
"tpot_ms": {
"n": 806,
"mean": 19.138733289320065,
"p50": 8.24416923684399,
"p90": 23.769559945071954,
"p99": 184.6952650922511
},
"e2e_ms": {
"n": 807,
"mean": 8663.490226920512,
"p50": 2352.715140004875,
"p90": 24966.471978026675,
"p99": 70932.61348700617
},
"throughput": {
"decode_tps": 231.93762537485247,
"prefill_tps": 8105.277926394779,
"total_tps": 8337.215551769632,
"total_output_tokens": 230773,
"total_new_prefill_tokens": 8064579
},
"apc": 0.6937194318219754,
"per_worker": {
"0": {
"n": 156,
"decode_tps": 44.672312500428745,
"prefill_tps": 1949.5271351907093,
"ttft_p90_ms": 20576.009418989997,
"gpu_util_mean": 93.18041237113403,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"1": {
"n": 114,
"decode_tps": 44.75372127725863,
"prefill_tps": 929.0429914624127,
"ttft_p90_ms": 5498.717762995511,
"gpu_util_mean": 53.08247422680412,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"2": {
"n": 88,
"decode_tps": 29.785561853462614,
"prefill_tps": 904.2113044360427,
"ttft_p90_ms": 12234.77461998118,
"gpu_util_mean": 49.628865979381445,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"3": {
"n": 98,
"decode_tps": 29.982550992458386,
"prefill_tps": 1018.2680159145942,
"ttft_p90_ms": 16286.48554199026,
"gpu_util_mean": 44.123711340206185,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"4": {
"n": 110,
"decode_tps": 37.69427376549181,
"prefill_tps": 949.1017120546454,
"ttft_p90_ms": 6709.773182024946,
"gpu_util_mean": 45.7680412371134,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"5": {
"n": 99,
"decode_tps": 19.964246209244884,
"prefill_tps": 980.7747514746083,
"ttft_p90_ms": 14065.780322009232,
"gpu_util_mean": 36.324742268041234,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"6": {
"n": 79,
"decode_tps": 11.21531531660107,
"prefill_tps": 682.5723918534845,
"ttft_p90_ms": 4579.089447972365,
"gpu_util_mean": 22.288659793814432,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
},
"7": {
"n": 63,
"decode_tps": 13.869643459906332,
"prefill_tps": 691.7796240082818,
"ttft_p90_ms": 18229.593775991816,
"gpu_util_mean": 30.762886597938145,
"gpu_util_max": 100.0,
"gpu_mem_max_mb": 89575.0
}
},
"decisions": {},
"gpu_captured": true,
"spread": {
"n_ratio": 2.4761904761904763,
"ttft_p90_ratio": 4.493471825081102,
"gpu_util_ratio": 4.180619796484737,
"gpu_util_min": 22.288659793814432,
"gpu_util_max": 93.18041237113403
},
"per_class": {
"WARM<5k": {
"n": 92,
"ttft_ms": {
"n": 92,
"mean": 827.0193562525294,
"p50": 197.0047799986787,
"p90": 507.2060489910655,
"p99": 19187.98109301133
}
},
"MED5-20k": {
"n": 278,
"ttft_ms": {
"n": 278,
"mean": 2624.659966896439,
"p50": 736.4085000008345,
"p90": 3899.43698499701,
"p99": 33760.123436979484
}
},
"HEAVY20-50k": {
"n": 248,
"ttft_ms": {
"n": 248,
"mean": 3807.600332329692,
"p50": 1086.1541359918192,
"p90": 9912.624888995197,
"p99": 40516.03257699753
}
},
"HEAVY+>50k": {
"n": 189,
"ttft_ms": {
"n": 189,
"mean": 9766.785228673292,
"p50": 2521.5582190139685,
"p90": 34039.37866198248,
"p99": 47948.314540000865
}
}
}
}
}