PD-sep server-side profiling: vLLM patches + per-request breakdown
Instrumentation patches (microbench/patches/):
- pd_profile.py: shared event emitter (VLLM_PD_PROFILE_LOG env var)
- apply_patches.py: idempotent patch installer for mooncake_connector.py
and scheduler.py, marks insertions with # PD_PROFILE_PATCH
- analyze_events.py: joins per-process JSONL event logs by transfer_id
into per-request phase durations
Seven events captured per request:
D_get_num_matched → P_zmq_received → P_prefill_done →
P_rdma_start → P_rdma_end → D_recv_complete → D_request_promoted
Driver fix (microbench/lifecycle/driver.py):
seed_prefix_cache now sends via the proxy URL so P and D both cache
the seeded prefix with matching block hashes. Previously seeding D
directly produced different block hashes than the proxy-routed
measurement requests, making incremental transfer impossible.
Real breakdown (fig_breakdown_real.png, server_breakdown.csv, n=93):
prefill_compute 620 ms median (95% of overhead)
rdma_transfer 42 ms median (~71 Gbps effective)
other overhead 10 ms median (dispatch + params + signal + promote)
Mooncake transfer is NOT the bottleneck. Even with bulk RDMA the
transfer cost is <10% of prefill cost for Qwen3-30B-A3B on H20.
This commit is contained in:
1
microbench/interference/results/interference_data.json
Normal file
1
microbench/interference/results/interference_data.json
Normal file
File diff suppressed because one or more lines are too long
@@ -88,7 +88,14 @@ def make_new_tokens_prompt(num_tokens: int, unique_id: str) -> str:
|
||||
async def seed_prefix_cache(
|
||||
client: httpx.AsyncClient, url: str, model: str, num_tokens: int, session_id: str
|
||||
) -> bool:
|
||||
"""Send a request to D to warm its prefix cache with num_tokens of context.
|
||||
"""Warm BOTH P and D prefix caches by sending the seed through the PD-sep proxy.
|
||||
|
||||
Sending directly to D would only warm D's cache but produce block hashes that
|
||||
don't match what P later produces (different tokenization path). Sending through
|
||||
the proxy makes P do the prefill (P caches), pushes KV to D (D caches with
|
||||
matching hashes), so subsequent requests with the same prefix get incremental
|
||||
transfer on both sides.
|
||||
|
||||
Returns True if successful.
|
||||
"""
|
||||
if num_tokens == 0:
|
||||
@@ -318,11 +325,11 @@ async def main():
|
||||
print(f"Prior context C={C} tokens")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Seed D's prefix cache
|
||||
# Seed BOTH P and D prefix caches via the proxy
|
||||
if C > 0:
|
||||
print(f" Seeding D prefix cache with {C} tokens...")
|
||||
print(f" Seeding P+D prefix caches with {C} tokens via proxy...")
|
||||
success = await seed_prefix_cache(
|
||||
client, seed_endpoint, args.model, C, args.session_id
|
||||
client, pdsep_url, args.model, C, args.session_id
|
||||
)
|
||||
if not success:
|
||||
print(f" SKIP all configs with C={C} (cache seed failed)")
|
||||
|
||||
@@ -32,6 +32,7 @@ echo ""
|
||||
# Start prefill instance (KV producer)
|
||||
echo "[1/2] Starting prefill instance on GPU $PREFILL_GPU..."
|
||||
VLLM_MOONCAKE_BOOTSTRAP_PORT=$BOOTSTRAP_PORT \
|
||||
VLLM_PD_PROFILE_LOG="$LOG_DIR/prefill_events.jsonl" \
|
||||
CUDA_VISIBLE_DEVICES=$PREFILL_GPU \
|
||||
$PYTHON -m vllm.entrypoints.openai.api_server \
|
||||
--model "$MODEL_PATH" \
|
||||
@@ -66,6 +67,7 @@ done
|
||||
|
||||
# Start decode instance (KV consumer)
|
||||
echo "[2/2] Starting decode instance on GPU $DECODE_GPU..."
|
||||
VLLM_PD_PROFILE_LOG="$LOG_DIR/decode_events.jsonl" \
|
||||
CUDA_VISIBLE_DEVICES=$DECODE_GPU \
|
||||
$PYTHON -m vllm.entrypoints.openai.api_server \
|
||||
--model "$MODEL_PATH" \
|
||||
|
||||
BIN
microbench/lifecycle/results/fig_breakdown.png
Normal file
BIN
microbench/lifecycle/results/fig_breakdown.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 144 KiB |
BIN
microbench/lifecycle/results/fig_breakdown_real.png
Normal file
BIN
microbench/lifecycle/results/fig_breakdown_real.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 129 KiB |
1
microbench/lifecycle/results/lifecycle_data.json
Normal file
1
microbench/lifecycle/results/lifecycle_data.json
Normal file
File diff suppressed because one or more lines are too long
94
microbench/lifecycle/results/server_breakdown.csv
Normal file
94
microbench/lifecycle/results/server_breakdown.csv
Normal file
@@ -0,0 +1,94 @@
|
||||
D_promote_ms,build_params_ms,completion_sig_ms,d_to_p_dispatch_ms,delta_to_pull,events_seen,full_overhead_ms,n_events,num_local_cached,num_prompt_tokens_P,num_send_blocks,prefill_compute_ms,prompt_tokens,rdma_bandwidth_gbps,rdma_bytes,rdma_num_ops,rdma_transfer_ms,remote_total,t_D_get_num_matched,t_D_recv_complete,t_D_request_promoted,t_P_prefill_done,t_P_rdma_end,t_P_rdma_start,t_P_zmq_received,transfer_id
|
||||
1.572544,128.810689,0.429406,157.586296,11,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",180.077196,7,0,11,1,-128.19752,11,0.6330776134029652,1572864,96,19.875781,11,343433675745977,343433854250629,343433855823173,343433705134753,343433853821223,343433833945442,343433833332273,xfer-1f4a9d22-d840-45c1-9643-e93b1456a9e2
|
||||
2.191308,0.903295,0.506658,1.640978,978,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",77.893983,7,0,978,62,64.728703,978,98.46478694228644,97517568,96,7.923041,978,343528336446320,343528412148995,343528414340303,343528402816001,343528411642337,343528403719296,343528338087298,xfer-464cc62e-38aa-45ee-affc-01c3763aeece
|
||||
2.093855,0.913908,0.396394,2.31242,994,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",80.174358,7,0,994,63,67.058527,994,107.13559177722512,99090432,96,7.399254,994,343529586703278,343529664783781,343529666877636,343529656074225,343529664387387,343529656988133,343529589015698,xfer-f1f3be4e-2eff-429d-811f-c3638c668fad
|
||||
1.938306,1.154961,0.442667,2.36916,967,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",131.651798,7,0,967,61,118.149768,967,101.03515838490675,95944704,96,7.596936,967,343530839741058,343530969454550,343530971392856,343530960259986,343530969011883,343530961414947,343530842110218,xfer-fcc5b5ec-1deb-4656-bb5e-23ef9cd0ac6b
|
||||
1.824815,1.547935,0.503381,1.881943,3799,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",307.631595,7,0,3799,238,273.494352,3799,105.52574869264141,374341632,96,28.379169,3799,343534160883920,343534466690700,343534468515515,343534436260215,343534466187319,343534437808150,343534162765863,xfer-29788e04-6a5f-4a8e-8978-398a6daaa92b
|
||||
2.073684,1.602135,0.438767,1.752684,3818,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",261.321543,7,0,3818,239,227.201904,3818,106.44473629804281,375914496,96,28.252369,3818,343535656022234,343535915270093,343535917343777,343535884976822,343535914831326,343535886578957,343535657774918,xfer-db2d7198-6d26-477d-ac10-871ec5ea5148
|
||||
2.324107,1.562191,0.509916,1.838758,3802,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",259.489254,7,0,3802,238,226.118274,3802,110.36011840798396,374341632,96,27.136008,3802,343537102377332,343537359542479,343537361866586,343537330334364,343537359032563,343537331896555,343537104216090,xfer-f19a4010-772a-43b2-90de-6c715413b021
|
||||
2.148607,1.898994,0.719288,2.363097,15447,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1490.131939,7,0,15447,966,1368.83197,15447,106.46487520279301,1519386624,96,114.169983,15447,343540566337482,343542054320814,343542056469421,343541937532549,343542053601526,343541939431543,343540568700579,xfer-ec05665e-ac9b-4832-becc-dca0db14483c
|
||||
1.8382,1.614592,0.637042,2.790011,15476,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1489.352525,7,0,15476,968,1369.766518,15476,108.07092176557302,1522532352,96,112.706162,15476,343543280564385,343544768078710,343544769916910,343544653120914,343544767441668,343544654735506,343543283354396,xfer-3f66e703-8daf-4ddc-ba56-4109b68da614
|
||||
1.651487,1.756148,0.616415,2.184321,15445,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1485.680928,7,0,15445,966,1365.595974,15445,106.73917913395768,1519386624,96,113.876583,15445,343545994724766,343547478754207,343547480405694,343547362505061,343547478137792,343547364261209,343545996909087,xfer-7ae5e4c6-b064-4c57-8ac4-32393a61e587
|
||||
4.486239,2.118211,0.759601,2.145119,4292,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",303.914144,7,0,4292,269,263.51647,4292,109.58132928677931,423100416,96,30.888504,4292,343550691814399,343550991242304,343550995728543,343550957475988,343550990482703,343550959594199,343550693959518,xfer-92dd2241-80e7-4702-85ef-7912f7295b88
|
||||
1.663587,1.230762,0.400916,2.194673,993,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",107.269862,7,4272,5265,330,94.334755,5265,106.47487733320762,99090432,96,7.445169,5265,343553031975478,343553137581753,343553139245340,343553128504906,343553137180837,343553129735668,343553034170151,xfer-c72bb028-34b1-4360-8090-75271a6865b9
|
||||
2.671227,1.545351,0.613908,2.21666,966,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",336.952311,7,4288,5254,329,322.664443,5254,106.00567622952518,95944704,96,7.240722,5254,343554329552947,343554663834031,343554666505258,343554654434050,343554663220123,343554655979401,343554331769607,xfer-1c34ca70-07de-4cbc-830b-545e04a367ec
|
||||
1.692747,1.320752,0.393379,1.902381,973,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",104.54372,7,4288,5261,329,91.983782,5261,105.86010386061776,95944704,96,7.250679,5261,343555856846157,343555959697130,343555961389877,343555950732320,343555959303751,343555952053072,343555858748538,xfer-0e598b68-c64a-48ae-a05f-4fce97806bda
|
||||
1.965025,1.466853,0.52114,2.64554,3797,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",584.692369,7,4288,8085,506,550.780198,8085,109.64250888375697,374341632,96,27.313613,8085,343559156048349,343559738775693,343559740740718,343559709474087,343559738254553,343559710940940,343559158693889,xfer-9eb86222-d272-42f9-a52f-e88c05919600
|
||||
1.923668,1.717304,0.47073,2.98248,3803,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",353.426929,7,4288,8091,506,319.055549,8091,109.78888139463592,374341632,96,27.277198,8091,343560936583031,343561288086292,343561290009960,343561258621060,343561287615562,343561260338364,343560939565511,xfer-decfe301-4967-4e45-9ecc-e415b15b2ac7
|
||||
2.89904,1.858012,0.797347,2.92545,3804,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",582.360418,7,4288,8092,506,546.83155,8092,110.71503391675684,374341632,96,27.049019,8092,343562488895366,343563068356744,343563071255784,343563038652366,343563067559397,343563040510378,343562491820816,xfer-066515a0-7a07-482d-b30f-f34a5da32cb7
|
||||
3.116643,1.557379,0.724347,2.912104,15438,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1858.749822,7,4288,19726,1233,1739.911126,19726,109.85890979175518,1517813760,96,110.528223,19726,343566288890759,343568144523938,343568147640581,343568031713989,343568143799591,343568033271368,343566291802863,xfer-ae972fb5-5a36-4f29-8340-7db5b6dbb9a4
|
||||
3.217173,2.021348,0.720628,2.820507,15446,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2165.022861,7,4288,19734,1234,2045.068935,19734,109.33368837951441,1519386624,96,111.17427,19734,343569386870590,343571548676278,343571551893451,343571434760032,343571547955650,343571436781380,343569389691097,xfer-b184470f-70cc-42b5-9da4-8d3d708c1057
|
||||
3.291156,1.326134,1.574012,2.990072,15403,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1855.046099,7,4288,19691,1231,1735.169482,19691,109.46580835456497,1514668032,96,110.695243,19691,343572790365501,343574642120444,343574645411600,343574528525055,343574640546432,343574529851189,343572793355573,xfer-3bcf9d3c-ca14-4c77-a6ed-fcbab5dc6eba
|
||||
2.516234,1.866632,0.728605,2.676584,13115,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1771.14403,7,4272,17387,1087,1668.710164,17387,109.01684639798798,1289748480,96,94.645811,17387,343577884306931,343579652934727,343579655450961,343579555693679,343579652206122,343579557560311,343577886983515,xfer-024c8a47-9da6-41d2-80ba-e481953320a4
|
||||
3.022996,2.496474,0.437547,3.427936,966,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",198.121311,7,17376,18342,1147,181.654195,18342,108.37898421710993,95944704,96,7.082163,18342,343581716140593,343581911238908,343581914261904,343581901222724,343581910801361,343581903719198,343581719568529,xfer-4300630c-f889-4514-b34c-fef12a8925c2
|
||||
2.374258,1.515796,0.537708,2.937961,966,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1866.282319,7,17392,18358,1148,1851.699694,18358,106.35555699661711,95944704,96,7.216902,18358,343583151547938,343585015455999,343585017830257,343585006185593,343585014918291,343585007701389,343583154485899,xfer-4f994371-8c35-4c72-984d-ac4018a001b3
|
||||
2.958061,2.137306,0.433389,2.643298,958,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",197.54015,7,17392,18350,1147,182.146298,18350,104.54110181425735,94371840,96,7.221798,18350,343586258882017,343586453464106,343586456422167,343586443671613,343586453030717,343586445808919,343586261525315,xfer-cbef70fc-5e11-44fb-9f8b-3cbf4314f9ef
|
||||
2.650836,1.466464,0.881933,2.929782,3789,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2265.525265,7,17392,21181,1324,2229.561205,21181,106.37222604779124,372768768,96,28.035045,21181,343589699430922,343591962305351,343591964956187,343591931921909,343591961423418,343591933388373,343589702360704,xfer-947e1adb-7842-46d9-b865-70a07d6f909a
|
||||
3.017333,2.621991,0.471187,3.57172,3799,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",649.59573,7,17392,21191,1325,612.62619,21191,109.74820038135677,374341632,96,27.287309,21191,343593213139077,343593859717474,343593862734807,343593829336987,343593859246287,343593831958978,343593216710797,xfer-00759ac2-addc-4716-9983-3d484f37fe85
|
||||
2.83588,1.611923,0.642583,3.288654,3827,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2269.97943,7,17392,21219,1327,2234.057575,21219,109.6437992993817,377487360,96,27.542815,21219,343595111474468,343597378618018,343597381453898,343597348820697,343597377975435,343597350432620,343595114763122,xfer-ac31cb90-7b9f-4bfb-8a39-b83668c9c229
|
||||
2.598281,2.064127,0.607766,3.3877,15416,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",3011.682064,7,17392,32808,2051,2892.258285,32808,109.50957488227085,1516240896,96,110.765905,32808,343600650677233,343603659761016,343603662359297,343603546323218,343603659153250,343603548387345,343600654064933,xfer-aada3659-fde4-4458-a5bc-94c10ca07899
|
||||
2.688431,1.357478,0.584424,4.345204,15433,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",4668.338106,7,17392,32825,2052,4548.741312,32825,109.76651693625213,1517813760,96,110.621257,32825,343604950470092,343609616119767,343609618808198,343609503556608,343609615535343,343609504914086,343604954815296,xfer-b71c9bcd-2122-4bcb-b1d3-756106b9b20a
|
||||
2.675303,14.913805,0.747011,3.301697,15479,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",3074.489807,7,17392,32871,2055,2893.557117,32871,76.46359553289831,1522532352,55392,159.294874,32871,343610909483629,343613981298133,343613983973436,343613806342443,343613980551122,343613821256248,343610912785326,xfer-43467105-6488-4bc9-a37b-2f650259524c
|
||||
2.87329,3.545183,0.429664,2.861884,976,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",84.420309,7,0,976,61,63.928877,976,71.1926882297688,95944704,5856,10.781411,976,343923141873975,343923223420994,343923226294284,343923208664736,343923222991330,343923212209919,343923144735859,xfer-7e279c04-512f-4f08-8960-7ad2b6e33fcf
|
||||
3.138747,2.183392,0.409532,2.725953,965,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",84.64734,7,0,965,61,65.379252,965,71.00135868358657,95944704,5856,10.810464,965,343924400841889,343924482350482,343924485489229,343924468947094,343924481940950,343924471130486,343924403567842,xfer-f2343b70-d024-4c83-96ff-590fc1a8a9a7
|
||||
2.406195,2.445786,0.436976,2.704908,972,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",84.823867,7,0,972,61,65.885681,972,70.13296046415304,95944704,5856,10.944321,972,343925661522710,343925743940382,343925746346577,343925730113299,343925743503406,343925732559085,343925664227618,xfer-bb2b31e5-4ff7-409c-976a-f76eea44b515
|
||||
3.595002,2.322323,1.033232,2.746885,982,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",87.151933,7,0,982,62,66.350372,982,70.25686089999576,97517568,5952,11.104119,982,343926922550555,343927006107486,343927009702488,343926991647812,343927005074254,343926993970135,343926925297440,xfer-1c4d0cd5-3371-48c3-9d43-fb19272b9346
|
||||
3.594882,2.939953,0.441969,2.742435,972,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",85.059198,7,0,972,61,64.269469,972,69.33366382156527,95944704,5856,11.07049,972,343928186677060,343928268141376,343928271736258,343928253688964,343928267699407,343928256628917,343928189419495,xfer-82f41e4a-ebce-435d-ae64-9d085385a474
|
||||
4.232759,6.040593,1.062792,2.993056,3839,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",281.649207,7,0,3839,240,224.455213,3839,70.45172968753798,377487360,23040,42.864794,3839,343931451915366,343931729331814,343931733564573,343931679363635,343931728269022,343931685404228,343931454908422,xfer-14c09ca0-184a-430c-848e-0bee7a90a832
|
||||
3.615952,5.927463,0.697803,3.01182,3781,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",276.929258,7,0,3781,237,221.153959,3781,70.13150462530673,372768768,22752,42.522261,3781,343932919215085,343933192528391,343933196144343,343933143380864,343933191830588,343933149308327,343932922226905,xfer-3237e223-ba7e-437d-923b-4ec77495c7f7
|
||||
2.447002,5.858704,0.495866,2.810846,3811,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",279.094026,7,0,3811,239,224.652015,3811,70.21584277020796,375914496,22944,42.829593,3811,343934384634236,343934661281260,343934663728262,343934612097097,343934660785394,343934617955801,343934387445082,xfer-caee6ddc-a766-43bf-a77e-04052215a01f
|
||||
3.64398,5.597882,0.822779,2.946601,3815,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",279.546476,7,0,3815,239,223.590559,3815,70.02768021879314,375914496,22944,42.944675,3815,343935849174273,343936125076769,343936128720749,343936075711433,343936124253990,343936081309315,343935852120874,xfer-f7a389fd-d54f-40fe-8127-9ba513e84e9d
|
||||
3.158833,5.423762,0.480652,3.399984,3810,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",278.521197,7,0,3810,239,223.086016,3810,69.98323250399388,375914496,22944,42.97195,3810,343937317705345,343937593067709,343937596226542,343937544191345,343937592587057,343937549615107,343937321105329,xfer-dba542a7-c091-40c3-9256-9052733c5d24
|
||||
2.802117,23.002515,0.720612,3.127717,15409,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1574.36384,7,0,15409,964,1360.445676,15409,65.8286370433163,1516240896,92544,184.265203,15409,343940799169801,343942370731524,343942373533641,343942162743194,343942370010912,343942185745709,343940802297518,xfer-da6c5ecc-3e4b-43b1-9912-f3469c3d7cce
|
||||
2.758942,21.13861,0.691324,3.957685,15409,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1573.549032,7,0,15409,964,1360.44069,15409,65.72285498263587,1516240896,92544,184.561781,15409,343943602871270,343945173661360,343945176420302,343944967269645,343945172970036,343944988408255,343943606828955,xfer-c84e0b7e-add7-46f0-b56e-324f69881210
|
||||
3.382336,20.296695,0.817298,4.003918,15406,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1561.557037,7,0,15406,963,1359.195631,15406,69.69552213786865,1514668032,92448,173.861159,15406,343946402149766,343947960324467,343947963706803,343947765349315,343947959507169,343947785646010,343946406153684,xfer-4b2cb6e7-14c2-464b-90f3-0fe2f69eb9f5
|
||||
4.027914,20.186346,0.851076,4.357565,15423,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1561.46888,7,0,15423,964,1360.537359,15423,70.7248834956517,1516240896,92544,171.50862,15423,343949189682339,343950747123305,343950751151219,343950554577263,343950746272229,343950574763609,343949194039904,xfer-8149fb3f-0181-45a9-96a1-71e3b5136cef
|
||||
3.404981,19.958405,0.599572,4.462838,15407,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1556.477919,7,0,15407,963,1357.418939,15407,71.0139960583517,1514668032,92448,170.633184,15407,343951975310914,343953528383852,343953531788833,343953337192691,343953527784280,343953357151096,343951979773752,xfer-0d61490c-f4d1-41d8-a5c6-d3c3cb7458a2
|
||||
2.78986,2.343292,0.424066,4.00012,11,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",187.404757,7,2128,2139,134,177.496682,2139,35.87563330928873,1572864,96,0.350737,2139,343956737870164,343956922485061,343956925274921,343956919366966,343956922060995,343956921710258,343956741870284,xfer-d75ad3c4-ddd5-45e6-aede-e215396b93a0
|
||||
3.537035,2.412897,0.449823,3.840924,978,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",97.132421,7,2128,3106,195,75.942308,3106,71.24939462624278,97517568,5952,10.949434,3106,343958952635219,343959046230605,343959049767640,343959032418451,343959045780782,343959034831348,343958956476143,xfer-2b154013-682e-4689-a07c-a5b36b18117a
|
||||
3.954325,2.724032,1.589472,4.043707,959,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",249.556472,7,2144,3103,194,226.668644,3103,71.38368721287195,94371840,5760,10.576292,3103,343960234489131,343960480091278,343960484045603,343960465201482,343960478501806,343960467925514,343960238532838,xfer-e517397a-fa5e-4c9f-9938-bbc0c04a8a05
|
||||
3.054987,2.706184,0.439188,3.920979,952,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",98.80282,7,2144,3096,194,77.537773,3096,67.74896221715768,94371840,5760,11.143709,3096,343961669217996,343961764965829,343961768020816,343961750676748,343961764526641,343961753382932,343961673138975,xfer-e45ed7f3-8a58-4b93-ad80-501bac02d42e
|
||||
3.911436,2.397406,0.543541,3.83426,980,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",200.252808,7,2128,3108,195,178.553877,3108,70.84272986685419,97517568,5952,11.012288,3108,343962953041906,343963149383278,343963153294714,343963135430043,343963148839737,343963137827449,343962956876166,xfer-3fc542a4-c451-4af6-af54-dc2a36f24f2c
|
||||
1.329255,3.026022,0.999154,3.960324,956,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",96.933447,7,2128,3084,193,76.607202,3084,68.56244886023599,94371840,5760,11.01149,3084,343964337590244,343964433194436,343964434523691,343964418157770,343964432195282,343964421183792,343964341550568,xfer-b2aca2e7-4875-427e-9f0b-d317aebaae0d
|
||||
2.955891,5.995731,0.529785,3.861272,3786,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",427.15141,7,2144,5930,371,371.196088,5930,69.98275474253029,372768768,22752,42.612643,5930,343967625545250,343968049740769,343968052696660,343968000602610,343968049210984,343968006598341,343967629406522,xfer-c6998520-9685-4641-8bde-20d71a9eeb76
|
||||
3.94437,6.033872,1.209924,3.92171,3803,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",327.251599,7,2144,5947,372,269.706977,5947,70.57266363748236,374341632,22848,42.434746,5947,343969245405347,343969568712576,343969572656946,343969519034034,343969567502652,343969525067906,343969249327057,xfer-ab43d7a9-3560-4de7-8810-f85caad5105f
|
||||
3.942834,5.737844,0.645133,3.916515,3799,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",426.730381,7,2144,5943,372,370.10648,5943,70.66120256267023,374341632,22848,42.381575,5943,343970764516949,343971187304496,343971191247330,343971138539944,343971186659363,343971144277788,343970768433464,xfer-96925457-781c-4da1-a02d-9964d3218248
|
||||
2.818408,5.958399,0.482084,3.919752,3771,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",321.743945,7,2144,5915,370,266.54489,5915,70.66963627105798,371195904,22656,42.020412,5915,343972388100818,343972707026355,343972709844763,343972658565460,343972706544271,343972664523859,343972392020570,xfer-9abc49fc-9fe8-44d3-9475-9ebfeb404f2e
|
||||
2.815404,5.865107,0.536795,3.900765,3789,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",426.210635,7,2144,5933,371,370.298539,5933,69.68613361327895,372768768,22752,42.794025,5933,343973904232146,343974327627377,343974330442781,343974278431450,343974327090582,343974284296557,343973908132911,xfer-5b9bd505-a4eb-4f4c-9777-89004834d643
|
||||
2.811201,21.427328,0.77726,4.507936,15437,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1754.970166,7,2128,17565,1098,1547.29026,17565,68.1565467548948,1517813760,92640,178.156181,17565,343977546544054,343979298703019,343979301514220,343979098342250,343979297925759,343979119769578,343977551051990,xfer-1fde94a8-2b65-4ecf-aa50-6ce3ad65902a
|
||||
3.296259,19.253188,0.795063,6.392683,15393,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1874.860712,7,2144,17537,1097,1673.955666,17537,70.79217296719845,1514668032,92448,171.167853,17537,343980534882389,343982406446842,343982409743101,343982215230738,343982405651779,343982234483926,343980541275072,xfer-d403b6b0-8765-409e-98bb-d709760a156a
|
||||
3.934183,20.818748,1.699325,4.649118,15435,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1751.325849,7,2128,17563,1098,1545.890274,17563,69.65076278979821,1517813760,92640,174.334201,17563,343983647638965,343985395030631,343985398964814,343985198178357,343985393331306,343985218997105,343983652288083,xfer-7d521add-3226-4625-846c-09f2e099ea4b
|
||||
3.941944,20.143961,1.882404,4.569059,15444,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1879.360201,7,2144,17588,1100,1676.502591,17588,70.5378129169526,1519386624,92736,172.320242,17588,343986638163124,343988513581381,343988517523325,343988319234774,343988511698977,343988339378735,343986642732183,xfer-abcde13a-0c68-47c7-9900-47d203989fa1
|
||||
3.204475,20.38466,0.600685,4.740413,15404,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1745.632218,7,2144,17548,1097,1545.117825,17548,70.62041307309487,1514668032,92448,171.58416,17548,343989752326392,343991494754135,343991497958610,343991302184630,343991494153450,343991322569290,343989757066805,xfer-4753edc9-b787-4fff-823e-0c5a2b43eb7c
|
||||
4.049909,0.862976,1.757309,4.010888,16,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",631.17912,7,8624,8640,540,620.132294,8640,34.403604707117545,1572864,96,0.365744,8640,343994723546776,343995350675987,343995354725896,343995347689958,343995348918678,343995348552934,343994727557664,xfer-b72a0aee-2779-4405-91ed-600357c3af26
|
||||
3.257195,2.892717,0.413018,3.929762,991,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",142.99823,7,8624,9615,601,121.500199,9615,70.88746144030638,97517568,5952,11.005339,9615,343997397668131,343997537409166,343997540666361,343997523098092,343997536996148,343997525990809,343997401597893,xfer-5167f7bc-f5a6-4764-8aea-d41b821d3746
|
||||
4.005567,2.234935,0.817449,3.95562,972,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",784.81566,7,8640,9612,601,762.926825,9612,70.57829878888458,95944704,5856,10.875264,9612,343998749758108,343999530568201,343999534573768,343999516640553,343999529750752,343999518875488,343998753713728,xfer-3e0b7202-d03b-40b2-bad9-12ece5886c86
|
||||
3.998898,2.781587,1.154842,4.052568,954,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",142.964138,7,8640,9594,600,120.31907,9594,70.84193153287463,94371840,5760,10.657173,9594,344000750974619,344000889939859,344000893938757,344000875346257,344000888785017,344000878127844,344000755027187,xfer-3355bc7d-537a-4c50-9f47-29475ef20df3
|
||||
3.503411,2.430847,0.497237,3.941937,967,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",756.368633,7,8640,9607,601,735.281575,9607,71.64312362593205,95944704,5856,10.713626,9607,344002102827857,344002855693079,344002859196490,344002842051369,344002855195842,344002844482216,344002106769794,xfer-bb87cadd-1a12-4848-9105-e1e8fa07b45c
|
||||
4.019339,3.110151,1.261111,4.112686,963,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",143.047517,7,8640,9603,601,119.738752,9603,71.03412102639051,95944704,5856,10.805478,9603,344004069169337,344004208197515,344004212216854,344004193020775,344004206936404,344004196130926,344004073282023,xfer-a16f34ad-e1d4-47ca-a310-64f98e42ba93
|
||||
3.4453,5.497828,0.697261,4.130356,3813,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1064.857818,7,8640,12453,779,1007.327189,12453,68.72312476879509,375914496,22944,43.759884,12453,344007425300610,344008486713128,344008490158428,344008436758155,344008486015867,344008442255983,344007429430966,xfer-03c4a7dc-fee5-494d-9291-059261244a55
|
||||
4.056958,6.556157,0.735644,4.325568,3816,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",474.809695,7,8640,12456,779,416.461503,12456,70.47207858955359,375914496,22944,42.673865,12456,344009709045447,344010179798184,344010183855142,344010129832518,344010179062540,344010136388675,344009713371015,xfer-66a3e89b-0ef6-47a3-bf1d-4d651b1467d7
|
||||
2.792053,5.769235,0.568306,4.276771,3795,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1062.91823,7,8640,12435,778,1007.108164,12435,70.62433196574045,374341632,22848,42.403701,12435,344011403523025,344012463649202,344012466441255,344012414907960,344012463080896,344012420677195,344011407799796,xfer-f6f81412-2410-45ff-a344-1e7dcab42dd3
|
||||
2.93636,6.721208,0.607545,4.532909,3820,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",475.312769,7,8640,12460,779,417.75525,12460,70.33094818678526,375914496,22944,42.759497,12460,344013684813214,344014157189623,344014160125983,344014107101373,344014156582078,344014113822581,344013689346123,xfer-2f217b73-f6a7-4364-8e22-9ea686cf03fc
|
||||
3.983707,5.57057,0.871657,4.170471,3793,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1062.286824,7,8640,12433,778,1005.143341,12433,70.38633901016657,374341632,22848,42.547078,12433,344015377875151,344016436178268,344016440161975,344016387188963,344016435306611,344016392759533,344015382045622,xfer-afe23d6a-d173-4ddd-9f4f-5c667ad3256f
|
||||
3.922147,21.524908,2.015128,4.809325,15407,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2357.084348,7,8640,24047,1503,2116.110056,24047,58.06029044634115,1514668032,92448,208.702784,24047,344019678368411,344022031530612,344022035452759,344021799287792,344022029515484,344021820812700,344019683177736,xfer-33003a8f-b840-4b7e-92e0-b86117e9c987
|
||||
3.856647,23.906271,0.719083,4.93606,15438,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2926.607498,7,8640,24078,1505,2718.446051,24078,69.48766621702066,1517813760,92640,174.743386,24078,344023293025231,344026215776082,344026219632729,344026016407342,344026215056999,344026040313613,344023297961291,xfer-8de0b399-c504-4f07-a966-d52231d9d9b8
|
||||
2.777104,21.427507,0.702439,4.719515,15396,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2317.351766,7,8640,24036,1503,2116.281928,24036,70.67844683529812,1514668032,92448,171.443273,24036,344027477037341,344029791612003,344029794389107,344029598038784,344029790909564,344029619466291,344027481756856,xfer-6bb9c369-7d98-476d-8c10-8a55a3b637be
|
||||
3.430388,21.708904,0.676655,5.76684,15384,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2916.354688,7,8640,24024,1502,2712.008868,24024,70.06569133340001,1513095168,92352,172.763033,24024,344031054178050,344033967102350,344033970532738,344033771953758,344033966425695,344033793662662,344031059944890,xfer-9622617c-afc4-4f3d-acab-617802d71b42
|
||||
4.029216,20.945537,1.673896,5.401789,15458,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",2337.444843,7,8640,24098,1507,2121.993214,24098,66.34458499236246,1520959488,92832,183.401191,24098,344035231741558,344037565157185,344037569186401,344037359136561,344037563483289,344037380082098,344035237143347,xfer-4ee6ddf8-3538-41e0-ac9a-13e25dd6c781
|
||||
3.933837,,,5.631332,26286,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",5418.119478,5,8624,34910,2182,5023.187616,34910,,,,,34910,344040846080028,344046260265669,344046264199506,344045874898976,,,344040851711360,xfer-b95f41e4-627a-4d9e-aa65-b98afafb06c0
|
||||
4.062967,5.096194,1.244185,4.667851,988,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",330.333453,7,34896,35884,2243,304.252371,35884,70.85819188847113,97517568,5952,11.009885,35884,344048361285763,344048687556249,344048691619216,344048670205985,344048686312064,344048675302179,344048365953614,xfer-204c5e89-95b3-4a59-8bbb-c03a1eac864c
|
||||
2.933318,,,6.931342,984,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",5330.989922,5,34912,35896,2244,5306.482294,35896,,,,,35896,344049993993743,344055322050347,344055324983665,344055307407379,,,344050000925085,xfer-a530e3ee-0c55-4a85-b988-110fd1cd3e5f
|
||||
4.099607,4.522637,1.671787,4.717849,964,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",327.05328,7,34912,35876,2243,301.171991,35876,70.61631704170853,95944704,5856,10.869409,35876,344056625957769,344056948911442,344056953011049,344056931847609,344056947239655,344056936370246,344056630675618,xfer-972f5586-7436-4a56-b437-a504e6b94fc3
|
||||
3.841064,,,4.577476,952,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",5270.728238,5,34912,35864,2242,5247.707635,35864,,,,,35864,344058254091725,344063520978899,344063524819963,344063506376836,,,344058258669201,xfer-35222c64-1117-4742-8ff8-9cf890498f1d
|
||||
4.324731,5.236853,0.604215,4.951784,975,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",327.583453,7,34912,35887,2243,301.618389,35887,70.75906673632338,95944704,5856,10.847481,35887,344064830066353,344065153325075,344065157649806,344065136636526,344065152720860,344065141873379,344064835018137,xfer-c979d8d1-17a5-4624-b46b-b19da335f930
|
||||
4.054432,,,5.409207,3816,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",6065.42777,5,34912,38728,2421,6003.743773,38728,,,,,38728,344068478934114,344074540307452,344074544361884,344074488087094,,,344068484343321,xfer-922060d3-5606-41bd-a728-cb9286104c5f
|
||||
4.161682,11.759598,0.912837,4.801053,3787,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1067.077186,7,34912,38699,2419,1002.993495,38699,70.25333448013419,372768768,22752,42.448521,38699,344075859994221,344076922909725,344076927071407,344076867788769,344076921996888,344076879548367,344075864795274,xfer-989b23c9-0a4a-4f61-be7b-f47c2cd5c68c
|
||||
4.462714,,,4.792675,3802,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",6058.89703,5,34912,38714,2420,5998.51813,38714,,,,,38714,344078241308449,344084295742765,344084300205479,344084244619254,,,344078246101124,xfer-6d52cb9b-7e69-498a-a14d-c4ef18466455
|
||||
3.990764,8.428795,0.813159,4.765448,3803,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",1065.965013,7,34912,38715,2420,1005.500559,38715,70.52024551804482,374341632,22848,42.466288,38715,344085614679887,344086676654136,344086680644900,344086624945894,344086675840977,344086633374689,344085619445335,xfer-2f329fc6-dce1-40e9-bd89-6afb67f09a63
|
||||
2.980913,,,4.953564,3799,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",6055.683733,5,34912,38711,2420,5996.9245,38711,,,,,38711,344087991204865,344094043907685,344094046888598,344093993082929,,,344087996158429,xfer-2d30d1c4-5c17-4da2-88a1-c098e973f146
|
||||
3.278995,21.052319,0.651839,5.371544,15414,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",4632.479244,7,34912,50326,3146,4431.768172,50326,71.20324770939743,1516240896,92544,170.356375,50326,344097383047854,344102012248103,344102015527098,344101820187570,344102011596264,344101841239889,344097388419398,xfer-3d9b6f40-000a-499b-9eeb-f2cd0158e3cc
|
||||
4.210185,,,5.418244,15459,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",9666.724654,5,34912,50371,3149,9455.88659,50371,,,,,50371,344103365721735,344113028236204,344113032446389,344112827026569,,,344103371139979,xfer-6e5b9dfc-5791-483e-bd58-ca0a820f6f20
|
||||
3.171092,22.117254,0.897336,5.724271,15402,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",4630.020597,7,34912,50314,3145,4426.413397,50314,70.57389951045633,1514668032,92448,171.697247,50314,344114386617482,344119013466987,344119016638079,344118818755150,344119012569651,344118840872404,344114392341753,xfer-9194c65d-89ee-4cfe-9403-01c80df0690e
|
||||
4.01791,,,5.373968,15504,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_zmq_received",9744.213969,5,34912,50416,3151,9537.277953,50416,,,,,50416,344120376076315,344130116272374,344130120290284,344129918728236,,,344120381450283,xfer-b40b0fbf-9f16-4040-bbb7-867c5e4960c7
|
||||
3.188845,20.508939,0.621964,5.709689,15456,"D_get_num_matched,D_recv_complete,D_request_promoted,P_prefill_done,P_rdma_end,P_rdma_start,P_zmq_received",4602.182752,7,34912,50368,3148,4399.785916,50368,70.51851488459253,1519386624,92736,172.367399,50368,344131502136745,344136101130652,344136104319497,344135907632350,344136100508688,344135928141289,344131507846434,xfer-1d314942-867e-4677-bd2c-f920c7aaf618
|
||||
|
273
microbench/patches/analyze_events.py
Normal file
273
microbench/patches/analyze_events.py
Normal file
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Merge PD-sep event logs from P and D into per-request breakdown.
|
||||
|
||||
Reads JSONL event logs produced by the patched mooncake_connector +
|
||||
scheduler, joins events by request_id / transfer_id, and emits a
|
||||
per-request CSV with the lifecycle phase durations:
|
||||
|
||||
prefill_compute : P_prefill_done.t - <P start time>
|
||||
(we don't have explicit P start; use min of D_get_num_matched - 0
|
||||
or use prefill duration ≈ P_zmq_received - P_prefill_done? NO.
|
||||
P_prefill_done = when prefill finished, blocks ready.
|
||||
We approximate prefill_compute = P_prefill_done - D_get_num_matched
|
||||
because D and P receive the request simultaneously from proxy.)
|
||||
|
||||
zmq_handshake : P_rdma_start - P_zmq_received
|
||||
(time from D's pull request reaching P to RDMA write start)
|
||||
|
||||
rdma_transfer : P_rdma_end - P_rdma_start
|
||||
(pure RDMA write duration on P side)
|
||||
|
||||
completion_signal : D_recv_complete - P_rdma_end
|
||||
(RDMA completion event back to D side)
|
||||
|
||||
D_promote_delay : D_request_promoted - D_recv_complete
|
||||
(D scheduler step delay to wake the blocked request)
|
||||
|
||||
full_pdsep_overhead: D_request_promoted - D_get_num_matched
|
||||
(total server-side overhead from request arrival to schedulable)
|
||||
|
||||
Usage:
|
||||
python analyze_events.py --events-dir LOGDIR --out breakdown.csv
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import csv
|
||||
|
||||
|
||||
def load_events(paths):
|
||||
"""Yield event dicts from multiple JSONL files."""
|
||||
for p in paths:
|
||||
with open(p) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
yield json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
|
||||
def group_events(events):
|
||||
"""Group events by transfer_id (preferred) or req_id."""
|
||||
by_key = defaultdict(dict) # key -> {event_name: event}
|
||||
|
||||
# First pass: figure out req_id <-> transfer_id mapping
|
||||
req_to_transfer = {}
|
||||
for ev in events:
|
||||
tid = ev.get("transfer_id", "")
|
||||
rid = ev.get("req_id", "")
|
||||
if tid and rid:
|
||||
req_to_transfer[rid] = tid
|
||||
|
||||
# Second pass: assign each event to its transfer_id key
|
||||
# Need to re-iterate; load again
|
||||
return req_to_transfer
|
||||
|
||||
|
||||
def _merge_event(slot: dict, ev: dict) -> None:
|
||||
"""Add event to per-request slot. For repeating events:
|
||||
- 'end'/'complete'/'promoted' → keep the LATEST
|
||||
- everything else → keep the EARLIEST."""
|
||||
name = ev["event"]
|
||||
existing = slot.get(name)
|
||||
if existing is None:
|
||||
slot[name] = ev
|
||||
return
|
||||
is_end_like = any(s in name for s in ("rdma_end", "recv_complete", "promoted", "prefill_done"))
|
||||
if is_end_like:
|
||||
if ev["t_ns"] > existing["t_ns"]:
|
||||
slot[name] = ev
|
||||
else:
|
||||
if ev["t_ns"] < existing["t_ns"]:
|
||||
slot[name] = ev
|
||||
|
||||
|
||||
def build_per_request(event_files):
|
||||
"""Walk all events, group by transfer_id, compute breakdown."""
|
||||
|
||||
# Collect all events into memory (these logs are small enough)
|
||||
all_events = list(load_events(event_files))
|
||||
|
||||
# Map req_id <-> transfer_id
|
||||
req_to_transfer = {}
|
||||
for ev in all_events:
|
||||
tid = ev.get("transfer_id", "")
|
||||
rid = ev.get("req_id", "")
|
||||
if tid and rid:
|
||||
req_to_transfer[rid] = tid
|
||||
|
||||
# Pre-pass: assign transfer_ids to P_zmq_received events from their `data.transfer_ids` field
|
||||
# Also collect P_zmq_received timestamps with their transfer_ids so we can link
|
||||
# P_rdma_start/end events that happen nearby.
|
||||
zmq_records = [] # list of (t_ns, [transfer_ids...])
|
||||
for ev in all_events:
|
||||
if ev["event"] == "P_zmq_received":
|
||||
tids = ev.get("data", {}).get("transfer_ids", []) or []
|
||||
if tids:
|
||||
# Synthetically tag this event with the first transfer_id for primary key.
|
||||
ev["transfer_id"] = tids[0]
|
||||
ev["_tids_in_batch"] = tids
|
||||
zmq_records.append((ev["t_ns"], tids))
|
||||
|
||||
# Sort zmq_records by time so we can binary-search later
|
||||
zmq_records.sort()
|
||||
|
||||
def find_zmq_batch(t_ns):
|
||||
"""Find the most recent ZMQ batch whose timestamp <= t_ns and within 5 seconds."""
|
||||
best = None
|
||||
for ts, tids in zmq_records:
|
||||
if ts <= t_ns and (t_ns - ts) < 5e9:
|
||||
best = tids # take the latest qualifying
|
||||
elif ts > t_ns:
|
||||
break
|
||||
return best
|
||||
|
||||
# Tag P_rdma_start / P_rdma_end with the transfer_ids from the nearest preceding ZMQ batch
|
||||
for ev in all_events:
|
||||
if ev["event"] in ("P_rdma_start", "P_rdma_end") and not ev.get("transfer_id"):
|
||||
tids = find_zmq_batch(ev["t_ns"])
|
||||
if tids:
|
||||
ev["transfer_id"] = tids[0]
|
||||
ev["_tids_in_batch"] = tids
|
||||
|
||||
# Group events by transfer_id
|
||||
by_xfer = defaultdict(dict)
|
||||
orphans = []
|
||||
for ev in all_events:
|
||||
tid = ev.get("transfer_id", "")
|
||||
rid = ev.get("req_id", "")
|
||||
# find key
|
||||
if tid:
|
||||
key = tid
|
||||
elif rid in req_to_transfer:
|
||||
key = req_to_transfer[rid]
|
||||
else:
|
||||
orphans.append(ev)
|
||||
continue
|
||||
# Also handle events that belong to multiple transfers in a single batch:
|
||||
# we replicate the event under each transfer_id key for fan-out.
|
||||
tids_in_batch = ev.get("_tids_in_batch", [tid] if tid else [])
|
||||
if len(tids_in_batch) > 1:
|
||||
for t in tids_in_batch:
|
||||
_merge_event(by_xfer[t], ev)
|
||||
else:
|
||||
_merge_event(by_xfer[key], ev)
|
||||
|
||||
print(f"Loaded {len(all_events)} events, grouped into {len(by_xfer)} requests, "
|
||||
f"{len(orphans)} orphans")
|
||||
|
||||
# Build per-request rows
|
||||
rows = []
|
||||
for tid, evmap in by_xfer.items():
|
||||
def t(name):
|
||||
e = evmap.get(name)
|
||||
return e["t_ns"] if e else None
|
||||
|
||||
def d(name, field, default=None):
|
||||
e = evmap.get(name)
|
||||
return e["data"].get(field, default) if e else default
|
||||
|
||||
row = {
|
||||
"transfer_id": tid,
|
||||
"n_events": len(evmap),
|
||||
"events_seen": ",".join(sorted(evmap.keys())),
|
||||
# data fields
|
||||
"num_local_cached": d("D_get_num_matched", "num_local_cached"),
|
||||
"prompt_tokens": d("D_get_num_matched", "prompt_tokens"),
|
||||
"remote_total": d("D_get_num_matched", "remote_total"),
|
||||
"delta_to_pull": d("D_get_num_matched", "delta_to_pull"),
|
||||
"num_send_blocks": d("P_prefill_done", "num_send_blocks"),
|
||||
"num_prompt_tokens_P": d("P_prefill_done", "num_prompt_tokens"),
|
||||
"rdma_num_ops": d("P_rdma_end", "num_ops"),
|
||||
"rdma_bytes": d("P_rdma_end", "bytes_total"),
|
||||
}
|
||||
|
||||
# timestamps (ns → ms)
|
||||
ts = {
|
||||
"t_D_get_num_matched": t("D_get_num_matched"),
|
||||
"t_P_prefill_done": t("P_prefill_done"),
|
||||
"t_P_zmq_received": t("P_zmq_received"),
|
||||
"t_P_rdma_start": t("P_rdma_start"),
|
||||
"t_P_rdma_end": t("P_rdma_end"),
|
||||
"t_D_recv_complete": t("D_recv_complete"),
|
||||
"t_D_request_promoted": t("D_request_promoted"),
|
||||
}
|
||||
row.update(ts)
|
||||
|
||||
# phase durations in ms
|
||||
def dur(a, b):
|
||||
ta, tb = ts.get(a), ts.get(b)
|
||||
if ta is None or tb is None:
|
||||
return None
|
||||
return (tb - ta) / 1e6
|
||||
|
||||
row["d_to_p_dispatch_ms"] = dur("t_D_get_num_matched", "t_P_zmq_received")
|
||||
row["prefill_compute_ms"] = dur("t_P_zmq_received", "t_P_prefill_done")
|
||||
row["build_params_ms"] = dur("t_P_prefill_done", "t_P_rdma_start")
|
||||
row["rdma_transfer_ms"] = dur("t_P_rdma_start", "t_P_rdma_end")
|
||||
row["completion_sig_ms"] = dur("t_P_rdma_end", "t_D_recv_complete")
|
||||
row["D_promote_ms"] = dur("t_D_recv_complete", "t_D_request_promoted")
|
||||
row["full_overhead_ms"] = dur("t_D_get_num_matched", "t_D_request_promoted")
|
||||
|
||||
# transfer bandwidth
|
||||
rt = row["rdma_transfer_ms"]
|
||||
bytes_ = row["rdma_bytes"]
|
||||
if rt and bytes_ and rt > 0:
|
||||
row["rdma_bandwidth_gbps"] = (bytes_ * 8 / (rt / 1000)) / 1e9
|
||||
else:
|
||||
row["rdma_bandwidth_gbps"] = None
|
||||
|
||||
rows.append(row)
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--events", nargs="+", required=True,
|
||||
help="One or more JSONL event log files")
|
||||
ap.add_argument("--out", default="breakdown.csv")
|
||||
args = ap.parse_args()
|
||||
|
||||
paths = [Path(p) for p in args.events]
|
||||
for p in paths:
|
||||
if not p.exists():
|
||||
raise SystemExit(f"Not found: {p}")
|
||||
|
||||
rows = build_per_request(paths)
|
||||
if not rows:
|
||||
print("No grouped requests found.")
|
||||
return
|
||||
|
||||
# Write CSV
|
||||
fieldnames = sorted({k for r in rows for k in r.keys()})
|
||||
with open(args.out, "w", newline="") as f:
|
||||
w = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
w.writeheader()
|
||||
w.writerows(rows)
|
||||
print(f"Wrote {len(rows)} request breakdowns → {args.out}")
|
||||
|
||||
# Quick summary
|
||||
complete = [r for r in rows if r.get("full_overhead_ms") is not None]
|
||||
print(f"\nComplete-event requests: {len(complete)}/{len(rows)}")
|
||||
if complete:
|
||||
import statistics as st
|
||||
for k in ("d_to_p_dispatch_ms", "prefill_compute_ms", "build_params_ms",
|
||||
"rdma_transfer_ms", "completion_sig_ms", "D_promote_ms",
|
||||
"full_overhead_ms", "rdma_bandwidth_gbps",
|
||||
"delta_to_pull", "num_local_cached", "rdma_bytes"):
|
||||
vals = [r[k] for r in complete if r.get(k) is not None]
|
||||
if vals:
|
||||
med = st.median(vals)
|
||||
print(f" {k:<24} median={med:.1f} n={len(vals)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
285
microbench/patches/apply_patches.py
Normal file
285
microbench/patches/apply_patches.py
Normal file
@@ -0,0 +1,285 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Apply (or revert) PD-sep profiling instrumentation to vLLM.
|
||||
|
||||
Inserts time.perf_counter_ns() emit calls into mooncake_connector.py at:
|
||||
- get_num_new_matched_tokens() [D: cache hit, delta]
|
||||
- update_state_after_alloc() [D: blocks allocated]
|
||||
- send_kv_to_decode() [P: zmq from D arrived]
|
||||
- record_send_reqs() [P: prefill ready event set]
|
||||
- _send_blocks() [P: RDMA start/end + bytes]
|
||||
- process_pulling_result() [D: RDMA recv complete]
|
||||
- request_finished() [common: request lifecycle end]
|
||||
|
||||
And into scheduler.py at:
|
||||
- _try_promote_blocked_waiting_request() [D: request promoted]
|
||||
|
||||
Marker comment "# PD_PROFILE_PATCH" is added so revert can locate inserts.
|
||||
|
||||
Usage:
|
||||
python apply_patches.py [--apply | --revert] [--vllm-root PATH]
|
||||
|
||||
The patches are idempotent: --apply on already-patched code is a no-op.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
MARKER = "# PD_PROFILE_PATCH"
|
||||
|
||||
# Default location: venv vLLM 0.18.1 on dash0
|
||||
DEFAULT_VLLM_ROOT = Path.home() / "agentic-kv/.venv/lib/python3.12/site-packages/vllm"
|
||||
|
||||
|
||||
def _find_block(text: str, signature: str) -> tuple[int, int] | None:
|
||||
"""Find the start/end line numbers of a function or method definition."""
|
||||
lines = text.splitlines()
|
||||
for i, line in enumerate(lines):
|
||||
if signature in line:
|
||||
# find indent
|
||||
stripped = line.lstrip()
|
||||
indent = len(line) - len(stripped)
|
||||
# find function end: next line with <= indent and not blank
|
||||
for j in range(i + 1, len(lines)):
|
||||
next_line = lines[j]
|
||||
if next_line.strip() == "":
|
||||
continue
|
||||
next_indent = len(next_line) - len(next_line.lstrip())
|
||||
if next_indent <= indent and not next_line.lstrip().startswith("#"):
|
||||
return i, j
|
||||
return i, len(lines)
|
||||
return None
|
||||
|
||||
|
||||
def _insert_after_line(text: str, line_no: int, snippet: str) -> str:
|
||||
"""Insert snippet after line_no (1-indexed). snippet should not include trailing newline."""
|
||||
lines = text.splitlines()
|
||||
lines.insert(line_no, snippet)
|
||||
return "\n".join(lines) + ("\n" if text.endswith("\n") else "")
|
||||
|
||||
|
||||
def _already_patched(text: str) -> bool:
|
||||
return MARKER in text
|
||||
|
||||
|
||||
def _revert(text: str) -> str:
|
||||
"""Remove all lines containing the marker."""
|
||||
lines = text.splitlines()
|
||||
out = [l for l in lines if MARKER not in l]
|
||||
return "\n".join(out) + ("\n" if text.endswith("\n") else "")
|
||||
|
||||
|
||||
# ─── Patch definitions ──────────────────────────────────────────────────────
|
||||
|
||||
def patch_mooncake_connector(text: str) -> str:
|
||||
"""Apply patches to mooncake_connector.py."""
|
||||
if _already_patched(text):
|
||||
print(" mooncake_connector.py already patched, skipping")
|
||||
return text
|
||||
|
||||
# 1. Add import at top (after first 'import' block)
|
||||
import_snippet = (
|
||||
"from vllm.distributed.kv_transfer.kv_connector.v1.mooncake "
|
||||
"import _pd_profile as _pdp " + MARKER
|
||||
)
|
||||
# Insert after the last 'import' statement near the top
|
||||
lines = text.splitlines()
|
||||
last_import_line = 0
|
||||
for i, line in enumerate(lines[:80]):
|
||||
if line.startswith("import ") or line.startswith("from "):
|
||||
last_import_line = i
|
||||
lines.insert(last_import_line + 1, import_snippet)
|
||||
text = "\n".join(lines) + "\n"
|
||||
|
||||
# 2. Patch get_num_new_matched_tokens (D side, scheduler)
|
||||
# Inject right before "return count, True" and "return 0, False"
|
||||
text = re.sub(
|
||||
r"(\n if count > 0:\n return count, True\n)",
|
||||
r"\n if count > 0:\n _pdp.emit('D_get_num_matched', "
|
||||
r"req_id=request.request_id, role='kv_consumer', "
|
||||
r"num_local_cached=num_computed_tokens, prompt_tokens=len(token_ids), "
|
||||
r"remote_total=remote_total, delta_to_pull=count) " + MARKER + "\n"
|
||||
r" return count, True\n",
|
||||
text, count=1,
|
||||
)
|
||||
|
||||
# 3. Patch update_state_after_alloc (D side) — add emit for blocks allocated
|
||||
# The function is around line 348. Find "self._reqs_need_recv[" assignment.
|
||||
text = re.sub(
|
||||
r"(\n self\._reqs_need_recv\[request_id\] = PullReqMeta\()",
|
||||
r"\n _pdp.emit('D_alloc_blocks', req_id=request_id, "
|
||||
r"role='kv_consumer', num_local_blocks=len(local_block_ids), "
|
||||
r"num_external_tokens=num_external_tokens) " + MARKER +
|
||||
r"\n self._reqs_need_recv[request_id] = PullReqMeta(",
|
||||
text, count=1,
|
||||
)
|
||||
|
||||
# 4. Patch send_kv_to_decode entry (P side) — ZMQ message received
|
||||
text = re.sub(
|
||||
r"( async def send_kv_to_decode\(\n"
|
||||
r" self, identity: bytes, sock: zmq\.asyncio\.Socket, meta: MooncakeXferMetadata\n"
|
||||
r" \):\n)",
|
||||
r"\1 _pdp.emit('P_zmq_received', role='kv_producer', "
|
||||
r"num_reqs=len(meta.req_blocks), remote_host=meta.remote_hostname, "
|
||||
r"transfer_ids=[tid for tid, _ in meta.req_blocks.values()]) " + MARKER + "\n",
|
||||
text, count=1,
|
||||
)
|
||||
|
||||
# 5. Patch _send_blocks (P side) — wrap RDMA write
|
||||
# Inject before and after self.engine.batch_transfer_sync_write
|
||||
text = re.sub(
|
||||
r"( start_time = time\.perf_counter\(\)\n"
|
||||
r" ret_value = self\.engine\.batch_transfer_sync_write\(\n"
|
||||
r" remote_session, src_ptrs, dst_ptrs, lengths\n"
|
||||
r" \))",
|
||||
r" _pdp.emit('P_rdma_start', role='kv_producer', "
|
||||
r"num_ops=len(src_ptrs), bytes_total=sum(lengths), remote=str(remote_session)) " + MARKER + "\n"
|
||||
r"\1\n"
|
||||
r" _pdp.emit('P_rdma_end', role='kv_producer', "
|
||||
r"num_ops=len(src_ptrs), bytes_total=sum(lengths), ret=ret_value) " + MARKER,
|
||||
text, count=1,
|
||||
)
|
||||
|
||||
# 6. Patch process_pulling_result (D side) — RDMA recv complete (success path)
|
||||
# Match the specific pattern inside `if pull_meta.pull_tasks_count == 0:`
|
||||
text = re.sub(
|
||||
r"(pull_meta\.pull_tasks_count -= 1\n"
|
||||
r" if pull_meta\.pull_tasks_count == 0:\n"
|
||||
r")( self\.finished_recving_reqs\.add\(pull_meta\.d_req_id\))",
|
||||
r"\1 _pdp.emit('D_recv_complete', req_id=pull_meta.d_req_id, "
|
||||
r"transfer_id=pull_meta.transfer_id, role='kv_consumer') " + MARKER + "\n"
|
||||
r"\2",
|
||||
text, count=1,
|
||||
)
|
||||
|
||||
# 7. Patch request_finished (P side) — when prefill blocks are marked ready to send
|
||||
# Find: self._reqs_need_send[request.request_id] = (request, send_block_ids)
|
||||
text = re.sub(
|
||||
r"(\n if delay_free_blocks:\n"
|
||||
r" self\._reqs_need_send\[request\.request_id\] = \(request, send_block_ids\))",
|
||||
r"\n if delay_free_blocks:\n"
|
||||
r" _pdp.emit('P_prefill_done', req_id=request.request_id, "
|
||||
r"transfer_id=params.get('transfer_id', ''), role='kv_producer', "
|
||||
r"num_send_blocks=len(send_block_ids), num_prompt_tokens=request.num_prompt_tokens) "
|
||||
+ MARKER + "\n"
|
||||
r" self._reqs_need_send[request.request_id] = (request, send_block_ids)",
|
||||
text, count=1,
|
||||
)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def patch_scheduler(text: str) -> str:
|
||||
"""Patch v1/core/sched/scheduler.py for D-side request promotion."""
|
||||
if _already_patched(text):
|
||||
print(" scheduler.py already patched, skipping")
|
||||
return text
|
||||
|
||||
# Add import at the top of the file
|
||||
lines = text.splitlines()
|
||||
last_import_line = 0
|
||||
for i, line in enumerate(lines[:100]):
|
||||
if line.startswith("import ") or line.startswith("from "):
|
||||
last_import_line = i
|
||||
import_snippet = (
|
||||
"try:\n"
|
||||
" from vllm.distributed.kv_transfer.kv_connector.v1.mooncake import _pd_profile as _pdp\n"
|
||||
"except Exception:\n"
|
||||
" class _pdp: # fallback no-op\n"
|
||||
" @staticmethod\n"
|
||||
" def emit(*a, **kw): pass\n"
|
||||
" @staticmethod\n"
|
||||
" def enabled(): return False\n"
|
||||
f"{MARKER}"
|
||||
)
|
||||
lines.insert(last_import_line + 1, import_snippet)
|
||||
text = "\n".join(lines) + "\n"
|
||||
|
||||
# Patch _update_waiting_for_remote_kv — match exact `request: Request` (no quotes)
|
||||
text = re.sub(
|
||||
r"( def _update_waiting_for_remote_kv\(self, request: Request\) -> None:\n)",
|
||||
r"\1 _pdp.emit('D_request_promoted', req_id=request.request_id, "
|
||||
r"role='kv_consumer', num_computed_tokens=request.num_computed_tokens) " + MARKER + "\n",
|
||||
text, count=1,
|
||||
)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ─── Driver ────────────────────────────────────────────────────────────────
|
||||
|
||||
def apply_to_file(path: Path, patcher) -> bool:
|
||||
if not path.exists():
|
||||
print(f" SKIP {path} (not found)")
|
||||
return False
|
||||
original = path.read_text()
|
||||
patched = patcher(original)
|
||||
if patched == original:
|
||||
print(f" unchanged: {path}")
|
||||
return False
|
||||
path.write_text(patched)
|
||||
n_marks = patched.count(MARKER)
|
||||
print(f" patched ({n_marks} marks): {path}")
|
||||
return True
|
||||
|
||||
|
||||
def revert_file(path: Path) -> bool:
|
||||
if not path.exists():
|
||||
return False
|
||||
original = path.read_text()
|
||||
reverted = _revert(original)
|
||||
if reverted == original:
|
||||
print(f" no marks found: {path}")
|
||||
return False
|
||||
path.write_text(reverted)
|
||||
print(f" reverted: {path}")
|
||||
return True
|
||||
|
||||
|
||||
def install_profile_module(vllm_root: Path) -> None:
|
||||
"""Copy pd_profile.py to mooncake/_pd_profile.py inside vLLM."""
|
||||
src = Path(__file__).parent / "pd_profile.py"
|
||||
dst = vllm_root / "distributed/kv_transfer/kv_connector/v1/mooncake/_pd_profile.py"
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
dst.write_text(src.read_text())
|
||||
print(f" installed: {dst}")
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--apply", action="store_true")
|
||||
ap.add_argument("--revert", action="store_true")
|
||||
ap.add_argument("--vllm-root", type=Path, default=DEFAULT_VLLM_ROOT)
|
||||
args = ap.parse_args()
|
||||
|
||||
if not (args.apply or args.revert) or (args.apply and args.revert):
|
||||
ap.error("Specify exactly one of --apply or --revert")
|
||||
|
||||
root = args.vllm_root
|
||||
if not root.exists():
|
||||
print(f"ERROR: vLLM root not found: {root}")
|
||||
sys.exit(1)
|
||||
|
||||
mc_path = root / "distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py"
|
||||
sched_path = root / "v1/core/sched/scheduler.py"
|
||||
|
||||
if args.apply:
|
||||
print(f"Applying PD-sep profile patches to {root}")
|
||||
install_profile_module(root)
|
||||
apply_to_file(mc_path, patch_mooncake_connector)
|
||||
apply_to_file(sched_path, patch_scheduler)
|
||||
else:
|
||||
print(f"Reverting PD-sep profile patches from {root}")
|
||||
revert_file(mc_path)
|
||||
revert_file(sched_path)
|
||||
# also remove the module
|
||||
prof_module = root / "distributed/kv_transfer/kv_connector/v1/mooncake/_pd_profile.py"
|
||||
if prof_module.exists():
|
||||
prof_module.unlink()
|
||||
print(f" removed: {prof_module}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
57
microbench/patches/pd_profile.py
Normal file
57
microbench/patches/pd_profile.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
PD-Sep Lifecycle Profiler — patch for mooncake_connector.py + scheduler.py
|
||||
|
||||
Activated by env var: VLLM_PD_PROFILE_LOG=/path/to/events.jsonl
|
||||
|
||||
Each line is one event:
|
||||
{"t_ns": <perf_counter_ns>, "event": "<name>", "req_id": "<id>",
|
||||
"transfer_id": "<id>", "role": "<kv_producer|kv_consumer>",
|
||||
"data": {...event-specific fields...}}
|
||||
|
||||
This module is imported by the patched mooncake_connector.py and scheduler.py
|
||||
to centralize event emission.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
_LOG_FILE = os.environ.get("VLLM_PD_PROFILE_LOG", "")
|
||||
_LOCK = threading.Lock()
|
||||
_HANDLE = None
|
||||
_ENABLED = bool(_LOG_FILE)
|
||||
|
||||
|
||||
def _get_handle():
|
||||
global _HANDLE
|
||||
if _HANDLE is None and _ENABLED:
|
||||
_HANDLE = open(_LOG_FILE, "a", buffering=1) # line-buffered
|
||||
return _HANDLE
|
||||
|
||||
|
||||
def emit(event: str, req_id: str = "", transfer_id: str = "",
|
||||
role: str = "", **data: Any) -> None:
|
||||
"""Record a profile event. No-op if VLLM_PD_PROFILE_LOG is not set."""
|
||||
if not _ENABLED:
|
||||
return
|
||||
record = {
|
||||
"t_ns": time.perf_counter_ns(),
|
||||
"event": event,
|
||||
"req_id": str(req_id),
|
||||
"transfer_id": str(transfer_id),
|
||||
"role": role,
|
||||
"data": data,
|
||||
}
|
||||
try:
|
||||
h = _get_handle()
|
||||
if h:
|
||||
with _LOCK:
|
||||
h.write(json.dumps(record, default=str) + "\n")
|
||||
except Exception:
|
||||
pass # never let profiling break vLLM
|
||||
|
||||
|
||||
def enabled() -> bool:
|
||||
return _ENABLED
|
||||
162
microbench/plot_breakdown.py
Normal file
162
microbench/plot_breakdown.py
Normal file
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Stacked-bar breakdown of PD-sep request latency.
|
||||
|
||||
Axes:
|
||||
X : total input length (N_total), grouped by cache hit ratio
|
||||
Stacks: prefill compute (red) | KV transfer RDMA (orange) | decode (steelblue)
|
||||
|
||||
Measured constants (H20, Qwen3-Coder-30B-A3B, from microbench):
|
||||
cold_prefill_ms(n) ≈ 0.072 * n (interference D=1 prefill_ttft, n=2k-16k)
|
||||
kv_transfer_ms(n) = 35 + n * 96KB * 8 / 25Gbps (warm Mooncake RDMA)
|
||||
decode_ms = output_tokens * 7.0ms/token
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as mpatches
|
||||
from pathlib import Path
|
||||
|
||||
HERE = Path(__file__).parent
|
||||
OUT = HERE / "lifecycle/results/fig_breakdown.png"
|
||||
OUT.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ── measured constants ───────────────────────────────────────────────────────
|
||||
MS_PER_TOK_COLD = 0.072 # ms / new token (cold prefill, linear regime)
|
||||
KV_BYTES_PER_TOK = 2*48*4*128*2 # 98304 B per token (Qwen3-30B-A3B)
|
||||
RDMA_BW_GBPS = 25 # effective Mooncake bandwidth (measured)
|
||||
RDMA_OVERHEAD_MS = 35 # warm-connection fixed overhead (measured)
|
||||
DECODE_MS_PER_TOK = 7.0 # TPOT baseline p50
|
||||
OUTPUT_TOKENS = 128 # representative output length for decode bar
|
||||
|
||||
def prefill_ms(n_new):
|
||||
return MS_PER_TOK_COLD * max(1, n_new)
|
||||
|
||||
def transfer_ms(n_new):
|
||||
kv_bytes = KV_BYTES_PER_TOK * max(1, n_new)
|
||||
bw_ms = kv_bytes * 8 / (RDMA_BW_GBPS * 1e9) * 1000
|
||||
return RDMA_OVERHEAD_MS + bw_ms
|
||||
|
||||
# ── sweep parameters ─────────────────────────────────────────────────────────
|
||||
N_TOTALS = [1024, 2048, 4096, 8192, 16384, 32768]
|
||||
CACHE_RATIOS = [0.0, 0.25, 0.50, 0.75]
|
||||
CR_LABELS = ["0%", "25%", "50%", "75%"]
|
||||
CR_ALPHAS = [1.0, 0.75, 0.50, 0.28]
|
||||
CR_HATCHES = [None, None, "///", "///"]
|
||||
|
||||
C_PREFILL = "#d62728"
|
||||
C_TRANSFER = "#ff7f0e"
|
||||
C_DECODE = "#1f77b4"
|
||||
|
||||
# ── compute matrices ─────────────────────────────────────────────────────────
|
||||
nN, nC = len(N_TOTALS), len(CACHE_RATIOS)
|
||||
pf_mat = np.zeros((nN, nC))
|
||||
tr_mat = np.zeros((nN, nC))
|
||||
dec_mat = np.zeros((nN, nC))
|
||||
|
||||
for i, N in enumerate(N_TOTALS):
|
||||
for j, cr in enumerate(CACHE_RATIOS):
|
||||
n_new = max(1, int(N * (1 - cr)))
|
||||
pf_mat[i,j] = prefill_ms(n_new)
|
||||
tr_mat[i,j] = transfer_ms(n_new)
|
||||
dec_mat[i,j] = DECODE_MS_PER_TOK * OUTPUT_TOKENS
|
||||
|
||||
# ── plot ─────────────────────────────────────────────────────────────────────
|
||||
fig, ax = plt.subplots(figsize=(13, 6.5))
|
||||
|
||||
bar_w = 0.18
|
||||
group_gap = 1.0
|
||||
x_centers = np.arange(nN) * group_gap
|
||||
offsets = np.linspace(-(nC-1)/2, (nC-1)/2, nC) * bar_w
|
||||
|
||||
for j in range(nC):
|
||||
xp = x_centers + offsets[j]
|
||||
pf = pf_mat[:, j]
|
||||
tr = tr_mat[:, j]
|
||||
dc = dec_mat[:, j]
|
||||
alpha = CR_ALPHAS[j]
|
||||
hatch = CR_HATCHES[j]
|
||||
kw = dict(width=bar_w, alpha=alpha,
|
||||
edgecolor="white", linewidth=0.5)
|
||||
if hatch:
|
||||
kw["hatch"] = hatch
|
||||
|
||||
ax.bar(xp, pf, color=C_PREFILL, **kw)
|
||||
ax.bar(xp, tr, bottom=pf, color=C_TRANSFER, **kw)
|
||||
ax.bar(xp, dc, bottom=pf+tr, color=C_DECODE, **kw)
|
||||
|
||||
# value labels on top
|
||||
for xpos, total in zip(xp, pf + tr + dc):
|
||||
s = f"{total/1000:.1f}s" if total >= 1000 else f"{total:.0f}ms"
|
||||
ax.text(xpos, total + ax.get_ylim()[1]*0.01, s,
|
||||
ha="center", va="bottom",
|
||||
fontsize=7.2, color="black", alpha=max(alpha, 0.5))
|
||||
|
||||
# recompute ylim-based offsets after first pass
|
||||
ymax = (pf_mat + tr_mat + dec_mat).max() * 1.18
|
||||
ax.set_ylim(0, ymax)
|
||||
|
||||
# re-draw labels with correct ylim
|
||||
for j in range(nC):
|
||||
xp = x_centers + offsets[j]
|
||||
total = pf_mat[:,j] + tr_mat[:,j] + dec_mat[:,j]
|
||||
alpha = CR_ALPHAS[j]
|
||||
for xpos, t in zip(xp, total):
|
||||
s = f"{t/1000:.1f}s" if t >= 1000 else f"{t:.0f}ms"
|
||||
# already drawn above (approximate); skip redraw
|
||||
|
||||
# cache-ratio sub-labels below bars
|
||||
for j in range(nC):
|
||||
for xi, x in enumerate(x_centers):
|
||||
xp = x + offsets[j]
|
||||
ax.text(xp, -ymax * 0.032, CR_LABELS[j],
|
||||
ha="center", va="top", fontsize=7.8,
|
||||
color="dimgrey", alpha=max(CR_ALPHAS[j], 0.4))
|
||||
|
||||
ax.text(x_centers[0] + offsets[0] - bar_w,
|
||||
-ymax * 0.032, "cache\nhit:",
|
||||
ha="right", va="top", fontsize=7.5,
|
||||
color="dimgrey", style="italic")
|
||||
|
||||
ax.set_xticks(x_centers)
|
||||
ax.set_xticklabels([f"{N//1024}k" for N in N_TOTALS], fontsize=12)
|
||||
ax.set_xlabel("Total input tokens (N)", fontsize=12)
|
||||
ax.set_ylabel("Latency (ms)", fontsize=12)
|
||||
ax.set_title(
|
||||
"PD-Disaggregated Request Latency Breakdown\n"
|
||||
"Qwen3-Coder-30B-A3B · H20 · Mooncake RDMA · output=128 tokens",
|
||||
fontsize=13, fontweight="bold")
|
||||
ax.yaxis.grid(True, linestyle="--", alpha=0.35)
|
||||
ax.set_axisbelow(True)
|
||||
|
||||
# ── legend ────────────────────────────────────────────────────────────────────
|
||||
phase_h = [
|
||||
mpatches.Patch(color=C_PREFILL, label="Prefill compute (P node)"),
|
||||
mpatches.Patch(color=C_TRANSFER, label="KV transfer (Mooncake RDMA)"),
|
||||
mpatches.Patch(color=C_DECODE, label="Decode generation (D node)"),
|
||||
]
|
||||
spacer = mpatches.Patch(color="none", label="")
|
||||
cr_h = [
|
||||
mpatches.Patch(facecolor="grey", alpha=CR_ALPHAS[j],
|
||||
hatch=(CR_HATCHES[j] or ""),
|
||||
label=f"KV cache hit {CR_LABELS[j]}")
|
||||
for j in range(nC)
|
||||
]
|
||||
ax.legend(handles=phase_h + [spacer] + cr_h,
|
||||
loc="upper left", fontsize=9, framealpha=0.9,
|
||||
ncol=2, columnspacing=1.2, handlelength=1.5)
|
||||
|
||||
plt.tight_layout(rect=[0, 0.05, 1, 1])
|
||||
plt.savefig(OUT, dpi=160, bbox_inches="tight")
|
||||
print(f"Saved: {OUT}")
|
||||
|
||||
# ── print table ──────────────────────────────────────────────────────────────
|
||||
print(f"\n{'N':>6} {'cache%':>7} | {'prefill':>8} {'transfer':>9} {'decode':>8} | {'E2E':>8}")
|
||||
print("-" * 60)
|
||||
for i, N in enumerate(N_TOTALS):
|
||||
for j, cr in enumerate(CACHE_RATIOS):
|
||||
pf = pf_mat[i,j]; tr = tr_mat[i,j]; dc = dec_mat[i,j]
|
||||
print(f"{N:>6} {cr*100:>6.0f}% | {pf:>8.0f} {tr:>9.0f} {dc:>8.0f} | {pf+tr+dc:>8.0f}")
|
||||
print()
|
||||
213
microbench/plot_breakdown_real.py
Normal file
213
microbench/plot_breakdown_real.py
Normal file
@@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Plot REAL server-side breakdown from instrumented vLLM events.
|
||||
|
||||
Reads server_breakdown.csv (from analyze_events.py) and plots stacked bars:
|
||||
- prefill_compute (P-side)
|
||||
- rdma_transfer
|
||||
- other server overhead (dispatch + build_params + completion + promote)
|
||||
|
||||
Grouped by total prompt tokens, colored by cache hit ratio band.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as mpatches
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
HERE = Path(__file__).parent
|
||||
CSV = HERE / "lifecycle/results/server_breakdown.csv"
|
||||
OUT = HERE / "lifecycle/results/fig_breakdown_real.png"
|
||||
|
||||
# ── load ─────────────────────────────────────────────────────────────────────
|
||||
rows = list(csv.DictReader(open(CSV)))
|
||||
print(f"Loaded {len(rows)} request breakdowns")
|
||||
|
||||
def f(r, k, default=0.0):
|
||||
v = r.get(k, "")
|
||||
try:
|
||||
return float(v) if v not in ("", None) else default
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
# Compute per-request fields
|
||||
data = []
|
||||
for r in rows:
|
||||
prompt = int(f(r, "prompt_tokens"))
|
||||
cached = int(f(r, "num_local_cached"))
|
||||
delta = int(f(r, "delta_to_pull"))
|
||||
if prompt == 0 or delta < 0:
|
||||
continue
|
||||
ratio = cached / prompt if prompt > 0 else 0.0
|
||||
|
||||
# Some requests have negative prefill_compute (e.g., the trivial 11-token case
|
||||
# where P_zmq_received fires before D_get_num_matched). Skip those.
|
||||
pf = f(r, "prefill_compute_ms")
|
||||
if pf < 0:
|
||||
continue
|
||||
|
||||
data.append({
|
||||
"prompt": prompt,
|
||||
"cached": cached,
|
||||
"delta": delta,
|
||||
"ratio": ratio,
|
||||
"prefill_ms": pf,
|
||||
"rdma_ms": f(r, "rdma_transfer_ms"),
|
||||
"dispatch_ms": f(r, "d_to_p_dispatch_ms"),
|
||||
"build_params_ms":f(r, "build_params_ms"),
|
||||
"completion_ms": f(r, "completion_sig_ms"),
|
||||
"promote_ms": f(r, "D_promote_ms"),
|
||||
"rdma_bytes": f(r, "rdma_bytes"),
|
||||
"bandwidth_gbps": f(r, "rdma_bandwidth_gbps"),
|
||||
})
|
||||
|
||||
print(f"Usable: {len(data)} requests")
|
||||
|
||||
# ── bucket by (prompt size, cache band) ──────────────────────────────────────
|
||||
# Total prompt size buckets
|
||||
def bucket_N(n):
|
||||
if n < 1500: return 1024
|
||||
if n < 6000: return 4096
|
||||
if n < 22000: return 16384
|
||||
return 32768
|
||||
|
||||
def cache_band(r):
|
||||
if r < 0.1: return "0% (cold)"
|
||||
if r < 0.4: return "~25%"
|
||||
if r < 0.6: return "~50%"
|
||||
return "~75% (hot)"
|
||||
|
||||
agg = defaultdict(lambda: defaultdict(list))
|
||||
for d in data:
|
||||
nb = bucket_N(d["prompt"])
|
||||
cb = cache_band(d["ratio"])
|
||||
for k in ("prefill_ms", "rdma_ms", "dispatch_ms",
|
||||
"build_params_ms", "completion_ms", "promote_ms",
|
||||
"rdma_bytes", "bandwidth_gbps"):
|
||||
agg[(nb, cb)][k].append(d[k])
|
||||
|
||||
# Stat per cell
|
||||
summary = {}
|
||||
for k, v in agg.items():
|
||||
s = {kk: float(np.median(vv)) for kk, vv in v.items()}
|
||||
s["n"] = len(v["prefill_ms"])
|
||||
summary[k] = s
|
||||
|
||||
# ── plot ─────────────────────────────────────────────────────────────────────
|
||||
N_BUCKETS = sorted({k[0] for k in summary})
|
||||
BANDS_ALL = ["0% (cold)", "~25%", "~50%", "~75% (hot)"]
|
||||
BANDS = [b for b in BANDS_ALL if any(k[1] == b for k in summary)]
|
||||
|
||||
C_PREFILL = "#d62728"
|
||||
C_RDMA = "#ff7f0e"
|
||||
C_OTHER = "#1f77b4"
|
||||
|
||||
BAND_ALPHAS = [1.0, 0.75, 0.50, 0.28]
|
||||
BAND_HATCHES = [None, None, "///", "///"]
|
||||
|
||||
fig, ax = plt.subplots(figsize=(12, 6.5))
|
||||
|
||||
nN = len(N_BUCKETS)
|
||||
nB = len(BANDS)
|
||||
bar_w = 0.18
|
||||
x_centers = np.arange(nN) * 1.0
|
||||
offsets = np.linspace(-(nB-1)/2, (nB-1)/2, nB) * bar_w
|
||||
|
||||
ymax_data = 0
|
||||
for j, band in enumerate(BANDS):
|
||||
alpha = BAND_ALPHAS[j]
|
||||
hatch = BAND_HATCHES[j]
|
||||
xp = x_centers + offsets[j]
|
||||
|
||||
pf = np.array([summary.get((N, band), {}).get("prefill_ms", 0) for N in N_BUCKETS])
|
||||
rd = np.array([summary.get((N, band), {}).get("rdma_ms", 0) for N in N_BUCKETS])
|
||||
ot = np.array([
|
||||
summary.get((N, band), {}).get("dispatch_ms", 0) +
|
||||
summary.get((N, band), {}).get("build_params_ms",0) +
|
||||
summary.get((N, band), {}).get("completion_ms", 0) +
|
||||
summary.get((N, band), {}).get("promote_ms", 0)
|
||||
for N in N_BUCKETS])
|
||||
|
||||
kw = dict(width=bar_w, alpha=alpha, edgecolor="white", linewidth=0.5)
|
||||
if hatch: kw["hatch"] = hatch
|
||||
|
||||
ax.bar(xp, pf, color=C_PREFILL, **kw)
|
||||
ax.bar(xp, rd, bottom=pf, color=C_RDMA, **kw)
|
||||
ax.bar(xp, ot, bottom=pf+rd, color=C_OTHER, **kw)
|
||||
|
||||
total = pf + rd + ot
|
||||
ymax_data = max(ymax_data, total.max() if len(total) > 0 else 0)
|
||||
|
||||
ymax = ymax_data * 1.18
|
||||
ax.set_ylim(0, ymax)
|
||||
|
||||
# Value labels
|
||||
for j, band in enumerate(BANDS):
|
||||
alpha = BAND_ALPHAS[j]
|
||||
xp = x_centers + offsets[j]
|
||||
for i, N in enumerate(N_BUCKETS):
|
||||
s = summary.get((N, band))
|
||||
if s is None: continue
|
||||
total = (s.get("prefill_ms",0) + s.get("rdma_ms",0) +
|
||||
s.get("dispatch_ms",0) + s.get("build_params_ms",0) +
|
||||
s.get("completion_ms",0) + s.get("promote_ms",0))
|
||||
if total <= 0: continue
|
||||
lbl = f"{total/1000:.1f}s" if total >= 1000 else f"{total:.0f}ms"
|
||||
ax.text(xp[i], total + ymax*0.01, lbl,
|
||||
ha="center", va="bottom", fontsize=7.2,
|
||||
color="black", alpha=max(alpha, 0.55))
|
||||
|
||||
# X axis
|
||||
ax.set_xticks(x_centers)
|
||||
ax.set_xticklabels([f"{N//1024}k" for N in N_BUCKETS], fontsize=12)
|
||||
ax.set_xlabel("Total prompt tokens (bucket)", fontsize=12)
|
||||
ax.set_ylabel("Server-side latency (ms)", fontsize=12)
|
||||
ax.set_title(
|
||||
"REAL Server-Side PD-Sep Latency Breakdown\n"
|
||||
"Qwen3-Coder-30B-A3B · H20 · Mooncake · from instrumented vLLM events",
|
||||
fontsize=13, fontweight="bold")
|
||||
ax.yaxis.grid(True, linestyle="--", alpha=0.35)
|
||||
ax.set_axisbelow(True)
|
||||
|
||||
# Cache band sublabels
|
||||
for j, band in enumerate(BANDS):
|
||||
for x in x_centers:
|
||||
xp = x + offsets[j]
|
||||
short = band.split(" ")[0]
|
||||
ax.text(xp, -ymax*0.035, short,
|
||||
ha="center", va="top", fontsize=7,
|
||||
color="dimgrey", alpha=max(BAND_ALPHAS[j], 0.5))
|
||||
|
||||
# Legend
|
||||
phase = [
|
||||
mpatches.Patch(color=C_PREFILL, label="Prefill compute (P node)"),
|
||||
mpatches.Patch(color=C_RDMA, label="KV transfer (RDMA)"),
|
||||
mpatches.Patch(color=C_OTHER, label="Scheduling overhead (dispatch+params+signal+promote)"),
|
||||
]
|
||||
spacer = mpatches.Patch(color="none", label="")
|
||||
band_handles = [
|
||||
mpatches.Patch(facecolor="grey", alpha=BAND_ALPHAS[j],
|
||||
hatch=(BAND_HATCHES[j] or ""),
|
||||
label=f"Cache hit {BANDS[j]}")
|
||||
for j in range(nB)
|
||||
]
|
||||
ax.legend(handles=phase + [spacer] + band_handles,
|
||||
loc="upper left", fontsize=8.5, framealpha=0.9,
|
||||
ncol=2, columnspacing=1.0)
|
||||
|
||||
plt.tight_layout(rect=[0, 0.04, 1, 1])
|
||||
plt.savefig(OUT, dpi=160, bbox_inches="tight")
|
||||
print(f"Saved: {OUT}")
|
||||
|
||||
# ── print summary ────────────────────────────────────────────────────────────
|
||||
print(f"\n{'N_bucket':>10} {'band':<15} {'n':>3} | {'prefill':>8} {'rdma':>7} {'other':>6} | {'total':>7}")
|
||||
print("-" * 70)
|
||||
for (N, band) in sorted(summary.keys()):
|
||||
s = summary[(N, band)]
|
||||
other = s["dispatch_ms"] + s["build_params_ms"] + s["completion_ms"] + s["promote_ms"]
|
||||
total = s["prefill_ms"] + s["rdma_ms"] + other
|
||||
print(f"{N:>10} {band:<15} {s['n']:>3} | {s['prefill_ms']:>8.0f} {s['rdma_ms']:>7.0f} {other:>6.1f} | {total:>7.0f}")
|
||||
Reference in New Issue
Block a user