Fix review bugs: PD-sep counter leaks, hardcoded paths, missing deps
Critical: - cache_aware_proxy: _handle_pd_sep leaked p_inst.num_requests (never decremented) and never managed d_inst.num_requests; fix media_type from application/json to text/event-stream for SSE stream High: - b3_sweep/b3_isolated_policy/b3_analyze: replace hardcoded /home/admin/cpfs/wjh/ ROOT with script-relative $(dirname "$0")/.. - b3_analyze: replace hardcoded 8-port WORKER_MAP with dynamic generation from BASE_PORT and N_INSTANCES Medium: - analyze_breakdown: warn on stderr when records are skipped (was silent) - deploy_vllm_patches: fail-fast on SSH/SCP errors instead of continuing with empty VENV_SITE - pyproject.toml: declare fastapi and uvicorn as runtime dependencies - launch_elastic_p2p: kill EngineCore and proxy in trap handler to prevent GPU memory leaks on exit
This commit is contained in:
@@ -1176,6 +1176,7 @@ async def _handle_pd_sep(api, req_data, request_id, token_ids, input_length,
|
||||
p_headers = {**headers, "X-data-parallel-rank": "0"}
|
||||
|
||||
p_inst.ongoing_tokens += input_length
|
||||
p_inst.num_requests += 1
|
||||
breakdown["t_prefill_sent"] = _time.monotonic()
|
||||
breakdown["t_prefill_sent_unix"] = _time.time()
|
||||
|
||||
@@ -1194,9 +1195,11 @@ async def _handle_pd_sep(api, req_data, request_id, token_ids, input_length,
|
||||
raise HTTPException(status_code=502, detail=f"Prefill failed: {e}")
|
||||
finally:
|
||||
p_inst.ongoing_tokens -= input_length
|
||||
p_inst.num_requests -= 1
|
||||
|
||||
# Send decode
|
||||
d_inst.ongoing_tokens += input_length
|
||||
d_inst.num_requests += 1
|
||||
parsed = urllib.parse.urlparse(str(p_inst.client.base_url))
|
||||
bootstrap_addr = f"http://{parsed.hostname}:{p_inst.bootstrap_port}"
|
||||
|
||||
@@ -1232,9 +1235,10 @@ async def _handle_pd_sep(api, req_data, request_id, token_ids, input_length,
|
||||
breakdown["t_done"] = _time.monotonic()
|
||||
breakdown["t_done_unix"] = _time.time()
|
||||
d_inst.ongoing_tokens -= input_length
|
||||
d_inst.num_requests -= 1
|
||||
_breakdown_log.append(breakdown)
|
||||
|
||||
return StreamingResponse(generate(), media_type="application/json")
|
||||
return StreamingResponse(generate(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.get("/breakdown")
|
||||
|
||||
Reference in New Issue
Block a user