Critical: - cache_aware_proxy: _handle_pd_sep leaked p_inst.num_requests (never decremented) and never managed d_inst.num_requests; fix media_type from application/json to text/event-stream for SSE stream High: - b3_sweep/b3_isolated_policy/b3_analyze: replace hardcoded /home/admin/cpfs/wjh/ ROOT with script-relative $(dirname "$0")/.. - b3_analyze: replace hardcoded 8-port WORKER_MAP with dynamic generation from BASE_PORT and N_INSTANCES Medium: - analyze_breakdown: warn on stderr when records are skipped (was silent) - deploy_vllm_patches: fail-fast on SSH/SCP errors instead of continuing with empty VENV_SITE - pyproject.toml: declare fastapi and uvicorn as runtime dependencies - launch_elastic_p2p: kill EngineCore and proxy in trap handler to prevent GPU memory leaks on exit
26 lines
499 B
TOML
26 lines
499 B
TOML
[project]
|
|
name = "agentic-kv"
|
|
version = "0.1.0"
|
|
description = "Trace-driven KV cache benchmarking for agentic LLM workloads"
|
|
requires-python = ">=3.10"
|
|
dependencies = [
|
|
"httpx>=0.27",
|
|
"numpy>=1.24",
|
|
"fastapi>=0.110",
|
|
"uvicorn>=0.29",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = ["pytest"]
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["replayer"]
|
|
|
|
[tool.pytest.ini_options]
|
|
testpaths = ["tests"]
|
|
addopts = "-q"
|