Critical: - cache_aware_proxy: _handle_pd_sep leaked p_inst.num_requests (never decremented) and never managed d_inst.num_requests; fix media_type from application/json to text/event-stream for SSE stream High: - b3_sweep/b3_isolated_policy/b3_analyze: replace hardcoded /home/admin/cpfs/wjh/ ROOT with script-relative $(dirname "$0")/.. - b3_analyze: replace hardcoded 8-port WORKER_MAP with dynamic generation from BASE_PORT and N_INSTANCES Medium: - analyze_breakdown: warn on stderr when records are skipped (was silent) - deploy_vllm_patches: fail-fast on SSH/SCP errors instead of continuing with empty VENV_SITE - pyproject.toml: declare fastapi and uvicorn as runtime dependencies - launch_elastic_p2p: kill EngineCore and proxy in trap handler to prevent GPU memory leaks on exit
63 lines
2.2 KiB
Bash
Executable File
63 lines
2.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# Deploy modified vLLM Python files from third_party/ to site-packages.
|
|
#
|
|
# Usage: bash scripts/deploy_vllm_patches.sh [HOST]
|
|
# HOST: ssh alias (default: dash0). Use "local" for local deployment.
|
|
#
|
|
# This copies only the Python files we've modified — C extensions and
|
|
# everything else come from the pip-installed vllm package.
|
|
|
|
set -euo pipefail
|
|
|
|
HOST="${1:-dash0}"
|
|
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
VLLM_SRC="$PROJECT_DIR/third_party/vllm/vllm"
|
|
|
|
# Files modified relative to vllm/ package root
|
|
PATCHED_FILES=(
|
|
"distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py"
|
|
"distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_utils.py"
|
|
"v1/core/sched/scheduler.py"
|
|
)
|
|
|
|
if [ "$HOST" = "local" ]; then
|
|
VENV_SITE=$("$PROJECT_DIR/.venv/bin/python" -c "import site; print(site.getsitepackages()[0])")
|
|
DST="$VENV_SITE/vllm"
|
|
echo "Deploying to local: $DST"
|
|
for f in "${PATCHED_FILES[@]}"; do
|
|
cp -v "$VLLM_SRC/$f" "$DST/$f"
|
|
done
|
|
else
|
|
# Find site-packages on remote
|
|
VENV_SITE=$(ssh "$HOST" "~/agentic-kv/.venv/bin/python -c \"import site; print(site.getsitepackages()[0])\"") || {
|
|
echo "ERROR: failed to resolve site-packages on $HOST" >&2; exit 1;
|
|
}
|
|
if [ -z "$VENV_SITE" ]; then
|
|
echo "ERROR: empty site-packages path from $HOST" >&2; exit 1;
|
|
fi
|
|
DST="$VENV_SITE/vllm"
|
|
echo "Deploying to $HOST:$DST"
|
|
for f in "${PATCHED_FILES[@]}"; do
|
|
scp "$VLLM_SRC/$f" "$HOST:$DST/$f" || {
|
|
echo "ERROR: failed to copy $f to $HOST" >&2; exit 1;
|
|
}
|
|
done
|
|
fi
|
|
|
|
echo "Deployed ${#PATCHED_FILES[@]} patched files."
|
|
|
|
# Verify
|
|
if [ "$HOST" = "local" ]; then
|
|
"$PROJECT_DIR/.venv/bin/python" -c "
|
|
import vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_utils as m
|
|
print('mooncake_utils:', m.__file__)
|
|
print('has estimate_hit:', hasattr(m.MooncakeBootstrapServer, 'estimate_hit'))
|
|
"
|
|
else
|
|
ssh "$HOST" "~/agentic-kv/.venv/bin/python -c \"
|
|
import vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_utils as m
|
|
print('mooncake_utils:', m.__file__)
|
|
print('has estimate_hit:', hasattr(m.MooncakeBootstrapServer, 'estimate_hit'))
|
|
\""
|
|
fi
|