bench.sh: add trap for graceful cleanup on kill/interrupt
Added EXIT/INT/TERM traps to ensure vLLM, proxy, and gpu_monitor processes are cleaned up even when bench.sh is killed externally. Also includes gpu_monitor in cleanup_gpu pattern matching. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,12 +87,11 @@ CONF
|
||||
# ─── GPU Cleanup (verified) ────────────────────────────────────────────────
|
||||
|
||||
cleanup_gpu() {
|
||||
echo "[cleanup] Killing all vLLM/proxy processes..."
|
||||
for p in $(ps aux | grep -E 'vllm serve|cache_aware_proxy' | grep -v grep | awk '{print $2}' 2>/dev/null); do
|
||||
echo "[cleanup] Killing all vLLM/proxy/monitor processes..."
|
||||
for p in $(ps aux | grep -E 'vllm serve|cache_aware_proxy|gpu_monitor' | grep -v grep | awk '{print $2}' 2>/dev/null); do
|
||||
kill -9 "$p" 2>/dev/null || true
|
||||
done
|
||||
sleep 3
|
||||
# Kill any remaining GPU holders
|
||||
local gpu_pids
|
||||
gpu_pids=$(fuser /dev/nvidia* 2>/dev/null | tr ' ' '\n' | sort -u | grep -v '^$' || true)
|
||||
if [ -n "$gpu_pids" ]; then
|
||||
@@ -100,7 +99,6 @@ cleanup_gpu() {
|
||||
echo "$gpu_pids" | xargs -r kill -9 2>/dev/null || true
|
||||
sleep 5
|
||||
fi
|
||||
# Verify GPUs are free
|
||||
local used
|
||||
used=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | awk '{s+=$1} END{print s}')
|
||||
if [ "${used:-0}" -gt 100 ]; then
|
||||
@@ -111,6 +109,9 @@ cleanup_gpu() {
|
||||
echo "[cleanup] All GPUs verified free."
|
||||
}
|
||||
|
||||
trap 'echo "[bench.sh] Caught signal, cleaning up..."; cleanup_gpu; exit 1' INT TERM
|
||||
trap 'cleanup_gpu' EXIT
|
||||
|
||||
# ─── Launch vLLM instances ─────────────────────────────────────────────────
|
||||
|
||||
launch_instances() {
|
||||
@@ -335,6 +336,6 @@ launch_proxy
|
||||
run_benchmark
|
||||
collect_artifacts
|
||||
print_summary
|
||||
cleanup_gpu
|
||||
# cleanup_gpu runs automatically via EXIT trap
|
||||
|
||||
echo "[done] $(date)"
|
||||
|
||||
Reference in New Issue
Block a user