bench.sh: add trap for graceful cleanup on kill/interrupt

Added EXIT/INT/TERM traps to ensure vLLM, proxy, and gpu_monitor
processes are cleaned up even when bench.sh is killed externally.
Also includes gpu_monitor in cleanup_gpu pattern matching.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 20:24:13 +08:00
parent ea5149726c
commit b2ede1da77

View File

@@ -87,12 +87,11 @@ CONF
# ─── GPU Cleanup (verified) ────────────────────────────────────────────────
cleanup_gpu() {
echo "[cleanup] Killing all vLLM/proxy processes..."
for p in $(ps aux | grep -E 'vllm serve|cache_aware_proxy' | grep -v grep | awk '{print $2}' 2>/dev/null); do
echo "[cleanup] Killing all vLLM/proxy/monitor processes..."
for p in $(ps aux | grep -E 'vllm serve|cache_aware_proxy|gpu_monitor' | grep -v grep | awk '{print $2}' 2>/dev/null); do
kill -9 "$p" 2>/dev/null || true
done
sleep 3
# Kill any remaining GPU holders
local gpu_pids
gpu_pids=$(fuser /dev/nvidia* 2>/dev/null | tr ' ' '\n' | sort -u | grep -v '^$' || true)
if [ -n "$gpu_pids" ]; then
@@ -100,7 +99,6 @@ cleanup_gpu() {
echo "$gpu_pids" | xargs -r kill -9 2>/dev/null || true
sleep 5
fi
# Verify GPUs are free
local used
used=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | awk '{s+=$1} END{print s}')
if [ "${used:-0}" -gt 100 ]; then
@@ -111,6 +109,9 @@ cleanup_gpu() {
echo "[cleanup] All GPUs verified free."
}
trap 'echo "[bench.sh] Caught signal, cleaning up..."; cleanup_gpu; exit 1' INT TERM
trap 'cleanup_gpu' EXIT
# ─── Launch vLLM instances ─────────────────────────────────────────────────
launch_instances() {
@@ -335,6 +336,6 @@ launch_proxy
run_benchmark
collect_artifacts
print_summary
cleanup_gpu
# cleanup_gpu runs automatically via EXIT trap
echo "[done] $(date)"