agentic-kvc/microbench/connector_tax/run_remaining.sh

#!/bin/bash
# Run remaining configs for Phase A, skipping those with summary_A.json.
# Adds a hard timeout of 120s per cell (rate) and limits to rates 0.5,1,2
# (the non-saturated regime) to avoid wasting GPU-hours on queue buildup.
#
# Usage: bash run_remaining.sh

set -uo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RUN_ROOT="$HERE/results/20260526_1728"
PORT=8000
GPU_ID=0
MODEL_PATH="$HOME/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"

PROJ_DIR="$(cd "$HERE/../.." && pwd)"
PYTHON="$PROJ_DIR/.venv/bin/python"
export PYTHONPATH="$PROJ_DIR:${PYTHONPATH:-}"

# Configs to run (in priority order)
ALL_CONFIGS=(plain noop_connector mooncake_producer mooncake_both nixl_both)
# Skip: mooncake_consumer (needs dummy bootstrap, pre-flight likely fails)
#        lmcache_only (not installed)
#        multi_mooncake_lmcache (not installed)

# Rates: only non-saturated to avoid runaway drain
RATES="0.5,1,2"

kill_all_vllm() {
    pkill -9 -f "port $PORT" 2>/dev/null || true
    pkill -9 -f "VLLM::EngineCore" 2>/dev/null || true
    pkill -9 -f "vllm.entrypoints" 2>/dev/null || true
    sleep 5
    for _ in $(seq 1 20); do
        used=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i "$GPU_ID" 2>/dev/null | tr -d ' ')
        [[ -n "$used" && "$used" -lt 1000 ]] && return 0
        fuser -k "/dev/nvidia${GPU_ID}" 2>/dev/null || true
        sleep 3
    done
    echo "WARNING: GPU not free" >&2
}

for config in "${ALL_CONFIGS[@]}"; do
    cfg_dir="$RUN_ROOT/$config"

    # Skip if already has valid summary
    if [[ -f "$cfg_dir/summary_A.json" ]]; then
        ncells=$(python3 -c "import json; print(len(json.load(open('$cfg_dir/summary_A.json'))))" 2>/dev/null || echo 0)
        if [[ "$ncells" -ge 3 ]]; then
            echo "SKIP $config (already has $ncells cells in summary_A.json)"
            continue
        fi
    fi

    echo ""
    echo "====== Running: $config ======"
    mkdir -p "$cfg_dir"

    # Launch
    export RUN_DIR="$cfg_dir"
    export PORT GPU_ID MODEL_PATH
    export AGENTIC_STEP_LOG_PATH="$cfg_dir/engine_step.jsonl"

    launch_script="$HERE/launch/launch_${config}.sh"
    if [[ ! -f "$launch_script" ]]; then
        echo "SKIP $config (no launch script)"
        continue
    fi

    bash "$launch_script" 2>&1 | tail -5
    rc=$?
    if [[ $rc == 42 ]]; then
        echo "SKIP $config (dependency missing, rc=42)"
        kill_all_vllm
        continue
    fi
    if [[ $rc != 0 ]]; then
        echo "FAIL $config (launch rc=$rc)"
        kill_all_vllm
        continue
    fi

    # Run bench_loop with rates 0.5,1,2
    echo "  Benchmarking $config rates=$RATES ..."
    "$PYTHON" "$HERE/bench_loop.py" \
        --url "http://127.0.0.1:$PORT/v1/chat/completions" \
        --model "$MODEL_PATH" \
        --phase A \
        --rates "$RATES" \
        --shape "4096,256" \
        --duration 60 \
        --min-completed 200 \
        --warmup 10 \
        --output-dir "$cfg_dir" 2>&1 | tail -10

    echo "  Done: $config"
    kill_all_vllm
    echo "  GPU released."
done

echo ""
echo "All configs processed. Check $RUN_ROOT/*/summary_A.json"