agentic-kvc/analysis/pd_sep_paper_section/scripts/bench_pd_matrix.sh

#!/bin/bash
# Experiment matrix that rigorously evaluates PD separation vs Combined on
# the agentic trace (traces/w600_r0.0015_st30.jsonl), using cuda graphs by
# default and capturing step-level KV utilization for the C3 time-series
# figure.
#
# Matrix (default minimal set, ~2 h wall-clock on 8 x H20):
#   Configs:  combined-ca  pdsep-4p4d  pdsep-6p2d
#   Modes:    cudagraph
#   Seeds:    1, 2, 3
#
# Optional extensions:
#   --with-rr        also run combined-rr (refresh C7 routing-lever data)
#   --with-eager     also run each config with --enforce-eager (cuda-graph
#                    ablation; this doubles wall-clock)
#
# Output structure (on the host that runs this):
#   outputs/pd_matrix/
#     <config>_<mode>_seed<N>/
#       config.json, metrics.jsonl, metrics.summary.json
#       breakdown.json, stats.json, apc.txt
#       proxy.log, replayer.log, gpu_util.csv, gpu_snapshot.csv
#       vllm_inst_*.log         <-- step-level "KV cache: X%" lines for C3
#
# Run on dash0:
#   cd ~/agentic-kv
#   bash analysis/pd_sep_paper_section/scripts/bench_pd_matrix.sh

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../../.." && pwd)"
BENCH="$PROJECT_DIR/scripts/bench.sh"
TRACE_DEFAULT="$PROJECT_DIR/traces/w600_r0.0015_st30.jsonl"

# Defaults
TRACE="$TRACE_DEFAULT"
REQUESTS=850
SEEDS=3
WITH_RR=false
WITH_EAGER=false
DRY_RUN=false
TAG_PREFIX="pd_matrix"
ONLY=""             # comma-separated list of tags to run (subset of the matrix)

while [[ $# -gt 0 ]]; do
    case "$1" in
        --trace) TRACE="$2"; shift 2 ;;
        --requests) REQUESTS="$2"; shift 2 ;;
        --seeds) SEEDS="$2"; shift 2 ;;
        --with-rr) WITH_RR=true; shift ;;
        --with-eager) WITH_EAGER=true; shift ;;
        --tag-prefix) TAG_PREFIX="$2"; shift 2 ;;
        --only) ONLY="$2"; shift 2 ;;
        --dry-run) DRY_RUN=true; shift ;;
        -h|--help)
            sed -n '2,30p' "$0"; exit 0 ;;
        *) echo "Unknown: $1"; exit 1 ;;
    esac
done

# Build set of allowed tags from --only (if provided).
declare -A ALLOWED
if [ -n "$ONLY" ]; then
    IFS=',' read -ra _tags <<< "$ONLY"
    for t in "${_tags[@]}"; do
        ALLOWED["$(echo "$t" | xargs)"]=1
    done
fi
is_allowed() {
    # if ONLY not set, everything is allowed
    [ -z "$ONLY" ] && return 0
    [ -n "${ALLOWED[$1]:-}" ]
}

if [ ! -f "$TRACE" ]; then
    echo "[ERROR] trace not found: $TRACE"
    exit 1
fi
if [ ! -x "$BENCH" ]; then
    echo "[ERROR] bench.sh not found or not executable: $BENCH"
    exit 1
fi

# Config table: name | bench.sh args
declare -a CONFIGS
CONFIGS+=("combined-ca|--mode baseline --policy linear")
CONFIGS+=("pdsep-4p4d|--mode pdsep --pd-ratio 4:4 --policy linear")
CONFIGS+=("pdsep-6p2d|--mode pdsep --pd-ratio 6:2 --policy linear")
if [ "$WITH_RR" = "true" ]; then
    # round-robin via lmetric without affinity is the cleanest proxy for RR;
    # to get pure RR you'd need to add a --policy rr to cache_aware_proxy.
    # For now this slot is a placeholder so the matrix script is uniform.
    CONFIGS+=("combined-rr|--mode baseline --policy lmetric")
fi

declare -a MODES
MODES+=("cudagraph|")
if [ "$WITH_EAGER" = "true" ]; then
    MODES+=("eager|--eager")
fi

# Pretty-print matrix
echo "=================================================================="
echo "  PD-sep paper section experiment matrix"
echo "  trace      = $TRACE"
echo "  requests   = $REQUESTS"
echo "  seeds      = 1..$SEEDS"
echo "  configs    = ${#CONFIGS[@]}"
echo "  modes      = ${#MODES[@]}"
echo "  total runs = $(( ${#CONFIGS[@]} * ${#MODES[@]} * SEEDS ))"
echo "=================================================================="

run_one() {
    local config_name="$1"; local config_args="$2"
    local mode_name="$3"; local mode_args="$4"
    local seed="$5"

    local tag="$TAG_PREFIX/${config_name}_${mode_name}_seed${seed}"
    local outdir="$PROJECT_DIR/outputs/$tag"

    if [ -d "$outdir" ] && [ -f "$outdir/metrics.summary.json" ]; then
        echo "[skip] $tag (already complete)"
        return 0
    fi
    rm -rf "$outdir"  # clear partial runs

    local cmd="bash $BENCH --tag $tag $config_args $mode_args \
        --trace $TRACE --requests $REQUESTS"

    echo
    echo "[run] $tag"
    echo "      cmd: $cmd"
    if [ "$DRY_RUN" = "true" ]; then
        return 0
    fi

    # PYTHONHASHSEED is set inside bench.sh for elastic, but not for the
    # other modes. We export a different seed per run for reproducibility
    # of any RNG inside the proxy/replayer.
    PYTHONHASHSEED=$((42 + seed)) eval "$cmd"
    local rc=$?
    if [ $rc -ne 0 ]; then
        echo "[FAIL] $tag exited rc=$rc"
        return $rc
    fi
    echo "[done] $tag"
}

START_TS=$(date +%s)
N_DONE=0
N_FAIL=0
N_TOTAL=$(( ${#CONFIGS[@]} * ${#MODES[@]} * SEEDS ))

for c in "${CONFIGS[@]}"; do
    cfg_name="${c%%|*}"; cfg_args="${c##*|}"
    for m in "${MODES[@]}"; do
        mode_name="${m%%|*}"; mode_args="${m##*|}"
        for s in $(seq 1 $SEEDS); do
            tag_name="${cfg_name}_${mode_name}_seed${s}"
            if ! is_allowed "$tag_name"; then
                continue
            fi
            if run_one "$cfg_name" "$cfg_args" "$mode_name" "$mode_args" "$s"; then
                N_DONE=$((N_DONE + 1))
            else
                N_FAIL=$((N_FAIL + 1))
            fi
            ELAPSED=$(( $(date +%s) - START_TS ))
            echo "[progress] $N_DONE done, $N_FAIL failed, ${ELAPSED}s elapsed"
        done
    done
done

echo
echo "=================================================================="
echo "  MATRIX COMPLETE: $N_DONE/$N_TOTAL succeeded, $N_FAIL failed"
echo "  elapsed: $(( ($(date +%s) - START_TS) / 60 )) min"
echo "  outputs: $PROJECT_DIR/outputs/$TAG_PREFIX/"
echo "=================================================================="