scripts/sweep_backpressure_smoke.sh: 4-run smoke matrix (KVC baseline / KVC + backpressure / KVC + backpressure @ time-scale=1 / DP @ time-scale=1) designed to fit ~3-4h GPU budget. Validates §3 backpressure implementation and partially probes §7 time-scale distortion. scripts/analysis/analyze_backpressure_smoke.py: consumes the new structural/* jsonl files plus request-metrics; emits headline metrics, backpressure histograms, admission probe stats, and per-session pinning distribution. scripts/sweep_tp1_v6_p1_profile.sh: pre-existing v6 P1 profile sweep script (was untracked; included for completeness). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
115 lines
3.6 KiB
Bash
Executable File
115 lines
3.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# Smoke sweep: validate backpressure code change on top of v5 Option D config.
|
||
# Designed to fit in ~3-4h GPU budget (4 runs × ~30-60 min).
|
||
#
|
||
# Usage:
|
||
# bash scripts/sweep_backpressure_smoke.sh
|
||
#
|
||
# Prerequisites: GPUs available; trace at outputs/qwen35-swebench-50sess.jsonl;
|
||
# model at $MODEL_PATH (default Qwen3-30B-A3B-Instruct-2507).
|
||
set -euo pipefail
|
||
|
||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||
cd "$REPO_ROOT"
|
||
|
||
OUT_ROOT=${OUT_ROOT:-outputs/sweep_backpressure_smoke}
|
||
TRACE=${TRACE:-outputs/qwen35-swebench-50sess.jsonl}
|
||
MODEL=${MODEL:-/mnt/kzlin/workflow/pd-hybrid/simm-swe-bench/models/Qwen3-30B-A3B-Instruct-2507}
|
||
|
||
mkdir -p "$OUT_ROOT"
|
||
LOG="$OUT_ROOT/sweep.log"
|
||
echo "[$(date '+%F %T')] Starting backpressure smoke sweep" | tee -a "$LOG"
|
||
echo " Trace: $TRACE" | tee -a "$LOG"
|
||
echo " Model: $MODEL" | tee -a "$LOG"
|
||
echo " Output root: $OUT_ROOT" | tee -a "$LOG"
|
||
|
||
KVC_COMMON_ARGS=(
|
||
--trace "$TRACE"
|
||
--model "$MODEL"
|
||
--mechanism kvcache-centric
|
||
--policy kv-aware
|
||
--kvcache-admission-mode worker
|
||
--kvcache-seed-min-turn-id 1
|
||
--kvcache-seed-max-inflight-decode -1
|
||
--kvcache-prefill-backup-policy release-after-transfer
|
||
--kvcache-prefill-priority-eviction
|
||
--prefill-workers 2
|
||
--decode-workers 6
|
||
--prefill-gpu-ids 0,1
|
||
--decode-gpu-ids 2,3,4,5,6,7
|
||
--transfer-backend mooncake
|
||
--target-duration-s 2000
|
||
--session-sample-rate 1.0
|
||
--min-turns 2
|
||
--concurrency-limit 32
|
||
)
|
||
|
||
DP_COMMON_ARGS=(
|
||
--trace "$TRACE"
|
||
--model "$MODEL"
|
||
--mechanism pd-colo
|
||
--policy kv-aware
|
||
--direct-workers 8
|
||
--direct-gpu-ids 0,1,2,3,4,5,6,7
|
||
--transfer-backend mooncake
|
||
--target-duration-s 2000
|
||
--session-sample-rate 1.0
|
||
--min-turns 2
|
||
--concurrency-limit 32
|
||
)
|
||
|
||
run_kvc_baseline_ts10() {
|
||
local out="$OUT_ROOT/E1_kvc_baseline_ts10"
|
||
echo "[$(date '+%F %T')] === E1: KVC baseline (no backpressure) time-scale=10 ===" | tee -a "$LOG"
|
||
python -m agentic_pd_hybrid.cli benchmark-live \
|
||
"${KVC_COMMON_ARGS[@]}" \
|
||
--output-root "$out" \
|
||
--time-scale 10 \
|
||
2>&1 | tee -a "$LOG"
|
||
}
|
||
|
||
run_kvc_backpressure_ts10() {
|
||
local out="$OUT_ROOT/E2_kvc_backpressure_ts10"
|
||
echo "[$(date '+%F %T')] === E2: KVC + backpressure ON, time-scale=10 ===" | tee -a "$LOG"
|
||
python -m agentic_pd_hybrid.cli benchmark-live \
|
||
"${KVC_COMMON_ARGS[@]}" \
|
||
--output-root "$out" \
|
||
--time-scale 10 \
|
||
--enable-backpressure \
|
||
--backpressure-max-pause-s 2.0 \
|
||
2>&1 | tee -a "$LOG"
|
||
}
|
||
|
||
run_kvc_backpressure_ts1() {
|
||
local out="$OUT_ROOT/E3_kvc_backpressure_ts1_short"
|
||
echo "[$(date '+%F %T')] === E3: KVC + backpressure ON, time-scale=1, FIRST 1000 reqs ===" | tee -a "$LOG"
|
||
python -m agentic_pd_hybrid.cli benchmark-live \
|
||
"${KVC_COMMON_ARGS[@]}" \
|
||
--output-root "$out" \
|
||
--time-scale 1 \
|
||
--enable-backpressure \
|
||
--backpressure-max-pause-s 2.0 \
|
||
--target-duration-s 1800 \
|
||
2>&1 | tee -a "$LOG"
|
||
}
|
||
|
||
run_dp_baseline_ts1() {
|
||
local out="$OUT_ROOT/E4_dp_ts1_short"
|
||
echo "[$(date '+%F %T')] === E4: 8-way DP cache-aware, time-scale=1, FIRST 1000 reqs ===" | tee -a "$LOG"
|
||
python -m agentic_pd_hybrid.cli benchmark-live \
|
||
"${DP_COMMON_ARGS[@]}" \
|
||
--output-root "$out" \
|
||
--time-scale 1 \
|
||
--target-duration-s 1800 \
|
||
2>&1 | tee -a "$LOG"
|
||
}
|
||
|
||
# Sequence — add/remove as fits the budget.
|
||
run_kvc_baseline_ts10
|
||
run_kvc_backpressure_ts10
|
||
run_kvc_backpressure_ts1
|
||
run_dp_baseline_ts1
|
||
|
||
echo "[$(date '+%F %T')] === sweep DONE ===" | tee -a "$LOG"
|
||
echo "Run analysis with: python scripts/analysis/analyze_backpressure_smoke.py $OUT_ROOT" | tee -a "$LOG"
|