scripts/sweep_backpressure_smoke.sh: 4-run smoke matrix (KVC baseline / KVC + backpressure / KVC + backpressure @ time-scale=1 / DP @ time-scale=1) designed to fit ~3-4h GPU budget. Validates §3 backpressure implementation and partially probes §7 time-scale distortion. scripts/analysis/analyze_backpressure_smoke.py: consumes the new structural/* jsonl files plus request-metrics; emits headline metrics, backpressure histograms, admission probe stats, and per-session pinning distribution. scripts/sweep_tp1_v6_p1_profile.sh: pre-existing v6 P1 profile sweep script (was untracked; included for completeness). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
130 lines
4.7 KiB
Bash
Executable File
130 lines
4.7 KiB
Bash
Executable File
#!/bin/bash
|
|
# v6 P1: re-run the v5 (Option D) config with the pool_breakdown instrument
|
|
# (commit 4978c0d) so d-pool-timeseries.jsonl carries radix_protected /
|
|
# slot_private / running_batch / {transfer,prealloc,retracted}_queue tokens.
|
|
#
|
|
# This is the same config as scripts/sweep_tp1_v5_optD_profile.sh but writes
|
|
# to a separate output dir, leaving the pre-instrument v5+profile run intact
|
|
# for before/after comparison.
|
|
#
|
|
# Output:
|
|
# outputs/qwen3-30b-tp1-v6-p1-profile/
|
|
# ├── kvcache-centric-kv-aware-worker-admission-<ts>/
|
|
# │ ├── request-metrics.jsonl
|
|
# │ ├── request-metrics.jsonl.summary.json
|
|
# │ └── d-pool-timeseries.jsonl ← now with pool_breakdown fields
|
|
# ├── exp{1,2}_*_metrics.jsonl
|
|
# └── exp{1,2}_*_pool_timeseries.jsonl
|
|
set -euo pipefail
|
|
cd "$(dirname "$0")/.."
|
|
|
|
MODEL=/mnt/kzlin/workflow/pd-hybrid/simm-swe-bench/models/Qwen3-30B-A3B-Instruct-2507
|
|
TRACE=outputs/qwen35-swebench-50sess.jsonl
|
|
OUTPUT=outputs/qwen3-30b-tp1-v6-p1-profile
|
|
VENV_PYTHON=.venv/bin/python
|
|
RESULTS_FILE=$OUTPUT/sweep_results.txt
|
|
POLL_INTERVAL=1.0
|
|
|
|
mkdir -p $OUTPUT
|
|
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a $RESULTS_FILE
|
|
}
|
|
|
|
save_result() {
|
|
local label=$1
|
|
local run_dir=$2
|
|
log "=== $label COMPLETED ==="
|
|
if [ -f "$run_dir/request-metrics.jsonl.summary.json" ]; then
|
|
log "Summary:"
|
|
cat "$run_dir/request-metrics.jsonl.summary.json" >> $RESULTS_FILE
|
|
echo "" >> $RESULTS_FILE
|
|
cp "$run_dir/request-metrics.jsonl.summary.json" "$OUTPUT/${label}_summary.json"
|
|
cp "$run_dir/request-metrics.jsonl" "$OUTPUT/${label}_metrics.jsonl"
|
|
if [ -f "$run_dir/d-pool-timeseries.jsonl" ]; then
|
|
cp "$run_dir/d-pool-timeseries.jsonl" "$OUTPUT/${label}_pool_timeseries.jsonl"
|
|
log "Pool timeseries: $(wc -l < $OUTPUT/${label}_pool_timeseries.jsonl) rows"
|
|
else
|
|
log "WARNING: no d-pool-timeseries.jsonl produced"
|
|
fi
|
|
log "Saved to $OUTPUT/${label}_summary.json + ${label}_metrics.jsonl + ${label}_pool_timeseries.jsonl"
|
|
else
|
|
log "WARNING: No summary file found in $run_dir"
|
|
fi
|
|
}
|
|
|
|
log "Starting v6 P1 sweep (v5 Option D config + ${POLL_INTERVAL}s pool polling + pool_breakdown)"
|
|
log "Model: $MODEL"
|
|
log "Trace: $TRACE (4449 requests, 52 sessions)"
|
|
log "Goal: capture pool_breakdown fields (radix_protected / slot_private / running_batch / queues)"
|
|
log " to decompose 'other' on the v5 baseline workload"
|
|
|
|
########################################
|
|
# Experiment 1: 1P + 7D KVC kv-aware Option D + profile
|
|
########################################
|
|
log ""
|
|
log "=== [EXP1] 1P7D KVC kv-aware Option D + profile ==="
|
|
PYTHONPATH=src:third_party/sglang/python \
|
|
$VENV_PYTHON -m agentic_pd_hybrid.cli benchmark-live \
|
|
--trace $TRACE \
|
|
--output-root $OUTPUT \
|
|
--mechanism kvcache-centric \
|
|
--policy kv-aware \
|
|
--model-path $MODEL \
|
|
--prefill-workers 1 --decode-workers 7 \
|
|
--prefill-tp-size 1 --decode-tp-size 1 \
|
|
--prefill-gpu-ids 0 --decode-gpu-ids 1,2,3,4,5,6,7 \
|
|
--transfer-backend mooncake \
|
|
--gpu-budget 8 \
|
|
--time-scale 10 \
|
|
--session-sample-rate 1.0 \
|
|
--target-duration-s 100000 \
|
|
--concurrency-limit 32 \
|
|
--timeout-s 900 \
|
|
--request-timeout-s 300 \
|
|
--kvcache-admission-mode worker \
|
|
--kvcache-seed-min-turn-id 1 \
|
|
--kvcache-seed-max-inflight-decode -1 \
|
|
--kvcache-prefill-backup-policy release-after-transfer \
|
|
--kvcache-prefill-priority-eviction \
|
|
--pool-poll-interval-s $POLL_INTERVAL
|
|
|
|
EXP1_DIR=$(ls -td $OUTPUT/kvcache-centric-*/ 2>/dev/null | head -1)
|
|
save_result "exp1_1p7d_kvc_v6_p1" "$EXP1_DIR"
|
|
|
|
########################################
|
|
# Experiment 2: 2P + 6D KVC kv-aware Option D + profile
|
|
########################################
|
|
log ""
|
|
log "=== [EXP2] 2P6D KVC kv-aware Option D + profile ==="
|
|
PYTHONPATH=src:third_party/sglang/python \
|
|
$VENV_PYTHON -m agentic_pd_hybrid.cli benchmark-live \
|
|
--trace $TRACE \
|
|
--output-root $OUTPUT \
|
|
--mechanism kvcache-centric \
|
|
--policy kv-aware \
|
|
--model-path $MODEL \
|
|
--prefill-workers 2 --decode-workers 6 \
|
|
--prefill-tp-size 1 --decode-tp-size 1 \
|
|
--prefill-gpu-ids 0,1 --decode-gpu-ids 2,3,4,5,6,7 \
|
|
--transfer-backend mooncake \
|
|
--gpu-budget 8 \
|
|
--time-scale 10 \
|
|
--session-sample-rate 1.0 \
|
|
--target-duration-s 100000 \
|
|
--concurrency-limit 32 \
|
|
--timeout-s 900 \
|
|
--request-timeout-s 300 \
|
|
--kvcache-admission-mode worker \
|
|
--kvcache-seed-min-turn-id 1 \
|
|
--kvcache-seed-max-inflight-decode -1 \
|
|
--kvcache-prefill-backup-policy release-after-transfer \
|
|
--kvcache-prefill-priority-eviction \
|
|
--pool-poll-interval-s $POLL_INTERVAL
|
|
|
|
EXP2_DIR=$(ls -td $OUTPUT/kvcache-centric-*/ 2>/dev/null | head -1)
|
|
save_result "exp2_2p6d_kvc_v6_p1" "$EXP2_DIR"
|
|
|
|
log ""
|
|
log "=== ALL v6 P1 EXPERIMENTS DONE ==="
|