#!/bin/bash # TP1 v3 sweep — KVC with kv-aware policy (fix routing mismatch) # v2 used --policy default for KVC experiments, causing session routing # mismatch: replay round-robin ≠ router round-robin → "session not found". # v3 uses --policy kv-aware for KVC to ensure session affinity. set -euo pipefail cd "$(dirname "$0")/.." MODEL=/mnt/kzlin/workflow/pd-hybrid/simm-swe-bench/models/Qwen3-30B-A3B-Instruct-2507 TRACE=outputs/qwen35-swebench-50sess.jsonl OUTPUT=outputs/qwen3-30b-tp1-v3-kvaware VENV_PYTHON=.venv/bin/python RESULTS_FILE=$OUTPUT/sweep_results.txt mkdir -p $OUTPUT log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a $RESULTS_FILE } save_result() { local label=$1 local run_dir=$2 log "=== $label COMPLETED ===" if [ -f "$run_dir/request-metrics.jsonl.summary.json" ]; then log "Summary:" cat "$run_dir/request-metrics.jsonl.summary.json" >> $RESULTS_FILE echo "" >> $RESULTS_FILE cp "$run_dir/request-metrics.jsonl.summary.json" "$OUTPUT/${label}_summary.json" cp "$run_dir/request-metrics.jsonl" "$OUTPUT/${label}_metrics.jsonl" log "Saved to $OUTPUT/${label}_summary.json + ${label}_metrics.jsonl" else log "WARNING: No summary file found in $run_dir" fi } log "Starting TP1 v3 sweep (KVC with kv-aware policy)" log "Model: $MODEL" log "Trace: $TRACE (4449 requests, 52 sessions)" log "Key change: --policy kv-aware for KVC (was --policy default in v2)" ######################################## # Experiment 1: 1P + 7D KVC kv-aware ######################################## log "" log "=== [EXP1] 1P7D KVC kv-aware ===" PYTHONPATH=src:third_party/sglang/python \ $VENV_PYTHON -m agentic_pd_hybrid.cli benchmark-live \ --trace $TRACE \ --output-root $OUTPUT \ --mechanism kvcache-centric \ --policy kv-aware \ --model-path $MODEL \ --prefill-workers 1 --decode-workers 7 \ --prefill-tp-size 1 --decode-tp-size 1 \ --prefill-gpu-ids 0 --decode-gpu-ids 1,2,3,4,5,6,7 \ --transfer-backend mooncake \ --gpu-budget 8 \ --time-scale 10 \ --session-sample-rate 1.0 \ --target-duration-s 100000 \ --concurrency-limit 32 \ --timeout-s 900 \ --request-timeout-s 300 \ --kvcache-admission-mode worker \ --kvcache-seed-min-turn-id 1 \ --kvcache-seed-max-inflight-decode -1 \ --kvcache-prefill-backup-policy release-after-transfer \ --kvcache-prefill-priority-eviction EXP1_DIR=$(ls -td $OUTPUT/kvcache-centric-*/ 2>/dev/null | head -1) save_result "exp1_1p7d_kvc_kvaware" "$EXP1_DIR" ######################################## # Experiment 2: 2P + 6D KVC kv-aware ######################################## log "" log "=== [EXP2] 2P6D KVC kv-aware ===" PYTHONPATH=src:third_party/sglang/python \ $VENV_PYTHON -m agentic_pd_hybrid.cli benchmark-live \ --trace $TRACE \ --output-root $OUTPUT \ --mechanism kvcache-centric \ --policy kv-aware \ --model-path $MODEL \ --prefill-workers 2 --decode-workers 6 \ --prefill-tp-size 1 --decode-tp-size 1 \ --prefill-gpu-ids 0,1 --decode-gpu-ids 2,3,4,5,6,7 \ --transfer-backend mooncake \ --gpu-budget 8 \ --time-scale 10 \ --session-sample-rate 1.0 \ --target-duration-s 100000 \ --concurrency-limit 32 \ --timeout-s 900 \ --request-timeout-s 300 \ --kvcache-admission-mode worker \ --kvcache-seed-min-turn-id 1 \ --kvcache-seed-max-inflight-decode -1 \ --kvcache-prefill-backup-policy release-after-transfer \ --kvcache-prefill-priority-eviction EXP2_DIR=$(ls -td $OUTPUT/kvcache-centric-*/ 2>/dev/null | head -1) save_result "exp2_2p6d_kvc_kvaware" "$EXP2_DIR" ######################################## log "" log "=== ALL TP1 V3 SWEEP EXPERIMENTS DONE ==="