agentic-pd-hybrid/scripts/run_all_experiments.sh

#!/bin/bash
# Run all 3 PD hybrid experiments sequentially
# Uses 52 sessions / 4,449 requests (10% sample of 497 sessions)
# Each experiment takes ~30-40 min
set -euo pipefail
cd "$(dirname "$0")/.."

TRACE="outputs/qwen35-swebench-50sess.jsonl"
MODEL="/mnt/kzlin/workflow/pd-hybrid/simm-swe-bench/models/Qwen3.5-35B-A3B"
OUTPUT="outputs/swebench-exps"

echo "=== Experiment A: pd-disaggregation ==="
uv run agentic-pd-hybrid benchmark-live \
  --trace "$TRACE" \
  --output-root "$OUTPUT" \
  --mechanism pd-disaggregation \
  --policy default \
  --model-path "$MODEL" \
  --prefill-workers 1 --decode-workers 1 \
  --prefill-tp-size 4 --decode-tp-size 4 \
  --prefill-gpu-ids 0,1,2,3 --decode-gpu-ids 4,5,6,7 \
  --transfer-backend mooncake \
  --gpu-budget 8 \
  --time-scale 10 \
  --session-sample-rate 1.0 \
  --target-duration-s 100000 \
  --concurrency-limit 32 \
  --timeout-s 900 \
  --request-timeout-s 300

echo "=== Experiment B: pd-colo ==="
uv run agentic-pd-hybrid benchmark-live \
  --trace "$TRACE" \
  --output-root "$OUTPUT" \
  --mechanism pd-colo \
  --policy default \
  --model-path "$MODEL" \
  --prefill-workers 0 --decode-workers 0 \
  --direct-workers 2 --direct-tp-size 4 \
  --direct-gpu-ids 0,1,2,3,4,5,6,7 \
  --transfer-backend mooncake \
  --gpu-budget 8 \
  --time-scale 10 \
  --session-sample-rate 1.0 \
  --target-duration-s 100000 \
  --concurrency-limit 32 \
  --timeout-s 900 \
  --request-timeout-s 300

echo "=== Experiment C: kvcache-centric ==="
uv run agentic-pd-hybrid benchmark-live \
  --trace "$TRACE" \
  --output-root "$OUTPUT" \
  --mechanism kvcache-centric \
  --policy default \
  --model-path "$MODEL" \
  --prefill-workers 1 --decode-workers 1 \
  --prefill-tp-size 4 --decode-tp-size 4 \
  --prefill-gpu-ids 0,1,2,3 --decode-gpu-ids 4,5,6,7 \
  --transfer-backend mooncake \
  --gpu-budget 8 \
  --time-scale 10 \
  --session-sample-rate 1.0 \
  --target-duration-s 100000 \
  --concurrency-limit 32 \
  --timeout-s 900 \
  --request-timeout-s 300 \
  --kvcache-admission-mode worker \
  --kvcache-seed-min-turn-id 2 \
  --kvcache-prefill-backup-policy release-after-transfer \
  --kvcache-prefill-priority-eviction

echo "=== All experiments complete ==="