#!/bin/bash # Run all 3 PD hybrid experiments sequentially # Uses 52 sessions / 4,449 requests (10% sample of 497 sessions) # Each experiment takes ~30-40 min set -euo pipefail cd "$(dirname "$0")/.." TRACE="outputs/qwen35-swebench-50sess.jsonl" MODEL="/mnt/kzlin/workflow/pd-hybrid/simm-swe-bench/models/Qwen3.5-35B-A3B" OUTPUT="outputs/swebench-exps" echo "=== Experiment A: pd-disaggregation ===" uv run agentic-pd-hybrid benchmark-live \ --trace "$TRACE" \ --output-root "$OUTPUT" \ --mechanism pd-disaggregation \ --policy default \ --model-path "$MODEL" \ --prefill-workers 1 --decode-workers 1 \ --prefill-tp-size 4 --decode-tp-size 4 \ --prefill-gpu-ids 0,1,2,3 --decode-gpu-ids 4,5,6,7 \ --transfer-backend mooncake \ --gpu-budget 8 \ --time-scale 10 \ --session-sample-rate 1.0 \ --target-duration-s 100000 \ --concurrency-limit 32 \ --timeout-s 900 \ --request-timeout-s 300 echo "=== Experiment B: pd-colo ===" uv run agentic-pd-hybrid benchmark-live \ --trace "$TRACE" \ --output-root "$OUTPUT" \ --mechanism pd-colo \ --policy default \ --model-path "$MODEL" \ --prefill-workers 0 --decode-workers 0 \ --direct-workers 2 --direct-tp-size 4 \ --direct-gpu-ids 0,1,2,3,4,5,6,7 \ --transfer-backend mooncake \ --gpu-budget 8 \ --time-scale 10 \ --session-sample-rate 1.0 \ --target-duration-s 100000 \ --concurrency-limit 32 \ --timeout-s 900 \ --request-timeout-s 300 echo "=== Experiment C: kvcache-centric ===" uv run agentic-pd-hybrid benchmark-live \ --trace "$TRACE" \ --output-root "$OUTPUT" \ --mechanism kvcache-centric \ --policy default \ --model-path "$MODEL" \ --prefill-workers 1 --decode-workers 1 \ --prefill-tp-size 4 --decode-tp-size 4 \ --prefill-gpu-ids 0,1,2,3 --decode-gpu-ids 4,5,6,7 \ --transfer-backend mooncake \ --gpu-budget 8 \ --time-scale 10 \ --session-sample-rate 1.0 \ --target-duration-s 100000 \ --concurrency-limit 32 \ --timeout-s 900 \ --request-timeout-s 300 \ --kvcache-admission-mode worker \ --kvcache-seed-min-turn-id 2 \ --kvcache-prefill-backup-policy release-after-transfer \ --kvcache-prefill-priority-eviction echo "=== All experiments complete ==="