agentic-pd-hybrid/scripts/run_exp_b_pd_colo.sh

#!/bin/bash
# Experiment B: pd-colo (direct/colocation)
# 2 direct workers (GPU 0-3, 4-7), TP4, no router
# Full 39K trace from SWE-Bench 500 instances
set -euo pipefail
cd "$(dirname "$0")/.."

uv run agentic-pd-hybrid benchmark-live \
  --trace outputs/qwen35-swebench-500.jsonl \
  --output-root outputs/swebench-exps \
  --mechanism pd-colo \
  --policy default \
  --model-path /mnt/kzlin/workflow/pd-hybrid/simm-swe-bench/models/Qwen3.5-35B-A3B \
  --prefill-workers 0 --decode-workers 0 \
  --direct-workers 2 --direct-tp-size 4 \
  --direct-gpu-ids 0,1,2,3,4,5,6,7 \
  --transfer-backend mooncake \
  --gpu-budget 8 \
  --time-scale 10 \
  --session-sample-rate 1.0 \
  --target-duration-s 100000 \
  --concurrency-limit 64 \
  --timeout-s 900 \
  --request-timeout-s 300