#!/bin/bash # Run the interference microbenchmark sweep. # Assumes vLLM is already running on the specified port. # # Usage: bash run_sweep.sh [port] [chunk_size] set -euo pipefail PORT=${1:-8000} CHUNK_SIZE=${2:-8192} REPS=${REPS:-5} OUTPUT_DIR="results/interference" echo "=== Interference Microbench Sweep ===" echo "Server: http://127.0.0.1:$PORT" echo "Chunk size: $CHUNK_SIZE" echo "Reps: $REPS" echo "Output: $OUTPUT_DIR" echo "" # Quick sanity check curl -sf "http://127.0.0.1:$PORT/v1/models" > /dev/null || { echo "ERROR: vLLM not reachable on port $PORT" exit 1 } cd "$(dirname "$0")" python driver.py \ --host 127.0.0.1 \ --port "$PORT" \ --chunk-size "$CHUNK_SIZE" \ --decode-batch-sizes "0,1,2,4,6,8,12" \ --prefill-tokens "512,1024,2048,4096,8192,16384,32768" \ --reps "$REPS" \ --output-dir "$OUTPUT_DIR"