scripts/sample_trace_subset.py — file-order head-cut that takes the
first N sessions of a converted trace. No RNG, no hashing — same
input yields byte-identical output (the included assertion compares
md5 across two runs).
scripts/sweep_e1_naive_1p3d.sh — E1 of ONBOARDING_NEXT_AGENT_ZH §3.1:
mechanism=pd-disaggregation, policy=kv-aware, 1P3D, RDMA on
(mlx5_60). Defaults to outputs/inferact_50sess.jsonl so E1 and E2
can share the exact same subset; override via TRACE= env var to run
on the full 20,230-request trace.
Reproducing the subset:
uv run --no-sync python scripts/sample_trace_subset.py \\
--input outputs/inferact_codex_swebenchpro.jsonl \\
--output outputs/inferact_50sess.jsonl \\
--sessions 50
# expected output_md5: 7bb263a32600ef5a6ef5099ba340a487
# 1285 requests, mean input_length 67631 tokens
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
83 lines
2.6 KiB
Bash
Executable File
83 lines
2.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# E1 — naive 1P3D + kv-aware + RDMA, ts=1
|
|
#
|
|
# Tests hypothesis H1 from ONBOARDING_NEXT_AGENT_ZH §3.1: separate the
|
|
# contribution of "1P3D topology + kv-aware policy" from "KVC layer
|
|
# (admission / migration / direct-to-D)".
|
|
#
|
|
# Mechanism = pd-disaggregation (no KVC layer); policy = kv-aware.
|
|
# Topology = 1P3D, RDMA on (mlx5_60 = cuda:0 NUMA-local).
|
|
#
|
|
# Prerequisites:
|
|
# - source scripts/setup_env.sh (sets CUDA_HOME etc.)
|
|
# - outputs/inferact_codex_swebenchpro.jsonl exists
|
|
# (run scripts/convert_inferact_to_trace.py if not)
|
|
#
|
|
# Usage:
|
|
# bash scripts/sweep_e1_naive_1p3d.sh
|
|
#
|
|
# Override defaults via env:
|
|
# MODEL=/path TRACE=path OUTPUT=path IB_DEVICE=mlx5_XX bash scripts/sweep_e1_naive_1p3d.sh
|
|
|
|
set -euo pipefail
|
|
cd "$(dirname "$0")/.."
|
|
|
|
if [ -z "${CUDA_HOME:-}" ]; then
|
|
echo "ERROR: CUDA_HOME not set. Source scripts/setup_env.sh first." >&2
|
|
exit 1
|
|
fi
|
|
|
|
MODEL=${MODEL:-/mnt/models/Qwen/Qwen3-30B-A3B-Instruct-2507}
|
|
TRACE=${TRACE:-outputs/inferact_50sess.jsonl}
|
|
OUTPUT=${OUTPUT:-outputs/e1_naive_1p3d_kvaware_rdma_50sess}
|
|
IB_DEVICE=${IB_DEVICE:-mlx5_60}
|
|
|
|
if [ ! -f "$TRACE" ]; then
|
|
echo "ERROR: trace not found at $TRACE" >&2
|
|
echo "Run: uv run --no-sync python scripts/convert_inferact_to_trace.py --output $TRACE" >&2
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p "$OUTPUT"
|
|
LOG="$OUTPUT/sweep.log"
|
|
|
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG"; }
|
|
|
|
log "=== E1: naive 1P3D kv-aware + RDMA, ts=1 ==="
|
|
log "MODEL=$MODEL"
|
|
log "TRACE=$TRACE ($(wc -l < $TRACE) requests)"
|
|
log "OUTPUT=$OUTPUT"
|
|
log "IB_DEVICE=$IB_DEVICE"
|
|
|
|
label=e1_naive_1p3d_kvaware_run1
|
|
log ""
|
|
log "=== [E1] $label starting ==="
|
|
|
|
uv run --no-sync python -m agentic_pd_hybrid.cli benchmark-live \
|
|
--trace "$TRACE" \
|
|
--output-root "$OUTPUT" \
|
|
--mechanism pd-disaggregation \
|
|
--policy kv-aware \
|
|
--model-path "$MODEL" \
|
|
--prefill-workers 1 --decode-workers 3 \
|
|
--prefill-tp-size 1 --decode-tp-size 1 \
|
|
--prefill-gpu-ids 0 --decode-gpu-ids 1,2,3 \
|
|
--transfer-backend mooncake \
|
|
--force-rdma --ib-device "$IB_DEVICE" \
|
|
--gpu-budget 4 \
|
|
--time-scale 1 \
|
|
--session-sample-rate 1.0 \
|
|
--target-duration-s 100000 \
|
|
--concurrency-limit 32 \
|
|
--timeout-s 1800 \
|
|
--request-timeout-s 300 2>&1 | tee -a "$LOG"
|
|
|
|
run_dir=$(ls -td "$OUTPUT"/pd-disaggregation-*/ 2>/dev/null | head -1)
|
|
log "=== [E1] $label COMPLETED, artifacts at $run_dir ==="
|
|
|
|
if [ -f "$run_dir/request-metrics.jsonl.summary.json" ]; then
|
|
cp "$run_dir/request-metrics.jsonl.summary.json" "$OUTPUT/${label}_summary.json"
|
|
cp "$run_dir/request-metrics.jsonl" "$OUTPUT/${label}_metrics.jsonl"
|
|
log "=== summary saved to $OUTPUT/${label}_summary.json ==="
|
|
fi
|