feat(experiments): E1 sweep on 50-session deterministic subset

scripts/sample_trace_subset.py — file-order head-cut that takes the first N sessions of a converted trace. No RNG, no hashing — same input yields byte-identical output (the included assertion compares md5 across two runs). scripts/sweep_e1_naive_1p3d.sh — E1 of ONBOARDING_NEXT_AGENT_ZH §3.1: mechanism=pd-disaggregation, policy=kv-aware, 1P3D, RDMA on (mlx5_60). Defaults to outputs/inferact_50sess.jsonl so E1 and E2 can share the exact same subset; override via TRACE= env var to run on the full 20,230-request trace. Reproducing the subset: uv run --no-sync python scripts/sample_trace_subset.py \\ --input outputs/inferact_codex_swebenchpro.jsonl \\ --output outputs/inferact_50sess.jsonl \\ --sessions 50 # expected output_md5: 7bb263a32600ef5a6ef5099ba340a487 # 1285 requests, mean input_length 67631 tokens Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 00:21:36 +08:00
parent b55371fe69
commit bb9cc249cd
2 changed files with 163 additions and 0 deletions
--- a/scripts/sample_trace_subset.py
+++ b/scripts/sample_trace_subset.py
@@ -0,0 +1,81 @@
+"""Deterministically slice the first N sessions of an agentic-pd-hybrid trace.
+
+Method: scan in file order, count records whose `parent_chat_id == -1` (= a
+session's turn 0), and write every record until the (N+1)-th such record is
+seen. No RNG, no hashing — re-running on the same input produces a byte-
+identical output. Used to derive matched subsets for paired sweeps (E1 vs E2)
+without spending GPU hours on the full trace.
+
+Usage:
+    uv run --no-sync python scripts/sample_trace_subset.py \
+        --input outputs/inferact_codex_swebenchpro.jsonl \
+        --output outputs/inferact_50sess.jsonl \
+        --sessions 50
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import sys
+from pathlib import Path
+
+
+def slice_first_n_sessions(input_path: Path, output_path: Path, n_sessions: int) -> dict:
+    sessions_seen = 0
+    requests_written = 0
+    input_length_sum = 0
+    output_length_sum = 0
+    min_in = float("inf")
+    max_in = 0
+
+    with input_path.open("r", encoding="utf-8") as f_in, output_path.open(
+        "w", encoding="utf-8"
+    ) as f_out:
+        for line in f_in:
+            rec = json.loads(line)
+            if rec["parent_chat_id"] == -1:
+                sessions_seen += 1
+                if sessions_seen > n_sessions:
+                    break
+            f_out.write(line)
+            requests_written += 1
+            il = int(rec["input_length"])
+            input_length_sum += il
+            output_length_sum += int(rec["output_length"])
+            if il < min_in:
+                min_in = il
+            if il > max_in:
+                max_in = il
+
+    h = hashlib.md5(output_path.read_bytes()).hexdigest()
+    return {
+        "sessions": min(sessions_seen, n_sessions),
+        "requests": requests_written,
+        "input_length_mean": input_length_sum / max(1, requests_written),
+        "input_length_min": int(min_in) if min_in != float("inf") else 0,
+        "input_length_max": max_in,
+        "output_length_mean": output_length_sum / max(1, requests_written),
+        "output_md5": h,
+    }
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument(
+        "--input",
+        type=Path,
+        default=Path("outputs/inferact_codex_swebenchpro.jsonl"),
+    )
+    p.add_argument("--output", type=Path, required=True)
+    p.add_argument("--sessions", type=int, default=50)
+    args = p.parse_args()
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    stats = slice_first_n_sessions(args.input, args.output, args.sessions)
+    print(json.dumps(stats, indent=2), file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/sweep_e1_naive_1p3d.sh
+++ b/scripts/sweep_e1_naive_1p3d.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# E1 — naive 1P3D + kv-aware + RDMA, ts=1
+#
+# Tests hypothesis H1 from ONBOARDING_NEXT_AGENT_ZH §3.1: separate the
+# contribution of "1P3D topology + kv-aware policy" from "KVC layer
+# (admission / migration / direct-to-D)".
+#
+# Mechanism = pd-disaggregation (no KVC layer); policy = kv-aware.
+# Topology = 1P3D, RDMA on (mlx5_60 = cuda:0 NUMA-local).
+#
+# Prerequisites:
+#   - source scripts/setup_env.sh (sets CUDA_HOME etc.)
+#   - outputs/inferact_codex_swebenchpro.jsonl exists
+#     (run scripts/convert_inferact_to_trace.py if not)
+#
+# Usage:
+#   bash scripts/sweep_e1_naive_1p3d.sh
+#
+# Override defaults via env:
+#   MODEL=/path TRACE=path OUTPUT=path IB_DEVICE=mlx5_XX bash scripts/sweep_e1_naive_1p3d.sh
+
+set -euo pipefail
+cd "$(dirname "$0")/.."
+
+if [ -z "${CUDA_HOME:-}" ]; then
+  echo "ERROR: CUDA_HOME not set. Source scripts/setup_env.sh first." >&2
+  exit 1
+fi
+
+MODEL=${MODEL:-/mnt/models/Qwen/Qwen3-30B-A3B-Instruct-2507}
+TRACE=${TRACE:-outputs/inferact_50sess.jsonl}
+OUTPUT=${OUTPUT:-outputs/e1_naive_1p3d_kvaware_rdma_50sess}
+IB_DEVICE=${IB_DEVICE:-mlx5_60}
+
+if [ ! -f "$TRACE" ]; then
+  echo "ERROR: trace not found at $TRACE" >&2
+  echo "Run: uv run --no-sync python scripts/convert_inferact_to_trace.py --output $TRACE" >&2
+  exit 1
+fi
+
+mkdir -p "$OUTPUT"
+LOG="$OUTPUT/sweep.log"
+
+log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG"; }
+
+log "=== E1: naive 1P3D kv-aware + RDMA, ts=1 ==="
+log "MODEL=$MODEL"
+log "TRACE=$TRACE ($(wc -l < $TRACE) requests)"
+log "OUTPUT=$OUTPUT"
+log "IB_DEVICE=$IB_DEVICE"
+
+label=e1_naive_1p3d_kvaware_run1
+log ""
+log "=== [E1] $label starting ==="
+
+uv run --no-sync python -m agentic_pd_hybrid.cli benchmark-live \
+  --trace "$TRACE" \
+  --output-root "$OUTPUT" \
+  --mechanism pd-disaggregation \
+  --policy kv-aware \
+  --model-path "$MODEL" \
+  --prefill-workers 1 --decode-workers 3 \
+  --prefill-tp-size 1 --decode-tp-size 1 \
+  --prefill-gpu-ids 0 --decode-gpu-ids 1,2,3 \
+  --transfer-backend mooncake \
+  --force-rdma --ib-device "$IB_DEVICE" \
+  --gpu-budget 4 \
+  --time-scale 1 \
+  --session-sample-rate 1.0 \
+  --target-duration-s 100000 \
+  --concurrency-limit 32 \
+  --timeout-s 1800 \
+  --request-timeout-s 300 2>&1 | tee -a "$LOG"
+
+run_dir=$(ls -td "$OUTPUT"/pd-disaggregation-*/ 2>/dev/null | head -1)
+log "=== [E1] $label COMPLETED, artifacts at $run_dir ==="
+
+if [ -f "$run_dir/request-metrics.jsonl.summary.json" ]; then
+  cp "$run_dir/request-metrics.jsonl.summary.json" "$OUTPUT/${label}_summary.json"
+  cp "$run_dir/request-metrics.jsonl" "$OUTPUT/${label}_metrics.jsonl"
+  log "=== summary saved to $OUTPUT/${label}_summary.json ==="
+fi