From fbeb968f2f392d2e35ddb2467ad00a1b3366b167 Mon Sep 17 00:00:00 2001 From: Claude Code Agent Date: Wed, 13 May 2026 10:22:58 +0800 Subject: [PATCH] =?UTF-8?q?feat(experiments):=20E4-pressured=20sweep=20?= =?UTF-8?q?=E2=80=94=20force=20reseed=20via=20reject=5Fthreshold=3D1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E4-v1 produced 272 admission rejects (good) but zero /_snapshot HTTP calls (bad, entrance gate bug fixed in e729d62). E4-v2 went the other way: 0 rejects through 53% of trace, sync function never even called. E4-pressured locks in the *fix-verified* code path by lowering --kvcache-migration-reject-threshold from 3 to 1. After ONE rejection the policy forces session migration, which lands in _invoke_kvcache_seeded_router → _attempt_d_to_p_sync. With the e729d62 fix in place, the d-to-p-sync.jsonl structural log should now capture every prepare/dump/finalize decision so we can forensic verify the D→P fast path is actually delivering KV bytes to P's radix tree. --- scripts/sweep_e4_pressured.sh | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 scripts/sweep_e4_pressured.sh diff --git a/scripts/sweep_e4_pressured.sh b/scripts/sweep_e4_pressured.sh new file mode 100755 index 0000000..71ade30 --- /dev/null +++ b/scripts/sweep_e4_pressured.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# E4-pressured — same as E4 but tuned to force admission rejections so the +# D→P snapshot fast-path actually fires. +# +# Key delta vs sweep_e4_kvc_v2_d_to_p_sync.sh: +# --kvcache-migration-reject-threshold 1 (was 3) +# After ONE rejection the policy migrates the session to a different +# D, which in turn triggers _invoke_kvcache_seeded_router → D→P sync. +# (rely solely on reject_threshold=1 for now; mem-fraction reduction +# would need extra_server_args plumbing which is out-of-scope here) +# +# Hypotheses (same as docs/E4_PROTOCOL_ZH.md but in a stressed regime): +# H1' E4-pressured TTFT p99 ≤ E1 TTFT p99 +# H2' D→P snapshot succeeds for ≥ 20% of reseed-triggering requests +# H3' D→P-pushed-then-cache-hit reduces re-prefill segment of reseed +# path TTFT measurably + +set -euo pipefail +cd "$(dirname "$0")/.." + +if [ -z "${CUDA_HOME:-}" ]; then + echo "ERROR: CUDA_HOME not set. Source scripts/setup_env.sh first." >&2 + exit 1 +fi + +MODEL=${MODEL:-/mnt/models/Qwen/Qwen3-30B-A3B-Instruct-2507} +TRACE=${TRACE:-outputs/inferact_50sess.jsonl} +OUTPUT=${OUTPUT:-outputs/e4p_kvc_v2_d_to_p_sync_pressured_50sess} +IB_DEVICE=${IB_DEVICE:-mlx5_60} +LOAD_FLOOR_BONUS=${LOAD_FLOOR_BONUS:-200} +REJECT_THRESHOLD=${REJECT_THRESHOLD:-1} +MEM_FRACTION=${MEM_FRACTION:-0.5} + +if [ ! -f "$TRACE" ]; then + echo "ERROR: trace not found at $TRACE" >&2 + exit 1 +fi + +mkdir -p "$OUTPUT" +LOG="$OUTPUT/sweep.log" + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG"; } + +log "=== E4-pressured: KVC v2 + RDMA + load-floor K=$LOAD_FLOOR_BONUS + D→P sync + reject_threshold=$REJECT_THRESHOLD + mem_fraction=$MEM_FRACTION ===" +log "MODEL=$MODEL" +log "TRACE=$TRACE ($(wc -l < $TRACE) requests)" +log "OUTPUT=$OUTPUT" + +label=e4p_kvc_v2_d_to_p_sync_run1 +log "=== [E4p] $label starting ===" + +uv run --no-sync python -m agentic_pd_hybrid.cli benchmark-live \ + --trace "$TRACE" \ + --output-root "$OUTPUT" \ + --mechanism kvcache-centric \ + --policy kv-aware \ + --model-path "$MODEL" \ + --prefill-workers 1 --decode-workers 3 \ + --prefill-tp-size 1 --decode-tp-size 1 \ + --prefill-gpu-ids 0 --decode-gpu-ids 1,2,3 \ + --transfer-backend mooncake \ + --force-rdma --ib-device "$IB_DEVICE" \ + --gpu-budget 4 \ + --time-scale 1 \ + --session-sample-rate 1.0 \ + --target-duration-s 100000 \ + --concurrency-limit 32 \ + --timeout-s 1800 \ + --request-timeout-s 300 \ + --kvcache-admission-mode worker \ + --kvcache-seed-min-turn-id 1 \ + --kvcache-seed-max-inflight-decode -1 \ + --kvcache-prefill-backup-policy release-after-transfer \ + --kvcache-prefill-priority-eviction \ + --kvcache-migration-reject-threshold "$REJECT_THRESHOLD" \ + --kvcache-direct-max-uncached-tokens 8192 \ + --kvcache-load-floor-bonus "$LOAD_FLOOR_BONUS" \ + --enable-d-to-p-sync 2>&1 | tee -a "$LOG" + +run_dir=$(ls -td "$OUTPUT"/kvcache-centric-*/ 2>/dev/null | head -1) +log "=== [E4p] $label COMPLETED, artifacts at $run_dir ===" + +if [ -f "$run_dir/request-metrics.jsonl.summary.json" ]; then + cp "$run_dir/request-metrics.jsonl.summary.json" "$OUTPUT/${label}_summary.json" + cp "$run_dir/request-metrics.jsonl" "$OUTPUT/${label}_metrics.jsonl" + log "=== summary saved to $OUTPUT/${label}_summary.json ===" +fi