feat(experiments): E4-pressured sweep — force reseed via reject_threshold=1
E4-v1 produced 272 admission rejects (good) but zero /_snapshot HTTP calls (bad, entrance gate bug fixed ine729d62). E4-v2 went the other way: 0 rejects through 53% of trace, sync function never even called. E4-pressured locks in the *fix-verified* code path by lowering --kvcache-migration-reject-threshold from 3 to 1. After ONE rejection the policy forces session migration, which lands in _invoke_kvcache_seeded_router → _attempt_d_to_p_sync. With thee729d62fix in place, the d-to-p-sync.jsonl structural log should now capture every prepare/dump/finalize decision so we can forensic verify the D→P fast path is actually delivering KV bytes to P's radix tree.
This commit is contained in:
87
scripts/sweep_e4_pressured.sh
Executable file
87
scripts/sweep_e4_pressured.sh
Executable file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env bash
|
||||
# E4-pressured — same as E4 but tuned to force admission rejections so the
|
||||
# D→P snapshot fast-path actually fires.
|
||||
#
|
||||
# Key delta vs sweep_e4_kvc_v2_d_to_p_sync.sh:
|
||||
# --kvcache-migration-reject-threshold 1 (was 3)
|
||||
# After ONE rejection the policy migrates the session to a different
|
||||
# D, which in turn triggers _invoke_kvcache_seeded_router → D→P sync.
|
||||
# (rely solely on reject_threshold=1 for now; mem-fraction reduction
|
||||
# would need extra_server_args plumbing which is out-of-scope here)
|
||||
#
|
||||
# Hypotheses (same as docs/E4_PROTOCOL_ZH.md but in a stressed regime):
|
||||
# H1' E4-pressured TTFT p99 ≤ E1 TTFT p99
|
||||
# H2' D→P snapshot succeeds for ≥ 20% of reseed-triggering requests
|
||||
# H3' D→P-pushed-then-cache-hit reduces re-prefill segment of reseed
|
||||
# path TTFT measurably
|
||||
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
if [ -z "${CUDA_HOME:-}" ]; then
|
||||
echo "ERROR: CUDA_HOME not set. Source scripts/setup_env.sh first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
MODEL=${MODEL:-/mnt/models/Qwen/Qwen3-30B-A3B-Instruct-2507}
|
||||
TRACE=${TRACE:-outputs/inferact_50sess.jsonl}
|
||||
OUTPUT=${OUTPUT:-outputs/e4p_kvc_v2_d_to_p_sync_pressured_50sess}
|
||||
IB_DEVICE=${IB_DEVICE:-mlx5_60}
|
||||
LOAD_FLOOR_BONUS=${LOAD_FLOOR_BONUS:-200}
|
||||
REJECT_THRESHOLD=${REJECT_THRESHOLD:-1}
|
||||
MEM_FRACTION=${MEM_FRACTION:-0.5}
|
||||
|
||||
if [ ! -f "$TRACE" ]; then
|
||||
echo "ERROR: trace not found at $TRACE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$OUTPUT"
|
||||
LOG="$OUTPUT/sweep.log"
|
||||
|
||||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG"; }
|
||||
|
||||
log "=== E4-pressured: KVC v2 + RDMA + load-floor K=$LOAD_FLOOR_BONUS + D→P sync + reject_threshold=$REJECT_THRESHOLD + mem_fraction=$MEM_FRACTION ==="
|
||||
log "MODEL=$MODEL"
|
||||
log "TRACE=$TRACE ($(wc -l < $TRACE) requests)"
|
||||
log "OUTPUT=$OUTPUT"
|
||||
|
||||
label=e4p_kvc_v2_d_to_p_sync_run1
|
||||
log "=== [E4p] $label starting ==="
|
||||
|
||||
uv run --no-sync python -m agentic_pd_hybrid.cli benchmark-live \
|
||||
--trace "$TRACE" \
|
||||
--output-root "$OUTPUT" \
|
||||
--mechanism kvcache-centric \
|
||||
--policy kv-aware \
|
||||
--model-path "$MODEL" \
|
||||
--prefill-workers 1 --decode-workers 3 \
|
||||
--prefill-tp-size 1 --decode-tp-size 1 \
|
||||
--prefill-gpu-ids 0 --decode-gpu-ids 1,2,3 \
|
||||
--transfer-backend mooncake \
|
||||
--force-rdma --ib-device "$IB_DEVICE" \
|
||||
--gpu-budget 4 \
|
||||
--time-scale 1 \
|
||||
--session-sample-rate 1.0 \
|
||||
--target-duration-s 100000 \
|
||||
--concurrency-limit 32 \
|
||||
--timeout-s 1800 \
|
||||
--request-timeout-s 300 \
|
||||
--kvcache-admission-mode worker \
|
||||
--kvcache-seed-min-turn-id 1 \
|
||||
--kvcache-seed-max-inflight-decode -1 \
|
||||
--kvcache-prefill-backup-policy release-after-transfer \
|
||||
--kvcache-prefill-priority-eviction \
|
||||
--kvcache-migration-reject-threshold "$REJECT_THRESHOLD" \
|
||||
--kvcache-direct-max-uncached-tokens 8192 \
|
||||
--kvcache-load-floor-bonus "$LOAD_FLOOR_BONUS" \
|
||||
--enable-d-to-p-sync 2>&1 | tee -a "$LOG"
|
||||
|
||||
run_dir=$(ls -td "$OUTPUT"/kvcache-centric-*/ 2>/dev/null | head -1)
|
||||
log "=== [E4p] $label COMPLETED, artifacts at $run_dir ==="
|
||||
|
||||
if [ -f "$run_dir/request-metrics.jsonl.summary.json" ]; then
|
||||
cp "$run_dir/request-metrics.jsonl.summary.json" "$OUTPUT/${label}_summary.json"
|
||||
cp "$run_dir/request-metrics.jsonl" "$OUTPUT/${label}_metrics.jsonl"
|
||||
log "=== summary saved to $OUTPUT/${label}_summary.json ==="
|
||||
fi
|
||||
Reference in New Issue
Block a user