bench harness: env-tunable vLLM health timeout + both-modes 5-policy driver
- b3_isolated_policy.sh: HEALTH_MAX_TRIES now env-overridable (default 180 ->
360s unchanged); slow-node launches can pass HEALTH_MAX_TRIES=300 (600s) to
ride out a single-instance startup flake without aborting the whole arm.
- run_5policy_both_modes.sh: runs run_5policy_600s.sh twice on the SAME ttp
trace with REPLAY_DISPATCH_MODE={tracets,thinktime}, so the only variable is
dispatch mode. Outputs to outputs/policy5_600s_{mode}_<date>/.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# 5-policy comparison in BOTH dispatch modes on the SAME ttp-annotated trace,
|
||||||
|
# so the only variable is dispatch-mode (tracets vs thinktime). Fresh vLLM
|
||||||
|
# (cold APC) per arm via run_5policy_600s.sh -> b3_isolated_policy.sh.
|
||||||
|
set -uo pipefail
|
||||||
|
ROOT="${ROOT:-/home/admin/cpfs/wjh/agentic-kv}"
|
||||||
|
TRACE_FILE="${TRACE_FILE:-$ROOT/traces/w600_r0.0015_st30_first600s_ttp.jsonl}"
|
||||||
|
RUN5="$ROOT/microbench/connector_tax/cache_sweep/run_5policy_600s.sh"
|
||||||
|
DATE="$(date +%Y%m%d_%H%M)"
|
||||||
|
|
||||||
|
echo "=== 5policy x {tracets,thinktime} | trace=$(basename "$TRACE_FILE") | $DATE ==="
|
||||||
|
for MODE in tracets thinktime; do
|
||||||
|
OUT="$ROOT/outputs/policy5_600s_${MODE}_${DATE}"
|
||||||
|
echo "############ MODE=$MODE OUT=$OUT $(date) ############"
|
||||||
|
TRACE="$TRACE_FILE" REPLAY_DISPATCH_MODE="$MODE" OUTROOT="$OUT" \
|
||||||
|
bash "$RUN5"
|
||||||
|
echo "dispatch_mode=$MODE" >> "$OUT/RUNINFO.txt"
|
||||||
|
echo "trace=$TRACE_FILE" >> "$OUT/RUNINFO.txt"
|
||||||
|
done
|
||||||
|
echo "=== ALL DONE (both modes) $(date) ==="
|
||||||
@@ -126,8 +126,9 @@ done
|
|||||||
echo "[isolated] waiting for vLLM health ..."
|
echo "[isolated] waiting for vLLM health ..."
|
||||||
# NIXL init takes ~100-150s per instance even with concurrent launches;
|
# NIXL init takes ~100-150s per instance even with concurrent launches;
|
||||||
# Mooncake is closer to ~30-60s. Use a generous 360s timeout to cover
|
# Mooncake is closer to ~30-60s. Use a generous 360s timeout to cover
|
||||||
# both (90s -> 360s vs the previous 180s).
|
# both (90s -> 360s vs the previous 180s). Override via env for slow nodes
|
||||||
HEALTH_MAX_TRIES=180
|
# (e.g. HEALTH_MAX_TRIES=300 -> 600s).
|
||||||
|
HEALTH_MAX_TRIES="${HEALTH_MAX_TRIES:-180}"
|
||||||
for i in $(seq 0 $((N_INSTANCES - 1))); do
|
for i in $(seq 0 $((N_INSTANCES - 1))); do
|
||||||
port=$((BASE_PORT + i))
|
port=$((BASE_PORT + i))
|
||||||
tries=0
|
tries=0
|
||||||
|
|||||||
Reference in New Issue
Block a user