diff --git a/microbench/connector_tax/cache_sweep/run_5policy_both_modes.sh b/microbench/connector_tax/cache_sweep/run_5policy_both_modes.sh new file mode 100644 index 0000000..a5b787b --- /dev/null +++ b/microbench/connector_tax/cache_sweep/run_5policy_both_modes.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# 5-policy comparison in BOTH dispatch modes on the SAME ttp-annotated trace, +# so the only variable is dispatch-mode (tracets vs thinktime). Fresh vLLM +# (cold APC) per arm via run_5policy_600s.sh -> b3_isolated_policy.sh. +set -uo pipefail +ROOT="${ROOT:-/home/admin/cpfs/wjh/agentic-kv}" +TRACE_FILE="${TRACE_FILE:-$ROOT/traces/w600_r0.0015_st30_first600s_ttp.jsonl}" +RUN5="$ROOT/microbench/connector_tax/cache_sweep/run_5policy_600s.sh" +DATE="$(date +%Y%m%d_%H%M)" + +echo "=== 5policy x {tracets,thinktime} | trace=$(basename "$TRACE_FILE") | $DATE ===" +for MODE in tracets thinktime; do + OUT="$ROOT/outputs/policy5_600s_${MODE}_${DATE}" + echo "############ MODE=$MODE OUT=$OUT $(date) ############" + TRACE="$TRACE_FILE" REPLAY_DISPATCH_MODE="$MODE" OUTROOT="$OUT" \ + bash "$RUN5" + echo "dispatch_mode=$MODE" >> "$OUT/RUNINFO.txt" + echo "trace=$TRACE_FILE" >> "$OUT/RUNINFO.txt" +done +echo "=== ALL DONE (both modes) $(date) ===" diff --git a/scripts/b3_isolated_policy.sh b/scripts/b3_isolated_policy.sh index 9b6644e..7ce6aad 100755 --- a/scripts/b3_isolated_policy.sh +++ b/scripts/b3_isolated_policy.sh @@ -126,8 +126,9 @@ done echo "[isolated] waiting for vLLM health ..." # NIXL init takes ~100-150s per instance even with concurrent launches; # Mooncake is closer to ~30-60s. Use a generous 360s timeout to cover -# both (90s -> 360s vs the previous 180s). -HEALTH_MAX_TRIES=180 +# both (90s -> 360s vs the previous 180s). Override via env for slow nodes +# (e.g. HEALTH_MAX_TRIES=300 -> 600s). +HEALTH_MAX_TRIES="${HEALTH_MAX_TRIES:-180}" for i in $(seq 0 $((N_INSTANCES - 1))); do port=$((BASE_PORT + i)) tries=0