#!/usr/bin/env bash # Ablation: does the REAL engine-state feed (P2) change each policy's # performance and ranking vs the stale-shadow baseline? # # Each config is run twice (ES=0 shadow-only, ES=1 real-state feed) so the # ONLY difference is the state source. Sequential, ~47 min each. # # Default = the 4 decisive runs (champion + migration, with/without feed). # Extend CONFIGS for the full sweep (lmetric / unified_kv_both / load_only). set -uo pipefail PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}" R="$PROJ_DIR/microbench/connector_tax/layerwise/run_v3_trace.sh" AB="--overload-factor 1.3 --lmetric-decode-weight 0.01" LOGD=/tmp/dst_break_logs; mkdir -p "$LOGD" # CONFIG format: "TAG|POLICY|MODE|AB?|ES" CONFIGS=( "unified_AB_es0|unified|baseline|AB|0" "unified_AB_es1|unified|baseline|AB|1" "v3_AB_lw_es0|unified_v3|layerwise|AB|0" "v3_AB_lw_es1|unified_v3|layerwise|AB|1" # --- extend for the full sweep --- # "lmetric_es0|lmetric|baseline|noAB|0" # "lmetric_es1|lmetric|baseline|noAB|1" # "ukvboth_AB_es0|unified_kv_both|baseline|AB|0" # "ukvboth_AB_es1|unified_kv_both|baseline|AB|1" ) for cfg in "${CONFIGS[@]}"; do IFS='|' read -r tag policy mode ab es <<< "$cfg" ab_flags=""; [ "$ab" = "AB" ] && ab_flags="$AB" echo "########## $tag (policy=$policy mode=$mode ab=$ab es=$es) ##########" TAG="$tag" POLICY="$policy" MODE="$mode" AB_FLAGS="$ab_flags" ES="$es" \ bash "$R" 2>&1 | tee "$LOGD/abl_${tag}.log" | tail -6 done echo "########## ABLATION DONE — summary ##########" for cfg in "${CONFIGS[@]}"; do IFS='|' read -r tag _ _ _ _ <<< "$cfg" echo "=== $tag ===" grep -E "requests:|TTFT|migrations:" "$LOGD/abl_${tag}.log" 2>/dev/null || true done