#!/usr/bin/env bash
# Ablation: does the REAL engine-state feed (P2) change each policy's
# performance and ranking vs the stale-shadow baseline?
#
# Each config is run twice (ES=0 shadow-only, ES=1 real-state feed) so the
# ONLY difference is the state source. Sequential, ~47 min each.
#
# Default = the 4 decisive runs (champion + migration, with/without feed).
# Extend CONFIGS for the full sweep (lmetric / unified_kv_both / load_only).
set -uo pipefail
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
R="$PROJ_DIR/microbench/connector_tax/layerwise/run_v3_trace.sh"
AB="--overload-factor 1.3 --lmetric-decode-weight 0.01"
LOGD=/tmp/dst_break_logs; mkdir -p "$LOGD"

# CONFIG format: "TAG|POLICY|MODE|AB?|ES"
CONFIGS=(
  "unified_AB_es0|unified|baseline|AB|0"
  "unified_AB_es1|unified|baseline|AB|1"
  "v3_AB_lw_es0|unified_v3|layerwise|AB|0"
  "v3_AB_lw_es1|unified_v3|layerwise|AB|1"
  # --- extend for the full sweep ---
  # "lmetric_es0|lmetric|baseline|noAB|0"
  # "lmetric_es1|lmetric|baseline|noAB|1"
  # "ukvboth_AB_es0|unified_kv_both|baseline|AB|0"
  # "ukvboth_AB_es1|unified_kv_both|baseline|AB|1"
)

for cfg in "${CONFIGS[@]}"; do
  IFS='|' read -r tag policy mode ab es <<< "$cfg"
  ab_flags=""; [ "$ab" = "AB" ] && ab_flags="$AB"
  echo "########## $tag (policy=$policy mode=$mode ab=$ab es=$es) ##########"
  TAG="$tag" POLICY="$policy" MODE="$mode" AB_FLAGS="$ab_flags" ES="$es" \
      bash "$R" 2>&1 | tee "$LOGD/abl_${tag}.log" | tail -6
done

echo "########## ABLATION DONE — summary ##########"
for cfg in "${CONFIGS[@]}"; do
  IFS='|' read -r tag _ _ _ _ <<< "$cfg"
  echo "=== $tag ==="
  grep -E "requests:|TTFT|migrations:" "$LOGD/abl_${tag}.log" 2>/dev/null || true
done