agentic-kvc/analysis/characterization/current_results/reproduction_commands.sh

#!/usr/bin/env bash
set -euo pipefail

# Window 0 audit refresh (legacy run summaries).
python3 analysis/characterization/summarize_runs.py \
    --output-dir analysis/characterization/current_results \
    --runs outputs/gpu_ab_combined outputs/gpu_ab_pdsep \
           outputs/contention_16s_ts10 outputs/contention_16s_elastic \
           outputs/combined_1000req outputs/exp3_pd_sep_tp1_mooncake

# B1' Per-request KV footprint on the full trace (runs on dash0 directly,
# CPU-only; the formatted full trace is hundreds of GiB).
python3 analysis/characterization/analyze.py \
    --trace ~/ali-trace/trace-glm5.1-formatted/051315-051317.jsonl \
    --kv-bytes-per-token 98304 \
    --task-name full_trace_with_kv \
    --output-root outputs/characterization \
    --overwrite

# w600 trace APC theoretical bound.
python3 scripts/compute_apc_upper_bound.py \
    --trace traces/w600_r0.0015_st30.jsonl \
    --out outputs/apc_upper_w600.json

# B3 5-policy routing sweep on dash0 (8 × TP1 instances).
#   First three policies share one vLLM lifecycle (hot-cache, fast):
bash scripts/b3_sweep.sh                  # writes outputs/b3_sweep_<TS>/

#   Last two run isolated with cold vLLM:
bash scripts/b3_isolated_policy.sh unified \
    traces/w600_r0.0015_st30.jsonl \
    outputs/b3_sweep_<TS>/unified

python3 scripts/build_capped_trace.py \
    --input traces/w600_r0.0015_st30.jsonl \
    --output outputs/b3_sweep_<TS>/capped/trace.jsonl \
    --max-turns 8

bash scripts/b3_isolated_policy.sh lmetric \
    outputs/b3_sweep_<TS>/capped/trace.jsonl \
    outputs/b3_sweep_<TS>/capped

# B3 analysis (joined records + indices) and report.
bash scripts/b3_analyze.sh outputs/b3_sweep_<TS>
python3 scripts/render_b3_report.py --sweep-dir outputs/b3_sweep_<TS>

# B2 PD-colo interference microbench. Launch 2 vLLM instances on
# ports 8100 and 8101 with --enable-prompt-tokens-details first, then:
python3 scripts/b2_interference.py \
    --decode-endpoint http://127.0.0.1:8100 \
    --prefill-endpoint http://127.0.0.1:8101 \
    --model <model-path> \
    --out-dir outputs/b2_microbench/sweep \
    --prefill-sizes 2048,8192,16384,32768,65536 \
    --variants different,same
python3 analysis/characterization/b2_sweep_analysis.py \
    --sweep-dir outputs/b2_microbench/sweep

# Window 1 figure rendering (CPU only).
python3 analysis/characterization/render_window1_figures.py \
    --results-dir analysis/characterization/window_1_results \
    --out-dir analysis/characterization/window_1_results/figures