agentic-kvc/v2/exp_b_capacity_knee/gen_synth_trace.py

"""Controlled multi-turn agentic workload for the capacity->APC knee.

Each session grows its prefix cumulatively: turn k appends G fresh blocks and
reuses all blocks of turns 1..k-1 (intra-session prefix reuse, the dominant
mode per the trace, 93% intra-session). Block ids are namespaced per session so
cross-session reuse is ~0. Intra-session APC ceiling = (T-1)/(T+1).

timestamp=0 => the replayer fires closed-loop, gated only by max-inflight-sessions.
"""
import argparse
import json

BLOCK = 16  # tokens/block (vLLM default)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--sessions", type=int, default=40)
    ap.add_argument("--turns", type=int, default=8)
    ap.add_argument("--blocks-per-turn", type=int, default=192)  # 3072 tok/turn
    ap.add_argument("--output-len", type=int, default=100)
    ap.add_argument("--out", required=True)
    a = ap.parse_args()

    rows = []
    for s in range(a.sessions):
        base = s * 10_000_000  # unique block namespace per session
        cum = []
        for k in range(1, a.turns + 1):
            for _ in range(a.blocks_per_turn):
                cum.append(base + len(cum))
            rows.append({
                "chat_id": s * 1000 + k,
                "parent_chat_id": (s * 1000 + k - 1) if k > 1 else 0,
                "timestamp": 0.0,
                "input_length": len(cum) * BLOCK,
                "output_length": a.output_len,
                "type": "coder",
                "turn": k,
                "hash_ids": list(cum),
                "session_id": f"s{s}",
            })
    with open(a.out, "w") as o:
        for r in rows:
            o.write(json.dumps(r) + "\n")
    ws_blocks = a.turns * a.blocks_per_turn
    apc = (a.turns - 1) / (a.turns + 1)
    print(f"wrote {len(rows)} reqs ({a.sessions} sessions x {a.turns} turns) -> {a.out}")
    print(f"session working set = {ws_blocks} blocks ({ws_blocks*BLOCK} tok, "
          f"{ws_blocks*BLOCK*98304/1e9:.2f} GB); max req = {ws_blocks*BLOCK} tok")
    print(f"intra-session APC ceiling = {apc:.1%}")


if __name__ == "__main__":
    main()