PD-disagg crossover: regular synthetic trace + goodput sweep + figure
gen_synthetic_trace.py --mode regular: maximally-regular multi-turn trace (fixed prefix/delta/turns, constant arrivals, zero session skew) to isolate the structural PD cost (per-turn full-context transfer + P/D capacity split) from the skew/hot-pin artifact. analysis/crossover/: SLO-goodput PD_advantage sweeps bracketing the prefill<->decode bottleneck axis (D1 grow input -> prefill-bound; D2 grow output -> decode-bound). figs/crossover_pd_advantage.png shows the crossover (y=1) with the agentic operating region annotated. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -161,11 +161,65 @@ def gen_multiturn(
|
||||
return rows
|
||||
|
||||
|
||||
def gen_regular(
|
||||
*,
|
||||
session_qps: float,
|
||||
duration_s: float,
|
||||
turns: int,
|
||||
prefix_len: int,
|
||||
delta_len: int,
|
||||
output_len: int,
|
||||
inter_turn_gap_s: float,
|
||||
seed: int,
|
||||
) -> list[dict]:
|
||||
"""Maximally-regular multi-turn trace: every session identical, no skew.
|
||||
|
||||
Each session has a FIXED reused prefix of `prefix_len` tokens (its own,
|
||||
established on turn 1) and every turn appends a FIXED `delta_len` of fresh
|
||||
tokens. So per turn: input = prefix_len + delta_len (fixed), reuse ratio =
|
||||
prefix_len/(prefix_len+delta_len) (fixed), actual new-prefill = delta_len
|
||||
(fixed). Constant-interval session arrivals, fixed inter-turn gap, fixed
|
||||
turn count -> zero session-size skew, so session-affinity cannot hot-pin.
|
||||
This isolates the *structural* PD cost (per-turn full-context KV transfer +
|
||||
P/D capacity split) from the skew/hot-pin artifact.
|
||||
"""
|
||||
rows: list[dict] = []
|
||||
next_hash = HASH_BASE
|
||||
chat_id = 1
|
||||
n_prefix = max(1, prefix_len // BLOCK_SIZE)
|
||||
n_delta = max(1, delta_len // BLOCK_SIZE)
|
||||
n_sessions = max(1, int(duration_s * session_qps))
|
||||
for s in range(n_sessions):
|
||||
start = s / session_qps
|
||||
sid = f"r{s}"
|
||||
prefix_blocks = list(range(next_hash, next_hash + n_prefix))
|
||||
next_hash += n_prefix
|
||||
prev = -1
|
||||
for k in range(1, turns + 1):
|
||||
delta_blocks = list(range(next_hash, next_hash + n_delta))
|
||||
next_hash += n_delta
|
||||
rows.append({
|
||||
"chat_id": chat_id,
|
||||
"parent_chat_id": prev,
|
||||
"timestamp": round(start + (k - 1) * inter_turn_gap_s, 6),
|
||||
"input_length": prefix_len + delta_len,
|
||||
"output_length": output_len,
|
||||
"type": "synthetic_regular",
|
||||
"turn": k,
|
||||
"hash_ids": prefix_blocks + delta_blocks,
|
||||
"session_id": sid,
|
||||
})
|
||||
prev = chat_id
|
||||
chat_id += 1
|
||||
rows.sort(key=lambda r: r["timestamp"])
|
||||
return rows
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser(description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
p.add_argument("--out", type=Path, required=True, help="output trace JSONL")
|
||||
p.add_argument("--mode", choices=["vanilla", "multiturn"], default="vanilla")
|
||||
p.add_argument("--mode", choices=["vanilla", "multiturn", "regular"], default="vanilla")
|
||||
p.add_argument("--qps", type=float, help="vanilla: mean Poisson request rate; "
|
||||
"multiturn: mean Poisson SESSION rate")
|
||||
p.add_argument("--duration-s", type=float, default=600.0, help="trace span (s)")
|
||||
@@ -182,7 +236,14 @@ def main() -> None:
|
||||
p.add_argument("--new-user-tokens", type=int, default=256,
|
||||
help="multiturn: fresh user tokens added each subsequent turn")
|
||||
p.add_argument("--inter-turn-gap-s", type=float, default=1.6,
|
||||
help="multiturn: mean think-time between turns")
|
||||
help="multiturn: mean think-time; regular: FIXED think-time")
|
||||
# regular knobs
|
||||
p.add_argument("--prefix-len", type=int, default=16384,
|
||||
help="regular: per-session fixed reused prefix length")
|
||||
p.add_argument("--delta-len", type=int, default=512,
|
||||
help="regular: fixed fresh new-prefill tokens per turn")
|
||||
p.add_argument("--turns", type=int, default=8,
|
||||
help="regular: fixed turns per session")
|
||||
args = p.parse_args()
|
||||
|
||||
if args.mode == "vanilla":
|
||||
@@ -194,6 +255,20 @@ def main() -> None:
|
||||
cfg = {"mode": "vanilla", "qps": args.qps, "duration_s": args.duration_s,
|
||||
"input_len": args.input_len, "output_len": args.output_len,
|
||||
"seed": args.seed, "reuse": "none"}
|
||||
elif args.mode == "regular":
|
||||
assert args.qps, "regular needs --qps (session rate)"
|
||||
rows = gen_regular(
|
||||
session_qps=args.qps, duration_s=args.duration_s, turns=args.turns,
|
||||
prefix_len=args.prefix_len, delta_len=args.delta_len,
|
||||
output_len=args.output_len, inter_turn_gap_s=args.inter_turn_gap_s,
|
||||
seed=args.seed,
|
||||
)
|
||||
cfg = {"mode": "regular", "session_qps": args.qps,
|
||||
"duration_s": args.duration_s, "turns": args.turns,
|
||||
"prefix_len": args.prefix_len, "delta_len": args.delta_len,
|
||||
"output_len": args.output_len, "inter_turn_gap_s": args.inter_turn_gap_s,
|
||||
"reuse_ratio": args.prefix_len / (args.prefix_len + args.delta_len),
|
||||
"seed": args.seed, "reuse": "fixed-intra-session"}
|
||||
else:
|
||||
assert args.qps, "multiturn needs --qps (session rate)"
|
||||
rows = gen_multiturn(
|
||||
|
||||
Reference in New Issue
Block a user