paper f2a: reuse-topology decomposition + mixture-sensitivity sweep
Full-trace analysis backing figure 2a on the real 2h cluster trace: - f2a_reuse_topology_analyze.py: infinite-KV-cache (LRU) decomposition of prefix-cache reuse hits into intra-session vs cross-session, by most-recent prior holder of each content-addressed block. - f2a_mixture_sweep.py: sensitivity of the intra/cross split to the single-turn session fraction (tests whether the 93%-intra sample vs 54.6% full-trace gap is session-mixture selection bias) -- keep all multi-turn sessions, downsample single-turn to each target fraction, reclassify. Includes the result JSONs for both. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
120
paper/data/f2a_mixture_sweep.py
Normal file
120
paper/data/f2a_mixture_sweep.py
Normal file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
f2a sensitivity: how does the intra/cross reuse split move as we change the
|
||||
single-turn session fraction? (Tests whether the old 93%-intra sample vs 54.6%
|
||||
full-trace gap is just session-mixture selection bias.)
|
||||
|
||||
Keep ALL multi-turn sessions; downsample single-turn sessions to hit each target
|
||||
single-turn fraction f. Re-run the LRU (last-touched), reuse-hits-only
|
||||
classification on the filtered request stream.
|
||||
|
||||
python3 f2a_mixture_sweep.py ~/ali-trace/.../051315-051317.jsonl /tmp/f2a_sweep.json
|
||||
"""
|
||||
import sys, json, time, random
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
PATH = sys.argv[1]
|
||||
OUT = sys.argv[2] if len(sys.argv) > 2 else "/tmp/f2a_sweep.json"
|
||||
random.seed(0)
|
||||
|
||||
t0 = time.time()
|
||||
chat_parent = {}
|
||||
records = []
|
||||
with open(PATH) as f:
|
||||
for line in f:
|
||||
d = json.loads(line)
|
||||
cid = d["chat_id"]; pc = d.get("parent_chat_id")
|
||||
chat_parent[cid] = 0 if pc is None else pc
|
||||
records.append((d.get("timestamp", 0.0), cid, d.get("hash_ids") or []))
|
||||
sys.stderr.write(f"[{time.time()-t0:.0f}s] loaded {len(records)}\n")
|
||||
|
||||
root_cache = {}
|
||||
def resolve_root(cid):
|
||||
chain = []; cur = cid
|
||||
while True:
|
||||
if cur in root_cache:
|
||||
r = root_cache[cur]; break
|
||||
p = chat_parent.get(cur, 0)
|
||||
if p == 0 or p not in chat_parent:
|
||||
r = cur; break
|
||||
chain.append(cur); cur = p
|
||||
if len(chain) > 100000:
|
||||
r = cur; break
|
||||
for nd in chain:
|
||||
root_cache[nd] = r
|
||||
root_cache[cid] = r
|
||||
return r
|
||||
|
||||
records.sort(key=lambda x: x[0])
|
||||
roots = [resolve_root(cid) for _, cid, _ in records]
|
||||
req_per_root = Counter(roots)
|
||||
single_roots = [r for r, c in req_per_root.items() if c == 1]
|
||||
multi_roots = [r for r, c in req_per_root.items() if c >= 2]
|
||||
M = len(multi_roots)
|
||||
sys.stderr.write(f"[{time.time()-t0:.0f}s] roots: single={len(single_roots)} multi={M}\n")
|
||||
|
||||
GAP_EDGES = [1, 10, 60, 300, 1800, 3600, float("inf")]
|
||||
def gbucket(g):
|
||||
for i, e in enumerate(GAP_EDGES):
|
||||
if g < e:
|
||||
return i
|
||||
return len(GAP_EDGES) - 1
|
||||
|
||||
def classify(kept): # kept=None -> keep all
|
||||
last_root = {}; last_ts = {}
|
||||
intra = cross = new = 0
|
||||
rec_i = [0] * len(GAP_EDGES); rec_c = [0] * len(GAP_EDGES)
|
||||
for (ts, cid, hs), r in zip(records, roots):
|
||||
if kept is not None and r not in kept:
|
||||
continue
|
||||
for h in hs:
|
||||
lr = last_root.get(h)
|
||||
if lr is None:
|
||||
new += 1
|
||||
else:
|
||||
gb = gbucket(max(0.0, ts - last_ts[h]))
|
||||
if lr == r:
|
||||
intra += 1; rec_i[gb] += 1
|
||||
else:
|
||||
cross += 1; rec_c[gb] += 1
|
||||
last_root[h] = r; last_ts[h] = ts
|
||||
return intra, cross, new, rec_i, rec_c
|
||||
|
||||
def cum_le(rec, idx): # cumulative fraction with gap-bucket <= idx
|
||||
tot = sum(rec) or 1
|
||||
return sum(rec[: idx + 1]) / tot
|
||||
|
||||
targets = [("full", None), (0.75, None), (0.50, None),
|
||||
(0.25, None), (0.10, None), (0.00, None)]
|
||||
rows = []
|
||||
for label, _ in targets:
|
||||
if label == "full":
|
||||
kept = None
|
||||
f_actual = len(single_roots) / (len(single_roots) + M)
|
||||
else:
|
||||
f = float(label)
|
||||
S = min(len(single_roots), int(round(M * f / (1 - f)))) if f < 1 else len(single_roots)
|
||||
keep_single = set(random.sample(single_roots, S)) if S < len(single_roots) else set(single_roots)
|
||||
kept = set(multi_roots) | keep_single
|
||||
f_actual = S / (S + M)
|
||||
intra, cross, new, rec_i, rec_c = classify(kept)
|
||||
reuse = intra + cross
|
||||
n_sess = (len(single_roots) + M) if kept is None else len(kept)
|
||||
row = {
|
||||
"target": label, "single_turn_frac": round(f_actual, 4), "n_sessions": n_sess,
|
||||
"new": new, "intra": intra, "cross": cross, "reuse": reuse,
|
||||
"intra_frac_of_reuse": round(intra / reuse, 4),
|
||||
"cross_frac_of_reuse": round(cross / reuse, 4),
|
||||
"intra_le60s": round(cum_le(rec_i, 2), 4),
|
||||
"cross_le60s": round(cum_le(rec_c, 2), 4),
|
||||
}
|
||||
rows.append(row)
|
||||
sys.stderr.write(f"[{time.time()-t0:.0f}s] f={row['single_turn_frac']}: "
|
||||
f"intra={row['intra_frac_of_reuse']} cross={row['cross_frac_of_reuse']}\n")
|
||||
|
||||
json.dump({"rows": rows, "n_single": len(single_roots), "n_multi": M}, open(OUT, "w"), indent=2)
|
||||
print(f"{'single-turn%':>12} {'sessions':>10} {'intra%':>8} {'cross%':>8} {'intra<=60s':>11} {'cross<=60s':>11}")
|
||||
for r in rows:
|
||||
print(f"{r['single_turn_frac']*100:>11.1f}% {r['n_sessions']:>10} "
|
||||
f"{r['intra_frac_of_reuse']*100:>7.1f}% {r['cross_frac_of_reuse']*100:>7.1f}% "
|
||||
f"{r['intra_le60s']*100:>10.1f}% {r['cross_le60s']*100:>10.1f}%")
|
||||
84
paper/data/f2a_mixture_sweep_result.json
Normal file
84
paper/data/f2a_mixture_sweep_result.json
Normal file
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"rows": [
|
||||
{
|
||||
"target": "full",
|
||||
"single_turn_frac": 0.9026,
|
||||
"n_sessions": 1307276,
|
||||
"new": 20650883,
|
||||
"intra": 65166144,
|
||||
"cross": 54134925,
|
||||
"reuse": 119301069,
|
||||
"intra_frac_of_reuse": 0.5462,
|
||||
"cross_frac_of_reuse": 0.4538,
|
||||
"intra_le60s": 0.8865,
|
||||
"cross_le60s": 0.8706
|
||||
},
|
||||
{
|
||||
"target": 0.75,
|
||||
"single_turn_frac": 0.75,
|
||||
"n_sessions": 509144,
|
||||
"new": 15446415,
|
||||
"intra": 66081759,
|
||||
"cross": 26932604,
|
||||
"reuse": 93014363,
|
||||
"intra_frac_of_reuse": 0.7104,
|
||||
"cross_frac_of_reuse": 0.2896,
|
||||
"intra_le60s": 0.8844,
|
||||
"cross_le60s": 0.8568
|
||||
},
|
||||
{
|
||||
"target": 0.5,
|
||||
"single_turn_frac": 0.5,
|
||||
"n_sessions": 254572,
|
||||
"new": 12843712,
|
||||
"intra": 66548474,
|
||||
"cross": 18990485,
|
||||
"reuse": 85538959,
|
||||
"intra_frac_of_reuse": 0.778,
|
||||
"cross_frac_of_reuse": 0.222,
|
||||
"intra_le60s": 0.8832,
|
||||
"cross_le60s": 0.8881
|
||||
},
|
||||
{
|
||||
"target": 0.25,
|
||||
"single_turn_frac": 0.25,
|
||||
"n_sessions": 169715,
|
||||
"new": 11553493,
|
||||
"intra": 66732961,
|
||||
"cross": 16726772,
|
||||
"reuse": 83459733,
|
||||
"intra_frac_of_reuse": 0.7996,
|
||||
"cross_frac_of_reuse": 0.2004,
|
||||
"intra_le60s": 0.8827,
|
||||
"cross_le60s": 0.9087
|
||||
},
|
||||
{
|
||||
"target": 0.1,
|
||||
"single_turn_frac": 0.1,
|
||||
"n_sessions": 141429,
|
||||
"new": 11036894,
|
||||
"intra": 66798704,
|
||||
"cross": 16084035,
|
||||
"reuse": 82882739,
|
||||
"intra_frac_of_reuse": 0.8059,
|
||||
"cross_frac_of_reuse": 0.1941,
|
||||
"intra_le60s": 0.8826,
|
||||
"cross_le60s": 0.9152
|
||||
},
|
||||
{
|
||||
"target": 0.0,
|
||||
"single_turn_frac": 0.0,
|
||||
"n_sessions": 127286,
|
||||
"new": 10724167,
|
||||
"intra": 66834552,
|
||||
"cross": 15799085,
|
||||
"reuse": 82633637,
|
||||
"intra_frac_of_reuse": 0.8088,
|
||||
"cross_frac_of_reuse": 0.1912,
|
||||
"intra_le60s": 0.8825,
|
||||
"cross_le60s": 0.9184
|
||||
}
|
||||
],
|
||||
"n_single": 1179990,
|
||||
"n_multi": 127286
|
||||
}
|
||||
182
paper/data/f2a_reuse_topology_analyze.py
Normal file
182
paper/data/f2a_reuse_topology_analyze.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
f2a reuse topology — full-trace, infinite-KV-cache decomposition (LRU semantics).
|
||||
|
||||
Question: on the real 2h cluster trace, assuming an *infinite* KV cache (nothing
|
||||
ever evicted), where do prefix-cache REUSE HITS come from?
|
||||
|
||||
We classify only reuse hits (the 1st occurrence of a block is `new` = irreducible
|
||||
prefill; it is reported only as context for the APC ceiling, not in the split).
|
||||
|
||||
A block (content-addressed `hash_id`) processed in timestamp order. For each hit we
|
||||
look at the block's **most recent prior holder** (last computed OR used = LRU):
|
||||
|
||||
intra : last touch was the SAME session (parent_chat_id chain)
|
||||
cross : last touch was a DIFFERENT session
|
||||
|
||||
After classifying, the block's last-holder / last-time are updated to the current
|
||||
request (LRU refresh). The reuse "recency" is the **LRU reuse distance** = time since
|
||||
the block was last touched (what a finite TTL/LRU cache would need to retain).
|
||||
|
||||
`cross` is further resolved by *block popularity* = number of distinct sessions that
|
||||
ever touch the block: a handful of hugely-popular blocks are the shared system/tool
|
||||
prefix; low-popularity cross blocks are genuine cross-session content.
|
||||
|
||||
Run on dash2 (trace lives there):
|
||||
python3 f2a_reuse_topology_analyze.py \
|
||||
~/ali-trace/trace-glm5.1-formatted/051315-051317.jsonl /tmp/f2a_result.json
|
||||
"""
|
||||
import sys, json, time
|
||||
from collections import defaultdict
|
||||
|
||||
PATH = sys.argv[1]
|
||||
OUT = sys.argv[2] if len(sys.argv) > 2 else "/tmp/f2a_result.json"
|
||||
POP_CAP = 4096 # cap per-block root set; >= this is "very shared", buckets unaffected
|
||||
|
||||
t0 = time.time()
|
||||
chat_parent = {}
|
||||
records = [] # (ts, chat_id, hash_ids)
|
||||
total_input_tokens = 0
|
||||
total_blocks = 0
|
||||
turn1 = 0
|
||||
n = 0
|
||||
with open(PATH) as f:
|
||||
for line in f:
|
||||
d = json.loads(line)
|
||||
cid = d["chat_id"]
|
||||
pc = d.get("parent_chat_id")
|
||||
chat_parent[cid] = 0 if pc is None else pc
|
||||
hs = d.get("hash_ids") or []
|
||||
records.append((d.get("timestamp", 0.0), cid, hs))
|
||||
total_input_tokens += d.get("input_length", 0) or 0
|
||||
total_blocks += len(hs)
|
||||
if (d.get("turn", 1) or 1) == 1:
|
||||
turn1 += 1
|
||||
n += 1
|
||||
sys.stderr.write(f"[{time.time()-t0:.0f}s] loaded {n} reqs, {total_blocks} block-occ\n")
|
||||
|
||||
# resolve session root by following parent_chat_id to turn-1 / out-of-window head
|
||||
root_cache = {}
|
||||
def resolve_root(cid):
|
||||
chain = []
|
||||
cur = cid
|
||||
while True:
|
||||
if cur in root_cache:
|
||||
r = root_cache[cur]; break
|
||||
p = chat_parent.get(cur, 0)
|
||||
if p == 0 or p not in chat_parent:
|
||||
r = cur; break
|
||||
chain.append(cur); cur = p
|
||||
if len(chain) > 100000:
|
||||
r = cur; break
|
||||
for nd in chain:
|
||||
root_cache[nd] = r
|
||||
root_cache[cid] = r
|
||||
return r
|
||||
|
||||
records.sort(key=lambda r: r[0])
|
||||
sys.stderr.write(f"[{time.time()-t0:.0f}s] sorted by ts\n")
|
||||
|
||||
last_root = {} # block -> root of MOST RECENT holder (LRU)
|
||||
last_ts = {} # block -> ts of most recent touch (LRU)
|
||||
roots_of = defaultdict(set) # block -> set of distinct roots (capped) = popularity
|
||||
intra_cnt = defaultdict(int) # block -> intra reuse hits
|
||||
cross_cnt = defaultdict(int) # block -> cross reuse hits
|
||||
new = intra = cross = 0
|
||||
|
||||
# LRU reuse distance of each hit: gap = consumer_ts - last_touch_ts
|
||||
GAP_EDGES = [1, 10, 60, 300, 1800, 3600, float("inf")] # seconds
|
||||
GAP_LABELS = ["<1s", "1-10s", "10-60s", "1-5min", "5-30min", "30-60min", ">60min"]
|
||||
rec_intra = [0] * len(GAP_EDGES)
|
||||
rec_cross = [0] * len(GAP_EDGES)
|
||||
def gap_bucket(g):
|
||||
for i, e in enumerate(GAP_EDGES):
|
||||
if g < e:
|
||||
return i
|
||||
return len(GAP_EDGES) - 1
|
||||
|
||||
for ts, cid, hs in records:
|
||||
if not hs:
|
||||
continue
|
||||
r = resolve_root(cid)
|
||||
for h in hs:
|
||||
lr = last_root.get(h)
|
||||
if lr is None:
|
||||
new += 1 # first compute: not a hit
|
||||
else:
|
||||
gb = gap_bucket(max(0.0, ts - last_ts[h]))
|
||||
if lr == r:
|
||||
intra += 1; intra_cnt[h] += 1; rec_intra[gb] += 1
|
||||
else:
|
||||
cross += 1; cross_cnt[h] += 1; rec_cross[gb] += 1
|
||||
last_root[h] = r # LRU refresh: now held by current session
|
||||
last_ts[h] = ts
|
||||
s = roots_of[h]
|
||||
if len(s) < POP_CAP:
|
||||
s.add(r)
|
||||
sys.stderr.write(f"[{time.time()-t0:.0f}s] classified: new={new} intra={intra} cross={cross}\n")
|
||||
|
||||
# popularity buckets: distinct sessions touching a block
|
||||
POP_EDGES = [2, 10, 100, 1000, float("inf")]
|
||||
POP_LABELS = ["1 (private)", "2-9", "10-99", "100-999", ">=1000"]
|
||||
def pop_bucket(p):
|
||||
if p <= 1:
|
||||
return 0
|
||||
for i, e in enumerate(POP_EDGES[1:], start=1):
|
||||
if p < e:
|
||||
return i
|
||||
return len(POP_LABELS) - 1
|
||||
pop_blocks = [0] * len(POP_LABELS)
|
||||
pop_intra = [0] * len(POP_LABELS)
|
||||
pop_cross = [0] * len(POP_LABELS)
|
||||
for h in last_root:
|
||||
p = len(roots_of[h])
|
||||
b = pop_bucket(p)
|
||||
pop_blocks[b] += 1
|
||||
pop_intra[b] += intra_cnt.get(h, 0)
|
||||
pop_cross[b] += cross_cnt.get(h, 0)
|
||||
|
||||
eff_blk = total_input_tokens / total_blocks if total_blocks else 0.0
|
||||
total_occ = new + intra + cross
|
||||
reuse = intra + cross
|
||||
result = {
|
||||
"trace": PATH,
|
||||
"semantics": "LRU last-touched; reuse-hits only (new excluded from split)",
|
||||
"n_requests": n,
|
||||
"n_sessions": len(set(resolve_root(c) for c in chat_parent)),
|
||||
"turn1_frac": turn1 / n,
|
||||
"block_size_tokens_eff": eff_blk,
|
||||
"total_input_tokens": total_input_tokens,
|
||||
"total_block_occ": total_occ,
|
||||
"distinct_blocks": len(last_root),
|
||||
"new_occ": new, # context only
|
||||
"apc_ceiling": reuse / total_occ, # context only
|
||||
# REUSE-ONLY decomposition (the headline)
|
||||
"reuse_total": reuse,
|
||||
"reuse": {"intra": intra, "cross": cross},
|
||||
"reuse_frac": {"intra": intra / reuse, "cross": cross / reuse},
|
||||
# cross resolved by popularity (over reuse hits)
|
||||
"pop_labels": POP_LABELS,
|
||||
"pop_blocks": pop_blocks,
|
||||
"pop_intra": pop_intra,
|
||||
"pop_cross": pop_cross,
|
||||
# LRU reuse-distance recency (over reuse hits)
|
||||
"gap_labels": GAP_LABELS,
|
||||
"rec_intra": rec_intra,
|
||||
"rec_cross": rec_cross,
|
||||
}
|
||||
with open(OUT, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
sys.stderr.write(f"[{time.time()-t0:.0f}s] wrote {OUT}\n")
|
||||
|
||||
# human summary
|
||||
print(json.dumps({k: result[k] for k in
|
||||
("n_requests","n_sessions","distinct_blocks","reuse_total",
|
||||
"reuse_frac","apc_ceiling")}, indent=2))
|
||||
print(f"new(context)={new} intra={intra} cross={cross}")
|
||||
print("popularity blocks / intra-hits / cross-hits:")
|
||||
for i, lab in enumerate(POP_LABELS):
|
||||
print(f" {lab:>12}: {pop_blocks[i]:>10} | {pop_intra[i]:>11} | {pop_cross[i]:>11}")
|
||||
print("LRU reuse-distance intra / cross:")
|
||||
for i, lab in enumerate(GAP_LABELS):
|
||||
print(f" {lab:>8}: {rec_intra[i]:>11} | {rec_cross[i]:>11}")
|
||||
77
paper/data/f2a_reuse_topology_result.json
Normal file
77
paper/data/f2a_reuse_topology_result.json
Normal file
@@ -0,0 +1,77 @@
|
||||
{
|
||||
"trace": "051315-051317.jsonl",
|
||||
"semantics": "LRU last-touched; reuse-hits only (new excluded from split)",
|
||||
"n_requests": 2114220,
|
||||
"n_sessions": 1307276,
|
||||
"turn1_frac": 0.6183254344391785,
|
||||
"block_size_tokens_eff": 508.1517503092776,
|
||||
"total_input_tokens": 71116829368,
|
||||
"total_block_occ": 139951952,
|
||||
"distinct_blocks": 20650883,
|
||||
"new_occ": 20650883,
|
||||
"apc_ceiling": 0.8524430513123532,
|
||||
"reuse_total": 119301069,
|
||||
"reuse": {
|
||||
"intra": 65166144,
|
||||
"cross": 54134925
|
||||
},
|
||||
"reuse_frac": {
|
||||
"intra": 0.5462326913432771,
|
||||
"cross": 0.45376730865672293
|
||||
},
|
||||
"pop_labels": [
|
||||
"1 (private)",
|
||||
"2-9",
|
||||
"10-99",
|
||||
"100-999",
|
||||
">=1000"
|
||||
],
|
||||
"pop_blocks": [
|
||||
14581108,
|
||||
5535433,
|
||||
517069,
|
||||
16153,
|
||||
1120
|
||||
],
|
||||
"pop_intra": [
|
||||
44515497,
|
||||
14288480,
|
||||
5421050,
|
||||
924419,
|
||||
16698
|
||||
],
|
||||
"pop_cross": [
|
||||
0,
|
||||
20230912,
|
||||
13750153,
|
||||
7689338,
|
||||
12464522
|
||||
],
|
||||
"gap_labels": [
|
||||
"<1s",
|
||||
"1-10s",
|
||||
"10-60s",
|
||||
"1-5min",
|
||||
"5-30min",
|
||||
"30-60min",
|
||||
">60min"
|
||||
],
|
||||
"rec_intra": [
|
||||
390952,
|
||||
26060293,
|
||||
31317556,
|
||||
5877221,
|
||||
1384772,
|
||||
109673,
|
||||
25677
|
||||
],
|
||||
"rec_cross": [
|
||||
13222875,
|
||||
22254795,
|
||||
11653445,
|
||||
4965765,
|
||||
1747487,
|
||||
220816,
|
||||
69742
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user