Add solo production-trace CDF figure (f2b_session_skew_prod.png)
Single-curve variant of f2b — production trace only, no replay overlay and no uniform reference. Cleaner for boss-meeting/talk slides where the extra context is noise. The combined three-curve figure is unchanged. scripts/plot_session_skew_cdf.py: split into plot_combined + plot_production_solo helpers; one run emits both PNGs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
BIN
figs/f2b_session_skew_prod.png
Normal file
BIN
figs/f2b_session_skew_prod.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 77 KiB |
@@ -47,32 +47,11 @@ def load_production_cdf(
|
||||
return xs, ys, d["n_sessions"], d["anchors_check"]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--replay-trace",
|
||||
default="traces/w600_r0.0015_st30.jsonl",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prod-cache",
|
||||
default="analysis/characterization/data/production_session_skew_cdf.json",
|
||||
)
|
||||
parser.add_argument("--out", default="figs/f2b_session_skew.png")
|
||||
args = parser.parse_args()
|
||||
ANNOTATE_PTS = [1.0, 5.0, 10.0, 25.0, 50.0]
|
||||
|
||||
prod_x, prod_y, prod_n, prod_anchors = load_production_cdf(Path(args.prod_cache))
|
||||
replay_rank_pct, replay_cum_pct, replay_n = load_replay_cdf(Path(args.replay_trace))
|
||||
|
||||
fig, ax = plt.subplots(figsize=(9, 5.5))
|
||||
|
||||
ax.plot(
|
||||
prod_x, prod_y,
|
||||
color="#c44e52", lw=2.4,
|
||||
label=f"production trace (n={prod_n:,} sessions, 456-pt sampled)",
|
||||
)
|
||||
|
||||
annotate_pts = [1.0, 5.0, 10.0, 25.0, 50.0]
|
||||
for p in annotate_pts:
|
||||
def _annotate_anchors(ax, prod_x, prod_y) -> None:
|
||||
for p in ANNOTATE_PTS:
|
||||
y = float(np.interp(p, prod_x, prod_y))
|
||||
ax.scatter([p], [y], color="#c44e52", s=55, zorder=5)
|
||||
ax.annotate(
|
||||
@@ -83,32 +62,64 @@ def main() -> None:
|
||||
color="#7a1d1d",
|
||||
)
|
||||
|
||||
ax.plot(
|
||||
replay_rank_pct, replay_cum_pct,
|
||||
color="#2f6fab", lw=1.6,
|
||||
alpha=0.85,
|
||||
label=f"replay window (n={replay_n} sessions, raw CDF)",
|
||||
)
|
||||
|
||||
ax.plot(
|
||||
[0, 100], [0, 100],
|
||||
color="#888", ls="--", lw=1,
|
||||
label="uniform reference (y = x)",
|
||||
)
|
||||
|
||||
ax.set_xlim(0, 100)
|
||||
ax.set_ylim(0, 102)
|
||||
def plot_combined(prod_x, prod_y, prod_n, replay_rank_pct, replay_cum_pct, replay_n,
|
||||
out_path: Path) -> None:
|
||||
fig, ax = plt.subplots(figsize=(9, 5.5))
|
||||
ax.plot(prod_x, prod_y, color="#c44e52", lw=2.4,
|
||||
label=f"production trace (n={prod_n:,} sessions, 456-pt sampled)")
|
||||
_annotate_anchors(ax, prod_x, prod_y)
|
||||
ax.plot(replay_rank_pct, replay_cum_pct, color="#2f6fab", lw=1.6, alpha=0.85,
|
||||
label=f"replay window (n={replay_n} sessions, raw CDF)")
|
||||
ax.plot([0, 100], [0, 100], color="#888", ls="--", lw=1,
|
||||
label="uniform reference (y = x)")
|
||||
ax.set_xlim(0, 100); ax.set_ylim(0, 102)
|
||||
ax.set_xlabel("Session rank percentile (top → bottom by input-token mass)")
|
||||
ax.set_ylabel("Cumulative % of input-token mass")
|
||||
ax.set_title("Session input-token mass CDF — Qwen3 production trace")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend(loc="lower right", framealpha=0.92, fontsize=9)
|
||||
|
||||
out_path = Path(args.out)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(out_path, dpi=150, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
print(f"wrote {out_path}")
|
||||
|
||||
|
||||
def plot_production_solo(prod_x, prod_y, prod_n, out_path: Path) -> None:
|
||||
fig, ax = plt.subplots(figsize=(9, 5.5))
|
||||
ax.plot(prod_x, prod_y, color="#c44e52", lw=2.6)
|
||||
_annotate_anchors(ax, prod_x, prod_y)
|
||||
ax.set_xlim(0, 100); ax.set_ylim(0, 102)
|
||||
ax.set_xlabel("Session rank percentile (top → bottom by input-token mass)")
|
||||
ax.set_ylabel("Cumulative % of input-token mass")
|
||||
ax.set_title(
|
||||
f"Session input-token mass CDF — Qwen3 production trace (n = {prod_n:,} sessions)"
|
||||
)
|
||||
ax.grid(True, alpha=0.3)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(out_path, dpi=150, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
print(f"wrote {out_path}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--replay-trace", default="traces/w600_r0.0015_st30.jsonl")
|
||||
parser.add_argument(
|
||||
"--prod-cache",
|
||||
default="analysis/characterization/data/production_session_skew_cdf.json",
|
||||
)
|
||||
parser.add_argument("--out-combined", default="figs/f2b_session_skew.png")
|
||||
parser.add_argument("--out-solo", default="figs/f2b_session_skew_prod.png")
|
||||
args = parser.parse_args()
|
||||
|
||||
prod_x, prod_y, prod_n, _ = load_production_cdf(Path(args.prod_cache))
|
||||
replay_rank_pct, replay_cum_pct, replay_n = load_replay_cdf(Path(args.replay_trace))
|
||||
|
||||
plot_combined(prod_x, prod_y, prod_n, replay_rank_pct, replay_cum_pct, replay_n,
|
||||
Path(args.out_combined))
|
||||
plot_production_solo(prod_x, prod_y, prod_n, Path(args.out_solo))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user