From ea5c3bfe6b9cc6036d7b6a33e21cbced2ccefdff Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sat, 23 May 2026 20:58:09 +0800 Subject: [PATCH] compute_roofline: argparse --trace, fix stale default path (D4) The hardcoded traces/sampled_1000req_seed42.jsonl no longer exists; switch the default to the current sampled trace file w600_r0.0015_st30.jsonl and let users override via --trace. Skip Part 4 cleanly when the file is missing instead of relying on os.path.exists. --- scripts/compute_roofline.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/scripts/compute_roofline.py b/scripts/compute_roofline.py index bcffff4..8e16a4c 100644 --- a/scripts/compute_roofline.py +++ b/scripts/compute_roofline.py @@ -12,6 +12,7 @@ GPU: NVIDIA H20 - Roofline ridge point: 148/4.0 = 37 FLOP/byte """ +import argparse import json import math @@ -161,16 +162,25 @@ print(" PART 4: Agentic Workload Real Distribution") print("-" * 80) # Use actual trace data -import os -trace_path = "traces/sampled_1000req_seed42.jsonl" -if os.path.exists(trace_path): +_parser = argparse.ArgumentParser(description=__doc__) +_parser.add_argument("--trace", type=str, + default="traces/w600_r0.0015_st30.jsonl", + help="Sampled trace JSONL for empirical workload roofline (Part 4)") +_args, _ = _parser.parse_known_args() +trace_path = _args.trace +try: + _trace_fh = open(trace_path) +except FileNotFoundError: + print(f" (skipped: trace file not found: {trace_path})") + _trace_fh = None +if _trace_fh is not None: BLOCK_SIZE = 512 seen = set() compute_bound = 0 memory_bound = 0 total = 0 - for line in open(trace_path): + for line in _trace_fh: d = json.loads(line) seq_len = d["input_length"] if seq_len < 1: continue @@ -201,10 +211,12 @@ if os.path.exists(trace_path): else: memory_bound += 1 - print(f" With actual trace prefix cache pattern:") - print(f" Compute-bound prefills: {compute_bound} ({compute_bound*100//total}%)") - print(f" Memory-bound prefills: {memory_bound} ({memory_bound*100//total}%)") - print(f" (Decode is ALWAYS memory-bound at these seq lengths)") - print() - print(f" Implication: {memory_bound*100//total}% of agentic prefills behave like decode") - print(f" → PD separation treats them as 'compute-heavy' but they are actually memory-heavy") + _trace_fh.close() + if total > 0: + print(f" With actual trace prefix cache pattern:") + print(f" Compute-bound prefills: {compute_bound} ({compute_bound*100//total}%)") + print(f" Memory-bound prefills: {memory_bound} ({memory_bound*100//total}%)") + print(f" (Decode is ALWAYS memory-bound at these seq lengths)") + print() + print(f" Implication: {memory_bound*100//total}% of agentic prefills behave like decode") + print(f" → PD separation treats them as 'compute-heavy' but they are actually memory-heavy")