From 7b7eaafd7885e0f6abba72a5765bed3b96aed1cc Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sat, 4 Apr 2026 22:09:43 +0800 Subject: [PATCH] Use time-based trace window ids --- configs/examples/study.example.json | 2 +- configs/examples/trace_windows/windows.json | 4 +- scripts/prepare_trace_windows.py | 59 +++++++++------------ tests/test_core_flow.py | 6 +-- 4 files changed, 32 insertions(+), 39 deletions(-) diff --git a/configs/examples/study.example.json b/configs/examples/study.example.json index 2b0b243..6ee9fd5 100644 --- a/configs/examples/study.example.json +++ b/configs/examples/study.example.json @@ -47,7 +47,7 @@ }, "trace": { "windows_path": "trace_windows/windows.json", - "window_id": "chat_w_example_peak_0001", + "window_id": "chat_w_example_0001", "u_field": "sampling_u", "timestamp_field": "timestamp", "max_concurrency": 64 diff --git a/configs/examples/trace_windows/windows.json b/configs/examples/trace_windows/windows.json index 44398c7..57aafae 100644 --- a/configs/examples/trace_windows/windows.json +++ b/configs/examples/trace_windows/windows.json @@ -4,9 +4,9 @@ "window_duration_seconds": 10.0, "windows": [ { - "window_id": "chat_w_example_peak_0001", + "window_id": "chat_w_example_0001", "trace_type": "chat", - "trace_file": "traces/chat_w_example_peak_0001.jsonl", + "trace_file": "traces/chat_w_example_0001.jsonl", "window_start": 0.0, "window_end": 10.0, "num_requests": 3 diff --git a/scripts/prepare_trace_windows.py b/scripts/prepare_trace_windows.py index ec49109..a58c38f 100644 --- a/scripts/prepare_trace_windows.py +++ b/scripts/prepare_trace_windows.py @@ -18,30 +18,23 @@ DEFAULT_THINKING_SOURCE = Path( DEFAULT_OUTPUT_ROOT = REPO_ROOT / "trace_windows" LEGACY_TARGET_DATES = ["2026-03-11", "2026-03-12", "2026-03-13", "2026-03-16", "2026-03-17"] THINKING_WINDOWS = [ - ("2026-03-21", "peak_weekend"), - ("2026-03-22", "peak_weekend"), - ("2026-03-23", "peak"), - ("2026-03-24", "peak"), - ("2026-03-25", "peak"), - ("2026-03-26", "peak"), - ("2026-03-27", "peak"), + ("2026-03-21", "1000"), + ("2026-03-22", "1000"), + ("2026-03-23", "1000"), + ("2026-03-24", "1000"), + ("2026-03-25", "1000"), + ("2026-03-26", "1000"), + ("2026-03-27", "1000"), ] WINDOW_SPECS = { - "peak": { + "1000": { "start_hour": 9, "window_start": 3600.0, "window_end": 4200.0, "slot_label": "10:00-10:10", "slot_token": "1000", }, - "peak_weekend": { - "start_hour": 9, - "window_start": 3600.0, - "window_end": 4200.0, - "slot_label": "10:00-10:10", - "slot_token": "1000", - }, - "valley": { + "2200": { "start_hour": 21, "window_start": 3600.0, "window_end": 4200.0, @@ -54,18 +47,18 @@ TRACE_SPECS = { "block_size": 64, "source_key": "legacy", "target_dates": LEGACY_TARGET_DATES, - "day_parts": ("peak", "valley"), + "slot_tokens": ("1000", "2200"), }, "coder": { "block_size": 512, "source_key": "legacy", "target_dates": LEGACY_TARGET_DATES, - "day_parts": ("peak", "valley"), + "slot_tokens": ("1000", "2200"), }, "thinking": { "block_size": 64, "source_key": "thinking", - "date_day_parts": THINKING_WINDOWS, + "date_slot_tokens": THINKING_WINDOWS, }, } @@ -109,9 +102,9 @@ def stable_uniform(*, seed: int, window_id: str, index: int, row: dict[str, Any] return int.from_bytes(digest, byteorder="big", signed=False) / float(1 << 64) -def build_source_filename(*, trace_type: str, date_text: str, day_part: str) -> str: +def build_source_filename(*, trace_type: str, date_text: str, slot_token: str) -> str: month, day = date_text[5:7], date_text[8:10] - start_hour = int(WINDOW_SPECS[day_part]["start_hour"]) + start_hour = int(WINDOW_SPECS[slot_token]["start_hour"]) return ( f"qwen_{trace_type}_blksz_{TRACE_SPECS[trace_type]['block_size']}_" f"{month}{day}{start_hour:02d}-{month}{day}{start_hour + 2:02d}.jsonl" @@ -135,23 +128,23 @@ def build_windows( for trace_type in workloads: spec = TRACE_SPECS[trace_type] source_dir = source_dirs[str(spec["source_key"])] - if "date_day_parts" in spec: - date_day_parts = list(spec["date_day_parts"]) + if "date_slot_tokens" in spec: + date_slot_tokens = list(spec["date_slot_tokens"]) else: - date_day_parts = [ - (date_text, day_part) - for day_part in spec["day_parts"] + date_slot_tokens = [ + (date_text, slot_token) + for slot_token in spec["slot_tokens"] for date_text in spec["target_dates"] ] sample_order_by_group: dict[str, int] = {} - for date_text, day_part in date_day_parts: - window_spec = WINDOW_SPECS[day_part] + for date_text, slot_token in date_slot_tokens: + window_spec = WINDOW_SPECS[slot_token] date_token = date_text.replace("-", "") - sample_order = sample_order_by_group.setdefault(day_part, 0) - sample_order_by_group[day_part] += 1 - window_id = f"{trace_type}_w{date_token}_{day_part}_{window_spec['slot_token']}" + sample_order = sample_order_by_group.setdefault(slot_token, 0) + sample_order_by_group[slot_token] += 1 + window_id = f"{trace_type}_w{date_token}_{window_spec['slot_token']}" source_trace_path = source_dir / build_source_filename( - trace_type=trace_type, date_text=date_text, day_part=day_part + trace_type=trace_type, date_text=date_text, slot_token=slot_token ) source_prompt_path = Path(str(source_trace_path).replace(".jsonl", "_prompt.jsonl")) windows.append( @@ -164,7 +157,7 @@ def build_windows( "window_index": 6, "sample_order": sample_order, "date": date_text, - "day_part": day_part, + "slot_token": window_spec["slot_token"], "slot_label": window_spec["slot_label"], "source_trace_path": str(source_trace_path), "source_prompt_path": str(source_prompt_path), diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py index e66ca84..fa58e29 100644 --- a/tests/test_core_flow.py +++ b/tests/test_core_flow.py @@ -344,10 +344,10 @@ class CoreFlowTests(unittest.TestCase): windows_payload = json.loads((output_root / "windows.json").read_text(encoding="utf-8")) windows = {item["window_id"]: item for item in windows_payload["windows"]} - self.assertIn("chat_w20260311_peak_1000", windows) - self.assertEqual(windows["chat_w20260311_peak_1000"]["num_requests"], 1) + self.assertIn("chat_w20260311_1000", windows) + self.assertEqual(windows["chat_w20260311_1000"]["num_requests"], 1) - trace_path = output_root / windows["chat_w20260311_peak_1000"]["trace_file"] + trace_path = output_root / windows["chat_w20260311_1000"]["trace_file"] rows = [json.loads(line) for line in trace_path.read_text(encoding="utf-8").splitlines()] self.assertEqual(len(rows), 1) self.assertEqual(rows[0]["prompt"], "real prompt")