Use time-based trace window ids
This commit is contained in:
@@ -47,7 +47,7 @@
|
||||
},
|
||||
"trace": {
|
||||
"windows_path": "trace_windows/windows.json",
|
||||
"window_id": "chat_w_example_peak_0001",
|
||||
"window_id": "chat_w_example_0001",
|
||||
"u_field": "sampling_u",
|
||||
"timestamp_field": "timestamp",
|
||||
"max_concurrency": 64
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
"window_duration_seconds": 10.0,
|
||||
"windows": [
|
||||
{
|
||||
"window_id": "chat_w_example_peak_0001",
|
||||
"window_id": "chat_w_example_0001",
|
||||
"trace_type": "chat",
|
||||
"trace_file": "traces/chat_w_example_peak_0001.jsonl",
|
||||
"trace_file": "traces/chat_w_example_0001.jsonl",
|
||||
"window_start": 0.0,
|
||||
"window_end": 10.0,
|
||||
"num_requests": 3
|
||||
|
||||
@@ -18,30 +18,23 @@ DEFAULT_THINKING_SOURCE = Path(
|
||||
DEFAULT_OUTPUT_ROOT = REPO_ROOT / "trace_windows"
|
||||
LEGACY_TARGET_DATES = ["2026-03-11", "2026-03-12", "2026-03-13", "2026-03-16", "2026-03-17"]
|
||||
THINKING_WINDOWS = [
|
||||
("2026-03-21", "peak_weekend"),
|
||||
("2026-03-22", "peak_weekend"),
|
||||
("2026-03-23", "peak"),
|
||||
("2026-03-24", "peak"),
|
||||
("2026-03-25", "peak"),
|
||||
("2026-03-26", "peak"),
|
||||
("2026-03-27", "peak"),
|
||||
("2026-03-21", "1000"),
|
||||
("2026-03-22", "1000"),
|
||||
("2026-03-23", "1000"),
|
||||
("2026-03-24", "1000"),
|
||||
("2026-03-25", "1000"),
|
||||
("2026-03-26", "1000"),
|
||||
("2026-03-27", "1000"),
|
||||
]
|
||||
WINDOW_SPECS = {
|
||||
"peak": {
|
||||
"1000": {
|
||||
"start_hour": 9,
|
||||
"window_start": 3600.0,
|
||||
"window_end": 4200.0,
|
||||
"slot_label": "10:00-10:10",
|
||||
"slot_token": "1000",
|
||||
},
|
||||
"peak_weekend": {
|
||||
"start_hour": 9,
|
||||
"window_start": 3600.0,
|
||||
"window_end": 4200.0,
|
||||
"slot_label": "10:00-10:10",
|
||||
"slot_token": "1000",
|
||||
},
|
||||
"valley": {
|
||||
"2200": {
|
||||
"start_hour": 21,
|
||||
"window_start": 3600.0,
|
||||
"window_end": 4200.0,
|
||||
@@ -54,18 +47,18 @@ TRACE_SPECS = {
|
||||
"block_size": 64,
|
||||
"source_key": "legacy",
|
||||
"target_dates": LEGACY_TARGET_DATES,
|
||||
"day_parts": ("peak", "valley"),
|
||||
"slot_tokens": ("1000", "2200"),
|
||||
},
|
||||
"coder": {
|
||||
"block_size": 512,
|
||||
"source_key": "legacy",
|
||||
"target_dates": LEGACY_TARGET_DATES,
|
||||
"day_parts": ("peak", "valley"),
|
||||
"slot_tokens": ("1000", "2200"),
|
||||
},
|
||||
"thinking": {
|
||||
"block_size": 64,
|
||||
"source_key": "thinking",
|
||||
"date_day_parts": THINKING_WINDOWS,
|
||||
"date_slot_tokens": THINKING_WINDOWS,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -109,9 +102,9 @@ def stable_uniform(*, seed: int, window_id: str, index: int, row: dict[str, Any]
|
||||
return int.from_bytes(digest, byteorder="big", signed=False) / float(1 << 64)
|
||||
|
||||
|
||||
def build_source_filename(*, trace_type: str, date_text: str, day_part: str) -> str:
|
||||
def build_source_filename(*, trace_type: str, date_text: str, slot_token: str) -> str:
|
||||
month, day = date_text[5:7], date_text[8:10]
|
||||
start_hour = int(WINDOW_SPECS[day_part]["start_hour"])
|
||||
start_hour = int(WINDOW_SPECS[slot_token]["start_hour"])
|
||||
return (
|
||||
f"qwen_{trace_type}_blksz_{TRACE_SPECS[trace_type]['block_size']}_"
|
||||
f"{month}{day}{start_hour:02d}-{month}{day}{start_hour + 2:02d}.jsonl"
|
||||
@@ -135,23 +128,23 @@ def build_windows(
|
||||
for trace_type in workloads:
|
||||
spec = TRACE_SPECS[trace_type]
|
||||
source_dir = source_dirs[str(spec["source_key"])]
|
||||
if "date_day_parts" in spec:
|
||||
date_day_parts = list(spec["date_day_parts"])
|
||||
if "date_slot_tokens" in spec:
|
||||
date_slot_tokens = list(spec["date_slot_tokens"])
|
||||
else:
|
||||
date_day_parts = [
|
||||
(date_text, day_part)
|
||||
for day_part in spec["day_parts"]
|
||||
date_slot_tokens = [
|
||||
(date_text, slot_token)
|
||||
for slot_token in spec["slot_tokens"]
|
||||
for date_text in spec["target_dates"]
|
||||
]
|
||||
sample_order_by_group: dict[str, int] = {}
|
||||
for date_text, day_part in date_day_parts:
|
||||
window_spec = WINDOW_SPECS[day_part]
|
||||
for date_text, slot_token in date_slot_tokens:
|
||||
window_spec = WINDOW_SPECS[slot_token]
|
||||
date_token = date_text.replace("-", "")
|
||||
sample_order = sample_order_by_group.setdefault(day_part, 0)
|
||||
sample_order_by_group[day_part] += 1
|
||||
window_id = f"{trace_type}_w{date_token}_{day_part}_{window_spec['slot_token']}"
|
||||
sample_order = sample_order_by_group.setdefault(slot_token, 0)
|
||||
sample_order_by_group[slot_token] += 1
|
||||
window_id = f"{trace_type}_w{date_token}_{window_spec['slot_token']}"
|
||||
source_trace_path = source_dir / build_source_filename(
|
||||
trace_type=trace_type, date_text=date_text, day_part=day_part
|
||||
trace_type=trace_type, date_text=date_text, slot_token=slot_token
|
||||
)
|
||||
source_prompt_path = Path(str(source_trace_path).replace(".jsonl", "_prompt.jsonl"))
|
||||
windows.append(
|
||||
@@ -164,7 +157,7 @@ def build_windows(
|
||||
"window_index": 6,
|
||||
"sample_order": sample_order,
|
||||
"date": date_text,
|
||||
"day_part": day_part,
|
||||
"slot_token": window_spec["slot_token"],
|
||||
"slot_label": window_spec["slot_label"],
|
||||
"source_trace_path": str(source_trace_path),
|
||||
"source_prompt_path": str(source_prompt_path),
|
||||
|
||||
@@ -344,10 +344,10 @@ class CoreFlowTests(unittest.TestCase):
|
||||
|
||||
windows_payload = json.loads((output_root / "windows.json").read_text(encoding="utf-8"))
|
||||
windows = {item["window_id"]: item for item in windows_payload["windows"]}
|
||||
self.assertIn("chat_w20260311_peak_1000", windows)
|
||||
self.assertEqual(windows["chat_w20260311_peak_1000"]["num_requests"], 1)
|
||||
self.assertIn("chat_w20260311_1000", windows)
|
||||
self.assertEqual(windows["chat_w20260311_1000"]["num_requests"], 1)
|
||||
|
||||
trace_path = output_root / windows["chat_w20260311_peak_1000"]["trace_file"]
|
||||
trace_path = output_root / windows["chat_w20260311_1000"]["trace_file"]
|
||||
rows = [json.loads(line) for line in trace_path.read_text(encoding="utf-8").splitlines()]
|
||||
self.assertEqual(len(rows), 1)
|
||||
self.assertEqual(rows[0]["prompt"], "real prompt")
|
||||
|
||||
Reference in New Issue
Block a user