trace: make window materialization atomic

This commit is contained in:
2026-04-12 23:09:30 +08:00
parent 631a076498
commit 4625fba487
2 changed files with 88 additions and 5 deletions

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import argparse
import hashlib
import json
import os
from pathlib import Path
from typing import Any
from dataclasses import dataclass
@@ -222,10 +223,19 @@ def materialize_windows(
stats_by_window = {str(window["window_id"]): WindowStats() for window in windows}
handles: dict[str, Any] = {}
final_paths: dict[str, Path] = {}
temp_paths: dict[str, Path] = {}
completed = False
try:
for window in windows:
window_id = str(window["window_id"])
handles[window_id] = (traces_dir / f"{window_id}.jsonl").open("w", encoding="utf-8")
final_path = traces_dir / f"{window_id}.jsonl"
temp_path = traces_dir / f".{window_id}.jsonl.tmp.{os.getpid()}"
if temp_path.exists():
temp_path.unlink()
final_paths[window_id] = final_path
temp_paths[window_id] = temp_path
handles[window_id] = temp_path.open("w", encoding="utf-8")
for trace_path, prompt_path in sorted(grouped.keys()):
bucket = grouped[(trace_path, prompt_path)]
@@ -270,9 +280,17 @@ def materialize_windows(
f"materialized {trace_path.name} -> matched_rows={matched_rows}",
flush=True,
)
completed = True
finally:
for handle in handles.values():
handle.close()
if completed:
for window_id, temp_path in temp_paths.items():
os.replace(temp_path, final_paths[window_id])
else:
for temp_path in temp_paths.values():
if temp_path.exists():
temp_path.unlink()
return stats_by_window
@@ -342,10 +360,17 @@ def main() -> int:
"window_duration_seconds": 600.0,
"windows": rendered_windows,
}
(output_root / "windows.json").write_text(
json.dumps(windows_payload, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
windows_path = output_root / "windows.json"
windows_tmp_path = output_root / f".windows.json.tmp.{os.getpid()}"
try:
windows_tmp_path.write_text(
json.dumps(windows_payload, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
os.replace(windows_tmp_path, windows_path)
finally:
if windows_tmp_path.exists():
windows_tmp_path.unlink()
print(output_root)
print(f"windows={len(rendered_windows)}")
return 0