Fix trace reuse and packaged model assets

This commit is contained in:
2026-05-08 21:24:29 +08:00
parent 152f01613b
commit 5a6b1acb49
6 changed files with 210 additions and 37 deletions

View File

@@ -6,14 +6,13 @@ import json
import os
import shutil
import sys
import subprocess
import tempfile
from array import array
from concurrent.futures import ProcessPoolExecutor, as_completed
from contextlib import contextmanager, nullcontext
from dataclasses import asdict
from pathlib import Path
from typing import Iterator, TextIO
from typing import Iterator
from trace_analyzer.helpers import normalize_unicode_text, parse_jsonish, safe_int
from tokenizers import Tokenizer
@@ -32,6 +31,7 @@ from .sessionization import (
encode_roles,
extract_user_id,
)
from .trace_io import open_trace_text
from .time_windows import infer_time_offset_ms, infer_time_window, parse_time_to_ms
@@ -88,34 +88,6 @@ def discover_source_files(input_dir: str | Path) -> list[Path]:
return files
@contextmanager
def open_trace_text(path: str | Path) -> Iterator[TextIO]:
resolved = Path(path)
if resolved.suffix == ".zst":
proc = subprocess.Popen(
["zstdcat", str(resolved)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding="utf-8",
)
if proc.stdout is None:
raise RuntimeError(f"Failed to stream {resolved}")
try:
yield proc.stdout
finally:
stdout = proc.stdout
stdout.close()
stderr = proc.stderr.read() if proc.stderr else ""
return_code = proc.wait()
if return_code != 0:
raise RuntimeError(f"zstdcat failed for {resolved}: {stderr.strip()}")
return
with resolved.open("r", encoding="utf-8") as handle:
yield handle
def _normalize_time_ms(*, raw_time_ms: int, wall_clock_ms: int, time_offset_ms: int) -> int:
if not raw_time_ms:
return wall_clock_ms - time_offset_ms if wall_clock_ms and time_offset_ms else wall_clock_ms