Fix trace reuse and packaged model assets
This commit is contained in:
@@ -6,14 +6,13 @@ import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import subprocess
|
||||
import tempfile
|
||||
from array import array
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from contextlib import contextmanager, nullcontext
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
from typing import Iterator, TextIO
|
||||
from typing import Iterator
|
||||
|
||||
from trace_analyzer.helpers import normalize_unicode_text, parse_jsonish, safe_int
|
||||
from tokenizers import Tokenizer
|
||||
@@ -32,6 +31,7 @@ from .sessionization import (
|
||||
encode_roles,
|
||||
extract_user_id,
|
||||
)
|
||||
from .trace_io import open_trace_text
|
||||
from .time_windows import infer_time_offset_ms, infer_time_window, parse_time_to_ms
|
||||
|
||||
|
||||
@@ -88,34 +88,6 @@ def discover_source_files(input_dir: str | Path) -> list[Path]:
|
||||
return files
|
||||
|
||||
|
||||
@contextmanager
|
||||
def open_trace_text(path: str | Path) -> Iterator[TextIO]:
|
||||
resolved = Path(path)
|
||||
if resolved.suffix == ".zst":
|
||||
proc = subprocess.Popen(
|
||||
["zstdcat", str(resolved)],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
)
|
||||
if proc.stdout is None:
|
||||
raise RuntimeError(f"Failed to stream {resolved}")
|
||||
try:
|
||||
yield proc.stdout
|
||||
finally:
|
||||
stdout = proc.stdout
|
||||
stdout.close()
|
||||
stderr = proc.stderr.read() if proc.stderr else ""
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
raise RuntimeError(f"zstdcat failed for {resolved}: {stderr.strip()}")
|
||||
return
|
||||
|
||||
with resolved.open("r", encoding="utf-8") as handle:
|
||||
yield handle
|
||||
|
||||
|
||||
def _normalize_time_ms(*, raw_time_ms: int, wall_clock_ms: int, time_offset_ms: int) -> int:
|
||||
if not raw_time_ms:
|
||||
return wall_clock_ms - time_offset_ms if wall_clock_ms and time_offset_ms else wall_clock_ms
|
||||
|
||||
@@ -8,6 +8,8 @@ from pathlib import Path
|
||||
|
||||
from trace_analyzer.helpers import parse_jsonish, safe_int
|
||||
|
||||
from .trace_io import open_trace_text
|
||||
|
||||
WINDOW_RE = re.compile(r"(?P<day>\d{4})-(?P<start>\d{4})-(?P<end>\d{4})$")
|
||||
UTC_PLUS_8 = timezone(timedelta(hours=8))
|
||||
|
||||
@@ -32,8 +34,17 @@ def parse_time_to_ms(value: str) -> int:
|
||||
raise ValueError(f"Unsupported timestamp format: {value!r}")
|
||||
|
||||
|
||||
def _trace_window_name(path: Path) -> str:
|
||||
name = path.name
|
||||
if name.endswith(".jsonl.zst"):
|
||||
return name[: -len(".jsonl.zst")]
|
||||
if name.endswith(".jsonl"):
|
||||
return name[: -len(".jsonl")]
|
||||
return path.stem
|
||||
|
||||
|
||||
def _read_first_timestamp(path: Path) -> str:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
with open_trace_text(path) as handle:
|
||||
for line in handle:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
@@ -46,7 +57,7 @@ def _read_first_timestamp(path: Path) -> str:
|
||||
|
||||
|
||||
def _read_first_timestamp_and_ready_ms(path: Path) -> tuple[str, int]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
with open_trace_text(path) as handle:
|
||||
for line in handle:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
@@ -90,8 +101,8 @@ def infer_time_window(
|
||||
if not source_files:
|
||||
return None
|
||||
|
||||
first_match = WINDOW_RE.match(source_files[0].stem)
|
||||
last_match = WINDOW_RE.match(source_files[-1].stem)
|
||||
first_match = WINDOW_RE.match(_trace_window_name(source_files[0]))
|
||||
last_match = WINDOW_RE.match(_trace_window_name(source_files[-1]))
|
||||
if first_match is None or last_match is None:
|
||||
return None
|
||||
|
||||
|
||||
33
trace_formatter/trace_io.py
Normal file
33
trace_formatter/trace_io.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Iterator, TextIO
|
||||
|
||||
|
||||
@contextmanager
|
||||
def open_trace_text(path: str | Path) -> Iterator[TextIO]:
|
||||
resolved = Path(path)
|
||||
if resolved.suffix == ".zst":
|
||||
proc = subprocess.Popen(
|
||||
["zstdcat", str(resolved)],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
)
|
||||
if proc.stdout is None:
|
||||
raise RuntimeError(f"Failed to stream {resolved}")
|
||||
try:
|
||||
yield proc.stdout
|
||||
finally:
|
||||
proc.stdout.close()
|
||||
stderr = proc.stderr.read() if proc.stderr else ""
|
||||
return_code = proc.wait()
|
||||
if return_code != 0:
|
||||
raise RuntimeError(f"zstdcat failed for {resolved}: {stderr.strip()}")
|
||||
return
|
||||
|
||||
with resolved.open("r", encoding="utf-8") as handle:
|
||||
yield handle
|
||||
Reference in New Issue
Block a user