"""Trace data structures and loader for the Ali agentic-coder trace format. Trace format (one JSON per line): chat_id, parent_chat_id, timestamp, input_length, output_length, type, turn, hash_ids[] Sessions are derived from parent_chat_id chains: - parent_chat_id == -1 → new session root - parent_chat_id >= 0 → belongs to the same session as the parent """ from __future__ import annotations import json from dataclasses import dataclass from pathlib import Path @dataclass(frozen=True) class TraceRequest: request_id: str session_id: str chat_id: int parent_chat_id: int timestamp_s: float input_length: int output_length: int request_type: str turn_id: int hash_ids: tuple[int, ...] # real production gap (s) from parent turn finishing to this turn arriving; # None for turn-1 / unannotated traces. Used by --dispatch-mode thinktime. time_to_parent_chat_s: float | None = None def load_trace( path: Path, *, request_limit: int | None = None, ) -> list[TraceRequest]: """Load trace and resolve session IDs from parent_chat_id chains.""" chat_to_session: dict[int, str] = {} requests: list[TraceRequest] = [] with path.open("r", encoding="utf-8") as fh: for idx, line in enumerate(fh): if request_limit is not None and len(requests) >= request_limit: break row = json.loads(line) chat_id = int(row["chat_id"]) parent_chat_id = int(row["parent_chat_id"]) if "session_id" in row: session_id = str(row["session_id"]) else: session_id = _resolve_session_id( chat_id, parent_chat_id, chat_to_session, ) chat_to_session[chat_id] = session_id requests.append(TraceRequest( request_id=f"{session_id}:{row['turn']}:{chat_id}:{idx}", session_id=session_id, chat_id=chat_id, parent_chat_id=parent_chat_id, timestamp_s=float(row["timestamp"]), input_length=int(row["input_length"]), output_length=int(row["output_length"]), request_type=str(row["type"]), turn_id=int(row["turn"]), hash_ids=tuple(int(h) for h in row.get("hash_ids", [])), time_to_parent_chat_s=( float(row["time_to_parent_chat"]) if row.get("time_to_parent_chat") is not None else None), )) return requests def _resolve_session_id( chat_id: int, parent_chat_id: int, chat_to_session: dict[int, str], ) -> str: if parent_chat_id < 0: session_id = str(chat_id) else: session_id = chat_to_session.get(parent_chat_id, str(parent_chat_id)) chat_to_session[chat_id] = session_id return session_id