from __future__ import annotations import json import re from dataclasses import dataclass from datetime import datetime, timedelta, timezone from pathlib import Path from trace_analyzer.helpers import parse_jsonish, safe_int WINDOW_RE = re.compile(r"(?P\d{4})-(?P\d{4})-(?P\d{4})$") UTC_PLUS_8 = timezone(timedelta(hours=8)) @dataclass(frozen=True) class TimeWindow: label: str start_ms: int end_ms: int def parse_time_to_ms(value: str) -> int: text = str(value or "").strip() if not text: return 0 for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"): try: dt = datetime.strptime(text, fmt).replace(tzinfo=UTC_PLUS_8) return int(dt.timestamp() * 1000) except ValueError: continue raise ValueError(f"Unsupported timestamp format: {value!r}") def _read_first_timestamp(path: Path) -> str: with path.open("r", encoding="utf-8") as handle: for line in handle: stripped = line.strip() if not stripped: continue raw = json.loads(stripped) value = str(raw.get("time", "")).strip() if value: return value raise ValueError(f"Could not find time field in {path}") def _read_first_timestamp_and_ready_ms(path: Path) -> tuple[str, int]: with path.open("r", encoding="utf-8") as handle: for line in handle: stripped = line.strip() if not stripped: continue raw = json.loads(stripped) value = str(raw.get("time", "")).strip() if not value: continue request_params = parse_jsonish(raw.get("request_params", {})) header = request_params.get("header", {}) if isinstance(request_params, dict) else {} attributes = header.get("attributes", {}) if isinstance(header, dict) else {} ready_ms = safe_int(attributes.get("x-dashscope-inner-requestreadytime")) return value, ready_ms raise ValueError(f"Could not find time field in {path}") def infer_time_offset_ms(path: Path) -> int: first_time, first_ready_ms = _read_first_timestamp_and_ready_ms(path) if not first_ready_ms: return 0 wall_clock_ms = parse_time_to_ms(first_time) hour_ms = 60 * 60 * 1000 return int(round((wall_clock_ms - first_ready_ms) / hour_ms)) * hour_ms def infer_time_window( source_files: list[Path], *, start_time: str | None = None, end_time: str | None = None, ) -> TimeWindow | None: if start_time and end_time: start_ms = parse_time_to_ms(start_time) end_ms = parse_time_to_ms(end_time) label = ( f"{datetime.fromtimestamp(start_ms / 1000, tz=UTC_PLUS_8):%m%d%H}" f"-{datetime.fromtimestamp(end_ms / 1000, tz=UTC_PLUS_8):%m%d%H}" ) return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms) if not source_files: return None first_match = WINDOW_RE.match(source_files[0].stem) last_match = WINDOW_RE.match(source_files[-1].stem) if first_match is None or last_match is None: return None first_time = _read_first_timestamp(source_files[0]) time_offset_ms = infer_time_offset_ms(source_files[0]) base_date = first_time.split(" ", 1)[0] start_hhmm = first_match.group("start") end_hhmm = last_match.group("end") start_text = f"{base_date} {start_hhmm[:2]}:{start_hhmm[2:]}:00.000" end_text = f"{base_date} {end_hhmm[:2]}:{end_hhmm[2:]}:00.000" start_ms = parse_time_to_ms(start_text) - time_offset_ms end_ms = parse_time_to_ms(end_text) - time_offset_ms label = f"{first_match.group('day')}{start_hhmm[:2]}-{last_match.group('day')}{end_hhmm[:2]}" return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms)