Initial commit

This commit is contained in:
2026-04-21 15:44:47 +00:00
commit bce3fe1395
40 changed files with 1758724 additions and 0 deletions

View File

@@ -0,0 +1,108 @@
from __future__ import annotations
import json
import re
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
from trace_analyzer.helpers import parse_jsonish, safe_int
WINDOW_RE = re.compile(r"(?P<day>\d{4})-(?P<start>\d{4})-(?P<end>\d{4})$")
UTC_PLUS_8 = timezone(timedelta(hours=8))
@dataclass(frozen=True)
class TimeWindow:
label: str
start_ms: int
end_ms: int
def parse_time_to_ms(value: str) -> int:
text = str(value or "").strip()
if not text:
return 0
for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"):
try:
dt = datetime.strptime(text, fmt).replace(tzinfo=UTC_PLUS_8)
return int(dt.timestamp() * 1000)
except ValueError:
continue
raise ValueError(f"Unsupported timestamp format: {value!r}")
def _read_first_timestamp(path: Path) -> str:
with path.open("r", encoding="utf-8") as handle:
for line in handle:
stripped = line.strip()
if not stripped:
continue
raw = json.loads(stripped)
value = str(raw.get("time", "")).strip()
if value:
return value
raise ValueError(f"Could not find time field in {path}")
def _read_first_timestamp_and_ready_ms(path: Path) -> tuple[str, int]:
with path.open("r", encoding="utf-8") as handle:
for line in handle:
stripped = line.strip()
if not stripped:
continue
raw = json.loads(stripped)
value = str(raw.get("time", "")).strip()
if not value:
continue
request_params = parse_jsonish(raw.get("request_params", {}))
header = request_params.get("header", {}) if isinstance(request_params, dict) else {}
attributes = header.get("attributes", {}) if isinstance(header, dict) else {}
ready_ms = safe_int(attributes.get("x-dashscope-inner-requestreadytime"))
return value, ready_ms
raise ValueError(f"Could not find time field in {path}")
def infer_time_offset_ms(path: Path) -> int:
first_time, first_ready_ms = _read_first_timestamp_and_ready_ms(path)
if not first_ready_ms:
return 0
wall_clock_ms = parse_time_to_ms(first_time)
hour_ms = 60 * 60 * 1000
return int(round((wall_clock_ms - first_ready_ms) / hour_ms)) * hour_ms
def infer_time_window(
source_files: list[Path],
*,
start_time: str | None = None,
end_time: str | None = None,
) -> TimeWindow | None:
if start_time and end_time:
start_ms = parse_time_to_ms(start_time)
end_ms = parse_time_to_ms(end_time)
label = (
f"{datetime.fromtimestamp(start_ms / 1000, tz=UTC_PLUS_8):%m%d%H}"
f"-{datetime.fromtimestamp(end_ms / 1000, tz=UTC_PLUS_8):%m%d%H}"
)
return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms)
if not source_files:
return None
first_match = WINDOW_RE.match(source_files[0].stem)
last_match = WINDOW_RE.match(source_files[-1].stem)
if first_match is None or last_match is None:
return None
first_time = _read_first_timestamp(source_files[0])
time_offset_ms = infer_time_offset_ms(source_files[0])
base_date = first_time.split(" ", 1)[0]
start_hhmm = first_match.group("start")
end_hhmm = last_match.group("end")
start_text = f"{base_date} {start_hhmm[:2]}:{start_hhmm[2:]}:00.000"
end_text = f"{base_date} {end_hhmm[:2]}:{end_hhmm[2:]}:00.000"
start_ms = parse_time_to_ms(start_text) - time_offset_ms
end_ms = parse_time_to_ms(end_text) - time_offset_ms
label = f"{first_match.group('day')}{start_hhmm[:2]}-{last_match.group('day')}{end_hhmm[:2]}"
return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms)