109 lines
3.8 KiB
Python
109 lines
3.8 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
from trace_analyzer.helpers import parse_jsonish, safe_int
|
|
|
|
WINDOW_RE = re.compile(r"(?P<day>\d{4})-(?P<start>\d{4})-(?P<end>\d{4})$")
|
|
UTC_PLUS_8 = timezone(timedelta(hours=8))
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class TimeWindow:
|
|
label: str
|
|
start_ms: int
|
|
end_ms: int
|
|
|
|
|
|
def parse_time_to_ms(value: str) -> int:
|
|
text = str(value or "").strip()
|
|
if not text:
|
|
return 0
|
|
for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"):
|
|
try:
|
|
dt = datetime.strptime(text, fmt).replace(tzinfo=UTC_PLUS_8)
|
|
return int(dt.timestamp() * 1000)
|
|
except ValueError:
|
|
continue
|
|
raise ValueError(f"Unsupported timestamp format: {value!r}")
|
|
|
|
|
|
def _read_first_timestamp(path: Path) -> str:
|
|
with path.open("r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
raw = json.loads(stripped)
|
|
value = str(raw.get("time", "")).strip()
|
|
if value:
|
|
return value
|
|
raise ValueError(f"Could not find time field in {path}")
|
|
|
|
|
|
def _read_first_timestamp_and_ready_ms(path: Path) -> tuple[str, int]:
|
|
with path.open("r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
raw = json.loads(stripped)
|
|
value = str(raw.get("time", "")).strip()
|
|
if not value:
|
|
continue
|
|
request_params = parse_jsonish(raw.get("request_params", {}))
|
|
header = request_params.get("header", {}) if isinstance(request_params, dict) else {}
|
|
attributes = header.get("attributes", {}) if isinstance(header, dict) else {}
|
|
ready_ms = safe_int(attributes.get("x-dashscope-inner-requestreadytime"))
|
|
return value, ready_ms
|
|
raise ValueError(f"Could not find time field in {path}")
|
|
|
|
|
|
def infer_time_offset_ms(path: Path) -> int:
|
|
first_time, first_ready_ms = _read_first_timestamp_and_ready_ms(path)
|
|
if not first_ready_ms:
|
|
return 0
|
|
wall_clock_ms = parse_time_to_ms(first_time)
|
|
hour_ms = 60 * 60 * 1000
|
|
return int(round((wall_clock_ms - first_ready_ms) / hour_ms)) * hour_ms
|
|
|
|
|
|
def infer_time_window(
|
|
source_files: list[Path],
|
|
*,
|
|
start_time: str | None = None,
|
|
end_time: str | None = None,
|
|
) -> TimeWindow | None:
|
|
if start_time and end_time:
|
|
start_ms = parse_time_to_ms(start_time)
|
|
end_ms = parse_time_to_ms(end_time)
|
|
label = (
|
|
f"{datetime.fromtimestamp(start_ms / 1000, tz=UTC_PLUS_8):%m%d%H}"
|
|
f"-{datetime.fromtimestamp(end_ms / 1000, tz=UTC_PLUS_8):%m%d%H}"
|
|
)
|
|
return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms)
|
|
|
|
if not source_files:
|
|
return None
|
|
|
|
first_match = WINDOW_RE.match(source_files[0].stem)
|
|
last_match = WINDOW_RE.match(source_files[-1].stem)
|
|
if first_match is None or last_match is None:
|
|
return None
|
|
|
|
first_time = _read_first_timestamp(source_files[0])
|
|
time_offset_ms = infer_time_offset_ms(source_files[0])
|
|
base_date = first_time.split(" ", 1)[0]
|
|
start_hhmm = first_match.group("start")
|
|
end_hhmm = last_match.group("end")
|
|
start_text = f"{base_date} {start_hhmm[:2]}:{start_hhmm[2:]}:00.000"
|
|
end_text = f"{base_date} {end_hhmm[:2]}:{end_hhmm[2:]}:00.000"
|
|
start_ms = parse_time_to_ms(start_text) - time_offset_ms
|
|
end_ms = parse_time_to_ms(end_text) - time_offset_ms
|
|
label = f"{first_match.group('day')}{start_hhmm[:2]}-{last_match.group('day')}{end_hhmm[:2]}"
|
|
return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms)
|