Initial commit
This commit is contained in:
108
trace_formatter/time_windows.py
Normal file
108
trace_formatter/time_windows.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from trace_analyzer.helpers import parse_jsonish, safe_int
|
||||
|
||||
WINDOW_RE = re.compile(r"(?P<day>\d{4})-(?P<start>\d{4})-(?P<end>\d{4})$")
|
||||
UTC_PLUS_8 = timezone(timedelta(hours=8))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TimeWindow:
|
||||
label: str
|
||||
start_ms: int
|
||||
end_ms: int
|
||||
|
||||
|
||||
def parse_time_to_ms(value: str) -> int:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return 0
|
||||
for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"):
|
||||
try:
|
||||
dt = datetime.strptime(text, fmt).replace(tzinfo=UTC_PLUS_8)
|
||||
return int(dt.timestamp() * 1000)
|
||||
except ValueError:
|
||||
continue
|
||||
raise ValueError(f"Unsupported timestamp format: {value!r}")
|
||||
|
||||
|
||||
def _read_first_timestamp(path: Path) -> str:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
raw = json.loads(stripped)
|
||||
value = str(raw.get("time", "")).strip()
|
||||
if value:
|
||||
return value
|
||||
raise ValueError(f"Could not find time field in {path}")
|
||||
|
||||
|
||||
def _read_first_timestamp_and_ready_ms(path: Path) -> tuple[str, int]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
raw = json.loads(stripped)
|
||||
value = str(raw.get("time", "")).strip()
|
||||
if not value:
|
||||
continue
|
||||
request_params = parse_jsonish(raw.get("request_params", {}))
|
||||
header = request_params.get("header", {}) if isinstance(request_params, dict) else {}
|
||||
attributes = header.get("attributes", {}) if isinstance(header, dict) else {}
|
||||
ready_ms = safe_int(attributes.get("x-dashscope-inner-requestreadytime"))
|
||||
return value, ready_ms
|
||||
raise ValueError(f"Could not find time field in {path}")
|
||||
|
||||
|
||||
def infer_time_offset_ms(path: Path) -> int:
|
||||
first_time, first_ready_ms = _read_first_timestamp_and_ready_ms(path)
|
||||
if not first_ready_ms:
|
||||
return 0
|
||||
wall_clock_ms = parse_time_to_ms(first_time)
|
||||
hour_ms = 60 * 60 * 1000
|
||||
return int(round((wall_clock_ms - first_ready_ms) / hour_ms)) * hour_ms
|
||||
|
||||
|
||||
def infer_time_window(
|
||||
source_files: list[Path],
|
||||
*,
|
||||
start_time: str | None = None,
|
||||
end_time: str | None = None,
|
||||
) -> TimeWindow | None:
|
||||
if start_time and end_time:
|
||||
start_ms = parse_time_to_ms(start_time)
|
||||
end_ms = parse_time_to_ms(end_time)
|
||||
label = (
|
||||
f"{datetime.fromtimestamp(start_ms / 1000, tz=UTC_PLUS_8):%m%d%H}"
|
||||
f"-{datetime.fromtimestamp(end_ms / 1000, tz=UTC_PLUS_8):%m%d%H}"
|
||||
)
|
||||
return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms)
|
||||
|
||||
if not source_files:
|
||||
return None
|
||||
|
||||
first_match = WINDOW_RE.match(source_files[0].stem)
|
||||
last_match = WINDOW_RE.match(source_files[-1].stem)
|
||||
if first_match is None or last_match is None:
|
||||
return None
|
||||
|
||||
first_time = _read_first_timestamp(source_files[0])
|
||||
time_offset_ms = infer_time_offset_ms(source_files[0])
|
||||
base_date = first_time.split(" ", 1)[0]
|
||||
start_hhmm = first_match.group("start")
|
||||
end_hhmm = last_match.group("end")
|
||||
start_text = f"{base_date} {start_hhmm[:2]}:{start_hhmm[2:]}:00.000"
|
||||
end_text = f"{base_date} {end_hhmm[:2]}:{end_hhmm[2:]}:00.000"
|
||||
start_ms = parse_time_to_ms(start_text) - time_offset_ms
|
||||
end_ms = parse_time_to_ms(end_text) - time_offset_ms
|
||||
label = f"{first_match.group('day')}{start_hhmm[:2]}-{last_match.group('day')}{end_hhmm[:2]}"
|
||||
return TimeWindow(label=label, start_ms=start_ms, end_ms=end_ms)
|
||||
Reference in New Issue
Block a user