agentic-kvc/microbench/connector_tax/metrics_sampler.py

#!/usr/bin/env python3
"""1 Hz /metrics scraper for connector_tax microbench.

Usage:
  metrics_sampler.py --url http://127.0.0.1:8000/metrics \
                     --output results/<run>/metrics.jsonl \
                     --interval 1.0
"""

import argparse
import json
import time
import urllib.request


def parse_prom(text: str) -> dict:
    """Parse Prometheus text-format metrics. Returns {name: [(labels, value)]}."""
    out: dict[str, list[tuple[dict[str, str], float]]] = {}
    for line in text.splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        # name{labels} value [timestamp]
        if "{" in line:
            name, rest = line.split("{", 1)
            labels_str, val_str = rest.rsplit("}", 1)
            labels = {}
            for piece in labels_str.split(","):
                if "=" in piece:
                    k, v = piece.split("=", 1)
                    labels[k.strip()] = v.strip().strip('"')
            try:
                val = float(val_str.strip().split()[0])
            except (ValueError, IndexError):
                continue
        else:
            parts = line.split()
            if len(parts) < 2:
                continue
            name = parts[0]
            try:
                val = float(parts[1])
            except ValueError:
                continue
            labels = {}
        out.setdefault(name, []).append((labels, val))
    return out


KEEP_PREFIXES = (
    "vllm:num_requests_running",
    "vllm:num_requests_waiting",
    "vllm:gpu_cache_usage_perc",
    "vllm:time_to_first_token_seconds",
    "vllm:time_per_output_token_seconds",
    "vllm:request_prefill_time_seconds",
    "vllm:request_decode_time_seconds",
    "vllm:iteration_tokens_total",
    "vllm:e2e_request_latency_seconds",
)


def collapse(parsed: dict) -> dict:
    """Keep only metrics whose names start with one of the prefixes; flatten
    histogram counts into '_bucket' / '_count' / '_sum' suffix entries."""
    out = {}
    for name, entries in parsed.items():
        if not any(name.startswith(p) for p in KEEP_PREFIXES):
            continue
        # Most are scalars (ignore label dimensions for compactness)
        # For histograms we keep _count/_sum and skip individual buckets
        if name.endswith("_bucket"):
            continue
        # Sum across labels to get a single number
        total = sum(v for _lbl, v in entries)
        out[name] = total
    return out


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--url", required=True,
                    help="http://host:port/metrics")
    ap.add_argument("--output", required=True)
    ap.add_argument("--interval", type=float, default=1.0)
    ap.add_argument("--duration", type=float, default=0.0,
                    help="Stop after N seconds; 0 = run until killed")
    args = ap.parse_args()

    out = open(args.output, "a", buffering=1)
    t_start = time.time()
    while True:
        try:
            with urllib.request.urlopen(args.url, timeout=2.0) as r:
                text = r.read().decode("utf-8")
            parsed = parse_prom(text)
            sample = collapse(parsed)
            sample["t_unix"] = time.time()
            out.write(json.dumps(sample) + "\n")
        except Exception as e:
            err = {"t_unix": time.time(), "error": str(e)}
            out.write(json.dumps(err) + "\n")

        if args.duration > 0 and time.time() - t_start >= args.duration:
            break
        time.sleep(args.interval)


if __name__ == "__main__":
    main()