Add L-C-A workload profile metric and CLI profile commands

Implement the paper's 10-dimensional L-C-A workload feature vector (RobustScaler-normalized, sim=exp(-||dz||)) in lca.py, and wire it into `aituner profile window` / `aituner profile similarity`. Covered by tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:02:24 +08:00
parent 984eb1f325
commit 27d1c8fa92
3 changed files with 770 additions and 2 deletions
--- a/src/aituner/cli.py
+++ b/src/aituner/cli.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import argparse
 import json
 import sys
+from dataclasses import replace
 from pathlib import Path

 from .compare import run_compare
@@ -12,8 +13,20 @@ from .harness import (
    build_harness_stop_proposal,
 )
 from .job import append_job, build_trial_job
+from .lca import (
+    build_workload_profile,
+    resolve_length_mode,
+    similarity_report,
+)
 from .llm import build_prompt, call_llm_for_proposal, load_capability_profile, parse_proposal_text
-from .spec import Proposal, SpecError, load_study_spec, to_jsonable
+from .spec import (
+    Proposal,
+    SpecError,
+    StudySpec,
+    load_structured_file,
+    load_study_spec,
+    to_jsonable,
+)
 from .store import StudyStore
 from .trace import load_trace_requests, summarize_window
 from .worker import run_trial
@@ -422,6 +435,159 @@ def cmd_compare_run(args: argparse.Namespace) -> int:
    return 0


+def _resolve_profile_gpu_count(args: argparse.Namespace, study: StudySpec) -> int:
+    gpu_count = args.gpu_count
+    if gpu_count is None:
+        gpu_count = study.hardware.gpu_count
+    if gpu_count <= 0:
+        raise SpecError("--gpu-count must be > 0.")
+    return int(gpu_count)
+
+
+def _load_profile_study_spec(spec_path: Path) -> StudySpec:
+    payload = dict(load_structured_file(spec_path))
+    llm_payload = dict(payload.get("llm") or {})
+    llm_payload.pop("endpoint", None)
+    payload["llm"] = llm_payload
+    return StudySpec.from_dict(payload)
+
+
+def _profile_current_study_window(args: argparse.Namespace) -> dict[str, object]:
+    spec_path = Path(args.spec).resolve()
+    study = _load_profile_study_spec(spec_path)
+    mode = resolve_length_mode(
+        request_mode=study.trace.request_mode,
+        length_mode=args.length_mode,
+    )
+    window, requests = load_trace_requests(study, study_spec_path=spec_path)
+    profile = build_workload_profile(
+        requests,
+        window,
+        gpu_count=_resolve_profile_gpu_count(args, study),
+        length_mode=mode,
+    )
+    return {
+        "profile": profile.to_dict(),
+        "source": {
+            "study_spec_path": str(spec_path),
+            "window_id": study.trace.window_id,
+        },
+    }
+
+
+def _resolve_windows_path_for_profile(study: StudySpec, *, study_spec_path: Path) -> Path:
+    path = Path(study.trace.windows_path)
+    if not path.is_absolute():
+        path = (study_spec_path.parent / path).resolve()
+    return path
+
+
+def _load_profile_windows(
+    study: StudySpec,
+    *,
+    study_spec_path: Path,
+) -> list[dict[str, object]]:
+    windows_path = _resolve_windows_path_for_profile(study, study_spec_path=study_spec_path)
+    payload = json.loads(windows_path.read_text(encoding="utf-8"))
+    raw_windows = payload.get("windows") if isinstance(payload, dict) else payload
+    if not isinstance(raw_windows, list):
+        raise SpecError(f"windows payload must contain a list: {windows_path}")
+    return [
+        {str(key): value for key, value in item.items()}
+        for item in raw_windows
+        if isinstance(item, dict)
+    ]
+
+
+def _selected_profile_windows(
+    args: argparse.Namespace,
+    study: StudySpec,
+    *,
+    study_spec_path: Path,
+) -> list[dict[str, object]]:
+    windows = _load_profile_windows(study, study_spec_path=study_spec_path)
+    window_ids = set(args.window_id or [])
+    selected: list[dict[str, object]] = []
+    for item in windows:
+        window_id = str(item.get("window_id") or "").strip()
+        if not window_id:
+            continue
+        if window_ids and window_id not in window_ids:
+            continue
+        if not window_ids and not args.all:
+            if window_id != study.trace.window_id:
+                continue
+        trace_type = str(item.get("trace_type") or "").strip()
+        if args.trace_type and trace_type != args.trace_type:
+            continue
+        date_value = str(item.get("date") or "").strip()
+        if args.date_from and date_value and date_value < args.date_from:
+            continue
+        if args.date_to and date_value and date_value > args.date_to:
+            continue
+        if args.slot_token and str(item.get("slot_token") or "").strip() != args.slot_token:
+            continue
+        selected.append(item)
+    selected.sort(
+        key=lambda item: (
+            str(item.get("date") or ""),
+            str(item.get("slot_token") or ""),
+            str(item.get("window_id") or ""),
+        )
+    )
+    if args.limit is not None:
+        selected = selected[: args.limit]
+    if not selected:
+        raise SpecError("No trace windows selected for profile similarity.")
+    return selected
+
+
+def cmd_profile_window(args: argparse.Namespace) -> int:
+    print(json.dumps(_profile_current_study_window(args), ensure_ascii=False, indent=2))
+    return 0
+
+
+def cmd_profile_similarity(args: argparse.Namespace) -> int:
+    spec_path = Path(args.spec).resolve()
+    study = _load_profile_study_spec(spec_path)
+    mode = resolve_length_mode(
+        request_mode=study.trace.request_mode,
+        length_mode=args.length_mode,
+    )
+    gpu_count = _resolve_profile_gpu_count(args, study)
+    profiles = []
+    selected = _selected_profile_windows(args, study, study_spec_path=spec_path)
+    for item in selected:
+        window_id = str(item["window_id"])
+        window_study = replace(study, trace=replace(study.trace, window_id=window_id))
+        window, requests = load_trace_requests(window_study, study_spec_path=spec_path)
+        profiles.append(
+            build_workload_profile(
+                requests,
+                window,
+                gpu_count=gpu_count,
+                length_mode=mode,
+            )
+        )
+    print(
+        json.dumps(
+            {
+                "source": {
+                    "study_spec_path": str(spec_path),
+                    "selected_window_count": len(profiles),
+                    "length_mode": mode,
+                    "gpu_count": gpu_count,
+                },
+                "profiles": [profile.to_dict() for profile in profiles],
+                "similarity": similarity_report(profiles),
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
+    )
+    return 0
+
+
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="AITuner CLI")
    subparsers = parser.add_subparsers(dest="command", required=True)
@@ -490,6 +656,50 @@ def build_parser() -> argparse.ArgumentParser:
    compare_run.add_argument("--output-root")
    compare_run.set_defaults(func=cmd_compare_run)

+    profile = subparsers.add_parser("profile")
+    profile_sub = profile.add_subparsers(dest="profile_command", required=True)
+
+    profile_window = profile_sub.add_parser("window")
+    profile_window.add_argument("--spec", required=True)
+    profile_window.add_argument(
+        "--length-mode",
+        default="auto",
+        choices=["auto", "total", "input", "output"],
+        help="Token length basis for the L vector. auto uses output for decode_only and total otherwise.",
+    )
+    profile_window.add_argument(
+        "--gpu-count",
+        type=int,
+        help="GPU denominator for per-GPU arrival rate. Defaults to hardware.gpu_count.",
+    )
+    profile_window.set_defaults(func=cmd_profile_window)
+
+    profile_similarity = profile_sub.add_parser("similarity")
+    profile_similarity.add_argument("--spec", required=True)
+    profile_similarity.add_argument("--window-id", action="append")
+    profile_similarity.add_argument("--trace-type")
+    profile_similarity.add_argument("--date-from")
+    profile_similarity.add_argument("--date-to")
+    profile_similarity.add_argument("--slot-token")
+    profile_similarity.add_argument("--limit", type=int)
+    profile_similarity.add_argument(
+        "--all",
+        action="store_true",
+        help="Profile all windows selected by filters. Without this or --window-id, only the study window is used.",
+    )
+    profile_similarity.add_argument(
+        "--length-mode",
+        default="auto",
+        choices=["auto", "total", "input", "output"],
+        help="Token length basis for the L vector. auto uses output for decode_only and total otherwise.",
+    )
+    profile_similarity.add_argument(
+        "--gpu-count",
+        type=int,
+        help="GPU denominator for per-GPU arrival rate. Defaults to hardware.gpu_count.",
+    )
+    profile_similarity.set_defaults(func=cmd_profile_similarity)
+
    return parser


--- a/src/aituner/lca.py
+++ b/src/aituner/lca.py
@@ -0,0 +1,406 @@
+from __future__ import annotations
+
+import json
+import math
+import statistics
+from dataclasses import dataclass
+from typing import Any, Sequence
+
+from .trace import TraceRequest, WindowRecord
+
+
+EPSILON = 1e-9
+
+FEATURE_NAMES = [
+    "L.log_mean_length",
+    "L.log_p95_over_mean_length",
+    "L.cv_length",
+    "C.log_mean_hit_length",
+    "C.log_p95_over_mean_hit_length",
+    "C.cv_hit_length",
+    "C.hit_rate",
+    "A.log_request_rate_per_gpu",
+    "A.cv_interarrival",
+    "A.log_fano_1s",
+]
+
+FAMILY_SLICES = {
+    "L": slice(0, 3),
+    "C": slice(3, 7),
+    "A": slice(7, 10),
+}
+
+LENGTH_MODES = {"total", "input", "output"}
+
+
+@dataclass(frozen=True)
+class WorkloadProfile:
+    window_id: str
+    trace_type: str
+    request_count: int
+    duration_s: float
+    gpu_count: int
+    length_mode: str
+    feature_names: list[str]
+    vector: list[float]
+    stats: dict[str, Any]
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "window_id": self.window_id,
+            "trace_type": self.trace_type,
+            "request_count": self.request_count,
+            "duration_s": self.duration_s,
+            "gpu_count": self.gpu_count,
+            "length_mode": self.length_mode,
+            "feature_names": self.feature_names,
+            "vector": self.vector,
+            "stats": self.stats,
+        }
+
+
+@dataclass(frozen=True)
+class RobustScale:
+    feature_names: list[str]
+    center: list[float]
+    scale: list[float]
+
+    def transform(self, vector: Sequence[float]) -> list[float]:
+        return [
+            (float(value) - self.center[idx]) / self.scale[idx]
+            for idx, value in enumerate(vector)
+        ]
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "feature_names": self.feature_names,
+            "center": self.center,
+            "scale": self.scale,
+        }
+
+
+def resolve_length_mode(*, request_mode: str | None = None, length_mode: str = "auto") -> str:
+    normalized = str(length_mode or "auto").strip().lower()
+    if normalized == "auto":
+        return (
+            "output"
+            if str(request_mode or "").strip().lower() == "decode_only"
+            else "total"
+        )
+    if normalized not in LENGTH_MODES:
+        raise ValueError(
+            "length_mode must be one of: auto, total, input, output."
+        )
+    return normalized
+
+
+def build_workload_profile(
+    requests: list[TraceRequest],
+    window: WindowRecord,
+    *,
+    gpu_count: int,
+    length_mode: str = "total",
+) -> WorkloadProfile:
+    if gpu_count <= 0:
+        raise ValueError("gpu_count must be > 0.")
+    if length_mode not in LENGTH_MODES:
+        raise ValueError(f"Unsupported length_mode: {length_mode}")
+
+    duration_s = _duration_s(requests, window)
+    input_lengths = [float(item.prompt_tokens_hint or 0) for item in requests]
+    output_lengths = [float(item.completion_tokens_hint or 0) for item in requests]
+    profile_lengths = [
+        _profile_length(input_len, output_len, length_mode=length_mode)
+        for input_len, output_len in zip(input_lengths, output_lengths)
+    ]
+    hit_lengths, cache_stats = _ideal_cache_hit_lengths(
+        requests,
+        input_lengths=input_lengths,
+        block_size=_block_size(window),
+    )
+    arrival_stats = _arrival_stats(requests, duration_s=duration_s, gpu_count=gpu_count)
+
+    length_stats = _series_stats(profile_lengths)
+    hit_stats = _series_stats(hit_lengths)
+    total_profile_length = sum(profile_lengths)
+    total_input_length = sum(input_lengths)
+    total_hit_length = sum(hit_lengths)
+    feature_hit_rate = (
+        float(total_hit_length / max(total_profile_length, EPSILON))
+        if total_profile_length > 0
+        else 0.0
+    )
+    input_hit_rate = (
+        float(total_hit_length / max(total_input_length, EPSILON))
+        if total_input_length > 0
+        else 0.0
+    )
+
+    vector = [
+        math.log1p(length_stats["mean"]),
+        math.log1p(length_stats["p95"] / max(length_stats["mean"], EPSILON)),
+        length_stats["cv"],
+        math.log1p(hit_stats["mean"]),
+        math.log1p(hit_stats["p95"] / max(hit_stats["mean"], EPSILON)),
+        hit_stats["cv"],
+        feature_hit_rate,
+        math.log1p(arrival_stats["request_rate_per_gpu"]),
+        arrival_stats["interarrival_cv"],
+        math.log1p(arrival_stats["fano_1s"]),
+    ]
+
+    return WorkloadProfile(
+        window_id=window.window_id,
+        trace_type=window.trace_type,
+        request_count=len(requests),
+        duration_s=duration_s,
+        gpu_count=int(gpu_count),
+        length_mode=length_mode,
+        feature_names=list(FEATURE_NAMES),
+        vector=[float(item) for item in vector],
+        stats={
+            "length": {
+                **length_stats,
+                "mode": length_mode,
+                "total": total_profile_length,
+                "input_total": total_input_length,
+                "output_total": sum(output_lengths),
+            },
+            "cache": {
+                **hit_stats,
+                **cache_stats,
+                "total_hit_length": total_hit_length,
+                "hit_rate": feature_hit_rate,
+                "input_hit_rate": input_hit_rate,
+            },
+            "arrival": arrival_stats,
+        },
+    )
+
+
+def fit_robust_scale(profiles: Sequence[WorkloadProfile]) -> RobustScale:
+    if not profiles:
+        raise ValueError("At least one profile is required to fit a robust scale.")
+    centers: list[float] = []
+    scales: list[float] = []
+    for idx in range(len(FEATURE_NAMES)):
+        values = [float(profile.vector[idx]) for profile in profiles]
+        median = _percentile(values, 50.0)
+        iqr = _percentile(values, 75.0) - _percentile(values, 25.0)
+        centers.append(float(median))
+        scales.append(float(iqr if abs(iqr) > EPSILON else 1.0))
+    return RobustScale(feature_names=list(FEATURE_NAMES), center=centers, scale=scales)
+
+
+def profile_similarity(
+    left: WorkloadProfile,
+    right: WorkloadProfile,
+    *,
+    scale: RobustScale | None = None,
+) -> float:
+    scaler = scale or fit_robust_scale([left, right])
+    z_left = scaler.transform(left.vector)
+    z_right = scaler.transform(right.vector)
+    return _similarity_from_z(z_left, z_right)
+
+
+def similarity_report(profiles: Sequence[WorkloadProfile]) -> dict[str, Any]:
+    if not profiles:
+        raise ValueError("At least one profile is required.")
+    scale = fit_robust_scale(profiles)
+    transformed = [scale.transform(profile.vector) for profile in profiles]
+    rows: list[dict[str, Any]] = []
+    matrix: list[list[float]] = []
+    for i, left in enumerate(profiles):
+        row_values: list[float] = []
+        for j, right in enumerate(profiles):
+            sim = _similarity_from_z(transformed[i], transformed[j])
+            row_values.append(sim)
+            rows.append(
+                {
+                    "left": left.window_id,
+                    "right": right.window_id,
+                    "similarity": sim,
+                    "family_similarity": _family_similarity(transformed[i], transformed[j]),
+                }
+            )
+        matrix.append(row_values)
+    return {
+        "feature_names": list(FEATURE_NAMES),
+        "scaler": scale.to_dict(),
+        "windows": [profile.window_id for profile in profiles],
+        "matrix": matrix,
+        "pairs": rows,
+    }
+
+
+def dumps_profile(profile: WorkloadProfile) -> str:
+    return json.dumps(profile.to_dict(), ensure_ascii=False, indent=2) + "\n"
+
+
+def _duration_s(requests: list[TraceRequest], window: WindowRecord) -> float:
+    duration = max(float(window.window_end) - float(window.window_start), 0.0)
+    if duration > 0:
+        return duration
+    if len(requests) >= 2:
+        return max(0.0, float(requests[-1].arrival_s) - float(requests[0].arrival_s))
+    return 0.0
+
+
+def _profile_length(input_length: float, output_length: float, *, length_mode: str) -> float:
+    if length_mode == "input":
+        return max(input_length, 0.0)
+    if length_mode == "output":
+        return max(output_length, 0.0)
+    return max(input_length, 0.0) + max(output_length, 0.0)
+
+
+def _block_size(window: WindowRecord) -> int:
+    value = window.source_payload.get("block_size")
+    if isinstance(value, bool):
+        return 1
+    if isinstance(value, (int, float)) and value > 0:
+        return int(value)
+    if isinstance(value, str) and value.strip():
+        try:
+            parsed = int(value)
+        except ValueError:
+            return 1
+        return parsed if parsed > 0 else 1
+    return 1
+
+
+def _ideal_cache_hit_lengths(
+    requests: list[TraceRequest],
+    *,
+    input_lengths: list[float],
+    block_size: int,
+) -> tuple[list[float], dict[str, Any]]:
+    seen_hashes: set[Any] = set()
+    hit_lengths: list[float] = []
+    total_blocks = 0
+    repeated_blocks = 0
+    rows_with_hash_ids = 0
+    for request, input_length in zip(requests, input_lengths):
+        hash_ids = request.metadata.get("hash_ids")
+        if not isinstance(hash_ids, list):
+            hit_lengths.append(0.0)
+            continue
+        rows_with_hash_ids += 1
+        repeated_for_request = 0
+        for hash_id in hash_ids:
+            total_blocks += 1
+            if hash_id in seen_hashes:
+                repeated_blocks += 1
+                repeated_for_request += 1
+            else:
+                seen_hashes.add(hash_id)
+        hit_lengths.append(float(min(max(input_length, 0.0), repeated_for_request * block_size)))
+    return hit_lengths, {
+        "block_size": block_size,
+        "rows_with_hash_ids": rows_with_hash_ids,
+        "total_blocks": total_blocks,
+        "repeated_blocks": repeated_blocks,
+        "repeated_block_ratio": (
+            float(repeated_blocks / total_blocks) if total_blocks else 0.0
+        ),
+    }
+
+
+def _arrival_stats(
+    requests: list[TraceRequest],
+    *,
+    duration_s: float,
+    gpu_count: int,
+) -> dict[str, Any]:
+    arrivals = [float(item.arrival_s) for item in requests]
+    interarrivals = [
+        max(0.0, arrivals[idx] - arrivals[idx - 1])
+        for idx in range(1, len(arrivals))
+    ]
+    per_second_counts = _per_second_counts(arrivals, duration_s=duration_s)
+    qps = float(len(requests) / duration_s) if duration_s > 0 else 0.0
+    return {
+        "request_rate": qps,
+        "request_rate_per_gpu": float(qps / gpu_count) if gpu_count > 0 else 0.0,
+        "interarrival_cv": _cv(interarrivals),
+        "fano_1s": _fano(per_second_counts),
+        "one_second_count_mean": statistics.fmean(per_second_counts)
+        if per_second_counts
+        else 0.0,
+        "one_second_count_variance": statistics.pvariance(per_second_counts)
+        if len(per_second_counts) >= 2
+        else 0.0,
+        "one_second_bin_count": len(per_second_counts),
+    }
+
+
+def _per_second_counts(arrivals: list[float], *, duration_s: float) -> list[float]:
+    if duration_s <= 0:
+        return [float(len(arrivals))] if arrivals else []
+    bin_count = max(1, int(math.ceil(duration_s)))
+    counts = [0.0 for _ in range(bin_count)]
+    for arrival in arrivals:
+        if arrival < 0:
+            continue
+        idx = int(math.floor(arrival))
+        if 0 <= idx < bin_count:
+            counts[idx] += 1.0
+    return counts
+
+
+def _series_stats(values: list[float]) -> dict[str, float]:
+    return {
+        "count": float(len(values)),
+        "mean": statistics.fmean(values) if values else 0.0,
+        "p50": _percentile(values, 50.0),
+        "p95": _percentile(values, 95.0),
+        "cv": _cv(values),
+    }
+
+
+def _cv(values: list[float]) -> float:
+    if not values:
+        return 0.0
+    mean = statistics.fmean(values)
+    if abs(mean) <= EPSILON:
+        return 0.0
+    return float(statistics.pstdev(values) / mean) if len(values) >= 2 else 0.0
+
+
+def _fano(values: list[float]) -> float:
+    if not values:
+        return 0.0
+    mean = statistics.fmean(values)
+    if abs(mean) <= EPSILON:
+        return 0.0
+    return float(statistics.pvariance(values) / mean) if len(values) >= 2 else 0.0
+
+
+def _percentile(values: Sequence[float], p: float) -> float:
+    if not values:
+        return 0.0
+    ordered = sorted(float(item) for item in values)
+    if len(ordered) == 1:
+        return ordered[0]
+    rank = (p / 100.0) * (len(ordered) - 1)
+    lower = int(math.floor(rank))
+    upper = int(math.ceil(rank))
+    if lower == upper:
+        return ordered[lower]
+    weight = rank - lower
+    return float(ordered[lower] * (1.0 - weight) + ordered[upper] * weight)
+
+
+def _similarity_from_z(left: Sequence[float], right: Sequence[float]) -> float:
+    distance = math.sqrt(
+        sum((float(lval) - float(rval)) ** 2 for lval, rval in zip(left, right))
+    )
+    return float(math.exp(-distance))
+
+
+def _family_similarity(left: Sequence[float], right: Sequence[float]) -> dict[str, float]:
+    result: dict[str, float] = {}
+    for family, family_slice in FAMILY_SLICES.items():
+        result[family] = _similarity_from_z(left[family_slice], right[family_slice])
+    return result
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -1,6 +1,8 @@
 from __future__ import annotations

 import json
+import io
+import math
 import os
 import signal
 import subprocess
@@ -25,6 +27,12 @@ from aituner.harness import (
    build_harness_guided_proposal,
    build_harness_stop_proposal,
 )
+from aituner.lca import (
+    build_workload_profile,
+    profile_similarity,
+    resolve_length_mode,
+    similarity_report,
+)
 from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
 from aituner.search import ThresholdProbe, binary_search_max_feasible
 from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
@@ -48,7 +56,7 @@ from aituner.worker import (
    _wait_for_server_or_exit,
    run_trial,
 )
-from aituner.trace import TraceRequest
+from aituner.trace import TraceRequest, WindowRecord


 REPO_ROOT = Path(__file__).resolve().parents[1]
@@ -241,6 +249,150 @@ class CoreFlowTests(unittest.TestCase):
            self.assertIn("knob_harnesses", prompt)
            self.assertTrue(study_root.exists())

+    def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
+        window = WindowRecord(
+            window_id="w1",
+            trace_path=Path("trace.jsonl"),
+            trace_type="chat",
+            window_start=0.0,
+            window_end=4.0,
+            source_payload={"block_size": 64},
+        )
+        requests = [
+            TraceRequest(
+                row_id="r1",
+                arrival_s=0.0,
+                sampling_u=1.0,
+                body={},
+                prompt_tokens_hint=100,
+                completion_tokens_hint=10,
+                metadata={"hash_ids": [1, 2]},
+            ),
+            TraceRequest(
+                row_id="r2",
+                arrival_s=1.0,
+                sampling_u=1.0,
+                body={},
+                prompt_tokens_hint=100,
+                completion_tokens_hint=20,
+                metadata={"hash_ids": [1, 3]},
+            ),
+        ]
+
+        profile = build_workload_profile(
+            requests,
+            window,
+            gpu_count=2,
+            length_mode="total",
+        )
+
+        self.assertEqual(len(profile.feature_names), 10)
+        self.assertEqual(len(profile.vector), 10)
+        self.assertEqual(profile.feature_names[0], "L.log_mean_length")
+        self.assertAlmostEqual(profile.stats["cache"]["total_hit_length"], 64.0)
+        self.assertAlmostEqual(profile.stats["cache"]["hit_rate"], 64.0 / 230.0)
+        self.assertAlmostEqual(profile.stats["cache"]["input_hit_rate"], 64.0 / 200.0)
+        self.assertAlmostEqual(profile.vector[3], math.log1p(32.0))
+        self.assertAlmostEqual(profile.vector[5], 1.0)
+        self.assertAlmostEqual(profile.stats["arrival"]["request_rate_per_gpu"], 0.25)
+        self.assertAlmostEqual(profile.stats["arrival"]["fano_1s"], 0.5)
+        self.assertEqual(resolve_length_mode(request_mode="decode_only"), "output")
+
+    def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
+        window = WindowRecord(
+            window_id="base",
+            trace_path=Path("trace.jsonl"),
+            trace_type="chat",
+            window_start=0.0,
+            window_end=4.0,
+            source_payload={"block_size": 64},
+        )
+
+        def make_profile(window_id: str, input_tokens: int, *, arrival_gap: float) -> object:
+            reqs = [
+                TraceRequest(
+                    row_id=f"{window_id}-1",
+                    arrival_s=0.0,
+                    sampling_u=1.0,
+                    body={},
+                    prompt_tokens_hint=input_tokens,
+                    completion_tokens_hint=16,
+                    metadata={"hash_ids": [window_id, 1]},
+                ),
+                TraceRequest(
+                    row_id=f"{window_id}-2",
+                    arrival_s=arrival_gap,
+                    sampling_u=1.0,
+                    body={},
+                    prompt_tokens_hint=input_tokens,
+                    completion_tokens_hint=16,
+                    metadata={"hash_ids": [window_id, 1, 2]},
+                ),
+            ]
+            return build_workload_profile(
+                reqs,
+                WindowRecord(
+                    window_id=window_id,
+                    trace_path=window.trace_path,
+                    trace_type=window.trace_type,
+                    window_start=window.window_start,
+                    window_end=window.window_end,
+                    source_payload=window.source_payload,
+                ),
+                gpu_count=1,
+                length_mode="total",
+            )
+
+        p1 = make_profile("same-a", 100, arrival_gap=1.0)
+        p2 = make_profile("same-b", 100, arrival_gap=1.0)
+        p3 = make_profile("different", 10000, arrival_gap=0.1)
+
+        report = similarity_report([p1, p2, p3])
+
+        self.assertAlmostEqual(profile_similarity(p1, p2), 1.0)
+        self.assertGreater(report["matrix"][0][1], report["matrix"][0][2])
+        self.assertIn("L", report["pairs"][2]["family_similarity"])
+
+    def test_cli_profile_window_outputs_lca_profile(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            stdout = io.StringIO()
+            with mock.patch("sys.stdout", stdout):
+                rc = cli_main(
+                    [
+                        "profile",
+                        "window",
+                        "--spec",
+                        str(study_path),
+                        "--gpu-count",
+                        "8",
+                    ]
+                )
+
+            self.assertEqual(rc, 0)
+            payload = json.loads(stdout.getvalue())
+            self.assertEqual(payload["profile"]["window_id"], "chat_w1")
+            self.assertEqual(len(payload["profile"]["vector"]), 10)
+            self.assertEqual(payload["profile"]["gpu_count"], 8)
+
+    def test_cli_profile_window_does_not_resolve_llm_endpoint(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            payload = json.loads(study_path.read_text(encoding="utf-8"))
+            payload["llm"]["endpoint"] = {
+                "provider": "codex",
+                "model": "gpt-5.4",
+            }
+            study_path.write_text(json.dumps(payload), encoding="utf-8")
+            stdout = io.StringIO()
+            with mock.patch("sys.stdout", stdout):
+                rc = cli_main(["profile", "window", "--spec", str(study_path)])
+
+            self.assertEqual(rc, 0)
+            self.assertEqual(json.loads(stdout.getvalue())["profile"]["window_id"], "chat_w1")
+
    def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)