Add L-C-A workload profile metric and CLI profile commands

Implement the paper's 10-dimensional L-C-A workload feature vector
(RobustScaler-normalized, sim=exp(-||dz||)) in lca.py, and wire it into
`aituner profile window` / `aituner profile similarity`. Covered by tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 14:02:24 +08:00
parent 984eb1f325
commit 27d1c8fa92
3 changed files with 770 additions and 2 deletions

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import argparse import argparse
import json import json
import sys import sys
from dataclasses import replace
from pathlib import Path from pathlib import Path
from .compare import run_compare from .compare import run_compare
@@ -12,8 +13,20 @@ from .harness import (
build_harness_stop_proposal, build_harness_stop_proposal,
) )
from .job import append_job, build_trial_job from .job import append_job, build_trial_job
from .lca import (
build_workload_profile,
resolve_length_mode,
similarity_report,
)
from .llm import build_prompt, call_llm_for_proposal, load_capability_profile, parse_proposal_text from .llm import build_prompt, call_llm_for_proposal, load_capability_profile, parse_proposal_text
from .spec import Proposal, SpecError, load_study_spec, to_jsonable from .spec import (
Proposal,
SpecError,
StudySpec,
load_structured_file,
load_study_spec,
to_jsonable,
)
from .store import StudyStore from .store import StudyStore
from .trace import load_trace_requests, summarize_window from .trace import load_trace_requests, summarize_window
from .worker import run_trial from .worker import run_trial
@@ -422,6 +435,159 @@ def cmd_compare_run(args: argparse.Namespace) -> int:
return 0 return 0
def _resolve_profile_gpu_count(args: argparse.Namespace, study: StudySpec) -> int:
gpu_count = args.gpu_count
if gpu_count is None:
gpu_count = study.hardware.gpu_count
if gpu_count <= 0:
raise SpecError("--gpu-count must be > 0.")
return int(gpu_count)
def _load_profile_study_spec(spec_path: Path) -> StudySpec:
payload = dict(load_structured_file(spec_path))
llm_payload = dict(payload.get("llm") or {})
llm_payload.pop("endpoint", None)
payload["llm"] = llm_payload
return StudySpec.from_dict(payload)
def _profile_current_study_window(args: argparse.Namespace) -> dict[str, object]:
spec_path = Path(args.spec).resolve()
study = _load_profile_study_spec(spec_path)
mode = resolve_length_mode(
request_mode=study.trace.request_mode,
length_mode=args.length_mode,
)
window, requests = load_trace_requests(study, study_spec_path=spec_path)
profile = build_workload_profile(
requests,
window,
gpu_count=_resolve_profile_gpu_count(args, study),
length_mode=mode,
)
return {
"profile": profile.to_dict(),
"source": {
"study_spec_path": str(spec_path),
"window_id": study.trace.window_id,
},
}
def _resolve_windows_path_for_profile(study: StudySpec, *, study_spec_path: Path) -> Path:
path = Path(study.trace.windows_path)
if not path.is_absolute():
path = (study_spec_path.parent / path).resolve()
return path
def _load_profile_windows(
study: StudySpec,
*,
study_spec_path: Path,
) -> list[dict[str, object]]:
windows_path = _resolve_windows_path_for_profile(study, study_spec_path=study_spec_path)
payload = json.loads(windows_path.read_text(encoding="utf-8"))
raw_windows = payload.get("windows") if isinstance(payload, dict) else payload
if not isinstance(raw_windows, list):
raise SpecError(f"windows payload must contain a list: {windows_path}")
return [
{str(key): value for key, value in item.items()}
for item in raw_windows
if isinstance(item, dict)
]
def _selected_profile_windows(
args: argparse.Namespace,
study: StudySpec,
*,
study_spec_path: Path,
) -> list[dict[str, object]]:
windows = _load_profile_windows(study, study_spec_path=study_spec_path)
window_ids = set(args.window_id or [])
selected: list[dict[str, object]] = []
for item in windows:
window_id = str(item.get("window_id") or "").strip()
if not window_id:
continue
if window_ids and window_id not in window_ids:
continue
if not window_ids and not args.all:
if window_id != study.trace.window_id:
continue
trace_type = str(item.get("trace_type") or "").strip()
if args.trace_type and trace_type != args.trace_type:
continue
date_value = str(item.get("date") or "").strip()
if args.date_from and date_value and date_value < args.date_from:
continue
if args.date_to and date_value and date_value > args.date_to:
continue
if args.slot_token and str(item.get("slot_token") or "").strip() != args.slot_token:
continue
selected.append(item)
selected.sort(
key=lambda item: (
str(item.get("date") or ""),
str(item.get("slot_token") or ""),
str(item.get("window_id") or ""),
)
)
if args.limit is not None:
selected = selected[: args.limit]
if not selected:
raise SpecError("No trace windows selected for profile similarity.")
return selected
def cmd_profile_window(args: argparse.Namespace) -> int:
print(json.dumps(_profile_current_study_window(args), ensure_ascii=False, indent=2))
return 0
def cmd_profile_similarity(args: argparse.Namespace) -> int:
spec_path = Path(args.spec).resolve()
study = _load_profile_study_spec(spec_path)
mode = resolve_length_mode(
request_mode=study.trace.request_mode,
length_mode=args.length_mode,
)
gpu_count = _resolve_profile_gpu_count(args, study)
profiles = []
selected = _selected_profile_windows(args, study, study_spec_path=spec_path)
for item in selected:
window_id = str(item["window_id"])
window_study = replace(study, trace=replace(study.trace, window_id=window_id))
window, requests = load_trace_requests(window_study, study_spec_path=spec_path)
profiles.append(
build_workload_profile(
requests,
window,
gpu_count=gpu_count,
length_mode=mode,
)
)
print(
json.dumps(
{
"source": {
"study_spec_path": str(spec_path),
"selected_window_count": len(profiles),
"length_mode": mode,
"gpu_count": gpu_count,
},
"profiles": [profile.to_dict() for profile in profiles],
"similarity": similarity_report(profiles),
},
ensure_ascii=False,
indent=2,
)
)
return 0
def build_parser() -> argparse.ArgumentParser: def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="AITuner CLI") parser = argparse.ArgumentParser(description="AITuner CLI")
subparsers = parser.add_subparsers(dest="command", required=True) subparsers = parser.add_subparsers(dest="command", required=True)
@@ -490,6 +656,50 @@ def build_parser() -> argparse.ArgumentParser:
compare_run.add_argument("--output-root") compare_run.add_argument("--output-root")
compare_run.set_defaults(func=cmd_compare_run) compare_run.set_defaults(func=cmd_compare_run)
profile = subparsers.add_parser("profile")
profile_sub = profile.add_subparsers(dest="profile_command", required=True)
profile_window = profile_sub.add_parser("window")
profile_window.add_argument("--spec", required=True)
profile_window.add_argument(
"--length-mode",
default="auto",
choices=["auto", "total", "input", "output"],
help="Token length basis for the L vector. auto uses output for decode_only and total otherwise.",
)
profile_window.add_argument(
"--gpu-count",
type=int,
help="GPU denominator for per-GPU arrival rate. Defaults to hardware.gpu_count.",
)
profile_window.set_defaults(func=cmd_profile_window)
profile_similarity = profile_sub.add_parser("similarity")
profile_similarity.add_argument("--spec", required=True)
profile_similarity.add_argument("--window-id", action="append")
profile_similarity.add_argument("--trace-type")
profile_similarity.add_argument("--date-from")
profile_similarity.add_argument("--date-to")
profile_similarity.add_argument("--slot-token")
profile_similarity.add_argument("--limit", type=int)
profile_similarity.add_argument(
"--all",
action="store_true",
help="Profile all windows selected by filters. Without this or --window-id, only the study window is used.",
)
profile_similarity.add_argument(
"--length-mode",
default="auto",
choices=["auto", "total", "input", "output"],
help="Token length basis for the L vector. auto uses output for decode_only and total otherwise.",
)
profile_similarity.add_argument(
"--gpu-count",
type=int,
help="GPU denominator for per-GPU arrival rate. Defaults to hardware.gpu_count.",
)
profile_similarity.set_defaults(func=cmd_profile_similarity)
return parser return parser

406
src/aituner/lca.py Normal file
View File

@@ -0,0 +1,406 @@
from __future__ import annotations
import json
import math
import statistics
from dataclasses import dataclass
from typing import Any, Sequence
from .trace import TraceRequest, WindowRecord
EPSILON = 1e-9
FEATURE_NAMES = [
"L.log_mean_length",
"L.log_p95_over_mean_length",
"L.cv_length",
"C.log_mean_hit_length",
"C.log_p95_over_mean_hit_length",
"C.cv_hit_length",
"C.hit_rate",
"A.log_request_rate_per_gpu",
"A.cv_interarrival",
"A.log_fano_1s",
]
FAMILY_SLICES = {
"L": slice(0, 3),
"C": slice(3, 7),
"A": slice(7, 10),
}
LENGTH_MODES = {"total", "input", "output"}
@dataclass(frozen=True)
class WorkloadProfile:
window_id: str
trace_type: str
request_count: int
duration_s: float
gpu_count: int
length_mode: str
feature_names: list[str]
vector: list[float]
stats: dict[str, Any]
def to_dict(self) -> dict[str, Any]:
return {
"window_id": self.window_id,
"trace_type": self.trace_type,
"request_count": self.request_count,
"duration_s": self.duration_s,
"gpu_count": self.gpu_count,
"length_mode": self.length_mode,
"feature_names": self.feature_names,
"vector": self.vector,
"stats": self.stats,
}
@dataclass(frozen=True)
class RobustScale:
feature_names: list[str]
center: list[float]
scale: list[float]
def transform(self, vector: Sequence[float]) -> list[float]:
return [
(float(value) - self.center[idx]) / self.scale[idx]
for idx, value in enumerate(vector)
]
def to_dict(self) -> dict[str, Any]:
return {
"feature_names": self.feature_names,
"center": self.center,
"scale": self.scale,
}
def resolve_length_mode(*, request_mode: str | None = None, length_mode: str = "auto") -> str:
normalized = str(length_mode or "auto").strip().lower()
if normalized == "auto":
return (
"output"
if str(request_mode or "").strip().lower() == "decode_only"
else "total"
)
if normalized not in LENGTH_MODES:
raise ValueError(
"length_mode must be one of: auto, total, input, output."
)
return normalized
def build_workload_profile(
requests: list[TraceRequest],
window: WindowRecord,
*,
gpu_count: int,
length_mode: str = "total",
) -> WorkloadProfile:
if gpu_count <= 0:
raise ValueError("gpu_count must be > 0.")
if length_mode not in LENGTH_MODES:
raise ValueError(f"Unsupported length_mode: {length_mode}")
duration_s = _duration_s(requests, window)
input_lengths = [float(item.prompt_tokens_hint or 0) for item in requests]
output_lengths = [float(item.completion_tokens_hint or 0) for item in requests]
profile_lengths = [
_profile_length(input_len, output_len, length_mode=length_mode)
for input_len, output_len in zip(input_lengths, output_lengths)
]
hit_lengths, cache_stats = _ideal_cache_hit_lengths(
requests,
input_lengths=input_lengths,
block_size=_block_size(window),
)
arrival_stats = _arrival_stats(requests, duration_s=duration_s, gpu_count=gpu_count)
length_stats = _series_stats(profile_lengths)
hit_stats = _series_stats(hit_lengths)
total_profile_length = sum(profile_lengths)
total_input_length = sum(input_lengths)
total_hit_length = sum(hit_lengths)
feature_hit_rate = (
float(total_hit_length / max(total_profile_length, EPSILON))
if total_profile_length > 0
else 0.0
)
input_hit_rate = (
float(total_hit_length / max(total_input_length, EPSILON))
if total_input_length > 0
else 0.0
)
vector = [
math.log1p(length_stats["mean"]),
math.log1p(length_stats["p95"] / max(length_stats["mean"], EPSILON)),
length_stats["cv"],
math.log1p(hit_stats["mean"]),
math.log1p(hit_stats["p95"] / max(hit_stats["mean"], EPSILON)),
hit_stats["cv"],
feature_hit_rate,
math.log1p(arrival_stats["request_rate_per_gpu"]),
arrival_stats["interarrival_cv"],
math.log1p(arrival_stats["fano_1s"]),
]
return WorkloadProfile(
window_id=window.window_id,
trace_type=window.trace_type,
request_count=len(requests),
duration_s=duration_s,
gpu_count=int(gpu_count),
length_mode=length_mode,
feature_names=list(FEATURE_NAMES),
vector=[float(item) for item in vector],
stats={
"length": {
**length_stats,
"mode": length_mode,
"total": total_profile_length,
"input_total": total_input_length,
"output_total": sum(output_lengths),
},
"cache": {
**hit_stats,
**cache_stats,
"total_hit_length": total_hit_length,
"hit_rate": feature_hit_rate,
"input_hit_rate": input_hit_rate,
},
"arrival": arrival_stats,
},
)
def fit_robust_scale(profiles: Sequence[WorkloadProfile]) -> RobustScale:
if not profiles:
raise ValueError("At least one profile is required to fit a robust scale.")
centers: list[float] = []
scales: list[float] = []
for idx in range(len(FEATURE_NAMES)):
values = [float(profile.vector[idx]) for profile in profiles]
median = _percentile(values, 50.0)
iqr = _percentile(values, 75.0) - _percentile(values, 25.0)
centers.append(float(median))
scales.append(float(iqr if abs(iqr) > EPSILON else 1.0))
return RobustScale(feature_names=list(FEATURE_NAMES), center=centers, scale=scales)
def profile_similarity(
left: WorkloadProfile,
right: WorkloadProfile,
*,
scale: RobustScale | None = None,
) -> float:
scaler = scale or fit_robust_scale([left, right])
z_left = scaler.transform(left.vector)
z_right = scaler.transform(right.vector)
return _similarity_from_z(z_left, z_right)
def similarity_report(profiles: Sequence[WorkloadProfile]) -> dict[str, Any]:
if not profiles:
raise ValueError("At least one profile is required.")
scale = fit_robust_scale(profiles)
transformed = [scale.transform(profile.vector) for profile in profiles]
rows: list[dict[str, Any]] = []
matrix: list[list[float]] = []
for i, left in enumerate(profiles):
row_values: list[float] = []
for j, right in enumerate(profiles):
sim = _similarity_from_z(transformed[i], transformed[j])
row_values.append(sim)
rows.append(
{
"left": left.window_id,
"right": right.window_id,
"similarity": sim,
"family_similarity": _family_similarity(transformed[i], transformed[j]),
}
)
matrix.append(row_values)
return {
"feature_names": list(FEATURE_NAMES),
"scaler": scale.to_dict(),
"windows": [profile.window_id for profile in profiles],
"matrix": matrix,
"pairs": rows,
}
def dumps_profile(profile: WorkloadProfile) -> str:
return json.dumps(profile.to_dict(), ensure_ascii=False, indent=2) + "\n"
def _duration_s(requests: list[TraceRequest], window: WindowRecord) -> float:
duration = max(float(window.window_end) - float(window.window_start), 0.0)
if duration > 0:
return duration
if len(requests) >= 2:
return max(0.0, float(requests[-1].arrival_s) - float(requests[0].arrival_s))
return 0.0
def _profile_length(input_length: float, output_length: float, *, length_mode: str) -> float:
if length_mode == "input":
return max(input_length, 0.0)
if length_mode == "output":
return max(output_length, 0.0)
return max(input_length, 0.0) + max(output_length, 0.0)
def _block_size(window: WindowRecord) -> int:
value = window.source_payload.get("block_size")
if isinstance(value, bool):
return 1
if isinstance(value, (int, float)) and value > 0:
return int(value)
if isinstance(value, str) and value.strip():
try:
parsed = int(value)
except ValueError:
return 1
return parsed if parsed > 0 else 1
return 1
def _ideal_cache_hit_lengths(
requests: list[TraceRequest],
*,
input_lengths: list[float],
block_size: int,
) -> tuple[list[float], dict[str, Any]]:
seen_hashes: set[Any] = set()
hit_lengths: list[float] = []
total_blocks = 0
repeated_blocks = 0
rows_with_hash_ids = 0
for request, input_length in zip(requests, input_lengths):
hash_ids = request.metadata.get("hash_ids")
if not isinstance(hash_ids, list):
hit_lengths.append(0.0)
continue
rows_with_hash_ids += 1
repeated_for_request = 0
for hash_id in hash_ids:
total_blocks += 1
if hash_id in seen_hashes:
repeated_blocks += 1
repeated_for_request += 1
else:
seen_hashes.add(hash_id)
hit_lengths.append(float(min(max(input_length, 0.0), repeated_for_request * block_size)))
return hit_lengths, {
"block_size": block_size,
"rows_with_hash_ids": rows_with_hash_ids,
"total_blocks": total_blocks,
"repeated_blocks": repeated_blocks,
"repeated_block_ratio": (
float(repeated_blocks / total_blocks) if total_blocks else 0.0
),
}
def _arrival_stats(
requests: list[TraceRequest],
*,
duration_s: float,
gpu_count: int,
) -> dict[str, Any]:
arrivals = [float(item.arrival_s) for item in requests]
interarrivals = [
max(0.0, arrivals[idx] - arrivals[idx - 1])
for idx in range(1, len(arrivals))
]
per_second_counts = _per_second_counts(arrivals, duration_s=duration_s)
qps = float(len(requests) / duration_s) if duration_s > 0 else 0.0
return {
"request_rate": qps,
"request_rate_per_gpu": float(qps / gpu_count) if gpu_count > 0 else 0.0,
"interarrival_cv": _cv(interarrivals),
"fano_1s": _fano(per_second_counts),
"one_second_count_mean": statistics.fmean(per_second_counts)
if per_second_counts
else 0.0,
"one_second_count_variance": statistics.pvariance(per_second_counts)
if len(per_second_counts) >= 2
else 0.0,
"one_second_bin_count": len(per_second_counts),
}
def _per_second_counts(arrivals: list[float], *, duration_s: float) -> list[float]:
if duration_s <= 0:
return [float(len(arrivals))] if arrivals else []
bin_count = max(1, int(math.ceil(duration_s)))
counts = [0.0 for _ in range(bin_count)]
for arrival in arrivals:
if arrival < 0:
continue
idx = int(math.floor(arrival))
if 0 <= idx < bin_count:
counts[idx] += 1.0
return counts
def _series_stats(values: list[float]) -> dict[str, float]:
return {
"count": float(len(values)),
"mean": statistics.fmean(values) if values else 0.0,
"p50": _percentile(values, 50.0),
"p95": _percentile(values, 95.0),
"cv": _cv(values),
}
def _cv(values: list[float]) -> float:
if not values:
return 0.0
mean = statistics.fmean(values)
if abs(mean) <= EPSILON:
return 0.0
return float(statistics.pstdev(values) / mean) if len(values) >= 2 else 0.0
def _fano(values: list[float]) -> float:
if not values:
return 0.0
mean = statistics.fmean(values)
if abs(mean) <= EPSILON:
return 0.0
return float(statistics.pvariance(values) / mean) if len(values) >= 2 else 0.0
def _percentile(values: Sequence[float], p: float) -> float:
if not values:
return 0.0
ordered = sorted(float(item) for item in values)
if len(ordered) == 1:
return ordered[0]
rank = (p / 100.0) * (len(ordered) - 1)
lower = int(math.floor(rank))
upper = int(math.ceil(rank))
if lower == upper:
return ordered[lower]
weight = rank - lower
return float(ordered[lower] * (1.0 - weight) + ordered[upper] * weight)
def _similarity_from_z(left: Sequence[float], right: Sequence[float]) -> float:
distance = math.sqrt(
sum((float(lval) - float(rval)) ** 2 for lval, rval in zip(left, right))
)
return float(math.exp(-distance))
def _family_similarity(left: Sequence[float], right: Sequence[float]) -> dict[str, float]:
result: dict[str, float] = {}
for family, family_slice in FAMILY_SLICES.items():
result[family] = _similarity_from_z(left[family_slice], right[family_slice])
return result

View File

@@ -1,6 +1,8 @@
from __future__ import annotations from __future__ import annotations
import json import json
import io
import math
import os import os
import signal import signal
import subprocess import subprocess
@@ -25,6 +27,12 @@ from aituner.harness import (
build_harness_guided_proposal, build_harness_guided_proposal,
build_harness_stop_proposal, build_harness_stop_proposal,
) )
from aituner.lca import (
build_workload_profile,
profile_similarity,
resolve_length_mode,
similarity_report,
)
from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
from aituner.search import ThresholdProbe, binary_search_max_feasible from aituner.search import ThresholdProbe, binary_search_max_feasible
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
@@ -48,7 +56,7 @@ from aituner.worker import (
_wait_for_server_or_exit, _wait_for_server_or_exit,
run_trial, run_trial,
) )
from aituner.trace import TraceRequest from aituner.trace import TraceRequest, WindowRecord
REPO_ROOT = Path(__file__).resolve().parents[1] REPO_ROOT = Path(__file__).resolve().parents[1]
@@ -241,6 +249,150 @@ class CoreFlowTests(unittest.TestCase):
self.assertIn("knob_harnesses", prompt) self.assertIn("knob_harnesses", prompt)
self.assertTrue(study_root.exists()) self.assertTrue(study_root.exists())
def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
window = WindowRecord(
window_id="w1",
trace_path=Path("trace.jsonl"),
trace_type="chat",
window_start=0.0,
window_end=4.0,
source_payload={"block_size": 64},
)
requests = [
TraceRequest(
row_id="r1",
arrival_s=0.0,
sampling_u=1.0,
body={},
prompt_tokens_hint=100,
completion_tokens_hint=10,
metadata={"hash_ids": [1, 2]},
),
TraceRequest(
row_id="r2",
arrival_s=1.0,
sampling_u=1.0,
body={},
prompt_tokens_hint=100,
completion_tokens_hint=20,
metadata={"hash_ids": [1, 3]},
),
]
profile = build_workload_profile(
requests,
window,
gpu_count=2,
length_mode="total",
)
self.assertEqual(len(profile.feature_names), 10)
self.assertEqual(len(profile.vector), 10)
self.assertEqual(profile.feature_names[0], "L.log_mean_length")
self.assertAlmostEqual(profile.stats["cache"]["total_hit_length"], 64.0)
self.assertAlmostEqual(profile.stats["cache"]["hit_rate"], 64.0 / 230.0)
self.assertAlmostEqual(profile.stats["cache"]["input_hit_rate"], 64.0 / 200.0)
self.assertAlmostEqual(profile.vector[3], math.log1p(32.0))
self.assertAlmostEqual(profile.vector[5], 1.0)
self.assertAlmostEqual(profile.stats["arrival"]["request_rate_per_gpu"], 0.25)
self.assertAlmostEqual(profile.stats["arrival"]["fano_1s"], 0.5)
self.assertEqual(resolve_length_mode(request_mode="decode_only"), "output")
def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
window = WindowRecord(
window_id="base",
trace_path=Path("trace.jsonl"),
trace_type="chat",
window_start=0.0,
window_end=4.0,
source_payload={"block_size": 64},
)
def make_profile(window_id: str, input_tokens: int, *, arrival_gap: float) -> object:
reqs = [
TraceRequest(
row_id=f"{window_id}-1",
arrival_s=0.0,
sampling_u=1.0,
body={},
prompt_tokens_hint=input_tokens,
completion_tokens_hint=16,
metadata={"hash_ids": [window_id, 1]},
),
TraceRequest(
row_id=f"{window_id}-2",
arrival_s=arrival_gap,
sampling_u=1.0,
body={},
prompt_tokens_hint=input_tokens,
completion_tokens_hint=16,
metadata={"hash_ids": [window_id, 1, 2]},
),
]
return build_workload_profile(
reqs,
WindowRecord(
window_id=window_id,
trace_path=window.trace_path,
trace_type=window.trace_type,
window_start=window.window_start,
window_end=window.window_end,
source_payload=window.source_payload,
),
gpu_count=1,
length_mode="total",
)
p1 = make_profile("same-a", 100, arrival_gap=1.0)
p2 = make_profile("same-b", 100, arrival_gap=1.0)
p3 = make_profile("different", 10000, arrival_gap=0.1)
report = similarity_report([p1, p2, p3])
self.assertAlmostEqual(profile_similarity(p1, p2), 1.0)
self.assertGreater(report["matrix"][0][1], report["matrix"][0][2])
self.assertIn("L", report["pairs"][2]["family_similarity"])
def test_cli_profile_window_outputs_lca_profile(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
stdout = io.StringIO()
with mock.patch("sys.stdout", stdout):
rc = cli_main(
[
"profile",
"window",
"--spec",
str(study_path),
"--gpu-count",
"8",
]
)
self.assertEqual(rc, 0)
payload = json.loads(stdout.getvalue())
self.assertEqual(payload["profile"]["window_id"], "chat_w1")
self.assertEqual(len(payload["profile"]["vector"]), 10)
self.assertEqual(payload["profile"]["gpu_count"], 8)
def test_cli_profile_window_does_not_resolve_llm_endpoint(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
payload = json.loads(study_path.read_text(encoding="utf-8"))
payload["llm"]["endpoint"] = {
"provider": "codex",
"model": "gpt-5.4",
}
study_path.write_text(json.dumps(payload), encoding="utf-8")
stdout = io.StringIO()
with mock.patch("sys.stdout", stdout):
rc = cli_main(["profile", "window", "--spec", str(study_path)])
self.assertEqual(rc, 0)
self.assertEqual(json.loads(stdout.getvalue())["profile"]["window_id"], "chat_w1")
def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None: def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
with tempfile.TemporaryDirectory() as tmp: with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp) tmp_path = Path(tmp)