Add L-C-A workload profile metric and CLI profile commands

Implement the paper's 10-dimensional L-C-A workload feature vector
(RobustScaler-normalized, sim=exp(-||dz||)) in lca.py, and wire it into
`aituner profile window` / `aituner profile similarity`. Covered by tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 14:02:24 +08:00
parent 984eb1f325
commit 27d1c8fa92
3 changed files with 770 additions and 2 deletions

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import json
import io
import math
import os
import signal
import subprocess
@@ -25,6 +27,12 @@ from aituner.harness import (
build_harness_guided_proposal,
build_harness_stop_proposal,
)
from aituner.lca import (
build_workload_profile,
profile_similarity,
resolve_length_mode,
similarity_report,
)
from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
from aituner.search import ThresholdProbe, binary_search_max_feasible
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
@@ -48,7 +56,7 @@ from aituner.worker import (
_wait_for_server_or_exit,
run_trial,
)
from aituner.trace import TraceRequest
from aituner.trace import TraceRequest, WindowRecord
REPO_ROOT = Path(__file__).resolve().parents[1]
@@ -241,6 +249,150 @@ class CoreFlowTests(unittest.TestCase):
self.assertIn("knob_harnesses", prompt)
self.assertTrue(study_root.exists())
def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
window = WindowRecord(
window_id="w1",
trace_path=Path("trace.jsonl"),
trace_type="chat",
window_start=0.0,
window_end=4.0,
source_payload={"block_size": 64},
)
requests = [
TraceRequest(
row_id="r1",
arrival_s=0.0,
sampling_u=1.0,
body={},
prompt_tokens_hint=100,
completion_tokens_hint=10,
metadata={"hash_ids": [1, 2]},
),
TraceRequest(
row_id="r2",
arrival_s=1.0,
sampling_u=1.0,
body={},
prompt_tokens_hint=100,
completion_tokens_hint=20,
metadata={"hash_ids": [1, 3]},
),
]
profile = build_workload_profile(
requests,
window,
gpu_count=2,
length_mode="total",
)
self.assertEqual(len(profile.feature_names), 10)
self.assertEqual(len(profile.vector), 10)
self.assertEqual(profile.feature_names[0], "L.log_mean_length")
self.assertAlmostEqual(profile.stats["cache"]["total_hit_length"], 64.0)
self.assertAlmostEqual(profile.stats["cache"]["hit_rate"], 64.0 / 230.0)
self.assertAlmostEqual(profile.stats["cache"]["input_hit_rate"], 64.0 / 200.0)
self.assertAlmostEqual(profile.vector[3], math.log1p(32.0))
self.assertAlmostEqual(profile.vector[5], 1.0)
self.assertAlmostEqual(profile.stats["arrival"]["request_rate_per_gpu"], 0.25)
self.assertAlmostEqual(profile.stats["arrival"]["fano_1s"], 0.5)
self.assertEqual(resolve_length_mode(request_mode="decode_only"), "output")
def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
window = WindowRecord(
window_id="base",
trace_path=Path("trace.jsonl"),
trace_type="chat",
window_start=0.0,
window_end=4.0,
source_payload={"block_size": 64},
)
def make_profile(window_id: str, input_tokens: int, *, arrival_gap: float) -> object:
reqs = [
TraceRequest(
row_id=f"{window_id}-1",
arrival_s=0.0,
sampling_u=1.0,
body={},
prompt_tokens_hint=input_tokens,
completion_tokens_hint=16,
metadata={"hash_ids": [window_id, 1]},
),
TraceRequest(
row_id=f"{window_id}-2",
arrival_s=arrival_gap,
sampling_u=1.0,
body={},
prompt_tokens_hint=input_tokens,
completion_tokens_hint=16,
metadata={"hash_ids": [window_id, 1, 2]},
),
]
return build_workload_profile(
reqs,
WindowRecord(
window_id=window_id,
trace_path=window.trace_path,
trace_type=window.trace_type,
window_start=window.window_start,
window_end=window.window_end,
source_payload=window.source_payload,
),
gpu_count=1,
length_mode="total",
)
p1 = make_profile("same-a", 100, arrival_gap=1.0)
p2 = make_profile("same-b", 100, arrival_gap=1.0)
p3 = make_profile("different", 10000, arrival_gap=0.1)
report = similarity_report([p1, p2, p3])
self.assertAlmostEqual(profile_similarity(p1, p2), 1.0)
self.assertGreater(report["matrix"][0][1], report["matrix"][0][2])
self.assertIn("L", report["pairs"][2]["family_similarity"])
def test_cli_profile_window_outputs_lca_profile(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
stdout = io.StringIO()
with mock.patch("sys.stdout", stdout):
rc = cli_main(
[
"profile",
"window",
"--spec",
str(study_path),
"--gpu-count",
"8",
]
)
self.assertEqual(rc, 0)
payload = json.loads(stdout.getvalue())
self.assertEqual(payload["profile"]["window_id"], "chat_w1")
self.assertEqual(len(payload["profile"]["vector"]), 10)
self.assertEqual(payload["profile"]["gpu_count"], 8)
def test_cli_profile_window_does_not_resolve_llm_endpoint(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(tmp_path)
payload = json.loads(study_path.read_text(encoding="utf-8"))
payload["llm"]["endpoint"] = {
"provider": "codex",
"model": "gpt-5.4",
}
study_path.write_text(json.dumps(payload), encoding="utf-8")
stdout = io.StringIO()
with mock.patch("sys.stdout", stdout):
rc = cli_main(["profile", "window", "--spec", str(study_path)])
self.assertEqual(rc, 0)
self.assertEqual(json.loads(stdout.getvalue())["profile"]["window_id"], "chat_w1")
def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)