Add L-C-A workload profile metric and CLI profile commands
Implement the paper's 10-dimensional L-C-A workload feature vector (RobustScaler-normalized, sim=exp(-||dz||)) in lca.py, and wire it into `aituner profile window` / `aituner profile similarity`. Covered by tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import io
|
||||
import math
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
@@ -25,6 +27,12 @@ from aituner.harness import (
|
||||
build_harness_guided_proposal,
|
||||
build_harness_stop_proposal,
|
||||
)
|
||||
from aituner.lca import (
|
||||
build_workload_profile,
|
||||
profile_similarity,
|
||||
resolve_length_mode,
|
||||
similarity_report,
|
||||
)
|
||||
from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
|
||||
from aituner.search import ThresholdProbe, binary_search_max_feasible
|
||||
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
|
||||
@@ -48,7 +56,7 @@ from aituner.worker import (
|
||||
_wait_for_server_or_exit,
|
||||
run_trial,
|
||||
)
|
||||
from aituner.trace import TraceRequest
|
||||
from aituner.trace import TraceRequest, WindowRecord
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
@@ -241,6 +249,150 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertIn("knob_harnesses", prompt)
|
||||
self.assertTrue(study_root.exists())
|
||||
|
||||
def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
|
||||
window = WindowRecord(
|
||||
window_id="w1",
|
||||
trace_path=Path("trace.jsonl"),
|
||||
trace_type="chat",
|
||||
window_start=0.0,
|
||||
window_end=4.0,
|
||||
source_payload={"block_size": 64},
|
||||
)
|
||||
requests = [
|
||||
TraceRequest(
|
||||
row_id="r1",
|
||||
arrival_s=0.0,
|
||||
sampling_u=1.0,
|
||||
body={},
|
||||
prompt_tokens_hint=100,
|
||||
completion_tokens_hint=10,
|
||||
metadata={"hash_ids": [1, 2]},
|
||||
),
|
||||
TraceRequest(
|
||||
row_id="r2",
|
||||
arrival_s=1.0,
|
||||
sampling_u=1.0,
|
||||
body={},
|
||||
prompt_tokens_hint=100,
|
||||
completion_tokens_hint=20,
|
||||
metadata={"hash_ids": [1, 3]},
|
||||
),
|
||||
]
|
||||
|
||||
profile = build_workload_profile(
|
||||
requests,
|
||||
window,
|
||||
gpu_count=2,
|
||||
length_mode="total",
|
||||
)
|
||||
|
||||
self.assertEqual(len(profile.feature_names), 10)
|
||||
self.assertEqual(len(profile.vector), 10)
|
||||
self.assertEqual(profile.feature_names[0], "L.log_mean_length")
|
||||
self.assertAlmostEqual(profile.stats["cache"]["total_hit_length"], 64.0)
|
||||
self.assertAlmostEqual(profile.stats["cache"]["hit_rate"], 64.0 / 230.0)
|
||||
self.assertAlmostEqual(profile.stats["cache"]["input_hit_rate"], 64.0 / 200.0)
|
||||
self.assertAlmostEqual(profile.vector[3], math.log1p(32.0))
|
||||
self.assertAlmostEqual(profile.vector[5], 1.0)
|
||||
self.assertAlmostEqual(profile.stats["arrival"]["request_rate_per_gpu"], 0.25)
|
||||
self.assertAlmostEqual(profile.stats["arrival"]["fano_1s"], 0.5)
|
||||
self.assertEqual(resolve_length_mode(request_mode="decode_only"), "output")
|
||||
|
||||
def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
|
||||
window = WindowRecord(
|
||||
window_id="base",
|
||||
trace_path=Path("trace.jsonl"),
|
||||
trace_type="chat",
|
||||
window_start=0.0,
|
||||
window_end=4.0,
|
||||
source_payload={"block_size": 64},
|
||||
)
|
||||
|
||||
def make_profile(window_id: str, input_tokens: int, *, arrival_gap: float) -> object:
|
||||
reqs = [
|
||||
TraceRequest(
|
||||
row_id=f"{window_id}-1",
|
||||
arrival_s=0.0,
|
||||
sampling_u=1.0,
|
||||
body={},
|
||||
prompt_tokens_hint=input_tokens,
|
||||
completion_tokens_hint=16,
|
||||
metadata={"hash_ids": [window_id, 1]},
|
||||
),
|
||||
TraceRequest(
|
||||
row_id=f"{window_id}-2",
|
||||
arrival_s=arrival_gap,
|
||||
sampling_u=1.0,
|
||||
body={},
|
||||
prompt_tokens_hint=input_tokens,
|
||||
completion_tokens_hint=16,
|
||||
metadata={"hash_ids": [window_id, 1, 2]},
|
||||
),
|
||||
]
|
||||
return build_workload_profile(
|
||||
reqs,
|
||||
WindowRecord(
|
||||
window_id=window_id,
|
||||
trace_path=window.trace_path,
|
||||
trace_type=window.trace_type,
|
||||
window_start=window.window_start,
|
||||
window_end=window.window_end,
|
||||
source_payload=window.source_payload,
|
||||
),
|
||||
gpu_count=1,
|
||||
length_mode="total",
|
||||
)
|
||||
|
||||
p1 = make_profile("same-a", 100, arrival_gap=1.0)
|
||||
p2 = make_profile("same-b", 100, arrival_gap=1.0)
|
||||
p3 = make_profile("different", 10000, arrival_gap=0.1)
|
||||
|
||||
report = similarity_report([p1, p2, p3])
|
||||
|
||||
self.assertAlmostEqual(profile_similarity(p1, p2), 1.0)
|
||||
self.assertGreater(report["matrix"][0][1], report["matrix"][0][2])
|
||||
self.assertIn("L", report["pairs"][2]["family_similarity"])
|
||||
|
||||
def test_cli_profile_window_outputs_lca_profile(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
stdout = io.StringIO()
|
||||
with mock.patch("sys.stdout", stdout):
|
||||
rc = cli_main(
|
||||
[
|
||||
"profile",
|
||||
"window",
|
||||
"--spec",
|
||||
str(study_path),
|
||||
"--gpu-count",
|
||||
"8",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertEqual(rc, 0)
|
||||
payload = json.loads(stdout.getvalue())
|
||||
self.assertEqual(payload["profile"]["window_id"], "chat_w1")
|
||||
self.assertEqual(len(payload["profile"]["vector"]), 10)
|
||||
self.assertEqual(payload["profile"]["gpu_count"], 8)
|
||||
|
||||
def test_cli_profile_window_does_not_resolve_llm_endpoint(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(tmp_path)
|
||||
payload = json.loads(study_path.read_text(encoding="utf-8"))
|
||||
payload["llm"]["endpoint"] = {
|
||||
"provider": "codex",
|
||||
"model": "gpt-5.4",
|
||||
}
|
||||
study_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
stdout = io.StringIO()
|
||||
with mock.patch("sys.stdout", stdout):
|
||||
rc = cli_main(["profile", "window", "--spec", str(study_path)])
|
||||
|
||||
self.assertEqual(rc, 0)
|
||||
self.assertEqual(json.loads(stdout.getvalue())["profile"]["window_id"], "chat_w1")
|
||||
|
||||
def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user