Add L-C-A workload profile metric and CLI profile commands

Implement the paper's 10-dimensional L-C-A workload feature vector (RobustScaler-normalized, sim=exp(-||dz||)) in lca.py, and wire it into `aituner profile window` / `aituner profile similarity`. Covered by tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:02:24 +08:00
parent 984eb1f325
commit 27d1c8fa92
3 changed files with 770 additions and 2 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -1,6 +1,8 @@
 from __future__ import annotations

 import json
+import io
+import math
 import os
 import signal
 import subprocess
@@ -25,6 +27,12 @@ from aituner.harness import (
    build_harness_guided_proposal,
    build_harness_stop_proposal,
 )
+from aituner.lca import (
+    build_workload_profile,
+    profile_similarity,
+    resolve_length_mode,
+    similarity_report,
+)
 from aituner.llm import _extract_response_text, build_prompt, parse_proposal_text, validate_proposal
 from aituner.search import ThresholdProbe, binary_search_max_feasible
 from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
@@ -48,7 +56,7 @@ from aituner.worker import (
    _wait_for_server_or_exit,
    run_trial,
 )
-from aituner.trace import TraceRequest
+from aituner.trace import TraceRequest, WindowRecord


 REPO_ROOT = Path(__file__).resolve().parents[1]
@@ -241,6 +249,150 @@ class CoreFlowTests(unittest.TestCase):
            self.assertIn("knob_harnesses", prompt)
            self.assertTrue(study_root.exists())

+    def test_lca_workload_profile_uses_standard_10d_features(self) -> None:
+        window = WindowRecord(
+            window_id="w1",
+            trace_path=Path("trace.jsonl"),
+            trace_type="chat",
+            window_start=0.0,
+            window_end=4.0,
+            source_payload={"block_size": 64},
+        )
+        requests = [
+            TraceRequest(
+                row_id="r1",
+                arrival_s=0.0,
+                sampling_u=1.0,
+                body={},
+                prompt_tokens_hint=100,
+                completion_tokens_hint=10,
+                metadata={"hash_ids": [1, 2]},
+            ),
+            TraceRequest(
+                row_id="r2",
+                arrival_s=1.0,
+                sampling_u=1.0,
+                body={},
+                prompt_tokens_hint=100,
+                completion_tokens_hint=20,
+                metadata={"hash_ids": [1, 3]},
+            ),
+        ]
+
+        profile = build_workload_profile(
+            requests,
+            window,
+            gpu_count=2,
+            length_mode="total",
+        )
+
+        self.assertEqual(len(profile.feature_names), 10)
+        self.assertEqual(len(profile.vector), 10)
+        self.assertEqual(profile.feature_names[0], "L.log_mean_length")
+        self.assertAlmostEqual(profile.stats["cache"]["total_hit_length"], 64.0)
+        self.assertAlmostEqual(profile.stats["cache"]["hit_rate"], 64.0 / 230.0)
+        self.assertAlmostEqual(profile.stats["cache"]["input_hit_rate"], 64.0 / 200.0)
+        self.assertAlmostEqual(profile.vector[3], math.log1p(32.0))
+        self.assertAlmostEqual(profile.vector[5], 1.0)
+        self.assertAlmostEqual(profile.stats["arrival"]["request_rate_per_gpu"], 0.25)
+        self.assertAlmostEqual(profile.stats["arrival"]["fano_1s"], 0.5)
+        self.assertEqual(resolve_length_mode(request_mode="decode_only"), "output")
+
+    def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
+        window = WindowRecord(
+            window_id="base",
+            trace_path=Path("trace.jsonl"),
+            trace_type="chat",
+            window_start=0.0,
+            window_end=4.0,
+            source_payload={"block_size": 64},
+        )
+
+        def make_profile(window_id: str, input_tokens: int, *, arrival_gap: float) -> object:
+            reqs = [
+                TraceRequest(
+                    row_id=f"{window_id}-1",
+                    arrival_s=0.0,
+                    sampling_u=1.0,
+                    body={},
+                    prompt_tokens_hint=input_tokens,
+                    completion_tokens_hint=16,
+                    metadata={"hash_ids": [window_id, 1]},
+                ),
+                TraceRequest(
+                    row_id=f"{window_id}-2",
+                    arrival_s=arrival_gap,
+                    sampling_u=1.0,
+                    body={},
+                    prompt_tokens_hint=input_tokens,
+                    completion_tokens_hint=16,
+                    metadata={"hash_ids": [window_id, 1, 2]},
+                ),
+            ]
+            return build_workload_profile(
+                reqs,
+                WindowRecord(
+                    window_id=window_id,
+                    trace_path=window.trace_path,
+                    trace_type=window.trace_type,
+                    window_start=window.window_start,
+                    window_end=window.window_end,
+                    source_payload=window.source_payload,
+                ),
+                gpu_count=1,
+                length_mode="total",
+            )
+
+        p1 = make_profile("same-a", 100, arrival_gap=1.0)
+        p2 = make_profile("same-b", 100, arrival_gap=1.0)
+        p3 = make_profile("different", 10000, arrival_gap=0.1)
+
+        report = similarity_report([p1, p2, p3])
+
+        self.assertAlmostEqual(profile_similarity(p1, p2), 1.0)
+        self.assertGreater(report["matrix"][0][1], report["matrix"][0][2])
+        self.assertIn("L", report["pairs"][2]["family_similarity"])
+
+    def test_cli_profile_window_outputs_lca_profile(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            stdout = io.StringIO()
+            with mock.patch("sys.stdout", stdout):
+                rc = cli_main(
+                    [
+                        "profile",
+                        "window",
+                        "--spec",
+                        str(study_path),
+                        "--gpu-count",
+                        "8",
+                    ]
+                )
+
+            self.assertEqual(rc, 0)
+            payload = json.loads(stdout.getvalue())
+            self.assertEqual(payload["profile"]["window_id"], "chat_w1")
+            self.assertEqual(len(payload["profile"]["vector"]), 10)
+            self.assertEqual(payload["profile"]["gpu_count"], 8)
+
+    def test_cli_profile_window_does_not_resolve_llm_endpoint(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            payload = json.loads(study_path.read_text(encoding="utf-8"))
+            payload["llm"]["endpoint"] = {
+                "provider": "codex",
+                "model": "gpt-5.4",
+            }
+            study_path.write_text(json.dumps(payload), encoding="utf-8")
+            stdout = io.StringIO()
+            with mock.patch("sys.stdout", stdout):
+                rc = cli_main(["profile", "window", "--spec", str(study_path)])
+
+            self.assertEqual(rc, 0)
+            self.assertEqual(json.loads(stdout.getvalue())["profile"]["window_id"], "chat_w1")
+
    def test_harness_uses_latency_failures_before_generic_unrecoverable(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)