From 6f8e3c95c1810745ef61ba45b4c53b974880b9cd Mon Sep 17 00:00:00 2001
From: Gahow Wang <gahow.wang@gmail.com>
Date: Mon, 15 Jun 2026 14:12:17 +0800
Subject: [PATCH] Unify harness L-C-A on the canonical lca.WorkloadProfile

Phase 0 of the two-stop work. The prompt block labeled `workload_lca_profile`
previously re-derived L-C-A from summarize_window's ad-hoc percentiles, diverging
from the paper's 10-dim RobustScaler vector implemented in lca.py. Make that block
authoritative: build_harness_context now accepts an optional workload_profile and
renders the canonical 10-dim vector + per-family stats when present, falling back
to the legacy rendering only when no profile is supplied (direct unit-test calls).

Real call sites (study prompt/llm-propose/tune, run_baseline_then_llm) build the
profile via lca.build_study_workload_profile and pass it through build_prompt. The
heuristic regime classifiers keep reading window_summary; that is the heuristic
layer, distinct from the similarity metric.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 scripts/run_baseline_then_llm.py |  2 ++
 src/aituner/cli.py               |  6 ++++
 src/aituner/harness.py           | 59 ++++++++++++++++++++++++++++++--
 src/aituner/lca.py               | 27 ++++++++++++++-
 src/aituner/llm.py               |  7 +++-
 tests/test_core_flow.py          | 29 ++++++++++++++++
 6 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/scripts/run_baseline_then_llm.py b/scripts/run_baseline_then_llm.py
index 68d753e..5dd59fa 100644
--- a/scripts/run_baseline_then_llm.py
+++ b/scripts/run_baseline_then_llm.py
@@ -10,6 +10,7 @@ from aituner.llm import (
     load_capability_profile,
     parse_proposal_text,
 )
+from aituner.lca import build_study_workload_profile
 from aituner.spec import load_study_spec
 from aituner.store import StudyStore
 from aituner.trace import load_trace_requests, summarize_window
@@ -89,6 +90,7 @@ def main() -> int:
             window_summary=summarize_window(requests, window),
             state=state,
             capability_profile=capability_profile,
+            workload_profile=build_study_workload_profile(study, requests, window),
         )
         prompt_name = f"prompt-{state.next_trial_index:04d}"
         store.write_prompt(study.study_id, prompt_name, prompt)
diff --git a/src/aituner/cli.py b/src/aituner/cli.py
index e24b99f..7a65c61 100644
--- a/src/aituner/cli.py
+++ b/src/aituner/cli.py
@@ -14,6 +14,7 @@ from .harness import (
 )
 from .job import append_job, build_trial_job
 from .lca import (
+    build_study_workload_profile,
     build_workload_profile,
     resolve_length_mode,
     similarity_report,
@@ -140,6 +141,7 @@ def cmd_study_prompt(args: argparse.Namespace) -> int:
         window_summary=summarize_window(requests, window),
         state=state,
         capability_profile=capability_profile,
+        workload_profile=build_study_workload_profile(study, requests, window),
     )
     prompt_name = args.prompt_name or f"prompt-{state.next_trial_index:04d}"
     path = store.write_prompt(study.study_id, prompt_name, prompt)
@@ -160,6 +162,7 @@ def cmd_study_llm_propose(args: argparse.Namespace) -> int:
         window_summary=summarize_window(requests, window),
         state=state,
         capability_profile=capability_profile,
+        workload_profile=build_study_workload_profile(study, requests, window),
     )
     proposal_text = call_llm_for_proposal(
         policy=study.llm,
@@ -242,11 +245,13 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
             break
         window, requests = load_trace_requests(study, study_spec_path=spec_path)
         window_summary = summarize_window(requests, window)
+        workload_profile = build_study_workload_profile(study, requests, window)
         harness_context = (
             build_harness_context(
                 study=study,
                 window_summary=window_summary,
                 state=state,
+                workload_profile=workload_profile,
             )
             if study.llm.use_harness
             else None
@@ -256,6 +261,7 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
             window_summary=window_summary,
             state=state,
             capability_profile=capability_profile,
+            workload_profile=workload_profile,
         )
         prompt_name = f"prompt-{state.next_trial_index:04d}"
         store.write_prompt(study.study_id, prompt_name, prompt)
diff --git a/src/aituner/harness.py b/src/aituner/harness.py
index 693655d..0036c66 100644
--- a/src/aituner/harness.py
+++ b/src/aituner/harness.py
@@ -4,6 +4,7 @@ import json
 from pathlib import Path
 from typing import Any
 
+from .lca import EPSILON, WorkloadProfile
 from .spec import ConfigPatch, Proposal, StudySpec, StudyState, TrialSummary
 
 
@@ -30,6 +31,7 @@ def build_harness_context(
     study: StudySpec,
     window_summary: dict[str, Any],
     state: StudyState,
+    workload_profile: WorkloadProfile | None = None,
 ) -> dict[str, Any]:
     recent_diagnostics = _recent_trial_diagnostics(state)
     trial_profiles = _trial_profiles(study, recent_diagnostics)
@@ -52,7 +54,7 @@ def build_harness_context(
             "feature_model": "L-C-A: request lengths, inter-request KV-cache reuse, and arrival dynamics.",
             "trial_policy": "Profile measured trials, rank bottleneck hypotheses, score generic candidate actions, and stop only when no useful measured hypothesis remains.",
         },
-        "workload_lca_profile": _workload_lca_profile(window_summary),
+        "workload_lca_profile": _workload_lca_profile(window_summary, workload_profile),
         "recent_trial_diagnostics": recent_diagnostics,
         "trial_profiles": trial_profiles,
         "bottleneck_hypotheses": bottleneck_hypotheses,
@@ -141,7 +143,12 @@ def render_harness_context(context: dict[str, Any]) -> str:
     return json.dumps(context, ensure_ascii=False, indent=2)
 
 
-def _workload_lca_profile(window_summary: dict[str, Any]) -> dict[str, Any]:
+def _workload_lca_profile(
+    window_summary: dict[str, Any],
+    workload_profile: WorkloadProfile | None = None,
+) -> dict[str, Any]:
+    if workload_profile is not None:
+        return _canonical_lca_profile(workload_profile)
     prefix_cache = window_summary.get("prefix_cache")
     if not isinstance(prefix_cache, dict):
         prefix_cache = {}
@@ -178,6 +185,54 @@ def _workload_lca_profile(window_summary: dict[str, Any]) -> dict[str, Any]:
     }
 
 
+def _canonical_lca_profile(profile: WorkloadProfile) -> dict[str, Any]:
+    """Authoritative L-C-A block: the paper's 10-dim RobustScaler vector.
+
+    Sourced from lca.WorkloadProfile so the prompt's L-C-A is the same metric
+    used for the workload-similarity computations, not an ad-hoc re-derivation.
+    The regime labels reuse the heuristic classifiers but are fed from the
+    canonical stats.
+    """
+    stats = profile.stats if isinstance(profile.stats, dict) else {}
+    length = stats.get("length") if isinstance(stats.get("length"), dict) else {}
+    cache = stats.get("cache") if isinstance(stats.get("cache"), dict) else {}
+    arrival = stats.get("arrival") if isinstance(stats.get("arrival"), dict) else {}
+    length_p95 = _as_float(length.get("p95"))
+    length_p50 = _as_float(length.get("p50"))
+    tail_ratio = float(length_p95 / max(length_p50, EPSILON)) if length_p95 else 0.0
+    repeated_token_ratio = _as_float(cache.get("input_hit_rate"))
+    fano_1s = _as_float(arrival.get("fano_1s"))
+    interarrival_cv = _as_float(arrival.get("interarrival_cv"))
+    return {
+        "metric": "paper L-C-A (10-dim, RobustScaler-normalized) from lca.WorkloadProfile",
+        "length_mode": profile.length_mode,
+        "feature_names": list(profile.feature_names),
+        "vector": list(profile.vector),
+        "L_request_lengths": {
+            "mean": _as_float(length.get("mean")),
+            "p50": length_p50,
+            "p95": length_p95,
+            "cv": _as_float(length.get("cv")),
+            "tail_ratio_p95_p50": tail_ratio,
+            "regime": _length_regime(length_p95, tail_ratio),
+        },
+        "C_prefix_cache": {
+            "hit_rate": _as_float(cache.get("hit_rate")),
+            "input_hit_rate": repeated_token_ratio,
+            "repeated_block_ratio": _as_float(cache.get("repeated_block_ratio")),
+            "rows_with_hash_ids": int(cache.get("rows_with_hash_ids") or 0),
+            "regime": _cache_regime(repeated_token_ratio),
+        },
+        "A_arrivals": {
+            "request_rate": _as_float(arrival.get("request_rate")),
+            "request_rate_per_gpu": _as_float(arrival.get("request_rate_per_gpu")),
+            "interarrival_cv": interarrival_cv,
+            "fano_1s": fano_1s,
+            "regime": _arrival_regime(fano_1s, interarrival_cv),
+        },
+    }
+
+
 def _knob_harnesses(
     study: StudySpec,
     window_summary: dict[str, Any],
diff --git a/src/aituner/lca.py b/src/aituner/lca.py
index 21846ac..eb8549d 100644
--- a/src/aituner/lca.py
+++ b/src/aituner/lca.py
@@ -4,10 +4,13 @@ import json
 import math
 import statistics
 from dataclasses import dataclass
-from typing import Any, Sequence
+from typing import TYPE_CHECKING, Any, Sequence
 
 from .trace import TraceRequest, WindowRecord
 
+if TYPE_CHECKING:
+    from .spec import StudySpec
+
 
 EPSILON = 1e-9
 
@@ -178,6 +181,28 @@ def build_workload_profile(
     )
 
 
+def build_study_workload_profile(
+    study: "StudySpec",
+    requests: list[TraceRequest],
+    window: WindowRecord,
+) -> WorkloadProfile:
+    """Canonical L-C-A profile for a study's loaded window.
+
+    This is the single source of truth for the paper's 10-dimensional L-C-A
+    feature vector used by the harness prompt and (later) by Stop-A.
+    """
+    mode = resolve_length_mode(
+        request_mode=study.trace.request_mode,
+        length_mode="auto",
+    )
+    return build_workload_profile(
+        requests,
+        window,
+        gpu_count=study.hardware.gpu_count,
+        length_mode=mode,
+    )
+
+
 def fit_robust_scale(profiles: Sequence[WorkloadProfile]) -> RobustScale:
     if not profiles:
         raise ValueError("At least one profile is required to fit a robust scale.")
diff --git a/src/aituner/llm.py b/src/aituner/llm.py
index 855fdb5..1a4dc9c 100644
--- a/src/aituner/llm.py
+++ b/src/aituner/llm.py
@@ -3,12 +3,15 @@ from __future__ import annotations
 import json
 import time
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from .harness import build_harness_context, render_harness_context
 from .http_client import chat_completion, stream_text_completion
 from .spec import LLMPolicySpec, Proposal, SpecError, StudySpec, StudyState
 
+if TYPE_CHECKING:
+    from .lca import WorkloadProfile
+
 
 def _parse_bool_like(value: Any, *, context: str) -> bool:
     if isinstance(value, bool):
@@ -178,6 +181,7 @@ def build_prompt(
     window_summary: dict[str, Any],
     state: StudyState,
     capability_profile: dict[str, Any] | None,
+    workload_profile: "WorkloadProfile | None" = None,
 ) -> str:
     objective_notes: list[str] = []
     if study.trace.request_mode == "decode_only":
@@ -409,6 +413,7 @@ def build_prompt(
                     study=study,
                     window_summary=window_summary,
                     state=state,
+                    workload_profile=workload_profile,
                 )
             ),
             "",
diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py
index 1fd5494..f0aca0d 100644
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -28,6 +28,7 @@ from aituner.harness import (
     build_harness_stop_proposal,
 )
 from aituner.lca import (
+    build_study_workload_profile,
     build_workload_profile,
     profile_similarity,
     resolve_length_mode,
@@ -298,6 +299,34 @@ class CoreFlowTests(unittest.TestCase):
         self.assertAlmostEqual(profile.stats["arrival"]["fano_1s"], 0.5)
         self.assertEqual(resolve_length_mode(request_mode="decode_only"), "output")
 
+    def test_harness_context_uses_canonical_lca_vector(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            study_path = _write_study_assets(tmp_path)
+            study = load_study_spec(study_path)
+            window, requests = load_trace_requests(study, study_spec_path=study_path)
+            profile = build_study_workload_profile(study, requests, window)
+            state = StudyState(study_id=study.study_id, trials=[])
+            summary = summarize_window(requests, window)
+            context = build_harness_context(
+                study=study,
+                window_summary=summary,
+                state=state,
+                workload_profile=profile,
+            )
+            block = context["workload_lca_profile"]
+            # The labeled L-C-A block is the canonical 10-dim metric, not ad-hoc.
+            self.assertEqual(block["vector"], profile.vector)
+            self.assertEqual(len(block["vector"]), 10)
+            self.assertIn("RobustScaler", block["metric"])
+            # Without a profile it falls back to the legacy ad-hoc rendering.
+            legacy = build_harness_context(
+                study=study,
+                window_summary=summary,
+                state=state,
+            )["workload_lca_profile"]
+            self.assertNotIn("vector", legacy)
+
     def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
         window = WindowRecord(
             window_id="base",