110 lines
4.1 KiB
Python
110 lines
4.1 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
from aituner.tuning_report import run_tuning_report
|
|
|
|
|
|
def _write_state(root: Path, *, study_id: str, rates: list[float | None]) -> None:
|
|
root.mkdir(parents=True)
|
|
trials = []
|
|
best_rate = None
|
|
best_trial_id = None
|
|
for idx, rate in enumerate(rates, start=1):
|
|
trial_id = f"trial-{idx:04d}"
|
|
trials.append(
|
|
{
|
|
"trial_id": trial_id,
|
|
"status": "completed" if rate is not None else "failed",
|
|
"parallel_size": 1,
|
|
"best_request_rate": rate,
|
|
"best_request_rate_per_gpu": rate,
|
|
"config_patch": {"env_patch": {}, "flag_patch": {}},
|
|
}
|
|
)
|
|
if rate is not None and (best_rate is None or rate > best_rate):
|
|
best_rate = rate
|
|
best_trial_id = trial_id
|
|
payload = {
|
|
"study_id": study_id,
|
|
"best_trial_id": best_trial_id,
|
|
"best_request_rate": best_rate,
|
|
"best_request_rate_per_gpu": best_rate,
|
|
"next_trial_index": len(rates) + 1,
|
|
"trials": trials,
|
|
}
|
|
(root / "state.json").write_text(json.dumps(payload), encoding="utf-8")
|
|
|
|
|
|
class TuningReportTests(unittest.TestCase):
|
|
def test_tuning_report_scores_harness_vs_naive_anytime_progress(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
tmp_path = Path(tmp)
|
|
_write_state(
|
|
tmp_path / "studies" / "harness-study",
|
|
study_id="harness-study",
|
|
rates=[0.4, 0.9],
|
|
)
|
|
_write_state(
|
|
tmp_path / "naive-study",
|
|
study_id="naive-study",
|
|
rates=[0.4, None, 0.7, 0.9],
|
|
)
|
|
spec_path = tmp_path / "report.json"
|
|
spec_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"report_id": "report-1",
|
|
"output_root": str(tmp_path / "out"),
|
|
"target_fraction": 0.8,
|
|
"cases": [
|
|
{
|
|
"case_id": "case-1",
|
|
"tags": ["model-a", "chat"],
|
|
"budgets": [1, 2, 4],
|
|
"arms": [
|
|
{
|
|
"name": "harness",
|
|
"kind": "harness",
|
|
"study_root": str(tmp_path / "studies"),
|
|
},
|
|
{
|
|
"name": "naive",
|
|
"kind": "naive",
|
|
"study_root": str(tmp_path / "naive-study"),
|
|
},
|
|
],
|
|
}
|
|
],
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
summary = run_tuning_report(spec_path)
|
|
|
|
case = summary["cases"][0]
|
|
self.assertEqual(case["reference_best_per_gpu"], 0.9)
|
|
self.assertEqual(case["winners"]["final_best"], "harness")
|
|
self.assertEqual(case["winners"]["fastest_to_target"], "harness")
|
|
harness = case["arms"][0]
|
|
naive = case["arms"][1]
|
|
self.assertEqual(harness["best_at_budget"]["2"], 0.9)
|
|
self.assertEqual(naive["best_at_budget"]["2"], 0.4)
|
|
self.assertEqual(case["target_fraction"], 0.8)
|
|
self.assertEqual(harness["trials_to_target"], 2)
|
|
self.assertEqual(naive["trials_to_target"], 4)
|
|
self.assertEqual(naive["failed_count"], 1)
|
|
comparison = case["harness_vs_naive"][0]
|
|
self.assertTrue(comparison["passes"])
|
|
self.assertEqual(comparison["target_trial_speedup_vs_best_naive"], 2.0)
|
|
self.assertTrue((tmp_path / "out" / "summary.json").exists())
|
|
self.assertTrue((tmp_path / "out" / "report.md").exists())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|