Fix trace reuse and packaged model assets
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import tomllib
|
||||
import unittest
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
@@ -655,6 +658,33 @@ class AliTracePipelineTest(unittest.TestCase):
|
||||
|
||||
self.assertEqual(Path(captured["dir"]), output_dir)
|
||||
|
||||
def test_format_and_sort_trace_supports_zstd_input_during_time_inference(self):
|
||||
if shutil.which("zstd") is None or shutil.which("zstdcat") is None:
|
||||
self.skipTest("zstd/zstdcat are required for .jsonl.zst formatter smoke test")
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
raw_path = root / "0417-1500-1530.jsonl"
|
||||
zst_path = root / "0417-1500-1530.jsonl.zst"
|
||||
output_path = root / "formatted" / "trace-raw.jsonl"
|
||||
raw_path.write_text(
|
||||
json.dumps(
|
||||
make_raw_row(
|
||||
"req-zst",
|
||||
utc_ms("2026-04-17 15:00:01.000"),
|
||||
time_text="2026-04-17 15:00:01.000",
|
||||
)
|
||||
)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
subprocess.run(["zstd", "-q", str(raw_path), "-o", str(zst_path)], check=True)
|
||||
|
||||
stats = format_and_sort_trace(input_dir=zst_path, output_path=output_path, chunk_bytes=256)
|
||||
|
||||
self.assertEqual(stats["row_count"], 1)
|
||||
formatted_rows = [json.loads(line) for line in output_path.read_text(encoding="utf-8").splitlines()]
|
||||
self.assertEqual([row["meta"]["request_id"] for row in formatted_rows], ["req-zst"])
|
||||
|
||||
def test_export_release_ready_trace_defaults_temp_dir_to_output_parent(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
@@ -855,3 +885,57 @@ class AliTracePipelineTest(unittest.TestCase):
|
||||
(analysis_dir / "details" / "details_summary.json").stat().st_mtime_ns,
|
||||
details_summary_mtime_ns,
|
||||
)
|
||||
|
||||
def test_trace_analyzer_rebuilds_when_same_output_gets_different_input(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
formatted_root = root / "outputs" / "formatted"
|
||||
analysis_root = root / "outputs" / "analysis"
|
||||
|
||||
def build_trace(label: str, request_id: str, ready: str) -> Path:
|
||||
raw_dir = root / label
|
||||
raw_dir.mkdir()
|
||||
raw_path = raw_dir / "0417-1500-1530.jsonl"
|
||||
raw_path.write_text(
|
||||
json.dumps(make_raw_row(request_id, utc_ms(ready), user_id=f"user-{request_id}")) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
self.assertEqual(formatter_main(["format", str(raw_dir), "--output-root", str(formatted_root)]), 0)
|
||||
formatted_path = formatted_root / "041715-041715-raw.jsonl"
|
||||
renamed_path = formatted_root / f"{label}-raw.jsonl"
|
||||
formatted_path.replace(renamed_path)
|
||||
self.assertEqual(formatter_main(["build-release", str(renamed_path), "--jobs", "1", "--block-size", "8"]), 0)
|
||||
return renamed_path
|
||||
|
||||
first_path = build_trace("first", "req-first", "2026-04-17 15:00:01.000")
|
||||
second_path = build_trace("second", "req-second", "2026-04-17 15:00:02.000")
|
||||
|
||||
common_args = [
|
||||
"--output-root",
|
||||
str(analysis_root),
|
||||
"--dataset-name",
|
||||
"same-dataset",
|
||||
"--segment-mode",
|
||||
"bytes",
|
||||
"--block-size",
|
||||
"8",
|
||||
]
|
||||
self.assertEqual(analyzer_main(["analyze", str(first_path), *common_args]), 0)
|
||||
analysis_dir = analysis_root / "same-dataset"
|
||||
self.assertIn("req-first", (analysis_dir / "features.csv").read_text(encoding="utf-8"))
|
||||
|
||||
self.assertEqual(analyzer_main(["analyze", str(second_path), *common_args]), 0)
|
||||
|
||||
features_text = (analysis_dir / "features.csv").read_text(encoding="utf-8")
|
||||
self.assertIn("req-second", features_text)
|
||||
self.assertNotIn("req-first", features_text)
|
||||
details_summary = json.loads((analysis_dir / "details" / "details_summary.json").read_text(encoding="utf-8"))
|
||||
self.assertTrue(str(details_summary["release_path"]).endswith("second.jsonl"))
|
||||
|
||||
def test_pyproject_includes_trace_model_meta_package_data(self):
|
||||
pyproject = tomllib.loads(Path("pyproject.toml").read_text(encoding="utf-8"))
|
||||
package_data = pyproject["tool"]["setuptools"]["package-data"]
|
||||
|
||||
self.assertIn("trace_model_meta", package_data)
|
||||
self.assertIn("**/*.json", package_data["trace_model_meta"])
|
||||
self.assertIn("**/*.jinja", package_data["trace_model_meta"])
|
||||
|
||||
Reference in New Issue
Block a user