A3: vLLM scheduler patch for step-level JSONL log

When AGENTIC_STEP_LOG_PATH is set, the scheduler emits one JSONL line per scheduler step with t_unix, worker_id, prefill/decode token counts, n_running/n_waiting, preempted ids, and per-request phase labels. No-op when the env var is unset, so production engines are not impacted. bench.sh now threads AGENTIC_STEP_LOG_DIR through to each per-engine launch so step logs end up at engine_${i}.jsonl. Required by Batch 2 (PD-colo interference index) and Batch 5 (same-worker overlap attribution); engine /metrics polling cannot provide per-step granularity. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 16:19:11 +08:00
parent fe556b5d98
commit 5816aad731
3 changed files with 175 additions and 0 deletions
--- a/third_party/vllm/vllm/v1/core/sched/scheduler.py
+++ b/third_party/vllm/vllm/v1/core/sched/scheduler.py
@@ -287,6 +287,29 @@ class Scheduler(SchedulerInterface):

        self._pause_state: PauseState = PauseState.UNPAUSED

+        # agentic-kv: optional step-level JSONL logger for B2 interference
+        # analysis. Activated by env vars; no-op otherwise.
+        self._agentic_step_log_fh = None
+        self._agentic_step_id = 0
+        self._agentic_worker_id = None
+        import os as _os
+        _step_path = _os.environ.get("AGENTIC_STEP_LOG_PATH")
+        if _step_path:
+            try:
+                _os.makedirs(_os.path.dirname(_step_path) or ".", exist_ok=True)
+                self._agentic_step_log_fh = open(_step_path, "a", buffering=1)
+                self._agentic_worker_id = _os.environ.get(
+                    "AGENTIC_WORKER_ID",
+                    f"dp{self.parallel_config.data_parallel_rank}",
+                )
+                logger.info(
+                    "agentic-kv step log enabled: path=%s worker_id=%s",
+                    _step_path, self._agentic_worker_id,
+                )
+            except Exception as _exc:
+                logger.warning("agentic-kv step log disabled (%r)", _exc)
+                self._agentic_step_log_fh = None
+
    def _mamba_block_aligned_split(
        self,
        request: Request,
@@ -926,8 +949,83 @@ class Scheduler(SchedulerInterface):

        with record_function_or_nullcontext("schedule: update_after_schedule"):
            self._update_after_schedule(scheduler_output)
+
+        if self._agentic_step_log_fh is not None:
+            self._agentic_emit_step_log(
+                scheduled_timestamp=scheduled_timestamp,
+                num_scheduled_tokens=num_scheduled_tokens,
+                total_num_scheduled_tokens=total_num_scheduled_tokens,
+                scheduled_new_reqs=scheduled_new_reqs,
+                scheduled_resumed_reqs=scheduled_resumed_reqs,
+                scheduled_running_reqs=scheduled_running_reqs,
+                preempted_reqs=preempted_reqs,
+            )
+
        return scheduler_output

+    def _agentic_emit_step_log(
+        self,
+        scheduled_timestamp: float,
+        num_scheduled_tokens: dict[str, int],
+        total_num_scheduled_tokens: int,
+        scheduled_new_reqs: list[Request],
+        scheduled_resumed_reqs: list[Request],
+        scheduled_running_reqs: list[Request],
+        preempted_reqs: list[Request],
+    ) -> None:
+        """Emit one JSONL line per scheduler step for agentic-kv B2 analysis.
+
+        Cheap when enabled (a few dozen dict lookups + one write). When the
+        env var AGENTIC_STEP_LOG_PATH is unset the caller does not invoke
+        this method at all.
+        """
+        import json as _json
+        import time as _time
+        new_ids = {r.request_id for r in scheduled_new_reqs}
+        resumed_ids = {r.request_id for r in scheduled_resumed_reqs}
+        running_ids = {r.request_id for r in scheduled_running_reqs}
+        per_req: list[dict[str, Any]] = []
+        prefill_tokens = 0
+        decode_tokens = 0
+        for rid, n in num_scheduled_tokens.items():
+            is_new = rid in new_ids
+            is_resumed = rid in resumed_ids
+            # Heuristic: any step touching a new/resumed request is prefill;
+            # otherwise per-step tokens >1 is chunked prefill, ==1 is decode.
+            if is_new or is_resumed or n > 1:
+                prefill_tokens += n
+                phase = "prefill"
+            else:
+                decode_tokens += n
+                phase = "decode"
+            per_req.append({
+                "rid": rid, "n": n, "phase": phase,
+                "is_new": is_new, "is_resumed": is_resumed,
+            })
+        record = {
+            "t_unix": _time.time(),
+            "t_monotonic": scheduled_timestamp,
+            "step_id": self._agentic_step_id,
+            "worker_id": self._agentic_worker_id,
+            "total_scheduled_tokens": total_num_scheduled_tokens,
+            "prefill_tokens": prefill_tokens,
+            "decode_tokens": decode_tokens,
+            "n_new": len(scheduled_new_reqs),
+            "n_resumed": len(scheduled_resumed_reqs),
+            "n_running_scheduled": len(scheduled_running_reqs),
+            "n_running_total": len(self.running),
+            "n_waiting": len(self.waiting),
+            "n_preempted": len(preempted_reqs),
+            "preempted_ids": [r.request_id for r in preempted_reqs],
+            "per_req": per_req,
+        }
+        try:
+            self._agentic_step_log_fh.write(_json.dumps(record) + "\n")
+        except Exception as _exc:
+            logger.warning("agentic-kv step log write failed (%r)", _exc)
+            self._agentic_step_log_fh = None
+        self._agentic_step_id += 1
+
    def _preempt_request(self, request: Request, timestamp: float) -> None:
        """Preempt a request and put it back to the waiting queue.