A3: vLLM scheduler patch for step-level JSONL log

When AGENTIC_STEP_LOG_PATH is set, the scheduler emits one JSONL line
per scheduler step with t_unix, worker_id, prefill/decode token
counts, n_running/n_waiting, preempted ids, and per-request phase
labels. No-op when the env var is unset, so production engines are
not impacted. bench.sh now threads AGENTIC_STEP_LOG_DIR through to
each per-engine launch so step logs end up at engine_${i}.jsonl.

Required by Batch 2 (PD-colo interference index) and Batch 5
(same-worker overlap attribution); engine /metrics polling cannot
provide per-step granularity.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 16:19:11 +08:00
parent fe556b5d98
commit 5816aad731
3 changed files with 175 additions and 0 deletions

View File

@@ -287,6 +287,29 @@ class Scheduler(SchedulerInterface):
self._pause_state: PauseState = PauseState.UNPAUSED
# agentic-kv: optional step-level JSONL logger for B2 interference
# analysis. Activated by env vars; no-op otherwise.
self._agentic_step_log_fh = None
self._agentic_step_id = 0
self._agentic_worker_id = None
import os as _os
_step_path = _os.environ.get("AGENTIC_STEP_LOG_PATH")
if _step_path:
try:
_os.makedirs(_os.path.dirname(_step_path) or ".", exist_ok=True)
self._agentic_step_log_fh = open(_step_path, "a", buffering=1)
self._agentic_worker_id = _os.environ.get(
"AGENTIC_WORKER_ID",
f"dp{self.parallel_config.data_parallel_rank}",
)
logger.info(
"agentic-kv step log enabled: path=%s worker_id=%s",
_step_path, self._agentic_worker_id,
)
except Exception as _exc:
logger.warning("agentic-kv step log disabled (%r)", _exc)
self._agentic_step_log_fh = None
def _mamba_block_aligned_split(
self,
request: Request,
@@ -926,8 +949,83 @@ class Scheduler(SchedulerInterface):
with record_function_or_nullcontext("schedule: update_after_schedule"):
self._update_after_schedule(scheduler_output)
if self._agentic_step_log_fh is not None:
self._agentic_emit_step_log(
scheduled_timestamp=scheduled_timestamp,
num_scheduled_tokens=num_scheduled_tokens,
total_num_scheduled_tokens=total_num_scheduled_tokens,
scheduled_new_reqs=scheduled_new_reqs,
scheduled_resumed_reqs=scheduled_resumed_reqs,
scheduled_running_reqs=scheduled_running_reqs,
preempted_reqs=preempted_reqs,
)
return scheduler_output
def _agentic_emit_step_log(
self,
scheduled_timestamp: float,
num_scheduled_tokens: dict[str, int],
total_num_scheduled_tokens: int,
scheduled_new_reqs: list[Request],
scheduled_resumed_reqs: list[Request],
scheduled_running_reqs: list[Request],
preempted_reqs: list[Request],
) -> None:
"""Emit one JSONL line per scheduler step for agentic-kv B2 analysis.
Cheap when enabled (a few dozen dict lookups + one write). When the
env var AGENTIC_STEP_LOG_PATH is unset the caller does not invoke
this method at all.
"""
import json as _json
import time as _time
new_ids = {r.request_id for r in scheduled_new_reqs}
resumed_ids = {r.request_id for r in scheduled_resumed_reqs}
running_ids = {r.request_id for r in scheduled_running_reqs}
per_req: list[dict[str, Any]] = []
prefill_tokens = 0
decode_tokens = 0
for rid, n in num_scheduled_tokens.items():
is_new = rid in new_ids
is_resumed = rid in resumed_ids
# Heuristic: any step touching a new/resumed request is prefill;
# otherwise per-step tokens >1 is chunked prefill, ==1 is decode.
if is_new or is_resumed or n > 1:
prefill_tokens += n
phase = "prefill"
else:
decode_tokens += n
phase = "decode"
per_req.append({
"rid": rid, "n": n, "phase": phase,
"is_new": is_new, "is_resumed": is_resumed,
})
record = {
"t_unix": _time.time(),
"t_monotonic": scheduled_timestamp,
"step_id": self._agentic_step_id,
"worker_id": self._agentic_worker_id,
"total_scheduled_tokens": total_num_scheduled_tokens,
"prefill_tokens": prefill_tokens,
"decode_tokens": decode_tokens,
"n_new": len(scheduled_new_reqs),
"n_resumed": len(scheduled_resumed_reqs),
"n_running_scheduled": len(scheduled_running_reqs),
"n_running_total": len(self.running),
"n_waiting": len(self.waiting),
"n_preempted": len(preempted_reqs),
"preempted_ids": [r.request_id for r in preempted_reqs],
"per_req": per_req,
}
try:
self._agentic_step_log_fh.write(_json.dumps(record) + "\n")
except Exception as _exc:
logger.warning("agentic-kv step log write failed (%r)", _exc)
self._agentic_step_log_fh = None
self._agentic_step_id += 1
def _preempt_request(self, request: Request, timestamp: float) -> None:
"""Preempt a request and put it back to the waiting queue.