A3: vLLM scheduler patch for step-level JSONL log
When AGENTIC_STEP_LOG_PATH is set, the scheduler emits one JSONL line
per scheduler step with t_unix, worker_id, prefill/decode token
counts, n_running/n_waiting, preempted ids, and per-request phase
labels. No-op when the env var is unset, so production engines are
not impacted. bench.sh now threads AGENTIC_STEP_LOG_DIR through to
each per-engine launch so step logs end up at engine_${i}.jsonl.
Required by Batch 2 (PD-colo interference index) and Batch 5
(same-worker overlap attribution); engine /metrics polling cannot
provide per-step granularity.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
98
third_party/vllm/vllm/v1/core/sched/scheduler.py
vendored
98
third_party/vllm/vllm/v1/core/sched/scheduler.py
vendored
@@ -287,6 +287,29 @@ class Scheduler(SchedulerInterface):
|
||||
|
||||
self._pause_state: PauseState = PauseState.UNPAUSED
|
||||
|
||||
# agentic-kv: optional step-level JSONL logger for B2 interference
|
||||
# analysis. Activated by env vars; no-op otherwise.
|
||||
self._agentic_step_log_fh = None
|
||||
self._agentic_step_id = 0
|
||||
self._agentic_worker_id = None
|
||||
import os as _os
|
||||
_step_path = _os.environ.get("AGENTIC_STEP_LOG_PATH")
|
||||
if _step_path:
|
||||
try:
|
||||
_os.makedirs(_os.path.dirname(_step_path) or ".", exist_ok=True)
|
||||
self._agentic_step_log_fh = open(_step_path, "a", buffering=1)
|
||||
self._agentic_worker_id = _os.environ.get(
|
||||
"AGENTIC_WORKER_ID",
|
||||
f"dp{self.parallel_config.data_parallel_rank}",
|
||||
)
|
||||
logger.info(
|
||||
"agentic-kv step log enabled: path=%s worker_id=%s",
|
||||
_step_path, self._agentic_worker_id,
|
||||
)
|
||||
except Exception as _exc:
|
||||
logger.warning("agentic-kv step log disabled (%r)", _exc)
|
||||
self._agentic_step_log_fh = None
|
||||
|
||||
def _mamba_block_aligned_split(
|
||||
self,
|
||||
request: Request,
|
||||
@@ -926,8 +949,83 @@ class Scheduler(SchedulerInterface):
|
||||
|
||||
with record_function_or_nullcontext("schedule: update_after_schedule"):
|
||||
self._update_after_schedule(scheduler_output)
|
||||
|
||||
if self._agentic_step_log_fh is not None:
|
||||
self._agentic_emit_step_log(
|
||||
scheduled_timestamp=scheduled_timestamp,
|
||||
num_scheduled_tokens=num_scheduled_tokens,
|
||||
total_num_scheduled_tokens=total_num_scheduled_tokens,
|
||||
scheduled_new_reqs=scheduled_new_reqs,
|
||||
scheduled_resumed_reqs=scheduled_resumed_reqs,
|
||||
scheduled_running_reqs=scheduled_running_reqs,
|
||||
preempted_reqs=preempted_reqs,
|
||||
)
|
||||
|
||||
return scheduler_output
|
||||
|
||||
def _agentic_emit_step_log(
|
||||
self,
|
||||
scheduled_timestamp: float,
|
||||
num_scheduled_tokens: dict[str, int],
|
||||
total_num_scheduled_tokens: int,
|
||||
scheduled_new_reqs: list[Request],
|
||||
scheduled_resumed_reqs: list[Request],
|
||||
scheduled_running_reqs: list[Request],
|
||||
preempted_reqs: list[Request],
|
||||
) -> None:
|
||||
"""Emit one JSONL line per scheduler step for agentic-kv B2 analysis.
|
||||
|
||||
Cheap when enabled (a few dozen dict lookups + one write). When the
|
||||
env var AGENTIC_STEP_LOG_PATH is unset the caller does not invoke
|
||||
this method at all.
|
||||
"""
|
||||
import json as _json
|
||||
import time as _time
|
||||
new_ids = {r.request_id for r in scheduled_new_reqs}
|
||||
resumed_ids = {r.request_id for r in scheduled_resumed_reqs}
|
||||
running_ids = {r.request_id for r in scheduled_running_reqs}
|
||||
per_req: list[dict[str, Any]] = []
|
||||
prefill_tokens = 0
|
||||
decode_tokens = 0
|
||||
for rid, n in num_scheduled_tokens.items():
|
||||
is_new = rid in new_ids
|
||||
is_resumed = rid in resumed_ids
|
||||
# Heuristic: any step touching a new/resumed request is prefill;
|
||||
# otherwise per-step tokens >1 is chunked prefill, ==1 is decode.
|
||||
if is_new or is_resumed or n > 1:
|
||||
prefill_tokens += n
|
||||
phase = "prefill"
|
||||
else:
|
||||
decode_tokens += n
|
||||
phase = "decode"
|
||||
per_req.append({
|
||||
"rid": rid, "n": n, "phase": phase,
|
||||
"is_new": is_new, "is_resumed": is_resumed,
|
||||
})
|
||||
record = {
|
||||
"t_unix": _time.time(),
|
||||
"t_monotonic": scheduled_timestamp,
|
||||
"step_id": self._agentic_step_id,
|
||||
"worker_id": self._agentic_worker_id,
|
||||
"total_scheduled_tokens": total_num_scheduled_tokens,
|
||||
"prefill_tokens": prefill_tokens,
|
||||
"decode_tokens": decode_tokens,
|
||||
"n_new": len(scheduled_new_reqs),
|
||||
"n_resumed": len(scheduled_resumed_reqs),
|
||||
"n_running_scheduled": len(scheduled_running_reqs),
|
||||
"n_running_total": len(self.running),
|
||||
"n_waiting": len(self.waiting),
|
||||
"n_preempted": len(preempted_reqs),
|
||||
"preempted_ids": [r.request_id for r in preempted_reqs],
|
||||
"per_req": per_req,
|
||||
}
|
||||
try:
|
||||
self._agentic_step_log_fh.write(_json.dumps(record) + "\n")
|
||||
except Exception as _exc:
|
||||
logger.warning("agentic-kv step log write failed (%r)", _exc)
|
||||
self._agentic_step_log_fh = None
|
||||
self._agentic_step_id += 1
|
||||
|
||||
def _preempt_request(self, request: Request, timestamp: float) -> None:
|
||||
"""Preempt a request and put it back to the waiting queue.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user