From 657cd36f3dbb824a2c7449e5bc47ac743afc5cd7 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Fri, 29 May 2026 18:18:59 +0800 Subject: [PATCH] Gate evict_sent_blocks behind VLLM_EVICT_SENT_BLOCKS Fork commit e13391e unconditionally evicts sent blocks from the prefix cache on every KV transfer. That is correct only for session MIGRATION (source won't see the session again); for plain PD-disagg producer-> consumer transfers it destroys cross-turn producer reuse and contaminates PD reuse experiments. Default OFF; enable for migration runs via VLLM_EVICT_SENT_BLOCKS=1. Co-Authored-By: Claude Opus 4.8 --- third_party/vllm/vllm/v1/core/sched/scheduler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/third_party/vllm/vllm/v1/core/sched/scheduler.py b/third_party/vllm/vllm/v1/core/sched/scheduler.py index 6f931ad..9a0319d 100644 --- a/third_party/vllm/vllm/v1/core/sched/scheduler.py +++ b/third_party/vllm/vllm/v1/core/sched/scheduler.py @@ -2218,7 +2218,12 @@ class Scheduler(SchedulerInterface): for group in self.kv_cache_manager.get_block_ids(req_id): sent_block_ids.update(group) self._free_blocks(self.requests[req_id]) - if sent_block_ids: + # e13391e: evict sent blocks from the prefix cache. Intended ONLY for + # session MIGRATION (the source worker won't see the session again). + # For plain PD-disagg producer->consumer transfers this destroys + # cross-turn producer reuse, so it is gated OFF by default and enabled + # only for migration experiments via VLLM_EVICT_SENT_BLOCKS=1. + if sent_block_ids and __import__("os").environ.get("VLLM_EVICT_SENT_BLOCKS") == "1": self.kv_cache_manager.evict_blocks(sent_block_ids) def _update_requests_with_invalid_blocks(