diff --git a/third_party/vllm/vllm/v1/core/sched/scheduler.py b/third_party/vllm/vllm/v1/core/sched/scheduler.py index f817deb..528fc40 100644 --- a/third_party/vllm/vllm/v1/core/sched/scheduler.py +++ b/third_party/vllm/vllm/v1/core/sched/scheduler.py @@ -2116,7 +2116,12 @@ class Scheduler(SchedulerInterface): if req_id not in self.requests: logger.warning("Skipping finished_sending for unknown request %s (already aborted?)", req_id) continue + sent_block_ids: set[int] = set() + for group in self.kv_cache_manager.get_block_ids(req_id): + sent_block_ids.update(group) self._free_blocks(self.requests[req_id]) + if sent_block_ids: + self.kv_cache_manager.evict_blocks(sent_block_ids) def _update_requests_with_invalid_blocks( self,