Evict migrated blocks from prefix cache after KV send completes
After a session migrates from C to D via offload, C's blocks were freed to the LRU tail (most-recently-used position), making them the last to be evicted. Since the session won't return to C, these blocks are dead weight occupying cache capacity. Now capture block IDs before _free_blocks and call evict_blocks to remove them from the prefix cache hash table, so they can be reused sooner for active sessions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2116,7 +2116,12 @@ class Scheduler(SchedulerInterface):
|
|||||||
if req_id not in self.requests:
|
if req_id not in self.requests:
|
||||||
logger.warning("Skipping finished_sending for unknown request %s (already aborted?)", req_id)
|
logger.warning("Skipping finished_sending for unknown request %s (already aborted?)", req_id)
|
||||||
continue
|
continue
|
||||||
|
sent_block_ids: set[int] = set()
|
||||||
|
for group in self.kv_cache_manager.get_block_ids(req_id):
|
||||||
|
sent_block_ids.update(group)
|
||||||
self._free_blocks(self.requests[req_id])
|
self._free_blocks(self.requests[req_id])
|
||||||
|
if sent_block_ids:
|
||||||
|
self.kv_cache_manager.evict_blocks(sent_block_ids)
|
||||||
|
|
||||||
def _update_requests_with_invalid_blocks(
|
def _update_requests_with_invalid_blocks(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Reference in New Issue
Block a user