Fix B2 migration: correct offload call signature (c_inst/d_inst order + cache_hit arg)
The session migration path was calling _handle_cached_prefill_offload with swapped c_inst/d_inst and missing cache_hit parameter, causing TypeError on every migration attempt (13 of 41 errors in the test run). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -562,14 +562,17 @@ async def _handle_combined(api, req_data, token_ids, input_length, session_id, h
|
||||
}
|
||||
session_affinity_combined[session_id] = mig_tgt_idx
|
||||
offload_mode = getattr(global_args, 'offload_mode', 'cached_prefill')
|
||||
push_cache_hit = cache_hits[mig_src_idx]
|
||||
if offload_mode == "cached_prefill":
|
||||
return await _handle_cached_prefill_offload(
|
||||
api, req_data, headers, token_ids, input_length,
|
||||
mig_tgt, mig_src, estimated_new, breakdown)
|
||||
mig_src, mig_tgt, push_cache_hit, estimated_new,
|
||||
breakdown)
|
||||
else:
|
||||
return await _handle_direct_read_offload(
|
||||
api, req_data, headers, token_ids, input_length,
|
||||
mig_tgt, mig_src, estimated_new, breakdown)
|
||||
mig_src, mig_tgt, push_cache_hit, estimated_new,
|
||||
breakdown)
|
||||
|
||||
def _current_offloads() -> int:
|
||||
return sum(i.active_p_offloads for i in combined_instances)
|
||||
|
||||
Reference in New Issue
Block a user