From a1f30e5fce4ad7197f9e7fa2717ea9b0d516389c Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sun, 24 May 2026 00:38:14 +0800 Subject: [PATCH] Add hash_table_sync logging + gap analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of 0 cache hits on offloaded requests identified: - Hash table sync IS working (scheduler→metadata→worker→bootstrap) - But D's query_blocks returns no matches → hash format mismatch between D's request.block_hashes and C's synced hashes The gap: offloaded TTFT (12.4s) ≈ co-located TTFT (12.0s) because D does FULL cold prefill (cache_hit=0), not partial prefill with RDMA-read cached blocks. Next: debug hash format mismatch between D and C. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../kv_connector/v1/mooncake/mooncake_connector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py b/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py index 7f234bc..a7abde2 100644 --- a/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py +++ b/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py @@ -422,6 +422,12 @@ class MooncakeConnectorScheduler: get_block_hash(k).hex() for k in removed_keys } self._known_hash_keys = current_keys.copy() + logger.info("hash_table_sync: +%d -%d (total known=%d)", + len(new_keys), len(removed_keys), len(self._known_hash_keys)) + else: + if not hasattr(self, '_bp_warned'): + logger.warning("_block_pool is None, hash table sync disabled") + self._bp_warned = True if not self.is_kv_consumer: for req_id, (req, block_ids) in self._reqs_need_send.items():