From a1f30e5fce4ad7197f9e7fa2717ea9b0d516389c Mon Sep 17 00:00:00 2001
From: Gahow Wang <gahow.wang@gmail.com>
Date: Sun, 24 May 2026 00:38:14 +0800
Subject: [PATCH] Add hash_table_sync logging + gap analysis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause of 0 cache hits on offloaded requests identified:
- Hash table sync IS working (scheduler→metadata→worker→bootstrap)
- But D's query_blocks returns no matches → hash format mismatch
  between D's request.block_hashes and C's synced hashes

The gap: offloaded TTFT (12.4s) ≈ co-located TTFT (12.0s) because
D does FULL cold prefill (cache_hit=0), not partial prefill with
RDMA-read cached blocks.

Next: debug hash format mismatch between D and C.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../kv_connector/v1/mooncake/mooncake_connector.py          | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py b/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py
index 7f234bc..a7abde2 100644
--- a/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py
+++ b/third_party/vllm/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py
@@ -422,6 +422,12 @@ class MooncakeConnectorScheduler:
                     get_block_hash(k).hex() for k in removed_keys
                 }
                 self._known_hash_keys = current_keys.copy()
+                logger.info("hash_table_sync: +%d -%d (total known=%d)",
+                            len(new_keys), len(removed_keys), len(self._known_hash_keys))
+        else:
+            if not hasattr(self, '_bp_warned'):
+                logger.warning("_block_pool is None, hash table sync disabled")
+                self._bp_warned = True
 
         if not self.is_kv_consumer:
             for req_id, (req, block_ids) in self._reqs_need_send.items():