Set PYTHONHASHSEED=42 for elastic mode to ensure consistent block hashes

Root cause confirmed: NONE_HASH = os.urandom(32) differs between
scheduler and bootstrap server even in the same process (init_none_hash
called separately by each import path). PYTHONHASHSEED makes it
deterministic: NONE_HASH = hash_fn(seed), same across all code paths.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 01:27:52 +08:00
parent ee2301ae17
commit 7e91b83d88
2 changed files with 8 additions and 1 deletions

View File

@@ -133,6 +133,7 @@ launch_instances() {
local logfile="$OUTDIR/vllm_inst_${i}.log" local logfile="$OUTDIR/vllm_inst_${i}.log"
if [ "$MODE" = "elastic" ]; then if [ "$MODE" = "elastic" ]; then
PYTHONHASHSEED=42 \
VLLM_MOONCAKE_BOOTSTRAP_PORT=$((8998 + i)) \ VLLM_MOONCAKE_BOOTSTRAP_PORT=$((8998 + i)) \
MASTER_PORT=$master \ MASTER_PORT=$master \
CUDA_VISIBLE_DEVICES=$i \ CUDA_VISIBLE_DEVICES=$i \

View File

@@ -252,11 +252,17 @@ class MooncakeBootstrapServer:
prev_hash = block_hash prev_hash = block_hash
bid = self._hash_table.get(block_hash.hex()) bid = self._hash_table.get(block_hash.hex())
if i == 0:
table_sample = next(iter(self._hash_table)) if self._hash_table else "empty"
logger.info(
"_lookup_by_tokens: block0 hash=%s, NONE_HASH=%s, table_sample=%s",
block_hash.hex()[:16], NONE_HASH.hex()[:16], table_sample[:16])
if bid is not None: if bid is not None:
block_ids.append(bid) block_ids.append(bid)
pinned_ids.append(bid) pinned_ids.append(bid)
else: else:
block_ids.append(None) if i == 0:
block_ids.append(None)
break break
return block_ids, pinned_ids return block_ids, pinned_ids