Set PYTHONHASHSEED=42 for elastic mode to ensure consistent block hashes
Root cause confirmed: NONE_HASH = os.urandom(32) differs between scheduler and bootstrap server even in the same process (init_none_hash called separately by each import path). PYTHONHASHSEED makes it deterministic: NONE_HASH = hash_fn(seed), same across all code paths. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -133,6 +133,7 @@ launch_instances() {
|
|||||||
local logfile="$OUTDIR/vllm_inst_${i}.log"
|
local logfile="$OUTDIR/vllm_inst_${i}.log"
|
||||||
|
|
||||||
if [ "$MODE" = "elastic" ]; then
|
if [ "$MODE" = "elastic" ]; then
|
||||||
|
PYTHONHASHSEED=42 \
|
||||||
VLLM_MOONCAKE_BOOTSTRAP_PORT=$((8998 + i)) \
|
VLLM_MOONCAKE_BOOTSTRAP_PORT=$((8998 + i)) \
|
||||||
MASTER_PORT=$master \
|
MASTER_PORT=$master \
|
||||||
CUDA_VISIBLE_DEVICES=$i \
|
CUDA_VISIBLE_DEVICES=$i \
|
||||||
|
|||||||
@@ -252,11 +252,17 @@ class MooncakeBootstrapServer:
|
|||||||
prev_hash = block_hash
|
prev_hash = block_hash
|
||||||
|
|
||||||
bid = self._hash_table.get(block_hash.hex())
|
bid = self._hash_table.get(block_hash.hex())
|
||||||
|
if i == 0:
|
||||||
|
table_sample = next(iter(self._hash_table)) if self._hash_table else "empty"
|
||||||
|
logger.info(
|
||||||
|
"_lookup_by_tokens: block0 hash=%s, NONE_HASH=%s, table_sample=%s",
|
||||||
|
block_hash.hex()[:16], NONE_HASH.hex()[:16], table_sample[:16])
|
||||||
if bid is not None:
|
if bid is not None:
|
||||||
block_ids.append(bid)
|
block_ids.append(bid)
|
||||||
pinned_ids.append(bid)
|
pinned_ids.append(bid)
|
||||||
else:
|
else:
|
||||||
block_ids.append(None)
|
if i == 0:
|
||||||
|
block_ids.append(None)
|
||||||
break
|
break
|
||||||
|
|
||||||
return block_ids, pinned_ids
|
return block_ids, pinned_ids
|
||||||
|
|||||||
Reference in New Issue
Block a user