third_party/vllm/ now tracked in git for direct patch management.
Based on vLLM v0.18.1 release with one patch applied:
vllm/v1/core/sched/scheduler.py:
Replace fatal assert with graceful skip when KV transfer callback
arrives for an already-aborted request during PD disaggregated serving.
Future vLLM modifications should be made directly in third_party/vllm/
and committed normally. The patches/ directory is kept as documentation
of what changed from upstream.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
35 lines
830 B
Bash
Executable File
35 lines
830 B
Bash
Executable File
#!/bin/bash
|
|
set -e
|
|
|
|
if [[ $# -lt 3 ]]; then
|
|
echo "Usage: $0 <registry> <repo> <commit>"
|
|
exit 1
|
|
fi
|
|
|
|
REGISTRY=$1
|
|
REPO=$2
|
|
BUILDKITE_COMMIT=$3
|
|
|
|
# authenticate with AWS ECR
|
|
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
|
|
|
|
# skip build if image already exists
|
|
if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then
|
|
echo "Image not found, proceeding with build..."
|
|
else
|
|
echo "Image found"
|
|
exit 0
|
|
fi
|
|
|
|
# build
|
|
docker build --file docker/Dockerfile.cpu \
|
|
--build-arg max_jobs=16 \
|
|
--build-arg buildkite_commit="$BUILDKITE_COMMIT" \
|
|
--build-arg VLLM_CPU_X86=true \
|
|
--tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \
|
|
--target vllm-test \
|
|
--progress plain .
|
|
|
|
# push
|
|
docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu
|