name: Release Docker Runtime Images # # This workflow builds and publishes runtime Docker images (production-optimized, ~50% smaller): # - lmsysorg/sglang:v{version}-runtime, lmsysorg/sglang:latest-runtime # - lmsysorg/sglang:v{version}-cu130-runtime, lmsysorg/sglang:latest-cu130-runtime # on: push: tags: - "v[0-9]+.*" workflow_dispatch: inputs: version: description: "Version to build (without v prefix, e.g., 0.5.7)" required: true jobs: publish-x86: if: github.repository == 'sgl-project/sglang' environment: "prod" strategy: matrix: variant: - cuda_version: "12.9.1" build_type: "all" grace_blackwell: 0 runs-on: x64-docker-build-node steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache - name: Checkout repository uses: actions/checkout@v4 - name: Free disk space uses: jlumbroso/free-disk-space@main with: tool-cache: false docker-images: false android: true dotnet: true haskell: true large-packages: true swap-storage: false - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Get version from tag id: version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT - name: Build and Push AMD64 Runtime run: | version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/amd64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu129-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu129-amd64-runtime.txt - name: Build and Push AMD64 Runtime (CUDA 13) run: | version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/amd64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg GRACE_BLACKWELL=0 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu130-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu130-amd64-runtime.txt - name: Upload digests uses: actions/upload-artifact@v4 with: name: digests-amd64 path: /tmp/digest-*.txt retention-days: 1 publish-arm64: if: github.repository == 'sgl-project/sglang' environment: "prod" strategy: matrix: variant: - cuda_version: "12.9.1" build_type: "all" grace_blackwell: 1 runs-on: arm-docker-build-node steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache - name: Checkout repository uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Get version from tag id: version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT - name: Build and Push ARM64 Runtime run: | version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/arm64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu129-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu129-arm64-runtime.txt - name: Build and Push ARM64 Runtime (CUDA 13) run: | version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/arm64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu130-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu130-arm64-runtime.txt - name: Upload digests uses: actions/upload-artifact@v4 with: name: digests-arm64 path: /tmp/digest-*.txt retention-days: 1 create-manifests: runs-on: ubuntu-22.04 needs: [publish-x86, publish-arm64] if: github.repository == 'sgl-project/sglang' environment: "prod" steps: - name: Checkout repository uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Get version from tag id: version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT - name: Download amd64 digests uses: actions/download-artifact@v4 with: name: digests-amd64 path: /tmp/digests/amd64 - name: Download arm64 digests uses: actions/download-artifact@v4 with: name: digests-arm64 path: /tmp/digests/arm64 - name: Create multi-arch manifests run: | version=${{ steps.version.outputs.version }} CU129_AMD64_RT=$(cat /tmp/digests/amd64/digest-cu129-amd64-runtime.txt) CU130_AMD64_RT=$(cat /tmp/digests/amd64/digest-cu130-amd64-runtime.txt) CU129_ARM64_RT=$(cat /tmp/digests/arm64/digest-cu129-arm64-runtime.txt) CU130_ARM64_RT=$(cat /tmp/digests/arm64/digest-cu130-arm64-runtime.txt) # Create versioned runtime manifest docker buildx imagetools create \ -t lmsysorg/sglang:v${version}-runtime \ lmsysorg/sglang@${CU129_AMD64_RT} \ lmsysorg/sglang@${CU129_ARM64_RT} # Create latest runtime manifest docker buildx imagetools create \ -t lmsysorg/sglang:latest-runtime \ lmsysorg/sglang@${CU129_AMD64_RT} \ lmsysorg/sglang@${CU129_ARM64_RT} # Create versioned CUDA 13 runtime manifest docker buildx imagetools create \ -t lmsysorg/sglang:v${version}-cu130-runtime \ lmsysorg/sglang@${CU130_AMD64_RT} \ lmsysorg/sglang@${CU130_ARM64_RT} # Create latest CUDA 13 runtime manifest docker buildx imagetools create \ -t lmsysorg/sglang:latest-cu130-runtime \ lmsysorg/sglang@${CU130_AMD64_RT} \ lmsysorg/sglang@${CU130_ARM64_RT}