name: Release Docker Images Nightly (AMD) on: workflow_dispatch: schedule: - cron: '0 12 * * *' concurrency: # A PR number if a pull request and otherwise the commit hash. This cancels # queued and in-progress runs for the same PR (presubmit) or commit # (postsubmit). The workflow name is prepended to avoid conflicts between # different workflows. group: ${{ github.workflow }}-${{ github.event.number || github.sha }} cancel-in-progress: true jobs: publish: if: github.repository == 'sgl-project/sglang' runs-on: amd-docker-scale environment: 'prod' strategy: fail-fast: false matrix: gpu_arch: ['gfx942', 'gfx950'] build_type: ['all'] steps: - name: Checkout repository uses: actions/checkout@v4 with: fetch-depth: 0 # Required for git describe to find tags - name: "Set Date" run: | echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV - name: Get version from latest tag id: version run: | # Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7) VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//') if [ -z "$VERSION" ]; then echo "::error::Could not determine version from git tags" exit 1 fi # Get short commit hash of current HEAD COMMIT_HASH=$(git rev-parse --short HEAD) # Compose pretend version for setuptools_scm: e.g., 0.5.8.dev20260129+g1a2b3c4 PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}" echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT echo "Detected version: ${VERSION}" echo "Pretend version for pip: ${PRETEND_VERSION}" - name: Login to Docker Hub (AMD) uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_AMD_USERNAME }} password: ${{ secrets.DOCKERHUB_AMD_TOKEN }} - name: Build and Push to rocm/sgl-dev run: | version=${{ steps.version.outputs.version }} pretend_version=${{ steps.version.outputs.pretend_version }} echo "Version: ${version}" echo "Pretend version: ${pretend_version}" if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm700-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then rocm_tag="rocm700-mi35x" else echo "Unsupported gfx arch" exit 1 fi tag=v${version}-${rocm_tag} echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache docker push rocm/sgl-dev:${tag}-${{ env.DATE }} - name: Login to Docker Hub (lmsys) uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Push to lmsysorg/sglang-rocm run: | docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }} docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }} # Temporarily disable docker cache seeding until performant storage is in place cache: if: false # if: always() && github.repository == 'sgl-project/sglang' runs-on: linux-mi300-gpu-1 environment: 'prod' needs: publish strategy: fail-fast: false matrix: gpu_arch: ['gfx942'] build_type: ['all'] steps: - name: Checkout repository uses: actions/checkout@v4 with: fetch-depth: 0 # Required for git describe to find tags - name: "Set Date" run: | echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV - name: Get version from latest tag id: version run: | # Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7) VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//') if [ -z "$VERSION" ]; then echo "::error::Could not determine version from git tags" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "Detected version: ${VERSION}" - name: Login to Docker Hub uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_AMD_USERNAME }} password: ${{ secrets.DOCKERHUB_AMD_TOKEN }} - name: Pull and Save Docker Image to Cache run: | set -euxo pipefail version=${{ steps.version.outputs.version }} echo "Version: ${version}" if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm700-mi30x" else echo "Unsupported gfx arch" exit 1 fi tag=v${version}-${rocm_tag} if [ "${{ matrix.build_type }}" = "all" ]; then tag_suffix="" else echo "Unsupported build type" exit 1 fi image="rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix}" # Determine target cache file name based on ROCm variant if [[ "${rocm_tag}" == rocm700* ]]; then final_path="/home/runner/sgl-data/docker/image-700.tar" else echo "Unexpected ROCm tag: ${rocm_tag}" exit 1 fi tmp_path="${final_path}.tmp" echo "Pulling image: ${image}" docker pull "${image}" echo "Saving to temp file: ${tmp_path}" docker save "${image}" -o "${tmp_path}" echo "Moving to final path: ${final_path}" mv -f "${tmp_path}" "${final_path}" echo "Cache populated successfully at ${final_path}"