agentic-pd-hybrid/third_party/sglang/.github/workflows/pr-test-npu.yml

name: PR Test (NPU)

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  workflow_dispatch:
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      run_all_tests:
        description: "Run all tests (for releasing or testing purpose)"
        required: false
        type: boolean
        default: false

concurrency:
  group: pr-test-npu-${{ inputs.ref || github.ref }}
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

jobs:
  # ==================== Check Changes ==================== #
  check-changes:
    runs-on: ubuntu-latest
    outputs:
      changes_exist: ${{ steps.filter.outputs.main_package == 'true' || steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true'}}
      main_package: ${{ steps.filter.outputs.main_package == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
      multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Determine run mode
        id: run-mode
        run: |
          # Run all tests for workflow_call (when ref input is provided)
          # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
          if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
            echo "run_all_tests=true" >> $GITHUB_OUTPUT
            echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
          else
            echo "run_all_tests=false" >> $GITHUB_OUTPUT
            echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
          fi

      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        if: steps.run-mode.outputs.run_all_tests != 'true'
        with:
          filters: |
            main_package:
              - "python/sglang/!(multimodal_gen)/**/!(*.md)"
              - "python/pyproject_npu.toml"
              - "scripts/ci/npu/npu_ci_install_dependency.sh"
              - "test/srt/ascend/**"
              - ".github/workflows/pr-test-npu.yml"
            multimodal_gen:
              - "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
              - "python/sglang/srt/**"
              - "python/pyproject_npu.toml"
              - "scripts/ci/npu/npu_ci_install_dependency.sh"
              - ".github/workflows/pr-test-npu.yml"

  # ==================== PR Gate ==================== #
  pr-gate:
    needs: check-changes
    if: needs.check-changes.outputs.changes_exist == 'true'
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit

  stage-b-test-1-npu-a2:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a2-1
    strategy:
      fail-fast: false
      matrix:
        part: [ 0, 1 ]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy gsm8k dataset
          cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test
          python3 run_suite.py --hw npu --suite stage-b-test-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

  stage-b-test-2-npu-a2:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a2-2
    strategy:
      fail-fast: true
      matrix:
        part: [0, 1]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy gsm8k dataset
          cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test
          python3 run_suite.py --hw npu --suite stage-b-test-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

  stage-b-test-4-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a3-4
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy gsm8k dataset
          cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test
          python3 run_suite.py --hw npu --suite stage-b-test-4-npu-a3 --timeout-per-file 3600


  stage-b-test-16-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a3-16
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy gsm8k dataset
          cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test
          python3 run_suite.py --hw npu --suite stage-b-test-16-npu-a3 --timeout-per-file 3600

  multimodal-gen-test-1-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.multimodal_gen == 'true'
    runs-on: linux-aarch64-a3-2
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy gsm8k dataset
          cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py --suite 1-npu

  multimodal-gen-test-2-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.multimodal_gen == 'true'
    runs-on: linux-aarch64-a3-16
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy gsm8k dataset
          cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py --suite 2-npu

  multimodal-gen-test-8-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.multimodal_gen == 'true'
    runs-on: linux-aarch64-a3-8
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy gsm8k dataset
          cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py --suite 8-npu

  pr-test-npu-finish:
    needs:
      [
        check-changes,

        stage-b-test-1-npu-a2,
        stage-b-test-2-npu-a2,
        stage-b-test-4-npu-a3,
        stage-b-test-16-npu-a3,

        multimodal-gen-test-1-npu-a3,
        multimodal-gen-test-2-npu-a3,
        multimodal-gen-test-8-npu-a3,
      ]
    if: always()
    runs-on: ubuntu-latest
    steps:
      - name: Check all dependent job statuses
        run: |
          # Convert the 'needs' context to a JSON string
          json_needs='${{ toJson(needs) }}'

          # Get a list of all job names from the JSON keys
          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')

          for job in $job_names; do
            # For each job, extract its result
            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')

            # Print the job name and its result
            echo "$job: $result"

            # Check for failure or cancellation and exit if found
            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
              echo "The above jobs failed."
              exit 1
            fi
          done
          # If the loop completes, all jobs were successful
          echo "All jobs completed successfully"
          exit 0