name: Nightly Test (NPU) on: schedule: - cron: '0 18 * * *' # Execute at 2:00 a.m. Beijing Time every day pull_request: branches: - main paths: - ".github/workflows/nightly-test-npu.yml" workflow_dispatch: workflow_call: inputs: ref: description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' required: false type: string default: '' job_filter: description: 'Select which job to run (leave empty or "all" to run all jobs)' required: false type: string default: 'all' image_a3: description: 'The a3 running docker image of the test task.' required: false type: string default: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11' skip_install_flag: description: 'Indicates whether to skip the installation of sglang, defaulting to false.' required: false type: string default: 'false' concurrency: group: nightly-test-npu-${{ inputs.ref || github.ref }} cancel-in-progress: ${{ github.event_name != 'workflow_call' }} jobs: set-image-config: runs-on: ubuntu-latest outputs: ref: ${{ steps.set-vars.outputs.ref }} job_filter: ${{ steps.set-vars.outputs.job_filter }} image_a3: ${{ steps.set-vars.outputs.image_a3 }} skip_install_flag: ${{ steps.set-vars.outputs.skip_install_flag }} steps: # When triggered by PR, no inputs parameters are used. The latest community code is tested by default. - name: Set image config id: set-vars run: | if [ -z "${{ inputs.ref }}" ]; then echo "ref=" >> $GITHUB_OUTPUT else echo "ref=${{ inputs.ref }}" >> $GITHUB_OUTPUT fi if [ -z "${{ inputs.job_filter }}" ]; then echo "job_filter=all" >> $GITHUB_OUTPUT else echo "job_filter=${{ inputs.job_filter }}" >> $GITHUB_OUTPUT fi if [ -z "${{ inputs.image_a3 }}" ]; then echo "image_a3=swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11" >> $GITHUB_OUTPUT else echo "image_a3=${{ inputs.image_a3 }}" >> $GITHUB_OUTPUT fi if [ -z "${{ inputs.skip_install_flag }}" ]; then echo "skip_install_flag=false" >> $GITHUB_OUTPUT else echo "skip_install_flag=${{ inputs.skip_install_flag }}" >> $GITHUB_OUTPUT fi nightly-1-npu-a3: needs: [set-image-config] if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0, 1] container: image: ${{ needs.set-image-config.outputs.image_a3 }} steps: - name: Checkout code uses: actions/checkout@v4 with: ref: ${{ needs.set-image-config.outputs.ref || github.ref }} - name: Install dependencies env: TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" run: | # speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.trusted-host "${CACHING_URL}" if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then bash scripts/ci/npu/npu_ci_install_dependency.sh a3 fi # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy gsm8k dataset cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp - name: Print Log Information run: | bash scripts/ci/npu/npu_log_print.sh - name: Run test timeout-minutes: 240 env: SGLANG_USE_MODELSCOPE: true SGLANG_IS_IN_CI: true HF_ENDPOINT: https://hf-mirror.com TORCH_EXTENSIONS_DIR: /tmp/torch_extensions PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" STREAMS_PER_DEVICE: 32 run: | pip install sglang_router hf download lmms-lab/MMMU --repo-type dataset pip install sentence_transformers torchaudio==2.8.0 pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git cd ./lmms-eval nohup pip install . > lmmslog.txt 2>&1 & sleep 120 export PYTHONPATH=$PYTHONPATH:$(pwd) cd ../ cd test python3 run_suite.py --hw npu --suite nightly-1-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 nightly-2-npu-a3: needs: [set-image-config] if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0] container: image: ${{ needs.set-image-config.outputs.image_a3 }} steps: - name: Checkout code uses: actions/checkout@v4 with: ref: ${{ needs.set-image-config.outputs.ref || github.ref }} - name: Install dependencies env: TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" run: | # speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.trusted-host "${CACHING_URL}" if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then bash scripts/ci/npu/npu_ci_install_dependency.sh a3 fi # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy gsm8k dataset cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp - name: Print Log Information run: | bash scripts/ci/npu/npu_log_print.sh - name: Run test timeout-minutes: 240 env: SGLANG_USE_MODELSCOPE: true SGLANG_IS_IN_CI: true HF_ENDPOINT: https://hf-mirror.com TORCH_EXTENSIONS_DIR: /tmp/torch_extensions PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" STREAMS_PER_DEVICE: 32 run: | pip install sglang_router hf download lmms-lab/MMMU --repo-type dataset pip install sentence_transformers torchaudio==2.8.0 pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git cd ./lmms-eval nohup pip install . > lmmslog.txt 2>&1 & sleep 120 export PYTHONPATH=$PYTHONPATH:$(pwd) cd ../ cd test python3 run_suite.py --hw npu --suite nightly-2-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 nightly-4-npu-a3: needs: [set-image-config] if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} runs-on: linux-aarch64-a3-4 strategy: fail-fast: false matrix: part: [0] container: image: ${{ needs.set-image-config.outputs.image_a3 }} steps: - name: Checkout code uses: actions/checkout@v4 with: ref: ${{ needs.set-image-config.outputs.ref|| github.ref }} - name: Install dependencies env: TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" run: | # speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.trusted-host "${CACHING_URL}" if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then bash scripts/ci/npu/npu_ci_install_dependency.sh a3 fi # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy gsm8k dataset cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp - name: Print Log Information run: | bash scripts/ci/npu/npu_log_print.sh - name: Run test timeout-minutes: 240 env: SGLANG_USE_MODELSCOPE: true SGLANG_IS_IN_CI: true HF_ENDPOINT: https://hf-mirror.com TORCH_EXTENSIONS_DIR: /tmp/torch_extensions PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" STREAMS_PER_DEVICE: 32 run: | pip install sglang_router hf download lmms-lab/MMMU --repo-type dataset pip install sentence_transformers torchaudio==2.8.0 pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git cd ./lmms-eval nohup pip install . > lmmslog.txt 2>&1 & sleep 120 export PYTHONPATH=$PYTHONPATH:$(pwd) cd ../ cd test python3 run_suite.py --hw npu --suite nightly-4-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 nightly-8-npu-a3: needs: [set-image-config] if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} runs-on: linux-aarch64-a3-8 strategy: fail-fast: false matrix: part: [0] container: image: ${{ needs.set-image-config.outputs.image_a3 }} steps: - name: Checkout code uses: actions/checkout@v4 with: ref: ${{ needs.set-image-config.outputs.ref || github.ref }} - name: Install dependencies env: TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" run: | # speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.trusted-host "${CACHING_URL}" if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then bash scripts/ci/npu/npu_ci_install_dependency.sh a3 fi # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy gsm8k dataset cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp - name: Print Log Information run: | bash scripts/ci/npu/npu_log_print.sh - name: Run test timeout-minutes: 240 env: SGLANG_USE_MODELSCOPE: true SGLANG_IS_IN_CI: true HF_ENDPOINT: https://hf-mirror.com TORCH_EXTENSIONS_DIR: /tmp/torch_extensions PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" STREAMS_PER_DEVICE: 32 run: | pip install sglang_router hf download lmms-lab/MMMU --repo-type dataset pip install sentence_transformers torchaudio==2.8.0 pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git cd ./lmms-eval nohup pip install . > lmmslog.txt 2>&1 & sleep 120 export PYTHONPATH=$PYTHONPATH:$(pwd) cd ../ cd test python3 run_suite.py --hw npu --suite nightly-8-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 nightly-16-npu-a3: needs: [set-image-config] if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }} runs-on: linux-aarch64-a3-16 strategy: fail-fast: false matrix: part: [0, 1] container: image: ${{ needs.set-image-config.outputs.image_a3 }} steps: - name: Checkout code uses: actions/checkout@v4 with: ref: ${{ needs.set-image-config.outputs.ref || github.ref }} - name: Install dependencies env: TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu" PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple" GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/" run: | # speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.trusted-host "${CACHING_URL}" if [ ${{ needs.set-image-config.outputs.skip_install_flag }} != "true" ];then bash scripts/ci/npu/npu_ci_install_dependency.sh a3 fi # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy gsm8k dataset cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp - name: Print Log Information run: | bash scripts/ci/npu/npu_log_print.sh - name: Run test timeout-minutes: 240 env: SGLANG_USE_MODELSCOPE: true SGLANG_IS_IN_CI: true HF_ENDPOINT: https://hf-mirror.com TORCH_EXTENSIONS_DIR: /tmp/torch_extensions PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" STREAMS_PER_DEVICE: 32 run: | pip install sglang_router hf download lmms-lab/MMMU --repo-type dataset pip install sentence_transformers torchaudio==2.8.0 pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git cd ./lmms-eval nohup pip install . > lmmslog.txt 2>&1 & sleep 120 export PYTHONPATH=$PYTHONPATH:$(pwd) cd ../ cd test python3 run_suite.py --hw npu --suite nightly-16-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 check-all-jobs: if: github.repository == 'sgl-project/sglang' && always() needs: - nightly-1-npu-a3 - nightly-2-npu-a3 - nightly-4-npu-a3 - nightly-8-npu-a3 - nightly-16-npu-a3 runs-on: ubuntu-latest container: image: docker.m.daocloud.io/ubuntu:22.04 steps: - name: Check if any job failed run: | if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then echo "One or more nightly test jobs failed" exit 1 fi if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then echo "One or more nightly test jobs were cancelled" exit 1 fi echo "All nightly test jobs passed"