454 lines
18 KiB
YAML
454 lines
18 KiB
YAML
name: PR Test (NPU)
|
|
|
|
on:
|
|
push:
|
|
branches: [ main ]
|
|
pull_request:
|
|
branches: [ main ]
|
|
workflow_dispatch:
|
|
workflow_call:
|
|
inputs:
|
|
ref:
|
|
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
run_all_tests:
|
|
description: "Run all tests (for releasing or testing purpose)"
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
|
|
concurrency:
|
|
group: pr-test-npu-${{ inputs.ref || github.ref }}
|
|
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
|
|
|
|
jobs:
|
|
# ==================== Check Changes ==================== #
|
|
check-changes:
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
changes_exist: ${{ steps.filter.outputs.main_package == 'true' || steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true'}}
|
|
main_package: ${{ steps.filter.outputs.main_package == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
|
|
multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Determine run mode
|
|
id: run-mode
|
|
run: |
|
|
# Run all tests for workflow_call (when ref input is provided)
|
|
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
|
|
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
|
|
echo "run_all_tests=true" >> $GITHUB_OUTPUT
|
|
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
|
|
else
|
|
echo "run_all_tests=false" >> $GITHUB_OUTPUT
|
|
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
|
|
fi
|
|
|
|
- name: Detect file changes
|
|
id: filter
|
|
uses: dorny/paths-filter@v3
|
|
if: steps.run-mode.outputs.run_all_tests != 'true'
|
|
with:
|
|
filters: |
|
|
main_package:
|
|
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
|
|
- "python/pyproject_npu.toml"
|
|
- "scripts/ci/npu/npu_ci_install_dependency.sh"
|
|
- "test/srt/ascend/**"
|
|
- ".github/workflows/pr-test-npu.yml"
|
|
multimodal_gen:
|
|
- "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
|
|
- "python/sglang/srt/**"
|
|
- "python/pyproject_npu.toml"
|
|
- "scripts/ci/npu/npu_ci_install_dependency.sh"
|
|
- ".github/workflows/pr-test-npu.yml"
|
|
|
|
# ==================== PR Gate ==================== #
|
|
pr-gate:
|
|
needs: check-changes
|
|
if: needs.check-changes.outputs.changes_exist == 'true'
|
|
uses: ./.github/workflows/pr-gate.yml
|
|
secrets: inherit
|
|
|
|
stage-b-test-1-npu-a2:
|
|
needs: [check-changes, pr-gate]
|
|
if: needs.check-changes.outputs.main_package == 'true'
|
|
runs-on: linux-aarch64-a2-1
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [ 0, 1 ]
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Mark repository safe
|
|
run: |
|
|
git config --system --add safe.directory ${GITHUB_WORKSPACE}
|
|
|
|
- name: Install dependencies
|
|
env:
|
|
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
|
|
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
|
|
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
|
|
run: |
|
|
# speed up by using infra cache services
|
|
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
|
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
|
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
|
pip config set global.trusted-host "${CACHING_URL}"
|
|
|
|
bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
|
|
# copy required file from our daily cache
|
|
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
|
# copy gsm8k dataset
|
|
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
env:
|
|
SGLANG_USE_MODELSCOPE: true
|
|
SGLANG_IS_IN_CI: true
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
|
STREAMS_PER_DEVICE: 32
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw npu --suite stage-b-test-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
|
|
|
stage-b-test-2-npu-a2:
|
|
needs: [check-changes, pr-gate]
|
|
if: needs.check-changes.outputs.main_package == 'true'
|
|
runs-on: linux-aarch64-a2-2
|
|
strategy:
|
|
fail-fast: true
|
|
matrix:
|
|
part: [0, 1]
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Mark repository safe
|
|
run: |
|
|
git config --system --add safe.directory ${GITHUB_WORKSPACE}
|
|
|
|
- name: Install dependencies
|
|
env:
|
|
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
|
|
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
|
|
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
|
|
run: |
|
|
# speed up by using infra cache services
|
|
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
|
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
|
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
|
pip config set global.trusted-host "${CACHING_URL}"
|
|
|
|
bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
|
|
# copy required file from our daily cache
|
|
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
|
# copy gsm8k dataset
|
|
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
env:
|
|
SGLANG_USE_MODELSCOPE: true
|
|
SGLANG_IS_IN_CI: true
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
|
STREAMS_PER_DEVICE: 32
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw npu --suite stage-b-test-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
|
|
|
stage-b-test-4-npu-a3:
|
|
needs: [check-changes, pr-gate]
|
|
if: needs.check-changes.outputs.main_package == 'true'
|
|
runs-on: linux-aarch64-a3-4
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Mark repository safe
|
|
run: |
|
|
git config --system --add safe.directory ${GITHUB_WORKSPACE}
|
|
|
|
- name: Install dependencies
|
|
env:
|
|
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
|
|
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
|
|
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
|
|
run: |
|
|
# speed up by using infra cache services
|
|
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
|
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
|
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
|
pip config set global.trusted-host "${CACHING_URL}"
|
|
|
|
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
|
|
# copy required file from our daily cache
|
|
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
|
# copy gsm8k dataset
|
|
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
env:
|
|
SGLANG_USE_MODELSCOPE: true
|
|
SGLANG_IS_IN_CI: true
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
|
STREAMS_PER_DEVICE: 32
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw npu --suite stage-b-test-4-npu-a3 --timeout-per-file 3600
|
|
|
|
|
|
stage-b-test-16-npu-a3:
|
|
needs: [check-changes, pr-gate]
|
|
if: needs.check-changes.outputs.main_package == 'true'
|
|
runs-on: linux-aarch64-a3-16
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Mark repository safe
|
|
run: |
|
|
git config --system --add safe.directory ${GITHUB_WORKSPACE}
|
|
|
|
- name: Install dependencies
|
|
env:
|
|
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
|
|
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
|
|
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
|
|
run: |
|
|
# speed up by using infra cache services
|
|
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
|
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
|
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
|
pip config set global.trusted-host "${CACHING_URL}"
|
|
|
|
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
|
|
# copy required file from our daily cache
|
|
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
|
# copy gsm8k dataset
|
|
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
env:
|
|
SGLANG_USE_MODELSCOPE: true
|
|
SGLANG_IS_IN_CI: true
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
|
STREAMS_PER_DEVICE: 32
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw npu --suite stage-b-test-16-npu-a3 --timeout-per-file 3600
|
|
|
|
multimodal-gen-test-1-npu-a3:
|
|
needs: [check-changes, pr-gate]
|
|
if: needs.check-changes.outputs.multimodal_gen == 'true'
|
|
runs-on: linux-aarch64-a3-2
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Mark repository safe
|
|
run: |
|
|
git config --system --add safe.directory ${GITHUB_WORKSPACE}
|
|
|
|
- name: Install dependencies
|
|
env:
|
|
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
|
|
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
|
|
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
|
|
run: |
|
|
# speed up by using infra cache services
|
|
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
|
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
|
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
|
pip config set global.trusted-host "${CACHING_URL}"
|
|
|
|
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
|
|
# copy required file from our daily cache
|
|
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
|
# copy gsm8k dataset
|
|
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
env:
|
|
SGLANG_USE_MODELSCOPE: true
|
|
SGLANG_IS_IN_CI: true
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
|
STREAMS_PER_DEVICE: 32
|
|
run: |
|
|
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
|
|
cd python
|
|
python3 sglang/multimodal_gen/test/run_suite.py --suite 1-npu
|
|
|
|
multimodal-gen-test-2-npu-a3:
|
|
needs: [check-changes, pr-gate]
|
|
if: needs.check-changes.outputs.multimodal_gen == 'true'
|
|
runs-on: linux-aarch64-a3-16
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Mark repository safe
|
|
run: |
|
|
git config --system --add safe.directory ${GITHUB_WORKSPACE}
|
|
|
|
- name: Install dependencies
|
|
env:
|
|
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
|
|
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
|
|
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
|
|
run: |
|
|
# speed up by using infra cache services
|
|
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
|
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
|
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
|
pip config set global.trusted-host "${CACHING_URL}"
|
|
|
|
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
|
|
# copy required file from our daily cache
|
|
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
|
# copy gsm8k dataset
|
|
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
env:
|
|
SGLANG_USE_MODELSCOPE: true
|
|
SGLANG_IS_IN_CI: true
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
|
STREAMS_PER_DEVICE: 32
|
|
run: |
|
|
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
|
|
cd python
|
|
python3 sglang/multimodal_gen/test/run_suite.py --suite 2-npu
|
|
|
|
multimodal-gen-test-8-npu-a3:
|
|
needs: [check-changes, pr-gate]
|
|
if: needs.check-changes.outputs.multimodal_gen == 'true'
|
|
runs-on: linux-aarch64-a3-8
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Mark repository safe
|
|
run: |
|
|
git config --system --add safe.directory ${GITHUB_WORKSPACE}
|
|
|
|
- name: Install dependencies
|
|
env:
|
|
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
|
|
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
|
|
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
|
|
run: |
|
|
# speed up by using infra cache services
|
|
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
|
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
|
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
|
pip config set global.trusted-host "${CACHING_URL}"
|
|
|
|
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
|
|
# copy required file from our daily cache
|
|
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
|
# copy gsm8k dataset
|
|
cp ~/.cache/modelscope/hub/datasets/tmp/test.jsonl /tmp
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
env:
|
|
SGLANG_USE_MODELSCOPE: true
|
|
SGLANG_IS_IN_CI: true
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
|
STREAMS_PER_DEVICE: 32
|
|
run: |
|
|
cd python
|
|
python3 sglang/multimodal_gen/test/run_suite.py --suite 8-npu
|
|
|
|
pr-test-npu-finish:
|
|
needs:
|
|
[
|
|
check-changes,
|
|
|
|
stage-b-test-1-npu-a2,
|
|
stage-b-test-2-npu-a2,
|
|
stage-b-test-4-npu-a3,
|
|
stage-b-test-16-npu-a3,
|
|
|
|
multimodal-gen-test-1-npu-a3,
|
|
multimodal-gen-test-2-npu-a3,
|
|
multimodal-gen-test-8-npu-a3,
|
|
]
|
|
if: always()
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Check all dependent job statuses
|
|
run: |
|
|
# Convert the 'needs' context to a JSON string
|
|
json_needs='${{ toJson(needs) }}'
|
|
|
|
# Get a list of all job names from the JSON keys
|
|
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
|
|
|
|
for job in $job_names; do
|
|
# For each job, extract its result
|
|
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
|
|
|
|
# Print the job name and its result
|
|
echo "$job: $result"
|
|
|
|
# Check for failure or cancellation and exit if found
|
|
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
|
|
echo "The above jobs failed."
|
|
exit 1
|
|
fi
|
|
done
|
|
# If the loop completes, all jobs were successful
|
|
echo "All jobs completed successfully"
|
|
exit 0
|