797 lines
27 KiB
YAML
797 lines
27 KiB
YAML
name: Nightly Test (Nvidia)
|
|
|
|
on:
|
|
schedule:
|
|
- cron: '0 0 * * *'
|
|
workflow_dispatch:
|
|
inputs:
|
|
job_filter:
|
|
description: 'Select which job to run (leave empty or "all" to run all jobs)'
|
|
required: false
|
|
type: choice
|
|
default: 'all'
|
|
options:
|
|
- 'all'
|
|
- 'nightly-test-general-1-gpu-h100'
|
|
- 'nightly-test-general-4-gpu-h100'
|
|
- 'nightly-test-general-8-gpu-h200'
|
|
- 'nightly-test-general-8-gpu-h20'
|
|
- 'nightly-test-general-8-gpu-b200'
|
|
- 'nightly-test-text-accuracy-2-gpu-h100'
|
|
- 'nightly-test-text-perf-2-gpu-h100'
|
|
- 'nightly-test-vlm-accuracy-2-gpu-h100'
|
|
- 'nightly-test-vlm-perf-2-gpu-h100'
|
|
- 'nightly-test-multimodal-server-1-gpu'
|
|
- 'nightly-test-multimodal-server-2-gpu'
|
|
- 'nightly-test-perf-4-gpu-b200'
|
|
- 'nightly-test-perf-8-gpu-b200'
|
|
- 'nightly-test-specialized-8-gpu-b200'
|
|
- 'nightly-test-kernel-1-gpu-h100'
|
|
- 'nightly-test-diffusion-comparison'
|
|
- 'nightly-test-kernel-8-gpu-h200'
|
|
workflow_call:
|
|
inputs:
|
|
ref:
|
|
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
job_filter:
|
|
description: 'Select which job to run (leave empty or "all" to run all jobs)'
|
|
required: false
|
|
type: string
|
|
default: 'all'
|
|
|
|
concurrency:
|
|
group: nightly-test-nvidia-${{ inputs.ref || github.ref }}
|
|
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
|
|
|
|
env:
|
|
SGLANG_IS_IN_CI: true
|
|
SGLANG_CUDA_COREDUMP: "1"
|
|
HF_HUB_DOWNLOAD_TIMEOUT: 300
|
|
HF_HUB_ETAG_TIMEOUT: 300
|
|
|
|
jobs:
|
|
# General tests - 1 GPU
|
|
nightly-test-general-1-gpu-h100:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-1-gpu-h100')
|
|
runs-on: 1-gpu-h100
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 60
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# JIT kernel full unit tests (expanded parameter ranges via SGLANG_JIT_KERNEL_RUN_FULL_TESTS)
|
|
nightly-test-kernel-1-gpu-h100:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-1-gpu-h100')
|
|
runs-on: 1-gpu-h100
|
|
timeout-minutes: 240
|
|
env:
|
|
# Full jit_kernel test grids (see sglang.jit_kernel.utils.should_run_full_tests)
|
|
SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
|
|
# Match pr-test-jit-kernel workflow for consistent JIT warmup behavior
|
|
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
|
|
# Allow maintenance bypass on default branch (same semantics as PR JIT workflow)
|
|
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run jit kernel nightly suite
|
|
timeout-minutes: 60
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-kernel-1-gpu --nightly --continue-on-error
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
nightly-test-kernel-8-gpu-h200:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-8-gpu-h200')
|
|
runs-on: 8-gpu-h200
|
|
timeout-minutes: 240
|
|
env:
|
|
SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
|
|
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
|
|
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run multi-GPU jit kernel nightly suite
|
|
timeout-minutes: 90
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-kernel-8-gpu-h200 --nightly --continue-on-error
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# General tests - 4 GPU H100
|
|
nightly-test-general-4-gpu-h100:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-4-gpu-h100')
|
|
runs-on: 4-gpu-h100
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# General tests - 8 GPU H200
|
|
nightly-test-general-8-gpu-h200:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h200')
|
|
runs-on: 8-gpu-h200
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
partition: [0, 1, 2, 3]
|
|
env:
|
|
RUNNER_LABELS: 8-gpu-h200
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run common 8-GPU model tests
|
|
if: always()
|
|
timeout-minutes: 300
|
|
env:
|
|
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
|
|
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
|
|
GPU_CONFIG: "8-gpu-h200"
|
|
IS_H200: "1"
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4
|
|
|
|
- name: Publish traces to storage repo
|
|
if: always()
|
|
continue-on-error: true
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
|
run: |
|
|
TRACE_ARGS=""
|
|
for dir in test/performance_profiles_*/; do
|
|
[ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
|
|
done
|
|
if [ -n "$TRACE_ARGS" ]; then
|
|
python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
|
|
find test/performance_profiles_*/ -name '*.json.gz' -delete
|
|
else
|
|
echo "No trace directories found, skipping publish"
|
|
fi
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
GPU_CONFIG: "8-gpu-h200"
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error
|
|
|
|
- name: Collect performance metrics
|
|
if: always()
|
|
run: |
|
|
python3 scripts/ci/utils/save_metrics.py \
|
|
--gpu-config 8-gpu-h200 \
|
|
--partition ${{ matrix.partition }} \
|
|
--run-id ${{ github.run_id }} \
|
|
--output test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json \
|
|
--search-dir test/performance_profiles_8_gpu \
|
|
--search-dir test
|
|
|
|
- name: Upload partition metrics
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: metrics-8gpu-h200-partition-${{ matrix.partition }}
|
|
path: test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json
|
|
retention-days: 5
|
|
if-no-files-found: ignore
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
with:
|
|
artifact-suffix: ${{ matrix.partition }}
|
|
|
|
# General tests - 8 GPU H20
|
|
nightly-test-general-8-gpu-h20:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20')
|
|
runs-on: 8-gpu-h20
|
|
env:
|
|
SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
GPU_CONFIG: "8-gpu-h20"
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# General tests - 8 GPU B200
|
|
nightly-test-general-8-gpu-b200:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-b200')
|
|
runs-on: 8-gpu-b200
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
partition: [0, 1, 2, 3]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run common 8-GPU model tests
|
|
if: always()
|
|
timeout-minutes: 300
|
|
env:
|
|
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
|
|
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
|
|
GPU_CONFIG: "8-gpu-b200"
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4
|
|
|
|
- name: Publish traces to storage repo
|
|
if: always()
|
|
continue-on-error: true
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
|
run: |
|
|
TRACE_ARGS=""
|
|
for dir in test/performance_profiles_*/; do
|
|
[ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
|
|
done
|
|
if [ -n "$TRACE_ARGS" ]; then
|
|
python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
|
|
find test/performance_profiles_*/ -name '*.json.gz' -delete
|
|
else
|
|
echo "No trace directories found, skipping publish"
|
|
fi
|
|
|
|
- name: Collect performance metrics
|
|
if: always()
|
|
run: |
|
|
python3 scripts/ci/utils/save_metrics.py \
|
|
--gpu-config 8-gpu-b200 \
|
|
--partition ${{ matrix.partition }} \
|
|
--run-id ${{ github.run_id }} \
|
|
--output test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json \
|
|
--search-dir test/performance_profiles_8_gpu \
|
|
--search-dir test
|
|
|
|
- name: Upload partition metrics
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: metrics-8gpu-b200-partition-${{ matrix.partition }}
|
|
path: test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json
|
|
retention-days: 5
|
|
if-no-files-found: ignore
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
with:
|
|
artifact-suffix: ${{ matrix.partition }}
|
|
|
|
# Text model accuracy tests
|
|
nightly-test-text-accuracy-2-gpu-h100:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-h100')
|
|
runs-on: 2-gpu-h100
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run eval test for text models
|
|
timeout-minutes: 120
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# Text model performance tests
|
|
nightly-test-text-perf-2-gpu-h100:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-perf-2-gpu-h100')
|
|
runs-on: 2-gpu-h100
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run performance test for text models
|
|
timeout-minutes: 180
|
|
env:
|
|
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
|
|
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
|
|
GPU_CONFIG: "2-gpu-h100"
|
|
run: |
|
|
cd test
|
|
rm -rf performance_profiles_text_models/
|
|
python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error --timeout-per-file 3600
|
|
|
|
- name: Publish traces to storage repo
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
|
run: |
|
|
python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_text_models
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# VLM accuracy tests
|
|
nightly-test-vlm-accuracy-2-gpu-h100:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-h100')
|
|
runs-on: 2-gpu-h100
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run eval test for VLM models (fixed MMMU-100)
|
|
timeout-minutes: 240
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# VLM performance tests
|
|
nightly-test-vlm-perf-2-gpu-h100:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-h100')
|
|
runs-on: 2-gpu-h100
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run perf test for VLM models (MMMU)
|
|
timeout-minutes: 240
|
|
env:
|
|
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
|
|
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
|
|
GPU_CONFIG: "2-gpu-h100"
|
|
run: |
|
|
cd test
|
|
rm -rf performance_profiles_vlms/
|
|
python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 3600
|
|
|
|
- name: Publish traces to storage repo
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
|
run: |
|
|
python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_vlms
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# diffusion performance tests
|
|
nightly-test-multimodal-server-1-gpu:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-1-gpu')
|
|
runs-on: 1-gpu-h100
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: 5
|
|
matrix:
|
|
part: [0, 1]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
|
|
pip install slack_sdk
|
|
|
|
- name: Run diffusion server tests
|
|
env:
|
|
SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GPU_CONFIG: "1-gpu-h100"
|
|
|
|
timeout-minutes: 90
|
|
run: |
|
|
cd python
|
|
python3 sglang/multimodal_gen/test/run_suite.py \
|
|
--suite 1-gpu \
|
|
--partition-id ${{ matrix.part }} \
|
|
--total-partitions 2
|
|
|
|
- name: Collect diffusion performance metrics
|
|
if: always()
|
|
run: |
|
|
python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
|
|
--gpu-config 1-gpu-h100 \
|
|
--run-id ${{ github.run_id }} \
|
|
--output python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json \
|
|
--results-json python/diffusion-results.json
|
|
|
|
- name: Upload diffusion metrics
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: diffusion-metrics-1gpu-partition-${{ matrix.part }}
|
|
path: python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json
|
|
retention-days: 90
|
|
if-no-files-found: ignore
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
with:
|
|
artifact-suffix: ${{ matrix.part }}
|
|
|
|
nightly-test-multimodal-server-2-gpu:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-2-gpu')
|
|
runs-on: 2-gpu-h100
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: 5
|
|
matrix:
|
|
part: [0, 1]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
|
|
pip install slack_sdk
|
|
|
|
- name: Run diffusion server tests
|
|
env:
|
|
SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GPU_CONFIG: "2-gpu-h100"
|
|
|
|
timeout-minutes: 90
|
|
run: |
|
|
cd python
|
|
python3 sglang/multimodal_gen/test/run_suite.py \
|
|
--suite 2-gpu \
|
|
--partition-id ${{ matrix.part }} \
|
|
--total-partitions 2
|
|
|
|
- name: Collect diffusion performance metrics
|
|
if: always()
|
|
run: |
|
|
python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
|
|
--gpu-config 2-gpu-h100 \
|
|
--run-id ${{ github.run_id }} \
|
|
--output python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json \
|
|
--results-json python/diffusion-results.json
|
|
|
|
- name: Upload diffusion metrics
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: diffusion-metrics-2gpu-partition-${{ matrix.part }}
|
|
path: python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json
|
|
retention-days: 90
|
|
if-no-files-found: ignore
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
with:
|
|
artifact-suffix: ${{ matrix.part }}
|
|
|
|
# B200 Performance tests - 4 GPU
|
|
nightly-test-perf-4-gpu-b200:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-4-gpu-b200')
|
|
runs-on: 4-gpu-b200
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 300
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error --timeout-per-file 12000
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# Specialized B200 tests - 8 GPU, for specific backends and configs
|
|
nightly-test-specialized-8-gpu-b200:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-8-gpu-b200' || inputs.job_filter == 'nightly-test-specialized-8-gpu-b200')
|
|
runs-on: 8-gpu-b200
|
|
env:
|
|
RUNNER_LABELS: 8-gpu-b200
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 120
|
|
env:
|
|
GPU_CONFIG: "8-gpu-b200"
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# Diffusion cross-framework comparison
|
|
nightly-test-diffusion-comparison:
|
|
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-diffusion-comparison')
|
|
runs-on: 4-gpu-h100
|
|
timeout-minutes: 240
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
|
|
|
|
- name: Run cross-framework comparison
|
|
env:
|
|
GITHUB_SHA: ${{ github.sha }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
PYTHONUNBUFFERED: "1"
|
|
timeout-minutes: 210
|
|
run: |
|
|
python3 -u scripts/ci/utils/diffusion/run_comparison.py \
|
|
--output comparison-results.json
|
|
|
|
- name: Generate dashboard
|
|
if: always()
|
|
env:
|
|
GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
|
|
GH_TOKEN: ${{ github.token }}
|
|
run: |
|
|
python3 scripts/ci/utils/diffusion/generate_diffusion_dashboard.py \
|
|
--results comparison-results.json \
|
|
--output dashboard.md \
|
|
--charts-dir comparison-charts \
|
|
--fetch-history \
|
|
--step-summary
|
|
|
|
- name: Publish to sglang-ci-data
|
|
if: always()
|
|
env:
|
|
GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
|
|
run: |
|
|
python3 scripts/ci/utils/diffusion/publish_comparison_results.py \
|
|
--results comparison-results.json \
|
|
--dashboard dashboard.md \
|
|
--charts-dir comparison-charts
|
|
|
|
- name: Upload comparison artifacts
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: diffusion-comparison-${{ github.run_id }}
|
|
path: |
|
|
comparison-results.json
|
|
dashboard.md
|
|
comparison-charts/
|
|
comparison-logs/
|
|
retention-days: 90
|
|
if-no-files-found: ignore
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: always()
|
|
|
|
# Consolidate performance metrics from all jobs
|
|
consolidate-metrics:
|
|
if: github.repository == 'sgl-project/sglang' && always()
|
|
needs:
|
|
- nightly-test-general-8-gpu-h200
|
|
- nightly-test-general-8-gpu-b200
|
|
- nightly-test-multimodal-server-1-gpu
|
|
- nightly-test-multimodal-server-2-gpu
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Download all partition metrics
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
pattern: "*metrics-*"
|
|
path: metrics/
|
|
merge-multiple: true
|
|
|
|
- name: List downloaded metrics
|
|
run: |
|
|
echo "Downloaded metrics files:"
|
|
find metrics/ -name "*.json" -type f 2>/dev/null || echo "No metrics files found"
|
|
|
|
- name: Merge metrics
|
|
run: |
|
|
python3 scripts/ci/utils/merge_metrics.py \
|
|
--input-dir metrics/ \
|
|
--output consolidated-metrics-${{ github.run_id }}.json \
|
|
--run-id ${{ github.run_id }} \
|
|
--commit-sha ${{ github.sha }} \
|
|
--branch ${{ github.ref_name }}
|
|
|
|
- name: Upload consolidated metrics
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: consolidated-metrics-${{ github.run_id }}
|
|
path: consolidated-metrics-${{ github.run_id }}.json
|
|
retention-days: 90
|
|
if-no-files-found: warn
|
|
|
|
# Final check job
|
|
check-all-jobs:
|
|
if: github.repository == 'sgl-project/sglang' && always()
|
|
needs:
|
|
- nightly-test-general-1-gpu-h100
|
|
- nightly-test-general-4-gpu-h100
|
|
- nightly-test-general-8-gpu-h200
|
|
- nightly-test-general-8-gpu-h20
|
|
- nightly-test-general-8-gpu-b200
|
|
- nightly-test-text-accuracy-2-gpu-h100
|
|
- nightly-test-text-perf-2-gpu-h100
|
|
- nightly-test-vlm-accuracy-2-gpu-h100
|
|
- nightly-test-vlm-perf-2-gpu-h100
|
|
- nightly-test-multimodal-server-1-gpu
|
|
- nightly-test-multimodal-server-2-gpu
|
|
- nightly-test-perf-4-gpu-b200
|
|
- nightly-test-specialized-8-gpu-b200
|
|
- nightly-test-diffusion-comparison
|
|
- consolidate-metrics
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Check if any job failed
|
|
run: |
|
|
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
|
|
echo "One or more nightly test jobs failed"
|
|
exit 1
|
|
fi
|
|
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
|
|
echo "One or more nightly test jobs were cancelled"
|
|
exit 1
|
|
fi
|
|
echo "All nightly test jobs passed"
|