1430 lines
68 KiB
YAML
1430 lines
68 KiB
YAML
name: Nightly Test (AMD)
|
|
|
|
on:
|
|
schedule:
|
|
- cron: '30 17 * * *'
|
|
push:
|
|
branches:
|
|
- main
|
|
paths:
|
|
- "python/sglang/version.py"
|
|
workflow_dispatch:
|
|
inputs:
|
|
aiter_ref:
|
|
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
continue_on_error:
|
|
description: 'Continue on error (do not fail the workflow on test failures)'
|
|
required: false
|
|
type: boolean
|
|
default: true
|
|
job_select:
|
|
description: 'Select a job to run from dropdown (choose "all" to run all jobs)'
|
|
required: false
|
|
type: choice
|
|
default: 'all'
|
|
options:
|
|
- 'all'
|
|
- nightly-test-1-gpu-unit
|
|
- nightly-accuracy-2-gpu
|
|
- nightly-accuracy-2-gpu-vlm
|
|
- nightly-perf-2-gpu-text
|
|
- nightly-perf-2-gpu-vlm
|
|
- nightly-4-gpu
|
|
- nightly-accuracy-8-gpu
|
|
- nightly-8-gpu-grok1-int4
|
|
- nightly-8-gpu-grok2
|
|
- nightly-8-gpu-deepseek-v31
|
|
- nightly-8-gpu-deepseek-v32
|
|
- nightly-8-gpu-deepseek-v32-mtp
|
|
- nightly-8-gpu-deepseek-v3-kv-fp8
|
|
- nightly-8-gpu-kimi-k25
|
|
- nightly-8-gpu-qwen3-235b
|
|
- nightly-8-gpu-qwen35
|
|
- nightly-8-gpu-glm5
|
|
- nightly-8-gpu-minimax-m25
|
|
- nightly-1-gpu-zimage-turbo
|
|
- nightly-test-1-gpu-mi35x
|
|
- nightly-accuracy-8-gpu-mi35x
|
|
- nightly-8-gpu-mi35x-grok1-int4
|
|
- nightly-8-gpu-mi35x-grok2
|
|
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4
|
|
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
|
|
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
|
|
- nightly-accuracy-8-gpu-mi35x-deepseek-v32
|
|
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
|
|
- nightly-perf-8-gpu-mi35x-deepseek-v32-basic
|
|
- nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
|
|
- nightly-8-gpu-mi35x-kimi-k25
|
|
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4
|
|
- nightly-8-gpu-mi35x-qwen35
|
|
- nightly-8-gpu-mi35x-glm5
|
|
- nightly-8-gpu-mi35x-minimax-m25
|
|
job_filter:
|
|
description: 'Or type comma-separated job names (overrides dropdown if non-empty)'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
workflow_call:
|
|
inputs:
|
|
ref:
|
|
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
aiter_ref:
|
|
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
job_filter:
|
|
description: 'Select which job to run (leave empty or "all" to run all jobs)'
|
|
required: false
|
|
type: string
|
|
default: 'all'
|
|
continue_on_error:
|
|
description: 'Continue on error (do not fail the workflow on test failures)'
|
|
required: false
|
|
type: boolean
|
|
default: true
|
|
|
|
env:
|
|
AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}
|
|
|
|
concurrency:
|
|
# When called via workflow_call with ref set, use a unique group per caller run to avoid
|
|
# collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name)
|
|
# to detect this, because github.event_name inherits from the caller in workflow_call.
|
|
group: nightly-test-amd-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }}
|
|
cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }}
|
|
|
|
jobs:
|
|
# ============================================== MI30x Unit Tests ==============================================
|
|
# 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x only)
|
|
nightly-test-1-gpu-unit:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit,'))
|
|
runs-on: linux-mi325-1gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Nightly Unit Test (1-GPU)
|
|
timeout-minutes: 90
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# ============================================== MI30x Accuracy Tests ==============================================
|
|
# 2-GPU Accuracy Tests - GSM8K eval (MI30x only)
|
|
nightly-accuracy-2-gpu:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu,'))
|
|
runs-on: linux-mi325-2gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Nightly Test (2-GPU)
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation
|
|
nightly-accuracy-2-gpu-vlm:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm,'))
|
|
runs-on: linux-mi325-2gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Nightly Accuracy Test (2-GPU VLM MMMU)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 2-GPU Text Models Performance Tests
|
|
nightly-perf-2-gpu-text:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text,'))
|
|
runs-on: linux-mi325-2gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Performance Test (2-GPU Text Models)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_AITER=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 2-GPU VLM Performance Tests
|
|
nightly-perf-2-gpu-vlm:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm,'))
|
|
runs-on: linux-mi325-2gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Performance Test (2-GPU VLM Models)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_AITER=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# ============================================== MI30x 4-GPU Tests ==============================================
|
|
# 4-GPU Nightly Tests - Dumper/Comparator E2E, VLM Encoder DP
|
|
nightly-4-gpu:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu,'))
|
|
runs-on: linux-mi325-4gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Nightly Test (4-GPU)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --continue-on-error --timeout-per-file 3600 || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (accuracy only)
|
|
nightly-accuracy-8-gpu:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU GPT-OSS)
|
|
timeout-minutes: 180
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Accuracy Test (8-GPU Grok1-FP8)
|
|
timeout-minutes: 60
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# ============================================== MI30x Combined Accuracy + Performance Tests ==============================================
|
|
# 8-GPU Grok1-INT4 (Accuracy + Performance combined)
|
|
nightly-8-gpu-grok1-int4:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU Grok1-INT4)
|
|
timeout-minutes: 60
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test (8-GPU Grok1-INT4)
|
|
timeout-minutes: 60
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU Grok2 (Accuracy + Performance combined)
|
|
nightly-8-gpu-grok2:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU Grok2)
|
|
timeout-minutes: 60
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test (8-GPU Grok2)
|
|
timeout-minutes: 60
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU DeepSeek-V3.1 (Accuracy + Performance combined)
|
|
nightly-8-gpu-deepseek-v31:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU DeepSeek-V3.1)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_AITER=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test (8-GPU DeepSeek-V3.1)
|
|
timeout-minutes: 300
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_ROCM700A=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf)
|
|
nightly-8-gpu-deepseek-v32:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU DeepSeek-V3.2 Basic)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test (8-GPU DeepSeek-V3.2 Basic)
|
|
timeout-minutes: 150
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf)
|
|
nightly-8-gpu-deepseek-v32-mtp:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU DeepSeek-V3.2 MTP)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test (8-GPU DeepSeek-V3.2 MTP)
|
|
timeout-minutes: 180
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3)
|
|
nightly-8-gpu-deepseek-v3-kv-fp8:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: DeepSeek-V3 KV FP8 Test (8-GPU Basic + MTP)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU Kimi-K2.5 (Accuracy)
|
|
nightly-8-gpu-kimi-k25:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU Kimi-K2.5)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
nightly-8-gpu-qwen3-235b:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test + Performance Test (8-GPU Qwen3)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU Qwen 3.5 (Accuracy)
|
|
nightly-8-gpu-qwen35:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
|
|
|
|
- name: Accuracy Test (8-GPU Qwen 3.5)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
nightly-8-gpu-glm5:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
|
|
|
|
- name: Accuracy Test (8-GPU GLM-5 NSA)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# 8-GPU MiniMax-M2.5 (Accuracy + Performance combined)
|
|
nightly-8-gpu-minimax-m25:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25,'))
|
|
runs-on: linux-mi325-8gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Accuracy Test (8-GPU MiniMax-M2.5)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_AITER=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test (8-GPU MiniMax-M2.5)
|
|
timeout-minutes: 120
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_AITER=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-minimax-m25 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# ============================================== MI30x Diffusion Tests ==============================================
|
|
# 1-GPU Z-Image-Turbo (Diffusion T2I)
|
|
nightly-1-gpu-zimage-turbo:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo,'))
|
|
runs-on: linux-mi325-1gpu-sglang
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
|
|
- name: Z-Image-Turbo Diffusion Test (1-GPU)
|
|
timeout-minutes: 45
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
-e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \
|
|
pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Upload generated images
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: zimage-turbo-outputs
|
|
path: diffusion-artifacts/
|
|
if-no-files-found: ignore
|
|
retention-days: 30
|
|
|
|
# ============================================== MI35x Tests ==============================================
|
|
# MI35x 1-GPU tests - platform-agnostic tests that may work on CDNA4 (gfx950)
|
|
nightly-test-1-gpu-mi35x:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x,'))
|
|
runs-on: linux-mi35x-gpu-1
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Nightly Test MI35x (1-GPU)
|
|
timeout-minutes: 90
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU Accuracy Tests - GPT-OSS (accuracy only)
|
|
nightly-accuracy-8-gpu-mi35x:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU GPT-OSS)
|
|
timeout-minutes: 180
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU Grok1-INT4 (Accuracy + Performance combined)
|
|
nightly-8-gpu-mi35x-grok1-int4:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU Grok1-INT4)
|
|
timeout-minutes: 90
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test MI35x (8-GPU Grok1-INT4)
|
|
timeout-minutes: 60
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU Grok2 (Accuracy + Performance combined)
|
|
nightly-8-gpu-mi35x-grok2:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU Grok2)
|
|
timeout-minutes: 60
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test MI35x (8-GPU Grok2)
|
|
timeout-minutes: 60
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e RCCL_MSCCL_ENABLE=0 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance combined)
|
|
nightly-8-gpu-mi35x-deepseek-r1-mxfp4:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4)
|
|
timeout-minutes: 300
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance combined)
|
|
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
|
|
timeout-minutes: 300
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance combined)
|
|
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
|
|
timeout-minutes: 300
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU DeepSeek-V3.2 Accuracy Test
|
|
nightly-accuracy-8-gpu-mi35x-deepseek-v32:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test
|
|
nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2 TP+MTP)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic)
|
|
nightly-perf-8-gpu-mi35x-deepseek-v32-basic:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Performance Test MI35x (8-GPU DeepSeek-V3.2 Basic)
|
|
timeout-minutes: 150
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU Kimi-K2.5 (Accuracy)
|
|
nightly-8-gpu-mi35x-kimi-k25:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU Kimi-K2.5)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance)
|
|
nightly-8-gpu-mi35x-qwen3-235b-mxfp4:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test + Performance Test MI35x (8-GPU Qwen3-235B-MXFP4)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU Qwen 3.5 (Accuracy)
|
|
nightly-8-gpu-mi35x-qwen35:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
|
|
|
|
- name: Accuracy Test MI35x (8-GPU Qwen 3.5)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
nightly-8-gpu-mi35x-glm5:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
|
|
|
|
- name: Accuracy Test MI35x (8-GPU GLM-5 NSA)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU MiniMax-M2.5 (Accuracy + Performance combined)
|
|
nightly-8-gpu-mi35x-minimax-m25:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Accuracy Test MI35x (8-GPU MiniMax-M2.5)
|
|
timeout-minutes: 120
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_AITER=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
- name: Performance Test MI35x (8-GPU MiniMax-M2.5)
|
|
timeout-minutes: 120
|
|
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e SGLANG_USE_AITER=1 \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP)
|
|
nightly-perf-8-gpu-mi35x-deepseek-v32-mtp:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp,'))
|
|
runs-on: linux-mi35x-gpu-8
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref || github.ref }}
|
|
|
|
- name: Setup docker
|
|
run: |
|
|
touch github_summary.md
|
|
bash scripts/ci/amd/amd_ci_start_container.sh
|
|
env:
|
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/amd/amd_ci_install_dependency.sh
|
|
# Install tabulate for run_suite.py (missing in MI35x container)
|
|
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
|
|
|
|
- name: Performance Test MI35x (8-GPU DeepSeek-V3.2 MTP)
|
|
timeout-minutes: 180
|
|
run: |
|
|
> github_summary.md # Clear summary file
|
|
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
|
|
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
|
|
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
|
|
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
|
|
exit ${TEST_EXIT_CODE:-0}
|
|
|
|
check-all-jobs:
|
|
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
|
|
needs:
|
|
# MI30x Unit Tests
|
|
- nightly-test-1-gpu-unit
|
|
# MI30x Accuracy Tests
|
|
- nightly-accuracy-2-gpu
|
|
- nightly-accuracy-2-gpu-vlm
|
|
# MI30x 4-GPU Tests
|
|
- nightly-4-gpu
|
|
- nightly-accuracy-8-gpu
|
|
# MI30x Performance Tests - excluded from check (perf failures don't block CI)
|
|
# - nightly-perf-2-gpu-text
|
|
# - nightly-perf-2-gpu-vlm
|
|
# MI30x Combined Accuracy + Performance Tests
|
|
- nightly-8-gpu-grok1-int4
|
|
- nightly-8-gpu-grok2
|
|
- nightly-8-gpu-deepseek-v31
|
|
- nightly-8-gpu-deepseek-v32
|
|
- nightly-8-gpu-deepseek-v32-mtp
|
|
- nightly-8-gpu-deepseek-v3-kv-fp8
|
|
- nightly-8-gpu-kimi-k25
|
|
- nightly-8-gpu-qwen3-235b
|
|
- nightly-8-gpu-qwen35
|
|
- nightly-8-gpu-glm5
|
|
- nightly-8-gpu-minimax-m25
|
|
# MI30x Diffusion Tests
|
|
- nightly-1-gpu-zimage-turbo
|
|
# MI35x jobs
|
|
- nightly-test-1-gpu-mi35x
|
|
- nightly-accuracy-8-gpu-mi35x
|
|
- nightly-8-gpu-mi35x-grok1-int4
|
|
- nightly-8-gpu-mi35x-grok2
|
|
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4
|
|
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
|
|
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
|
|
- nightly-accuracy-8-gpu-mi35x-deepseek-v32
|
|
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
|
|
- nightly-8-gpu-mi35x-kimi-k25
|
|
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4
|
|
- nightly-8-gpu-mi35x-qwen35
|
|
- nightly-8-gpu-mi35x-glm5
|
|
- nightly-8-gpu-mi35x-minimax-m25
|
|
# MI35x perf jobs excluded from check - perf failures don't block CI
|
|
# - nightly-perf-8-gpu-mi35x-deepseek-v32-basic
|
|
# - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Check if any job failed
|
|
run: |
|
|
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
|
|
echo "One or more nightly test jobs failed"
|
|
exit 1
|
|
fi
|
|
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
|
|
echo "One or more nightly test jobs were cancelled"
|
|
exit 1
|
|
fi
|
|
echo "All nightly test jobs passed"
|