Files
agentic-pd-hybrid/third_party/sglang/.github/workflows/stress-test.yml

45 lines
1.1 KiB
YAML

name: Stress Test
on:
workflow_dispatch:
inputs:
num_prompts:
description: 'Number of prompts per model'
required: true
default: '50000'
type: string
duration_minutes:
description: 'Timeout per model in minutes'
required: true
default: '45'
type: string
jobs:
stress-test:
if: github.repository == 'sgl-project/sglang'
runs-on: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run stress tests
timeout-minutes: 210
env:
NUM_PROMPTS: ${{ inputs.num_prompts }}
DURATION_MINUTES: ${{ inputs.duration_minutes }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stress
- name: Upload results
if: always()
uses: actions/upload-artifact@v4
with:
name: stress-test-results
path: |
stress_test_*.jsonl