339 lines
11 KiB
YAML
339 lines
11 KiB
YAML
name: AMD CI Job Monitor
|
|
|
|
on:
|
|
schedule:
|
|
- cron: '0 0 * * *' # Daily at midnight UTC
|
|
pull_request:
|
|
paths:
|
|
- '.github/workflows/amd-ci-job-monitor.yml'
|
|
- 'scripts/ci/utils/query_job_status.py'
|
|
workflow_dispatch:
|
|
inputs:
|
|
hours:
|
|
description: 'Time window in hours'
|
|
required: false
|
|
default: '24'
|
|
type: string
|
|
job_filter:
|
|
description: 'Job name filter (leave empty for all AMD jobs)'
|
|
required: false
|
|
type: string
|
|
|
|
jobs:
|
|
fetch-actions-data:
|
|
name: Fetch Actions Snapshot
|
|
runs-on: ubuntu-latest
|
|
env:
|
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install dependencies
|
|
run: pip install tabulate
|
|
|
|
- name: Select workflows for snapshot
|
|
id: select-workflows
|
|
run: |
|
|
if [[ -n "${{ inputs.job_filter }}" ]]; then
|
|
echo "workflows=pr-test-amd.yml" >> "$GITHUB_OUTPUT"
|
|
else
|
|
echo "workflows=pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" >> "$GITHUB_OUTPUT"
|
|
fi
|
|
|
|
- name: Fetch Actions data snapshot
|
|
timeout-minutes: 30
|
|
run: |
|
|
python scripts/ci/utils/query_job_status.py \
|
|
--repo ${{ github.repository }} \
|
|
--workflow "${{ steps.select-workflows.outputs.workflows }}" \
|
|
--hours ${{ inputs.hours || '24' }} \
|
|
--dump-data-file actions-job-snapshot.json
|
|
|
|
- name: Upload Actions data snapshot
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: actions-job-snapshot
|
|
path: actions-job-snapshot.json
|
|
if-no-files-found: error
|
|
|
|
# Single job filter mode
|
|
custom-report:
|
|
name: Custom Job Report
|
|
if: ${{ inputs.job_filter }}
|
|
needs: fetch-actions-data
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install dependencies
|
|
run: pip install tabulate
|
|
|
|
- name: Download Actions data snapshot
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: actions-job-snapshot
|
|
path: ci-data
|
|
|
|
- name: Generate Custom Job Report
|
|
timeout-minutes: 30
|
|
run: |
|
|
python scripts/ci/utils/query_job_status.py \
|
|
--repo ${{ github.repository }} \
|
|
--job "${{ inputs.job_filter }}" \
|
|
--workflow "pr-test-amd.yml" \
|
|
--hours ${{ inputs.hours || '24' }} \
|
|
--input-data-file ci-data/actions-job-snapshot.json \
|
|
--summary
|
|
|
|
# Parse workflow files to get job names dynamically
|
|
parse-workflows:
|
|
name: Parse Workflow Jobs
|
|
if: ${{ !inputs.job_filter }}
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
pr_jobs: ${{ steps.parse.outputs.pr_jobs }}
|
|
nightly_jobs: ${{ steps.parse.outputs.nightly_jobs }}
|
|
pr_rocm720_jobs: ${{ steps.parse.outputs.pr_rocm720_jobs }}
|
|
nightly_rocm720_jobs: ${{ steps.parse.outputs.nightly_rocm720_jobs }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Parse workflow files
|
|
id: parse
|
|
run: |
|
|
# Parse pr-test-amd.yml and extract job names (exclude utility jobs)
|
|
# Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs
|
|
pr_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd.yml | \
|
|
grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \
|
|
jq -R -s -c 'split("\n") | map(select(length > 0))')
|
|
echo "pr_jobs=$pr_jobs" >> $GITHUB_OUTPUT
|
|
echo "PR jobs: $pr_jobs"
|
|
|
|
# Parse nightly-test-amd.yml and extract job names (exclude utility jobs)
|
|
# Excluded: check-all-jobs
|
|
nightly_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd.yml | \
|
|
grep -v -E '^(check-all-jobs)$' | \
|
|
jq -R -s -c 'split("\n") | map(select(length > 0))')
|
|
echo "nightly_jobs=$nightly_jobs" >> $GITHUB_OUTPUT
|
|
echo "Nightly jobs: $nightly_jobs"
|
|
|
|
# Parse pr-test-amd-rocm720.yml (exclude utility jobs)
|
|
# Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs
|
|
pr_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd-rocm720.yml | \
|
|
grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \
|
|
jq -R -s -c 'split("\n") | map(select(length > 0))')
|
|
echo "pr_rocm720_jobs=$pr_rocm720_jobs" >> $GITHUB_OUTPUT
|
|
echo "PR ROCm 7.2 jobs: $pr_rocm720_jobs"
|
|
|
|
# Parse nightly-test-amd-rocm720.yml (exclude utility jobs)
|
|
# Excluded: check-all-jobs
|
|
nightly_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd-rocm720.yml | \
|
|
grep -v -E '^(check-all-jobs)$' | \
|
|
jq -R -s -c 'split("\n") | map(select(length > 0))')
|
|
echo "nightly_rocm720_jobs=$nightly_rocm720_jobs" >> $GITHUB_OUTPUT
|
|
echo "Nightly ROCm 7.2 jobs: $nightly_rocm720_jobs"
|
|
|
|
# PR CI reports using dynamic matrix
|
|
pr-ci-reports:
|
|
name: PR - ${{ matrix.job_name }}
|
|
needs: [parse-workflows, fetch-actions-data]
|
|
if: ${{ !inputs.job_filter }}
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_jobs) }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install dependencies
|
|
run: pip install tabulate
|
|
|
|
- name: Download Actions data snapshot
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: actions-job-snapshot
|
|
path: ci-data
|
|
|
|
- name: Generate Report
|
|
timeout-minutes: 15
|
|
run: |
|
|
python scripts/ci/utils/query_job_status.py \
|
|
--repo ${{ github.repository }} \
|
|
--job "${{ matrix.job_name }}" \
|
|
--workflow "pr-test-amd.yml" \
|
|
--hours ${{ inputs.hours || '24' }} \
|
|
--input-data-file ci-data/actions-job-snapshot.json \
|
|
--summary
|
|
|
|
# Nightly AMD test reports using dynamic matrix
|
|
nightly-reports:
|
|
name: Nightly - ${{ matrix.job_name }}
|
|
needs: [parse-workflows, fetch-actions-data]
|
|
if: ${{ !inputs.job_filter }}
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_jobs) }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install dependencies
|
|
run: pip install tabulate
|
|
|
|
- name: Download Actions data snapshot
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: actions-job-snapshot
|
|
path: ci-data
|
|
|
|
- name: Generate Nightly Report
|
|
timeout-minutes: 15
|
|
run: |
|
|
python scripts/ci/utils/query_job_status.py \
|
|
--repo ${{ github.repository }} \
|
|
--job "${{ matrix.job_name }}" \
|
|
--workflow "nightly-test-amd.yml" \
|
|
--hours ${{ inputs.hours || '24' }} \
|
|
--input-data-file ci-data/actions-job-snapshot.json \
|
|
--summary
|
|
|
|
# PR ROCm 7.2 CI reports using dynamic matrix
|
|
pr-rocm720-ci-reports:
|
|
name: PR ROCm720 - ${{ matrix.job_name }}
|
|
needs: [parse-workflows, fetch-actions-data]
|
|
if: ${{ !inputs.job_filter }}
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_rocm720_jobs) }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install dependencies
|
|
run: pip install tabulate
|
|
|
|
- name: Download Actions data snapshot
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: actions-job-snapshot
|
|
path: ci-data
|
|
|
|
- name: Generate PR ROCm 7.2 Report
|
|
timeout-minutes: 15
|
|
run: |
|
|
python scripts/ci/utils/query_job_status.py \
|
|
--repo ${{ github.repository }} \
|
|
--job "${{ matrix.job_name }}" \
|
|
--workflow "pr-test-amd-rocm720.yml" \
|
|
--hours ${{ inputs.hours || '24' }} \
|
|
--input-data-file ci-data/actions-job-snapshot.json \
|
|
--summary
|
|
|
|
# Nightly ROCm 7.2 reports using dynamic matrix
|
|
nightly-rocm720-reports:
|
|
name: Nightly ROCm720 - ${{ matrix.job_name }}
|
|
needs: [parse-workflows, fetch-actions-data]
|
|
if: ${{ !inputs.job_filter }}
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_rocm720_jobs) }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install dependencies
|
|
run: pip install tabulate
|
|
|
|
- name: Download Actions data snapshot
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: actions-job-snapshot
|
|
path: ci-data
|
|
|
|
- name: Generate Nightly ROCm 7.2 Report
|
|
timeout-minutes: 15
|
|
run: |
|
|
python scripts/ci/utils/query_job_status.py \
|
|
--repo ${{ github.repository }} \
|
|
--job "${{ matrix.job_name }}" \
|
|
--workflow "nightly-test-amd-rocm720.yml" \
|
|
--hours ${{ inputs.hours || '24' }} \
|
|
--input-data-file ci-data/actions-job-snapshot.json \
|
|
--summary
|
|
|
|
# Runner fleet report - cross-workflow runner analytics in a single pass
|
|
runner-fleet-report:
|
|
name: Runner Fleet Report
|
|
if: ${{ !inputs.job_filter }}
|
|
needs: fetch-actions-data
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install dependencies
|
|
run: pip install tabulate
|
|
|
|
- name: Download Actions data snapshot
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: actions-job-snapshot
|
|
path: ci-data
|
|
|
|
- name: Generate Runner Fleet Report
|
|
timeout-minutes: 30
|
|
run: |
|
|
python scripts/ci/utils/query_job_status.py \
|
|
--repo ${{ github.repository }} \
|
|
--runner-report \
|
|
--workflow "pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" \
|
|
--hours ${{ inputs.hours || '24' }} \
|
|
--input-data-file ci-data/actions-job-snapshot.json \
|
|
--summary
|