Skip to content

Nightly T5X MGMN performance test #218

Nightly T5X MGMN performance test

Nightly T5X MGMN performance test #218

name: Nightly T5X MGMN performance test
on:
workflow_run:
workflows: [Nightly T5X build]
types: [completed]
branches: [main]
workflow_dispatch:
inputs:
T5X_IMAGE:
type: string
description: T5X container
default: 'ghcr.io/nvidia/t5x:latest'
required: true
PUBLISH:
type: boolean
description: Publish dated results to tensorboard server?
default: false
required: false
permissions:
contents: read # to fetch code
actions: write # to cancel previous workflows
packages: write # to upload container
env:
DEFAULT_T5X_IMAGE: 'ghcr.io/nvidia/t5x:latest'
jobs:
metadata:
runs-on: ubuntu-22.04
outputs:
BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }}
T5X_IMAGE: ${{ steps.date.outputs.T5X_IMAGE }}
PUBLISH: ${{ steps.date.outputs.PUBLISH }}
steps:
- name: Set metadata
id: date
shell: bash -x -e {0}
run: |
BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d')
echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT
T5X_IMAGE=${{ inputs.T5X_IMAGE }}
T5X_IMAGE=${T5X_IMAGE:-${{ env.DEFAULT_T5X_IMAGE }}}
echo "T5X_IMAGE=${T5X_IMAGE}" >> $GITHUB_OUTPUT
echo "PUBLISH=${{ inputs.PUBLISH }}" >> $GITHUB_OUTPUT
run-jobs:
needs: metadata
uses: ./.github/workflows/_test_t5x.yaml
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch'
with:
T5X_IMAGE: ${{ needs.metadata.outputs.T5X_IMAGE }}
secrets: inherit
publish:
needs: [metadata, run-jobs]
uses: ./.github/workflows/_publish_t5x_results.yaml
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch'
with:
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
EXPERIMENT_SUBDIR: T5X
secrets: inherit
publish-completion:
needs: [metadata, run-jobs]
uses: ./.github/workflows/_publish_badge.yaml
if: ( always() )
secrets: inherit
with:
ENDPOINT_FILENAME: 't5x-test-overall-status.json'
PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }}
SCRIPT: |
if [[ ${{ needs.run-jobs.result }} == "success" ]]; then
EXIT_STATUSES="${GITHUB_RUN_ID}-*/*-status.json"
PASSED_TESTS=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
FAILED_TESTS=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
TOTAL_TESTS=$(ls $EXIT_STATUSES | wc -l)
echo "Test statuses:"
jq -rc 'input_filename,.' $EXIT_STATUSES
if [[ $FAILED_TESTS -eq 0 ]] && [[ $TOTAL_TESTS -gt 0 ]] || [[ $PASSED_TESTS -eq $TOTAL_TESTS ]]; then
BADGE_COLOR=brightgreen
elif [[ $PASSED_TESTS -eq 0 ]]; then
BADGE_COLOR=red
else
BADGE_COLOR=yellow
fi
echo "LABEL='Completion'" >> $GITHUB_OUTPUT
echo "MESSAGE='${PASSED_TESTS}/${TOTAL_TESTS} passed'" >> $GITHUB_OUTPUT
echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT
else
echo "LABEL='Completion'" >> $GITHUB_OUTPUT
echo "MESSAGE='n/a'" >> $GITHUB_OUTPUT
echo "COLOR='red'" >> $GITHUB_OUTPUT
fi