Skip to content

Nightly MJX unit test (push) #3

Nightly MJX unit test (push)

Nightly MJX unit test (push) #3

name: Nightly MJX unit test
run-name: Nightly MJX unit test (${{ github.event_name == 'workflow_run' && format('nightly {0}', github.event.workflow_run.created_at) || github.event_name }})
on:
push:
# workflow_run:
# workflows: [Nightly MJX build]
# types: [completed]
# branches: [main]
workflow_dispatch:
inputs:
MJX_IMAGE:
type: string
description: 'MJX image built by NVIDIA/MJX-Toolbox'
default: ghcr.io/nvidia/jax-toolbox-internal:7662445215-upstream-mjx-amd64 #'ghcr.io/nvidia/upstream-mjx:latest'
required: true
ARTIFACT_NAME:
type: string
description: 'Name of the artifact zip file'
required: false
default: 'artifact-mjx-unit-test'
PUBLISH:
type: boolean
description: Update status badge?
default: false
required: true
permissions:
contents: read # to fetch code
actions: write # to cancel previous workflows
packages: write # to upload container
env:
DEFAULT_MJX_IMAGE: ghcr.io/nvidia/jax-toolbox-internal:7662445215-upstream-mjx-amd64 #ghcr.io/nvidia/upstream-mjx:latest
DEFAULT_ARTIFACT_NAME: artifact-mjx-unit-test
jobs:
if-upstream-failed:
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- run: echo 'Upstream workflow failed, aborting run' && exit 1
metadata:
runs-on: ubuntu-22.04
outputs:
MJX_IMAGE: ${{ steps.image.outputs.MJX_IMAGE }}
PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }}
ARTIFACT_NAME: ${{ steps.artifact-name.outputs.ARTIFACT_NAME }}
steps:
- name: Determine mjx image to use
id: image
shell: bash -x -e {0}
run: |
if [[ -z "${{ inputs.MJX_IMAGE }}" ]]; then
MJX_IMAGE=${{ env.DEFAULT_MJX_IMAGE }}
else
MJX_IMAGE=${{ inputs.MJX_IMAGE }}
fi
echo "MJX_IMAGE=${MJX_IMAGE}" >> $GITHUB_OUTPUT
- name: Determine artifact name
id: artifact-name
shell: bash -x -e {0}
run: |
if [[ -z "${{ inputs.ARTIFACT_NAM }}" ]]; then
ARTIFACT_NAME=${{ env.DEFAULT_ARTIFACT_NAME }}
else
ARTIFACT_NAME=${{ inputs.ARTIFACT_NAME }}
fi
echo "ARTIFACT_NAME=${ARTIFACT_NAM}" >> $GITHUB_OUTPUT
- name: Determine whether results will be 'published'
id: if-publish
shell: bash -x -e {0}
run: |
echo "PUBLISH=${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) }}" >> $GITHUB_OUTPUT
runner:
uses: ./.github/workflows/_runner_ondemand_slurm.yaml
with:
NAME: "A100-${{ github.run_id }}"
LABELS: "A100:${{ github.run_id }}"
TIME: "01:00:00"
secrets: inherit
mjx-unit-test:
needs: metadata
strategy:
fail-fast: false
matrix:
GPU_ARCH: [V100, A100]
# ensures A100 job lands on dedicated runner for this particular job
runs-on: [self-hosted, "${{ matrix.GPU_ARCH == 'A100' && format('{0}:{1}', matrix.GPU_ARCH, github.run_id) || matrix.GPU_ARCH }}"]
steps:
- name: Print environment variables
run: env
- name: Print GPU information
run: nvidia-smi
- name: Check out repository
uses: actions/checkout@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Pull MJX image
shell: bash -x -e {0}
run: |
docker pull ${{ needs.metadata.outputs.MJX_IMAGE }}
- name: MJX speed test
shell: bash -x -e {0}
continue-on-error: true
run: |
docker run --gpus=all --shm-size=1g ${{ needs.metadata.outputs.MJX_IMAGE }} \
bash -ec "mjx-testspeed --mjcf=humanoid/humanoid.xml --batch_size=8192 --unroll=4" | tee -a test-mjx.log
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ needs.metadata.outputs.ARTIFACT_NAME }}-${{ matrix.GPU_ARCH }}
path: |
test-mjx.log
finalize:
if: "!cancelled()"
needs: [metadata, mjx-unit-test]
uses: ./.github/workflows/_finalize.yaml
with:
PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }}
secrets: inherit