Skip to content

Commit

Permalink
integrate
Browse files Browse the repository at this point in the history
  • Loading branch information
yhtang committed Sep 6, 2023
1 parent 59b3d7d commit ab18003
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 63 deletions.
1 change: 0 additions & 1 deletion .github/workflows/_sandbox.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,4 @@ jobs:
upload-badge:
needs: [run-jobs]
uses: ./.github/workflows/_upload_badge.yaml
if: always()
secrets: inherit
61 changes: 31 additions & 30 deletions .github/workflows/_test_jax.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,29 @@ on:

jobs:

# runner:
# uses: ./.github/workflows/_runner_ondemand_slurm.yaml
# with:
# NAME: "A100-${{ github.run_id }}"
# LABELS: "A100:${{ github.run_id }}"
# TIME: "01:00:00"
# secrets: inherit
runner:
uses: ./.github/workflows/_runner_ondemand_slurm.yaml
with:
NAME: "A100-${{ github.run_id }}"
LABELS: "A100:${{ github.run_id }}"
TIME: "01:00:00"
secrets: inherit

unit-test:
strategy:
fail-fast: false
matrix:
GPU_ARCH: [V100, A100]
# ensures A100 job lands on dedicated runner for this particular job
# runs-on: [self-hosted, "${{ matrix.GPU_ARCH == 'A100' && format('{0}:{1}', matrix.GPU_ARCH, github.run_id) || matrix.GPU_ARCH }}"]
runs-on: ubuntu-22.04
runs-on: [self-hosted, "${{ matrix.GPU_ARCH == 'A100' && format('{0}:{1}', matrix.GPU_ARCH, github.run_id) || matrix.GPU_ARCH }}"]
env:
BADGE_FILENAME_FULL: ${{ inputs.BADGE_FILENAME }}-${{ matrix.GPU_ARCH }}.json
steps:
- name: Print environment variables
run: env

# - name: Print GPU information
# run: nvidia-smi
- name: Print GPU information
run: nvidia-smi

- name: Check out repository
uses: actions/checkout@v3
Expand All @@ -56,30 +55,31 @@ jobs:
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

# - name: Pull JAX image
# shell: bash -x -e {0}
# run: |
# docker pull ${{ inputs.JAX_IMAGE }}
- name: Pull JAX image
shell: bash -x -e {0}
run: |
docker pull ${{ inputs.JAX_IMAGE }}
# - name: Backend-independent tests
# shell: bash -x {0}
# run: |
# docker run --gpus all ${{ inputs.JAX_IMAGE }} test-jax.sh -b backend-independent | tee test-backend-independent.log
- name: Backend-independent tests
shell: bash -x {0}
run: |
docker run --gpus all ${{ inputs.JAX_IMAGE }} test-jax.sh -b backend-independent | tee test-backend-independent.log
# - name: GPU-specific tests
# shell: bash -x {0}
# run: |
# docker run --gpus all ${{ inputs.JAX_IMAGE }} test-jax.sh -b gpu | tee test-gpu.log
- name: GPU-specific tests
shell: bash -x {0}
run: |
docker run --gpus all ${{ inputs.JAX_IMAGE }} test-jax.sh -b gpu | tee test-gpu.log
- name: download existing artifact
uses: dawidd6/action-download-artifact@v2.27.0
with:
github_token: ${{secrets.GITHUB_TOKEN}}
run_id: 6047993737
name: jax-unit-test-logs-${{ matrix.GPU_ARCH }}
path: .
# - name: download existing artifact
# uses: dawidd6/action-download-artifact@v2.27.0
# with:
# github_token: ${{secrets.GITHUB_TOKEN}}
# run_id: 6047993737
# name: jax-unit-test-logs-${{ matrix.GPU_ARCH }}
# path: .

- name: Process results and generate report
if: success() || failure()
shell: bash -x -e {0}
run: |
# bring in utility functions
Expand Down Expand Up @@ -120,6 +120,7 @@ jobs:
> ${{ env.BADGE_FILENAME_FULL }}
- name: Upload artifacts
if: success() || failure()
uses: actions/upload-artifact@v3
with:
name: ${{ inputs.ARTIFACT_NAME }}-${{ matrix.GPU_ARCH }}
Expand Down
86 changes: 54 additions & 32 deletions .github/workflows/nightly-jax-test-unit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ env:

jobs:

if-upstream-failed:
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- run: echo 'Upstream workflow failed, aborting run' && exit 1

metadata:
runs-on: ubuntu-22.04
outputs:
Expand All @@ -54,38 +60,54 @@ jobs:
JAX_IMAGE: ${{ needs.metadata.outputs.JAX_IMAGE }}
secrets: inherit

publish:
needs: [metadata, run-jobs]
strategy:
fail-fast: false
matrix:
GPU_ARCH: [V100, A100]
uses: ./.github/workflows/_publish_badge.yaml
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch'
summary:
needs: [run-jobs]
runs-on: ubuntu-22.04
steps:
- name: Download artifacts
uses: actions/download-artifact@v3

- name: Write output to step summary
shell: bash -x -e {0}
run: |
find -name "sitrep.json" | while read -s f; do
cat "$f" | jq -r '.summary' | tee -a $GITHUB_STEP_SUMMARY
done
upload-badge:
needs: [run-jobs]
uses: ./.github/workflows/_upload_badge.yaml
secrets: inherit
with:
ENDPOINT_FILENAME: 'jax-unit-test-status-${{ matrix.GPU_ARCH }}.json'
PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }}
SCRIPT: |
ARTIFACTS="${{ needs.run-jobs.outputs.ARTIFACT_NAME }}-${{ matrix.GPU_ARCH }}/*"
FAILED_TESTS=$(cat $ARTIFACTS | grep -c 'FAILED in' || true)
PASSED_TESTS=$(cat $ARTIFACTS | grep -c 'PASSED in' || true)
TOTAL_TESTS=$((FAILED_TESTS + PASSED_TESTS))
if [[ $FAILED_TESTS == 0 ]]; then
BADGE_COLOR=brightgreen
else
if [[ $FAILED_TESTS < $TOTAL_TESTS ]]; then
BADGE_COLOR=yellow
else
BADGE_COLOR=red
fi
fi
echo "LABEL='${{ matrix.GPU_ARCH }} Unit'" >> $GITHUB_OUTPUT
echo "MESSAGE='${PASSED_TESTS}/${TOTAL_TESTS} passed'" >> $GITHUB_OUTPUT
echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT

if-upstream-failed:
runs-on: ubuntu-latest
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch'
publish-badge:
needs: [metadata, upload-badge]
if: github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true'
runs-on: ubuntu-22.04
steps:
- run: echo 'Upstream workflow failed, aborting run' && exit 1
- name: copy badge to primary Gist
uses: actions/github-script@v6
with:
github-token: ${{ secrets.NVJAX_GIST_TOKEN }}
script: |
const srcId = "${{ needs.upload-badge.outputs.GIST_ID }}";
const dstId = "${{ vars.BADGE_ENDPOINT_GIST_ID }}";
// Fetch files from source gist
const { data: srcData } = await octokit.gists.get({
gist_id: srcId
});
// Prepare file upload
let filesToUpdate = {};
for (const [filename, fileObj] of Object.entries(srcData.files)) {
filesToUpdate[filename] = {
content: fileObj.content
};
}
// Update files in destination gist
await octokit.gists.update({
gist_id: dstId,
files: filesToUpdate
});
console.log("Files copied successfully.");

0 comments on commit ab18003

Please sign in to comment.