diff --git a/.github/workflows/_build_pax.yaml b/.github/workflows/_build_pax.yaml index 630db4202..0c1ea75b5 100644 --- a/.github/workflows/_build_pax.yaml +++ b/.github/workflows/_build_pax.yaml @@ -7,7 +7,7 @@ on: type: string description: 'Base docker image that provides JAX' required: false - default: ghcr.io/nvidia/jax-te:latest + default: ghcr.io/nvidia/jax:latest BUILD_DATE: type: string description: "Build date in YYYY-MM-DD format" diff --git a/.github/workflows/_build_t5x.yaml b/.github/workflows/_build_t5x.yaml index c4cd4475a..0b6b8c29a 100644 --- a/.github/workflows/_build_t5x.yaml +++ b/.github/workflows/_build_t5x.yaml @@ -7,7 +7,7 @@ on: type: string description: 'Base docker image that provides JAX' required: false - default: ghcr.io/nvidia/jax-te:latest + default: ghcr.io/nvidia/jax:latest BUILD_DATE: type: string description: "Build date in YYYY-MM-DD format" diff --git a/.github/workflows/_build_te.yaml b/.github/workflows/_build_te.yaml deleted file mode 100644 index 1919be104..000000000 --- a/.github/workflows/_build_te.yaml +++ /dev/null @@ -1,90 +0,0 @@ -name: ~build Transformer Engine container - -on: - workflow_call: - inputs: - BASE_IMAGE: - type: string - description: 'Base docker image that provides JAX' - required: false - default: ghcr.io/nvidia/jax:latest - BUILD_DATE: - type: string - description: "Build date in YYYY-MM-DD format" - required: false - default: 'NOT SPECIFIED' - REPO_TE: - type: string - description: URL of TE repository to check out - required: false - default: "https://github.com/NVIDIA/TransformerEngine.git" - REF_TE: - type: string - description: Git commit, tag, or branch for TE - required: false - default: main - outputs: - DOCKER_TAGS: - description: "Tags of the image built" - value: ${{ jobs.build.outputs.DOCKER_TAGS }} - -env: - UPLD_IMAGE: ghcr.io/nvidia/jax-toolbox-internal - -permissions: - contents: read # to fetch code - actions: write # to cancel previous workflows - packages: write # to upload container - -jobs: - - build: - outputs: - DOCKER_TAGS: ${{ steps.meta.outputs.tags }} - runs-on: [self-hosted, small-builder] - steps: - - name: Print environment variables - run: env - - - name: Check out the repository under ${GITHUB_WORKSPACE} - uses: actions/checkout@v3 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Set docker metadata - id: meta - uses: docker/metadata-action@v4 - with: - images: | - ${{ env.UPLD_IMAGE }} - flavor: | - latest=false - tags: | - type=raw,value=${{ github.run_id }}-te - labels: - org.opencontainers.image.created=${{ inputs.BUILD_DATE }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - driver-opts: | - image=moby/buildkit:v0.10.6 - - - name: Build docker images - uses: docker/build-push-action@v4 - with: - context: .github/container - push: true - file: .github/container/Dockerfile.te - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - BASE_IMAGE=${{ inputs.BASE_IMAGE }} - BUILD_DATE=${{ inputs.BUILD_DATE }} - REPO_TE=${{ inputs.REPO_TE }} - REF_TE=${{ inputs.REF_TE }} \ No newline at end of file diff --git a/.github/workflows/_test_te.yaml b/.github/workflows/_test_te.yaml index 399631c3f..b4609765d 100644 --- a/.github/workflows/_test_te.yaml +++ b/.github/workflows/_test_te.yaml @@ -8,7 +8,7 @@ on: type: string description: 'JAX-TE image build by NVIDIA/JAX-Toolbox' required: true - default: 'ghcr.io/nvidia/jax-te:latest' + default: 'ghcr.io/nvidia/jax:latest' outputs: UNIT_TEST_ARTIFACT_NAME: description: 'Name of the unit test artifact for downstream workflows' diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3e9f78d4c..068801384 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -119,16 +119,6 @@ jobs: REF_XLA: ${{ needs.metadata.outputs.REF_XLA }} secrets: inherit - build-te: - needs: [metadata, build-jax] - uses: ./.github/workflows/_build_te.yaml - with: - BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} - BASE_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAGS }} - REPO_TE: ${{ needs.metadata.outputs.REPO_TE }} - REF_TE: ${{ needs.metadata.outputs.REF_TE }} - secrets: inherit - build-t5x: needs: [metadata, build-jax] uses: ./.github/workflows/_build_t5x.yaml @@ -170,7 +160,7 @@ jobs: secrets: inherit build-summary: - needs: [build-base, build-jax, build-te, build-t5x, build-pax, build-rosetta-t5x, build-rosetta-pax] + needs: [build-base, build-jax, build-t5x, build-pax, build-rosetta-t5x, build-rosetta-pax] runs-on: ubuntu-22.04 steps: - name: Generate job summary for container build @@ -183,7 +173,6 @@ jobs: | ------------ | -------------------------------------------------- | | Base | ${{ needs.build-base.outputs.DOCKER_TAGS }} | | JAX | ${{ needs.build-jax.outputs.DOCKER_TAGS }} | - | JAX-TE | ${{ needs.build-te.outputs.DOCKER_TAGS }} | | T5X | ${{ needs.build-t5x.outputs.DOCKER_TAGS }} | | PAX | ${{ needs.build-pax.outputs.DOCKER_TAGS }} | | ROSETTA(t5x) | ${{ needs.build-rosetta-t5x.outputs.DOCKER_TAGS }} | @@ -198,10 +187,10 @@ jobs: secrets: inherit test-te: - needs: build-te + needs: [build-jax, test-jax] uses: ./.github/workflows/_test_te.yaml with: - JAX_TE_IMAGE: ${{ needs.build-te.outputs.DOCKER_TAGS }} + JAX_TE_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAGS }} secrets: inherit test-t5x: diff --git a/.github/workflows/nightly-te-build.yaml b/.github/workflows/nightly-te-build.yaml deleted file mode 100644 index 3fecd1067..000000000 --- a/.github/workflows/nightly-te-build.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: Nightly Transformer Engine build - -on: - workflow_run: - workflows: [Nightly JAX build] - types: [completed] - branches: [main] - workflow_dispatch: - inputs: - PUBLISH: - type: boolean - description: Publish dated images and update the 'latest' tag? - default: false - required: false - -env: - TARGET: jax-te - DOCKER_REGISTRY: ghcr.io/nvidia - -permissions: - contents: read # to fetch code - actions: write # to cancel previous workflows - packages: write # to upload container - -jobs: - - metadata: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' - runs-on: ubuntu-22.04 - outputs: - BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }} - steps: - - name: Set build date - id: date - shell: bash -x -e {0} - run: | - BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') - echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT - - build: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' - needs: metadata - uses: ./.github/workflows/_build_te.yaml - with: - BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} - secrets: inherit - - publish: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) - needs: [metadata, build] - uses: ./.github/workflows/_publish_container.yaml - secrets: inherit - with: - SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} - TARGET_IMAGE: jax-te - TARGET_TAGS: | - type=raw,value=latest,priority=1000 - type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 - - if-upstream-failed: - runs-on: ubuntu-latest - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch' - steps: - - run: echo 'Upstream workflow failed, aborting run' && exit 1 \ No newline at end of file diff --git a/.github/workflows/nightly-te-test.yaml b/.github/workflows/nightly-te-test.yaml index c030af044..182cdf641 100644 --- a/.github/workflows/nightly-te-test.yaml +++ b/.github/workflows/nightly-te-test.yaml @@ -2,7 +2,7 @@ name: Nightly Transformer Engine test on: workflow_run: - workflows: [Nightly Transformer Engine build] + workflows: [Nightly JAX build] types: [completed] branches: [main] workflow_dispatch: @@ -11,7 +11,7 @@ on: type: string description: 'JAX-TE image build by NVIDIA/JAX-Toolbox' required: true - default: 'ghcr.io/nvidia/jax-te:latest' + default: 'ghcr.io/nvidia/jax:latest' PUBLISH: type: boolean description: Update status badge? @@ -24,7 +24,7 @@ permissions: packages: write # to upload container env: - DEFAULT_JAX_TE_IMAGE: 'ghcr.io/nvidia/jax-te:latest' + DEFAULT_JAX_TE_IMAGE: 'ghcr.io/nvidia/jax:latest' jobs: diff --git a/README.md b/README.md index 150b557e1..1bc49618f 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,9 @@ | Image | Build | Test | | ---------------------------------------------------- | ------------------------------------------ | -------------------------------------- | | [![container-badge-base]][container-link-base] | [![build-badge-base]][workflow-base] | n/a | -| [![container-badge-jax]][container-link-jax] | [![build-badge-jax]][workflow-jax] | [![test-badge-jax]][workflow-jax-unit] | +| [![container-badge-jax]][container-link-jax] | [![build-badge-jax]][workflow-jax] | [![test-badge-jax]][workflow-jax-unit]
[![unit-test-badge-te]][workflow-te-test]
[![integration-test-badge-te]][workflow-te-test] | | [![container-badge-t5x]][container-link-t5x] | [![build-badge-t5x]][workflow-t5x] | [![test-badge-t5x]][workflow-t5x-perf] | | [![container-badge-pax]][container-link-pax] | [![build-badge-pax]][workflow-pax] | [![test-badge-pax]][workflow-pax-perf] | -| [![container-badge-te]][container-link-te] | [![build-badge-te]][workflow-te] | [![unit-test-badge-te]][workflow-te-test]
[![integration-test-badge-te]][workflow-te-test] | | [![container-badge-rosetta-t5x]][container-link-rosetta-t5x] | [![build-badge-rosetta-t5x]][workflow-rosetta-t5x] | [![test-badge-rosetta-t5x]][workflow-rosetta-t5x] | | [![container-badge-rosetta-pax]][container-link-rosetta-pax] | [![build-badge-rosetta-pax]][workflow-rosetta-pax] | [![test-badge-rosetta-pax]][workflow-rosetta-pax] | @@ -16,13 +15,11 @@ [container-badge-pax]: https://img.shields.io/static/v1?label=&message=PAX&color=gray&logo=docker [container-badge-rosetta-t5x]: https://img.shields.io/static/v1?label=&message=ROSETTA(T5X)&color=gray&logo=docker [container-badge-rosetta-pax]: https://img.shields.io/static/v1?label=&message=ROSETTA(PAX)&color=gray&logo=docker -[container-badge-te]: https://img.shields.io/static/v1?label=&message=TE&color=gray&logo=docker [container-link-base]: https://github.com/NVIDIA/JAX-Toolbox/pkgs/container/jax-toolbox [container-link-jax]: https://github.com/NVIDIA/JAX-Toolbox/pkgs/container/jax [container-link-t5x]: https://github.com/NVIDIA/JAX-Toolbox/pkgs/container/t5x [container-link-pax]: https://github.com/NVIDIA/JAX-Toolbox/pkgs/container/pax -[container-link-te]: https://github.com/NVIDIA/JAX-Toolbox/pkgs/container/jax-te [container-link-rosetta-t5x]: https://github.com/NVIDIA/JAX-Toolbox/pkgs/container/rosetta-t5x [container-link-rosetta-pax]: https://github.com/NVIDIA/JAX-Toolbox/pkgs/container/rosetta-pax @@ -32,7 +29,6 @@ [build-badge-pax]: https://img.shields.io/github/actions/workflow/status/NVIDIA/JAX-Toolbox/nightly-pax-build.yaml?branch=main&label=nightly&logo=github-actions&logoColor=dddddd [build-badge-rosetta-t5x]: https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Frosetta-t5x-build-status.json&logo=github-actions&logoColor=dddddd [build-badge-rosetta-pax]: https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Frosetta-pax-build-status.json&logo=github-actions&logoColor=dddddd -[build-badge-te]: https://img.shields.io/github/actions/workflow/status/NVIDIA/JAX-Toolbox/nightly-te-build.yaml?branch=main&label=nightly&logo=github-actions&logoColor=dddddd [workflow-base]: https://github.com/NVIDIA/JAX-Toolbox/actions/workflows/weekly-base-build.yaml [workflow-jax]: https://github.com/NVIDIA/JAX-Toolbox/actions/workflows/nightly-jax-build.yaml @@ -40,7 +36,6 @@ [workflow-pax]: https://github.com/NVIDIA/JAX-Toolbox/actions/workflows/nightly-pax-build.yaml [workflow-rosetta-t5x]: https://github.com/NVIDIA/JAX-Toolbox/actions/workflows/nightly-rosetta-t5x-build.yaml [workflow-rosetta-pax]: https://github.com/NVIDIA/JAX-Toolbox/actions/workflows/nightly-rosetta-pax-build.yaml -[workflow-te]: https://github.com/NVIDIA/JAX-Toolbox/actions/workflows/nightly-te-build.yaml [test-badge-jax]: https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fjax-unit-test-status.json&logo=nvidia [test-badge-t5x]: https://img.shields.io/github/actions/workflow/status/NVIDIA/JAX-Toolbox/nightly-t5x-test-mgmn.yaml?branch=main&label=A100%20MGMN&logo=nvidia