Skip to content

Commit

Permalink
limit jumpbox job concurrency
Browse files Browse the repository at this point in the history
  • Loading branch information
yhtang committed Oct 15, 2024
1 parent d0800ed commit de43bb4
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 21 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/_test_maxtext.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ jobs:
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}

steps:
- name: Print environment variables
Expand Down Expand Up @@ -195,6 +197,8 @@ jobs:
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}

steps:
- name: Print environment variables
Expand Down
17 changes: 10 additions & 7 deletions .github/workflows/_test_pax_rosetta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@ jobs:
- [1, 8, 1, 1]
- [1, 1, 2, 4]
fail-fast: false

runs-on: jumpbox

concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-pax-single-process-multi-device-te

steps:
- name: Print environment variables
run: env
Expand Down Expand Up @@ -259,8 +258,9 @@ jobs:
EVALUATE: true
ADDITIONAL_ARGS: "--model-type LLaMA70BProxy --evaluate"
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-pax-multi-node-te
steps:
Expand Down Expand Up @@ -457,8 +457,9 @@ jobs:
- [4, 2, 1, 1]
- [4, 2, 1, 2]
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-pax-multi-node
steps:
Expand Down Expand Up @@ -649,8 +650,9 @@ jobs:
PARALLEL_CONFIG:
- [1, 8, 1, 1]
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-pax-single-node-dropout-te
steps:
Expand Down Expand Up @@ -843,8 +845,9 @@ jobs:
PARALLEL_CONFIG:
- [1, 8, 1, 1]
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-pax-single-process-evaluation-te
steps:
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/_test_t5x_rosetta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ jobs:
ADDITIONAL_ARGS: ""
EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-t5x-single-process-multi-device
steps:
Expand Down Expand Up @@ -241,8 +242,9 @@ jobs:
ADDITIONAL_ARGS: "--enable-te 0"
EXTRA_GIN_ARGS: ""
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-t5x-multi-gpu-multi-node
steps:
Expand Down Expand Up @@ -427,8 +429,9 @@ jobs:
matrix:
N_GPU: [8]
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-t5x-vit-single-process-multi-device
steps:
Expand Down Expand Up @@ -597,8 +600,9 @@ jobs:
N_GPU: [1, 8]
N_NODE: [1, 2]
fail-fast: false

runs-on: jumpbox
concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
env:
BADGE_FILENAME_PREFIX: badge-rosetta-t5x-vit-multi-gpu-multi-node
steps:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/_test_upstream_pax.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ jobs:
- [1, 8, 1, 1]
- [1, 1, 2, 4]
fail-fast: false

runs-on: jumpbox

concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
steps:
- name: Print environment variables
run: env
Expand Down Expand Up @@ -211,9 +211,9 @@ jobs:
EVALUATE: true
ADDITIONAL_ARGS: "--model-type LLaMA70BProxy --evaluate"
fail-fast: false

runs-on: jumpbox

concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
steps:
- name: Print environment variables
run: env
Expand Down Expand Up @@ -360,9 +360,9 @@ jobs:
PARALLEL_CONFIG:
- [1, 8, 1, 1]
fail-fast: false

runs-on: jumpbox

concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
steps:
- name: Print environment variables
run: env
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/_test_upstream_t5x.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ jobs:
N_GPU: 2
ADDITIONAL_ARGS: "--enable-fmha 1"
fail-fast: false

runs-on: jumpbox

concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
steps:
- name: Print environment variables
run: env
Expand Down Expand Up @@ -190,9 +190,9 @@ jobs:
N_NODE: 2
ADDITIONAL_ARGS: "--enable-fmha 1"
fail-fast: false

runs-on: jumpbox

concurrency:
group: ${{ github.run_id }}-${{ github.run_attempt }}
steps:
- name: Print environment variables
run: env
Expand Down

0 comments on commit de43bb4

Please sign in to comment.