Fix metrics for all MGMN tests (#622)

After #509 was merged, the metrics were broken since there was an unexpected number of hyphens. This obtains the job name more robustly by splitting on the GITHUB_RUN_ID, which all experiment dirs should have. * Introduces `--output_json_path` to be more explicit * Fixes rosetta-t5x which still used v3 actions, which caused some issues download artifacts * Adds size_guidance to the TB EventAccumulator to ensure all steps are loaded --------- Co-authored-by: Yu-Hang "Maxin" Tang <Tang.Maxin@gmail.com>
NVIDIA · Mar 8, 2024 · 9cced45 · 9cced45
1 parent 9c2ae8f
commit 9cced45
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 64 deletions.
diff --git a/.github/workflows/_test_maxtext.yaml b/.github/workflows/_test_maxtext.yaml
@@ -27,7 +27,7 @@ on:
         type: string
         description: 'Name of the framework being used'
         required: false
-        default: 'MaxText'
+        default: 'upstream-maxtext'
     outputs:
       TEST_STATUS:
         description: 'Summary pass/fail value indicating if results from tests are acceptable'
@@ -343,21 +343,23 @@ jobs:
         uses: actions/download-artifact@v4
 
       - name: Run pytest
-        shell: bash -x {0}
+        shell: bash -eux {0}
         run: |
           pip install pytest pytest-reportlog tensorboard
           for i in ${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-*; do
-            SUBDIR=$(echo $i | cut -d'-' -f3)
-            mv $i/$SUBDIR* .
-            python3 .github/workflows/baselines/summarize_metrics.py $SUBDIR --loss_summary_name "learning/loss" --perf_summary_name "perf/step_time_seconds" # create result json in baseline format
+            JOB_NAME=$(echo $i | awk -F "${GITHUB_RUN_ID}-" '{print $2}')
+            METRIC_PATH=${JOB_NAME}_metrics.json
+            python3 .github/workflows/baselines/summarize_metrics.py $i/$JOB_NAME --loss_summary_name "learning/loss" --perf_summary_name "perf/step_time_seconds" --output_json_path $METRIC_PATH
+            # Test script expects the job dir and the log to be in the CWD
+            mv $i/$JOB_NAME $i/${JOB_NAME}.log .
           done
 
           RESULTS_DIR=$PWD BASELINES_DIR=MAXTEXT/upstream pytest --report-log=report.jsonl .github/workflows/baselines/test_maxtext_metrics.py || true
 
       - name: Upload metrics test json logs
         uses: actions/upload-artifact@v4
         with:
-          name: upstream-${{ inputs.FW_NAME }}-metrics-test-log
+          name: ${{ inputs.FW_NAME }}-metrics-test-log
           path: |
             report.jsonl
             *_metrics.json

diff --git a/.github/workflows/_test_pax_rosetta.yaml b/.github/workflows/_test_pax_rosetta.yaml
@@ -1033,32 +1033,17 @@ jobs:
         uses: actions/download-artifact@v4
 
       - name: Run pytest
-        shell: bash -x {0}
+        shell: bash -eux {0}
         run: |
           pip install pytest pytest-reportlog tensorboard
           for i in ${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-*; do
-            SUBDIR=$(echo $i | rev | cut -d'-' -f1 | rev)
-            mv $i/$SUBDIR* .
-            python3 .github/workflows/baselines/summarize_metrics.py $SUBDIR # create result json in baseline format
+            JOB_NAME=$(echo $i | awk -F "${GITHUB_RUN_ID}-" '{print $2}')
+            METRIC_PATH=${JOB_NAME}_metrics.json
+            python3 .github/workflows/baselines/summarize_metrics.py $i/$JOB_NAME --output_json_path $METRIC_PATH
+            # Test script expects the job dir and the log to be in the CWD
+            mv $i/$JOB_NAME $i/${JOB_NAME}.log .
           done
 
-          echo '## Rosetta PAX MGMN Test Metrics' >> $GITHUB_STEP_SUMMARY
-          python <<EOF | tee -a $GITHUB_STEP_SUMMARY
-          import json
-          files = "$(echo *_metrics.json)".split()
-          header = None
-          print_row = lambda lst: print('| ' + ' | '.join(str(el) for el in lst) + ' |')
-          for path in files:
-            with open(path) as f:
-              obj = json.loads(f.read())
-              if not header:
-                header = list(obj.keys())
-                print_row(["Job Name"] + header)
-                print_row(["---"] * (1+len(header)))
-              job_name = path[:-len('_metrics.json')]
-              print_row([job_name] + [obj[h] for h in header])
-          EOF
-
           RESULTS_DIR=$PWD BASELINES_DIR=PAX_MGMN/rosetta pytest --report-log=report.jsonl .github/workflows/baselines/test_pax_mgmn_metrics.py || true
 
       - name: Upload metrics test json logs

diff --git a/.github/workflows/_test_t5x_rosetta.yaml b/.github/workflows/_test_t5x_rosetta.yaml
@@ -784,42 +784,27 @@ jobs:
 
     steps:
       - name: Check out the repository under ${GITHUB_WORKSPACE}
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Download artifacts
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
 
       - name: Run pytest
-        shell: bash -x {0}
+        shell: bash -eux {0}
         run: |
           pip install pytest pytest-reportlog tensorboard
           for i in ${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-* ${{ inputs.FW_NAME }}-vit-${GITHUB_RUN_ID}-*; do
-            SUBDIR=$(echo $i | rev | cut -d'-' -f1 | rev)
-            mv $i/$SUBDIR* .
-            python3 .github/workflows/baselines/summarize_metrics.py $SUBDIR # create result json in baseline format
+            JOB_NAME=$(echo $i | awk -F "${GITHUB_RUN_ID}-" '{print $2}')
+            METRIC_PATH=${JOB_NAME}_metrics.json
+            python3 .github/workflows/baselines/summarize_metrics.py $i/$JOB_NAME --perf_summary_name "timing/steps_per_second" --output_json_path $METRIC_PATH
+            # Test script expects the job dir and the log to be in the CWD
+            mv $i/$JOB_NAME $i/${JOB_NAME}.log .
           done
 
-          echo '## Rosetta T5X MGMN Test Metrics' >> $GITHUB_STEP_SUMMARY
-          python <<EOF | tee -a $GITHUB_STEP_SUMMARY
-          import json
-          files = "$(echo *_metrics.json)".split()
-          header = None
-          print_row = lambda lst: print('| ' + ' | '.join(str(el) for el in lst) + ' |')
-          for path in files:
-            with open(path) as f:
-              obj = json.loads(f.read())
-              if not header:
-                header = list(obj.keys())
-                print_row(["Job Name"] + header)
-                print_row(["---"] * (1+len(header)))
-              job_name = path[:-len('_metrics.json')]
-              print_row([job_name] + [obj[h] for h in header])
-          EOF
-
           RESULTS_DIR=$PWD BASELINES_DIR=T5X_MGMN/rosetta pytest --report-log=report.jsonl .github/workflows/baselines/test_t5x_mgmn_metrics.py || true
 
       - name: Upload metrics test json logs
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: ${{ inputs.FW_NAME }}-metrics-test-log
           path: |

diff --git a/.github/workflows/_test_upstream_pax.yaml b/.github/workflows/_test_upstream_pax.yaml
@@ -486,13 +486,15 @@ jobs:
         uses: actions/download-artifact@v4
 
       - name: Run pytest
-        shell: bash -x {0}
+        shell: bash -eux {0}
         run: |
           pip install pytest pytest-reportlog tensorboard
           for i in ${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-*; do
-            SUBDIR=$(echo $i | cut -d'-' -f3)
-            mv $i/$SUBDIR* .
-            python3 .github/workflows/baselines/summarize_metrics.py $SUBDIR # create result json in baseline format
+            JOB_NAME=$(echo $i | awk -F "${GITHUB_RUN_ID}-" '{print $2}')
+            METRIC_PATH=${JOB_NAME}_metrics.json
+            python3 .github/workflows/baselines/summarize_metrics.py $i/$JOB_NAME --output_json_path $METRIC_PATH
+            # Test script expects the job dir and the log to be in the CWD
+            mv $i/$JOB_NAME $i/${JOB_NAME}.log .
           done
 
           RESULTS_DIR=$PWD BASELINES_DIR=PAX_MGMN/upstream pytest --report-log=report.jsonl .github/workflows/baselines/test_pax_mgmn_metrics.py || true

diff --git a/.github/workflows/_test_upstream_t5x.yaml b/.github/workflows/_test_upstream_t5x.yaml
@@ -324,13 +324,15 @@ jobs:
         uses: actions/download-artifact@v4
 
       - name: Run pytest
-        shell: bash -x {0}
+        shell: bash -eux {0}
         run: |
           pip install pytest pytest-reportlog tensorboard
           for i in ${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-*; do
-            SUBDIR=$(echo $i | cut -d'-' -f3)
-            mv $i/$SUBDIR* .
-            python3 .github/workflows/baselines/summarize_metrics.py $SUBDIR --perf_summary_name "timing/steps_per_second" # create result json in baseline format
+            JOB_NAME=$(echo $i | awk -F "${GITHUB_RUN_ID}-" '{print $2}')
+            METRIC_PATH=${JOB_NAME}_metrics.json
+            python3 .github/workflows/baselines/summarize_metrics.py $i/$JOB_NAME --perf_summary_name "timing/steps_per_second" --output_json_path $METRIC_PATH
+            # Test script expects the job dir and the log to be in the CWD
+            mv $i/$JOB_NAME $i/${JOB_NAME}.log .
           done
 
           RESULTS_DIR=$PWD BASELINES_DIR=T5X_MGMN/upstream pytest --report-log=report.jsonl .github/workflows/baselines/test_t5x_mgmn_metrics.py || true

diff --git a/.github/workflows/baselines/summarize_metrics.py b/.github/workflows/baselines/summarize_metrics.py
@@ -32,6 +32,7 @@ def main():
                               help='Key in the tensorboard event file containing loss data')
     parser.add_argument('-p', '--perf_summary_name', default='Steps/sec',
                         help='Key in the tensorboard event file containing perf data')
+    parser.add_argument('-o', '--output_json_path', type=str)
     args = parser.parse_args()
 
     try:
@@ -59,9 +60,10 @@ def main():
         e2e_time = read_e2e_time(args.test_config + ".log")
 
         baseline = _create_summary(loss, train_time, e2e_time)
-        json_fname = args.test_config + "_metrics.json"
-        print(f'JSON FILENAME: {json_fname}')
-        with open(json_fname, "w") as f:
+        print(f'JSON FILENAME: {args.output_json_path}')
+        if os.path.exists(args.output_json_path):
+            raise FileExistsError(f"Did not expect --output_json_path={args.output_json_path} to exist. Delete it before proceeding.")
+        with open(args.output_json_path, "w") as f:
             json.dump(baseline, f, indent=2)
 
     except KeyError as e:

diff --git a/.github/workflows/baselines/test_utils.py b/.github/workflows/baselines/test_utils.py
@@ -2,8 +2,18 @@
 from tensorboard.util import tensor_util
 import numpy as np
 
+# By default TB tries to be smart about what to load in memory to avoid OOM
+# Since we expect every step to be there when we do our comparisons, we explicitly
+# set the size guidance to 0 so that we load everything. It's okay given our tests
+# are small/short.
+size_guidance = {
+    event_accumulator.TENSORS: 0,
+    event_accumulator.SCALARS: 0,
+}
+
+
 def read_tb_tag(tb_file: str, summary_name: str) -> dict:
-    ea = event_accumulator.EventAccumulator(tb_file)
+    ea = event_accumulator.EventAccumulator(tb_file, size_guidance)
     ea.Reload()
 
     return {
@@ -12,7 +22,7 @@ def read_tb_tag(tb_file: str, summary_name: str) -> dict:
     }
 
 def read_maxtext_tb_tag(tb_file: str, summary_name: str) -> dict:
-    ea = event_accumulator.EventAccumulator(tb_file)
+    ea = event_accumulator.EventAccumulator(tb_file, size_guidance)
     ea.Reload()
 
     return {