From 88494fd937bf59782e56942b7dff72e5c9a62519 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 2 Aug 2023 12:07:30 -0500 Subject: [PATCH 01/82] Define GitHub workflows for building and testing dbt models --- .github/actions/install_dbt_requirements.yaml | 17 +++++ .github/workflows/build_and_test_dbt.yaml | 63 +++++++++++++++++++ .github/workflows/test_dbt_models.yaml | 35 +++++++++++ 3 files changed, 115 insertions(+) create mode 100644 .github/actions/install_dbt_requirements.yaml create mode 100644 .github/workflows/build_and_test_dbt.yaml create mode 100644 .github/workflows/test_dbt_models.yaml diff --git a/.github/actions/install_dbt_requirements.yaml b/.github/actions/install_dbt_requirements.yaml new file mode 100644 index 000000000..ebae03ef6 --- /dev/null +++ b/.github/actions/install_dbt_requirements.yaml @@ -0,0 +1,17 @@ +name: Install dbt dependencies +runs: + using: composite + steps: + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: 3.x + + - name: Install python requirements + run: python -m pip install -r dbt/requirements.txt + shell: bash + + - name: Install dbt requirements + run: dbt deps + working-directory: ./dbt + shell: bash diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml new file mode 100644 index 000000000..7fc4b2557 --- /dev/null +++ b/.github/workflows/build_and_test_dbt.yaml @@ -0,0 +1,63 @@ +name: Build and test dbt + +on: + pull_request: + branches: [master, data-catalog] + push: + branches: [master, data-catalog] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements.yaml + + # TODO: AWS configuration? + + - name: Set environment for branch + run: | + if [[ $GITHUB_REF == 'refs/heads/master' ]]; then + echo "DBT_TARGET=prod" >> "$GITHUB_ENV" + echo "CACHE_KEY=master" >> "$GITHUB_ENV" + else + echo "DBT_TARGET=ci" >> "$GITHUB_ENV" + echo "CACHE_KEY=$GITHUB_HEAD_REF" >> "$GITHUB_ENV" + fi + shell: bash + + - name: Cache dbt manifest + id: cache + uses: actions/cache@v3 + env: + # Cache keys are shared by the test_dbt workflow and we are too lazy + # to factor this step out into a composite action, so make sure to + # update that workflow if you change the cache key format + cache-name: cache-dbt-run + with: + path: ./dbt/target + key: ${{ cache-name }}-$${{ vars.CACHE_KEY }} + restore-keys: ${{ cache-name }}-master + + - if: ${{ steps.cache.outputs.cache-hit == 'true' }} + name: Set state args + run: echo "STATE_ARGS='-s state:modified --state target/'" >> "$GITHUB_ENV" + shell: bash + + - name: Test dbt macros + run: dbt run-operation test_all + working-directory: ./dbt + shell: bash + + - name: Build models + run: dbt run --target "$DBT_TARGET" "$STATE_ARGS" + working-directory: ./dbt + shell: bash + + - name: Test models + run: dbt test --target "$DBT_TARGET" "$STATE_ARGS" + working-directory: ./dbt + shell: bash diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml new file mode 100644 index 000000000..3e5f3d14a --- /dev/null +++ b/.github/workflows/test_dbt_models.yaml @@ -0,0 +1,35 @@ +name: Test dbt models + +on: + workflow_dispatch: + inputs: + use_build_cache: + description: >- + Use the master branch build cache to avoid rebuilding models. + type: boolean + required: true + default: true + +jobs: + run-tests: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements.yaml + + - if: ${{ inputs.use_build_cache }} + name: Cache dbt manifest + uses: actions/cache@v3 + with: + path: ./dbt/target + key: cache-dbt-run-master + + # TODO: AWS configuration? + + - name: Test models + run: dbt test --target prod + working-directory: ./dbt + shell: bash From 997f3e4784455595d82af69a79be414faa37f1a2 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 2 Aug 2023 15:03:30 -0500 Subject: [PATCH 02/82] Configure AWS credentials in GitHub Actions build and test workflows --- .github/workflows/build_and_test_dbt.yaml | 6 +++++- .github/workflows/test_dbt_models.yaml | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 7fc4b2557..cc408796e 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -16,7 +16,11 @@ jobs: - name: Install dbt requirements uses: ./.github/actions/install_dbt_requirements.yaml - # TODO: AWS configuration? + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + aws-region: us-east-1 - name: Set environment for branch run: | diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 3e5f3d14a..6993a414a 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -27,7 +27,11 @@ jobs: path: ./dbt/target key: cache-dbt-run-master - # TODO: AWS configuration? + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + aws-region: us-east-1 - name: Test models run: dbt test --target prod From bc0e000f19006e876a126c4f80ddbf28362816b6 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 2 Aug 2023 16:54:56 -0500 Subject: [PATCH 03/82] Centralize dbt env vars in GitHub Actions workflows --- .../actions/load_environment_variables.yaml | 15 ++++++++ .github/variables/dbt.env | 3 ++ .github/workflows/build_and_test_dbt.yaml | 34 +++++++++++-------- .github/workflows/test_dbt_models.yaml | 9 +++-- 4 files changed, 43 insertions(+), 18 deletions(-) create mode 100644 .github/actions/load_environment_variables.yaml create mode 100644 .github/variables/dbt.env diff --git a/.github/actions/load_environment_variables.yaml b/.github/actions/load_environment_variables.yaml new file mode 100644 index 000000000..941d09f13 --- /dev/null +++ b/.github/actions/load_environment_variables.yaml @@ -0,0 +1,15 @@ +name: Load environment variables +description: Configures environment variables for a workflow +inputs: + env_var_file_path: + description: | + File path to variable file or directory. + Defaults to ./.github/variables/* if none specified + and runs against each file in that directory. + required: false + default: ./.github/variables/* +runs: + using: composite + steps: + - run: sed "" ${{ inputs.env_var_file_path }} >> "$GITHUB_ENV" + shell: bash diff --git a/.github/variables/dbt.env b/.github/variables/dbt.env new file mode 100644 index 000000000..82ffef7fb --- /dev/null +++ b/.github/variables/dbt.env @@ -0,0 +1,3 @@ + DBT_PROJECT_DIR=./dbt + DBT_MANIFEST_DIR=./dbt/target + DBT_CACHE_NAME=dbt-cache diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index cc408796e..3650bec85 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -16,6 +16,9 @@ jobs: - name: Install dbt requirements uses: ./.github/actions/install_dbt_requirements.yaml + - name: Load environment variables + uses: ./.github/actions/load_environment_variables.yaml + - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 with: @@ -24,9 +27,13 @@ jobs: - name: Set environment for branch run: | - if [[ $GITHUB_REF == 'refs/heads/master' ]]; then + if [[ $GITHUB_REF_NAME == 'master' ]]; then echo "DBT_TARGET=prod" >> "$GITHUB_ENV" echo "CACHE_KEY=master" >> "$GITHUB_ENV" + elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then + echo "DBT_TARGET=ci" >> "$GITHUB_ENV" + echo "CACHE_KEY=data-catalog" >> "$GITHUB_ENV" + echo "GITHUB_HEAD_REF=data-catalog" >> "$GITHUB_ENV" else echo "DBT_TARGET=ci" >> "$GITHUB_ENV" echo "CACHE_KEY=$GITHUB_HEAD_REF" >> "$GITHUB_ENV" @@ -36,32 +43,29 @@ jobs: - name: Cache dbt manifest id: cache uses: actions/cache@v3 - env: - # Cache keys are shared by the test_dbt workflow and we are too lazy - # to factor this step out into a composite action, so make sure to - # update that workflow if you change the cache key format - cache-name: cache-dbt-run with: - path: ./dbt/target - key: ${{ cache-name }}-$${{ vars.CACHE_KEY }} - restore-keys: ${{ cache-name }}-master + path: ${{ env.DBT_MANIFEST_DIR }} + key: ${{ env.DBT_CACHE_NAME }}-$${{ vars.CACHE_KEY }} + restore-keys: | + ${{ env.DBT_CACHE_NAME }}-data-catalog + ${{ env.DBT_CACHE_NAME }}-master - if: ${{ steps.cache.outputs.cache-hit == 'true' }} name: Set state args - run: echo "STATE_ARGS='-s state:modified --state target/'" >> "$GITHUB_ENV" + run: echo "DBT_STATE_ARGS='-s state:modified --state target/'" >> "$GITHUB_ENV" shell: bash - name: Test dbt macros run: dbt run-operation test_all - working-directory: ./dbt + working-directory: ${{ env.DBT_PROJECT_DIR }} shell: bash - name: Build models - run: dbt run --target "$DBT_TARGET" "$STATE_ARGS" - working-directory: ./dbt + run: dbt run --target "$DBT_TARGET" "$DBT_STATE_ARGS" + working-directory: ${{ env.DBT_PROJECT_DIR }} shell: bash - name: Test models - run: dbt test --target "$DBT_TARGET" "$STATE_ARGS" - working-directory: ./dbt + run: dbt test --target "$DBT_TARGET" "$DBT_STATE_ARGS" + working-directory: ${{ env.DBT_PROJECT_DIR }} shell: bash diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 6993a414a..d9bfb5cdd 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -20,12 +20,15 @@ jobs: - name: Install dbt requirements uses: ./.github/actions/install_dbt_requirements.yaml + - name: Load environment variables + uses: ./.github/actions/load_environment_variables.yaml + - if: ${{ inputs.use_build_cache }} name: Cache dbt manifest uses: actions/cache@v3 with: - path: ./dbt/target - key: cache-dbt-run-master + path: ${{ env.DBT_MANIFEST_DIR }} + key: ${{ env.DBT_CACHE_NAME }}-master - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 @@ -35,5 +38,5 @@ jobs: - name: Test models run: dbt test --target prod - working-directory: ./dbt + working-directory: ${{ DBT_PROJECT_DIR }} shell: bash From ff4fd8116773dd901788f5da8abe91f4c04a5d22 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 09:09:23 -0500 Subject: [PATCH 04/82] Tweaks to dbt GitHub Actions workflow definition ahead of testing --- .github/actions/install_dbt_requirements.yaml | 14 ++++++++++++-- .github/workflows/build_and_test_dbt.yaml | 2 +- .github/workflows/test_dbt_models.yaml | 4 ++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/actions/install_dbt_requirements.yaml b/.github/actions/install_dbt_requirements.yaml index ebae03ef6..743872b27 100644 --- a/.github/actions/install_dbt_requirements.yaml +++ b/.github/actions/install_dbt_requirements.yaml @@ -1,4 +1,14 @@ name: Install dbt dependencies +description: Installs Python and dbt requirements for a workflow +inputs: + dbt_project_dir: + description: Path to the directory containing the dbt project. + required: false + default: ./dbt + requirements_file_path: + description: Path to Python requirements file. + required: false + default: ./dbt/requirements.txt runs: using: composite steps: @@ -8,10 +18,10 @@ runs: python-version: 3.x - name: Install python requirements - run: python -m pip install -r dbt/requirements.txt + run: python -m pip install -r ${{ inputs.requirements_file_path }} shell: bash - name: Install dbt requirements run: dbt deps - working-directory: ./dbt + working-directory: ${{ inputs.dbt_project_dir }} shell: bash diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 3650bec85..df6a11b22 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -45,7 +45,7 @@ jobs: uses: actions/cache@v3 with: path: ${{ env.DBT_MANIFEST_DIR }} - key: ${{ env.DBT_CACHE_NAME }}-$${{ vars.CACHE_KEY }} + key: ${{ env.DBT_CACHE_NAME }}-$${{ env.CACHE_KEY }} restore-keys: | ${{ env.DBT_CACHE_NAME }}-data-catalog ${{ env.DBT_CACHE_NAME }}-master diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index d9bfb5cdd..3369ee497 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -7,7 +7,7 @@ on: description: >- Use the master branch build cache to avoid rebuilding models. type: boolean - required: true + required: false default: true jobs: @@ -38,5 +38,5 @@ jobs: - name: Test models run: dbt test --target prod - working-directory: ${{ DBT_PROJECT_DIR }} + working-directory: ${{ env.DBT_PROJECT_DIR }} shell: bash From 297f7ece80b604b4eedb678c6b514503473a6cf7 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 10:19:04 -0500 Subject: [PATCH 05/82] Fix linting problems with dbt GitHub actions and workflows --- .github/variables/dbt.env | 6 +++--- .github/workflows/build_and_test_dbt.yaml | 20 +++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/variables/dbt.env b/.github/variables/dbt.env index 82ffef7fb..b16832a94 100644 --- a/.github/variables/dbt.env +++ b/.github/variables/dbt.env @@ -1,3 +1,3 @@ - DBT_PROJECT_DIR=./dbt - DBT_MANIFEST_DIR=./dbt/target - DBT_CACHE_NAME=dbt-cache +DBT_CACHE_NAME=dbt-cache +DBT_MANIFEST_DIR=./dbt/target +DBT_PROJECT_DIR=./dbt diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index df6a11b22..1dad79013 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -28,15 +28,21 @@ jobs: - name: Set environment for branch run: | if [[ $GITHUB_REF_NAME == 'master' ]]; then - echo "DBT_TARGET=prod" >> "$GITHUB_ENV" - echo "CACHE_KEY=master" >> "$GITHUB_ENV" + { + echo "DBT_TARGET=prod"; + echo "CACHE_KEY=master"; + } >> "$GITHUB_ENV" elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then - echo "DBT_TARGET=ci" >> "$GITHUB_ENV" - echo "CACHE_KEY=data-catalog" >> "$GITHUB_ENV" - echo "GITHUB_HEAD_REF=data-catalog" >> "$GITHUB_ENV" + { + echo "DBT_TARGET=ci"; + echo "CACHE_KEY=data-catalog"; + echo "GITHUB_HEAD_REF=data-catalog"; + } >> "$GITHUB_ENV" else - echo "DBT_TARGET=ci" >> "$GITHUB_ENV" - echo "CACHE_KEY=$GITHUB_HEAD_REF" >> "$GITHUB_ENV" + { + echo "DBT_TARGET=ci"; + echo "CACHE_KEY=$GITHUB_HEAD_REF"; + } >> "$GITHUB_ENV" fi shell: bash From 0396849abf4519e93265006fcdb036cea4a5bd7c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 10:30:11 -0500 Subject: [PATCH 06/82] Empty commit to trigger CI build From f83327137592d0e49b245ea9f41921f8abf2b0ff Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 10:35:40 -0500 Subject: [PATCH 07/82] Rename local GitHub actions to match expected dir/action.yaml naming scheme --- .../action.yaml} | 0 .../action.yaml} | 0 .github/workflows/build_and_test_dbt.yaml | 4 ++-- .github/workflows/test_dbt_models.yaml | 4 ++-- 4 files changed, 4 insertions(+), 4 deletions(-) rename .github/actions/{install_dbt_requirements.yaml => install_dbt_requirements/action.yaml} (100%) rename .github/actions/{load_environment_variables.yaml => load_environment_variables/action.yaml} (100%) diff --git a/.github/actions/install_dbt_requirements.yaml b/.github/actions/install_dbt_requirements/action.yaml similarity index 100% rename from .github/actions/install_dbt_requirements.yaml rename to .github/actions/install_dbt_requirements/action.yaml diff --git a/.github/actions/load_environment_variables.yaml b/.github/actions/load_environment_variables/action.yaml similarity index 100% rename from .github/actions/load_environment_variables.yaml rename to .github/actions/load_environment_variables/action.yaml diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 1dad79013..d408a2f2e 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -14,10 +14,10 @@ jobs: uses: actions/checkout@v3 - name: Install dbt requirements - uses: ./.github/actions/install_dbt_requirements.yaml + uses: ./.github/actions/install_dbt_requirements - name: Load environment variables - uses: ./.github/actions/load_environment_variables.yaml + uses: ./.github/actions/load_environment_variables - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 3369ee497..7dab8ecaa 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -18,10 +18,10 @@ jobs: uses: actions/checkout@v3 - name: Install dbt requirements - uses: ./.github/actions/install_dbt_requirements.yaml + uses: ./.github/actions/install_dbt_requirements - name: Load environment variables - uses: ./.github/actions/load_environment_variables.yaml + uses: ./.github/actions/load_environment_variables - if: ${{ inputs.use_build_cache }} name: Cache dbt manifest From 3fb81f60e0a4ed6f1a4ea54bfb9baa4280c673bf Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 11:08:16 -0500 Subject: [PATCH 08/82] Add permissions to interact with GitHub OIDC to dbt actions --- .github/workflows/build_and_test_dbt.yaml | 5 +++++ .github/workflows/test_dbt_models.yaml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index d408a2f2e..5cafe80e1 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -9,6 +9,11 @@ on: jobs: build-and-test: runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint + # so that we can authenticate with AWS + permissions: + id-token: write + contents: read steps: - name: Checkout uses: actions/checkout@v3 diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 7dab8ecaa..0835e057a 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -13,6 +13,11 @@ on: jobs: run-tests: runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint + # so that we can authenticate with AWS + permissions: + id-token: write + contents: read steps: - name: Checkout uses: actions/checkout@v3 From 5b37bf8f8fc88390908cd9c7403b2e492f9fb99c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 11:12:42 -0500 Subject: [PATCH 09/82] Try new format for build and test cache key on CI --- .github/workflows/build_and_test_dbt.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 5cafe80e1..9f8835cc0 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -46,7 +46,7 @@ jobs: else { echo "DBT_TARGET=ci"; - echo "CACHE_KEY=$GITHUB_HEAD_REF"; + echo "CACHE_KEY=GITHUB_HEAD_REF"; } >> "$GITHUB_ENV" fi shell: bash From e8ec9d0817a5ab65d880dc8089e7f6548305307d Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 11:18:44 -0500 Subject: [PATCH 10/82] Strip extraneous dollar sign from dbt workflow cache key --- .github/workflows/build_and_test_dbt.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 9f8835cc0..39d82d3a0 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -46,7 +46,7 @@ jobs: else { echo "DBT_TARGET=ci"; - echo "CACHE_KEY=GITHUB_HEAD_REF"; + echo "CACHE_KEY=$GITHUB_HEAD_REF"; } >> "$GITHUB_ENV" fi shell: bash @@ -56,7 +56,7 @@ jobs: uses: actions/cache@v3 with: path: ${{ env.DBT_MANIFEST_DIR }} - key: ${{ env.DBT_CACHE_NAME }}-$${{ env.CACHE_KEY }} + key: ${{ env.DBT_CACHE_NAME }}-${{ env.CACHE_KEY }} restore-keys: | ${{ env.DBT_CACHE_NAME }}-data-catalog ${{ env.DBT_CACHE_NAME }}-master From f4ba9de74ad63add02549b2d266d8f38d712735a Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 11:19:06 -0500 Subject: [PATCH 11/82] Try different format for dbt directory paths in workflow env vars --- .github/variables/dbt.env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/variables/dbt.env b/.github/variables/dbt.env index b16832a94..8565f00a9 100644 --- a/.github/variables/dbt.env +++ b/.github/variables/dbt.env @@ -1,3 +1,3 @@ DBT_CACHE_NAME=dbt-cache -DBT_MANIFEST_DIR=./dbt/target -DBT_PROJECT_DIR=./dbt +DBT_MANIFEST_DIR=dbt/target +DBT_PROJECT_DIR=dbt From 8530109dca24a7fd7356450d52663c1de969b114 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 11:31:30 -0500 Subject: [PATCH 12/82] Rename dbt workflow env vars to avoid collisions with dbt internal env vars --- .github/variables/dbt.env | 6 +++--- .github/workflows/build_and_test_dbt.yaml | 26 +++++++++++------------ .github/workflows/test_dbt_models.yaml | 6 +++--- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/variables/dbt.env b/.github/variables/dbt.env index 8565f00a9..f142826fa 100644 --- a/.github/variables/dbt.env +++ b/.github/variables/dbt.env @@ -1,3 +1,3 @@ -DBT_CACHE_NAME=dbt-cache -DBT_MANIFEST_DIR=dbt/target -DBT_PROJECT_DIR=dbt +CACHE_NAME=dbt-cache +MANIFEST_DIR=dbt/target +PROJECT_DIR=dbt diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 39d82d3a0..4c91b08d1 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -34,18 +34,18 @@ jobs: run: | if [[ $GITHUB_REF_NAME == 'master' ]]; then { - echo "DBT_TARGET=prod"; + echo "TARGET=prod"; echo "CACHE_KEY=master"; } >> "$GITHUB_ENV" elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then { - echo "DBT_TARGET=ci"; + echo "TARGET=ci"; echo "CACHE_KEY=data-catalog"; echo "GITHUB_HEAD_REF=data-catalog"; } >> "$GITHUB_ENV" else { - echo "DBT_TARGET=ci"; + echo "TARGET=ci"; echo "CACHE_KEY=$GITHUB_HEAD_REF"; } >> "$GITHUB_ENV" fi @@ -55,28 +55,28 @@ jobs: id: cache uses: actions/cache@v3 with: - path: ${{ env.DBT_MANIFEST_DIR }} - key: ${{ env.DBT_CACHE_NAME }}-${{ env.CACHE_KEY }} + path: ${{ env.MANIFEST_DIR }} + key: ${{ env.CACHE_NAME }}-${{ env.CACHE_KEY }} restore-keys: | - ${{ env.DBT_CACHE_NAME }}-data-catalog - ${{ env.DBT_CACHE_NAME }}-master + ${{ env.CACHE_NAME }}-data-catalog + ${{ env.CACHE_NAME }}-master - if: ${{ steps.cache.outputs.cache-hit == 'true' }} name: Set state args - run: echo "DBT_STATE_ARGS='-s state:modified --state target/'" >> "$GITHUB_ENV" + run: echo "STATE_ARGS='-s state:modified --state target/'" >> "$GITHUB_ENV" shell: bash - name: Test dbt macros run: dbt run-operation test_all - working-directory: ${{ env.DBT_PROJECT_DIR }} + working-directory: ${{ env.PROJECT_DIR }} shell: bash - name: Build models - run: dbt run --target "$DBT_TARGET" "$DBT_STATE_ARGS" - working-directory: ${{ env.DBT_PROJECT_DIR }} + run: dbt run --target "$TARGET" "$STATE_ARGS" + working-directory: ${{ env.PROJECT_DIR }} shell: bash - name: Test models - run: dbt test --target "$DBT_TARGET" "$DBT_STATE_ARGS" - working-directory: ${{ env.DBT_PROJECT_DIR }} + run: dbt test --target "$TARGET" "$STATE_ARGS" + working-directory: ${{ env.PROJECT_DIR }} shell: bash diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 0835e057a..df2c2f9e8 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -32,8 +32,8 @@ jobs: name: Cache dbt manifest uses: actions/cache@v3 with: - path: ${{ env.DBT_MANIFEST_DIR }} - key: ${{ env.DBT_CACHE_NAME }}-master + path: ${{ env.MANIFEST_DIR }} + key: ${{ env.CACHE_NAME }}-master - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 @@ -43,5 +43,5 @@ jobs: - name: Test models run: dbt test --target prod - working-directory: ${{ env.DBT_PROJECT_DIR }} + working-directory: ${{ env.PROJECT_DIR }} shell: bash From b26cefccde880a77917304d6d33f76eb33772217 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 11:42:41 -0500 Subject: [PATCH 13/82] Make sure STATE_ARGS env var is never empty in build_and_test_dbt workflow --- .github/workflows/build_and_test_dbt.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 4c91b08d1..31758bee4 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -62,10 +62,15 @@ jobs: ${{ env.CACHE_NAME }}-master - if: ${{ steps.cache.outputs.cache-hit == 'true' }} - name: Set state args + name: Set state args to build/test modified resources run: echo "STATE_ARGS='-s state:modified --state target/'" >> "$GITHUB_ENV" shell: bash + - if: ${{ steps.cache.outputs.cache-hit != 'true' }} + name: Set state args to build/test all resources + run: echo "STATE_ARGS='--state target/'" >> "$GITHUB_ENV" + shell: bash + - name: Test dbt macros run: dbt run-operation test_all working-directory: ${{ env.PROJECT_DIR }} From 3ff6d9851186e48a274685fed5397d26e3486ebf Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 11:55:31 -0500 Subject: [PATCH 14/82] Try new format for reading dbt commands from env vars in GitHub workflow --- .github/workflows/build_and_test_dbt.yaml | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 31758bee4..b86c92f5a 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -62,13 +62,21 @@ jobs: ${{ env.CACHE_NAME }}-master - if: ${{ steps.cache.outputs.cache-hit == 'true' }} - name: Set state args to build/test modified resources - run: echo "STATE_ARGS='-s state:modified --state target/'" >> "$GITHUB_ENV" + name: Set command args to build/test modified resources + run: | + { + echo "RUN_CMD='dbt run --target $TARGET -s state:modified --state target/'"; + echo "TEST_CMD='dbt test --target $TARGET -s state:modified --state target/'"; + } >> "$GITHUB_ENV" shell: bash - if: ${{ steps.cache.outputs.cache-hit != 'true' }} - name: Set state args to build/test all resources - run: echo "STATE_ARGS='--state target/'" >> "$GITHUB_ENV" + name: Set command args to build/test all resources + run: | + { + echo "RUN_CMD='dbt run --target $TARGET'"; + echo "TEST_CMD='dbt test --target $TARGET'"; + } >> "$GITHUB_ENV" shell: bash - name: Test dbt macros @@ -77,11 +85,11 @@ jobs: shell: bash - name: Build models - run: dbt run --target "$TARGET" "$STATE_ARGS" + run: $RUN_CMD working-directory: ${{ env.PROJECT_DIR }} shell: bash - name: Test models - run: dbt test --target "$TARGET" "$STATE_ARGS" + run: $TEST_CMD working-directory: ${{ env.PROJECT_DIR }} shell: bash From 0607be717b695233061dddfcc7492a7ee18da016 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 12:02:51 -0500 Subject: [PATCH 15/82] Add step to build_and_test_dbt workflow to test dbt installation --- .github/workflows/build_and_test_dbt.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index b86c92f5a..dd77b3eb2 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -21,6 +21,10 @@ jobs: - name: Install dbt requirements uses: ./.github/actions/install_dbt_requirements + - name: Test dbt installation + run: dbt --help + working-directory: ${{ env.PROJECT_DIR }} + - name: Load environment variables uses: ./.github/actions/load_environment_variables From 98a42ef60472149dd2c882f6ffd340895fdcf67a Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 12:58:13 -0500 Subject: [PATCH 16/82] Try a different quoting scheme for RUN_CMD and TEST_CMD in build_and_test_dbt workflow --- .github/workflows/build_and_test_dbt.yaml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index dd77b3eb2..9983b6cb0 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -21,10 +21,6 @@ jobs: - name: Install dbt requirements uses: ./.github/actions/install_dbt_requirements - - name: Test dbt installation - run: dbt --help - working-directory: ${{ env.PROJECT_DIR }} - - name: Load environment variables uses: ./.github/actions/load_environment_variables @@ -69,8 +65,8 @@ jobs: name: Set command args to build/test modified resources run: | { - echo "RUN_CMD='dbt run --target $TARGET -s state:modified --state target/'"; - echo "TEST_CMD='dbt test --target $TARGET -s state:modified --state target/'"; + echo "RUN_CMD=dbt\ run\ --target\ $TARGET\ -s\ state:modified\ --state\ target/"; + echo "TEST_CMD=dbt\ test\ --target\ $TARGET\ -s\ state:modified\ --state\ target/"; } >> "$GITHUB_ENV" shell: bash @@ -78,8 +74,8 @@ jobs: name: Set command args to build/test all resources run: | { - echo "RUN_CMD='dbt run --target $TARGET'"; - echo "TEST_CMD='dbt test --target $TARGET'"; + echo "RUN_CMD=dbt\ run\ --target\ $TARGET"; + echo "TEST_CMD=dbt\ test\ --target\ $TARGET"; } >> "$GITHUB_ENV" shell: bash @@ -89,11 +85,11 @@ jobs: shell: bash - name: Build models - run: $RUN_CMD + run: ${{ env.RUN_CMD }} working-directory: ${{ env.PROJECT_DIR }} shell: bash - name: Test models - run: $TEST_CMD + run: ${{ env.TEST_CMD }} working-directory: ${{ env.PROJECT_DIR }} shell: bash From 03e4dc101fe240d0303ba53c11bf9fed1368811d Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 13:13:28 -0500 Subject: [PATCH 17/82] Define build/test commands directly instead of via env vars in dbt workflows --- .github/workflows/build_and_test_dbt.yaml | 26 ++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 9983b6cb0..a6f80f847 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -63,20 +63,12 @@ jobs: - if: ${{ steps.cache.outputs.cache-hit == 'true' }} name: Set command args to build/test modified resources - run: | - { - echo "RUN_CMD=dbt\ run\ --target\ $TARGET\ -s\ state:modified\ --state\ target/"; - echo "TEST_CMD=dbt\ test\ --target\ $TARGET\ -s\ state:modified\ --state\ target/"; - } >> "$GITHUB_ENV" + run: echo "MODIFIED_RESOURCES_ONLY=true" >> "$GITHUB_ENV" shell: bash - if: ${{ steps.cache.outputs.cache-hit != 'true' }} name: Set command args to build/test all resources - run: | - { - echo "RUN_CMD=dbt\ run\ --target\ $TARGET"; - echo "TEST_CMD=dbt\ test\ --target\ $TARGET"; - } >> "$GITHUB_ENV" + run: echo "MODIFIED_RESOURCES_ONLY=false" >> "$GITHUB_ENV" shell: bash - name: Test dbt macros @@ -85,11 +77,21 @@ jobs: shell: bash - name: Build models - run: ${{ env.RUN_CMD }} + run: | + if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then + dbt run --target "$TARGET" -s state:modified --state target/ + else + dbt run --target "$TARGET" + fi working-directory: ${{ env.PROJECT_DIR }} shell: bash - name: Test models - run: ${{ env.TEST_CMD }} + run: | + if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then + dbt test --target "$TARGET" -s state:modified --state target/ + else + dbt test --target "$TARGET" + fi working-directory: ${{ env.PROJECT_DIR }} shell: bash From b99d0dafbe0f3d2bad2e40c29d95ceffac9e126c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 13:21:52 -0500 Subject: [PATCH 18/82] Log all conditional branches in build_and_test_dbt_models workflow --- .github/workflows/build_and_test_dbt.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index a6f80f847..d66c5f8c9 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -33,17 +33,20 @@ jobs: - name: Set environment for branch run: | if [[ $GITHUB_REF_NAME == 'master' ]]; then + echo "On master branch" { echo "TARGET=prod"; echo "CACHE_KEY=master"; } >> "$GITHUB_ENV" elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then + echo "On data catalog branch" { echo "TARGET=ci"; echo "CACHE_KEY=data-catalog"; echo "GITHUB_HEAD_REF=data-catalog"; } >> "$GITHUB_ENV" else + echo "On pull request branch" { echo "TARGET=ci"; echo "CACHE_KEY=$GITHUB_HEAD_REF"; @@ -79,8 +82,10 @@ jobs: - name: Build models run: | if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then + echo "Running build on modified resources only" dbt run --target "$TARGET" -s state:modified --state target/ else + echo "Running build on all resources" dbt run --target "$TARGET" fi working-directory: ${{ env.PROJECT_DIR }} @@ -89,8 +94,10 @@ jobs: - name: Test models run: | if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then + echo "Running tests on modified resources only" dbt test --target "$TARGET" -s state:modified --state target/ else + echo "Running tests on all resources" dbt test --target "$TARGET" fi working-directory: ${{ env.PROJECT_DIR }} From 829a456edd8c2b8f33c9ac5beca842653afbd751 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 14:00:51 -0500 Subject: [PATCH 19/82] See if removing hyphens from database names appeases dbt-athena on CI --- dbt/macros/generate_schema_name.sql | 2 +- dbt/macros/tests/test_generate_schema_name.sql | 2 +- dbt/profiles.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql index a1ecf71dc..ba1c9f4ee 100644 --- a/dbt/macros/generate_schema_name.sql +++ b/dbt/macros/generate_schema_name.sql @@ -25,7 +25,7 @@ {%- if target.name == "dev" -%} {%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%} {%- elif target.name == "ci" -%} - {%- set github_head_ref = kebab_slugify( + {%- set github_head_ref = dbt_utils.slugify( env_var_func("GITHUB_HEAD_REF") ) -%} {%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%} diff --git a/dbt/macros/tests/test_generate_schema_name.sql b/dbt/macros/tests/test_generate_schema_name.sql index cd0add27a..48f6f45de 100644 --- a/dbt/macros/tests/test_generate_schema_name.sql +++ b/dbt/macros/tests/test_generate_schema_name.sql @@ -43,7 +43,7 @@ mock_env_var, exceptions.raise_compiler_error ), - "ci_testuser-feature-branch-1_test" + "ci_testuserfeature_branch_1_test" ) %} {% endmacro %} diff --git a/dbt/profiles.yml b/dbt/profiles.yml index 565eed890..483601391 100644 --- a/dbt/profiles.yml +++ b/dbt/profiles.yml @@ -7,7 +7,7 @@ athena: s3_data_dir: s3://ccao-dbt-athena-test-us-east-1/data/ region_name: us-east-1 # "schema" here corresponds to a Glue database - schema: dbt-test + schema: dbt_test # "database" here corresponds to a Glue data catalog database: awsdatacatalog threads: 5 @@ -16,7 +16,7 @@ athena: s3_staging_dir: s3://ccao-dbt-athena-ci-us-east-1/results/ s3_data_dir: s3://ccao-dbt-athena-ci-us-east-1/data/ region_name: us-east-1 - schema: dbt-test + schema: dbt_test database: awsdatacatalog # Prefix all generated data by schema, so that we can delete it when the # PR is merged From 31de4e0a2cfc8dc92a10a0935f0169cd023ac278 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 3 Aug 2023 15:17:32 -0500 Subject: [PATCH 20/82] Temporarily enable dbt debugging to try to figure out AWS permissions --- .github/workflows/build_and_test_dbt.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index d66c5f8c9..265f7dc23 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -86,7 +86,7 @@ jobs: dbt run --target "$TARGET" -s state:modified --state target/ else echo "Running build on all resources" - dbt run --target "$TARGET" + dbt --debug run --target "$TARGET" fi working-directory: ${{ env.PROJECT_DIR }} shell: bash From 7b0037a370c9d91fb5929ec68974da96a5c62d90 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 4 Aug 2023 09:22:15 -0500 Subject: [PATCH 21/82] Try reverting dbt schema naming back to kebab_slugify --- dbt/macros/generate_schema_name.sql | 2 +- dbt/macros/tests/test_generate_schema_name.sql | 2 +- dbt/profiles.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql index f16551c7f..e46df7ea1 100644 --- a/dbt/macros/generate_schema_name.sql +++ b/dbt/macros/generate_schema_name.sql @@ -29,7 +29,7 @@ {%- if target.name == "dev" -%} {%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%} {%- elif target.name == "ci" -%} - {%- set github_head_ref = dbt_utils.slugify(env_var_func("GITHUB_HEAD_REF")) -%} + {%- set github_head_ref = kebab_slugify(env_var_func("GITHUB_HEAD_REF")) -%} {%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%} {%- else -%} {%- set schema_prefix = "" -%} {%- endif -%} diff --git a/dbt/macros/tests/test_generate_schema_name.sql b/dbt/macros/tests/test_generate_schema_name.sql index 77c67457a..5116a26df 100644 --- a/dbt/macros/tests/test_generate_schema_name.sql +++ b/dbt/macros/tests/test_generate_schema_name.sql @@ -40,7 +40,7 @@ mock_env_var, exceptions.raise_compiler_error, ), - "ci_testuserfeature_branch_1_test", + "ci_testuser-feature-branch-1_test", ) %} {% endmacro %} diff --git a/dbt/profiles.yml b/dbt/profiles.yml index 483601391..565eed890 100644 --- a/dbt/profiles.yml +++ b/dbt/profiles.yml @@ -7,7 +7,7 @@ athena: s3_data_dir: s3://ccao-dbt-athena-test-us-east-1/data/ region_name: us-east-1 # "schema" here corresponds to a Glue database - schema: dbt_test + schema: dbt-test # "database" here corresponds to a Glue data catalog database: awsdatacatalog threads: 5 @@ -16,7 +16,7 @@ athena: s3_staging_dir: s3://ccao-dbt-athena-ci-us-east-1/results/ s3_data_dir: s3://ccao-dbt-athena-ci-us-east-1/data/ region_name: us-east-1 - schema: dbt_test + schema: dbt-test database: awsdatacatalog # Prefix all generated data by schema, so that we can delete it when the # PR is merged From 9bf6cac8e94fe0033eeccbbd473669b30bc7fc60 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 4 Aug 2023 10:22:06 -0500 Subject: [PATCH 22/82] Remove --debug flag from dbt run call in build_and_test_dbt workflow --- .github/workflows/build_and_test_dbt.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 265f7dc23..d66c5f8c9 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -86,7 +86,7 @@ jobs: dbt run --target "$TARGET" -s state:modified --state target/ else echo "Running build on all resources" - dbt --debug run --target "$TARGET" + dbt run --target "$TARGET" fi working-directory: ${{ env.PROJECT_DIR }} shell: bash From 76db394b412027c955e4926c50b8ebd3e253b6b3 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 4 Aug 2023 10:30:45 -0500 Subject: [PATCH 23/82] Bump error thresholds for four dbt tests --- dbt/models/default/schema.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/models/default/schema.yml b/dbt/models/default/schema.yml index f6feb7f06..b1d4b22fc 100644 --- a/dbt/models/default/schema.yml +++ b/dbt/models/default/schema.yml @@ -31,7 +31,7 @@ models: - pin - year config: - error_if: ">280655" + error_if: ">280658" # Unique by case number and year - unique_combination_of_columns: name: vw_pin_appeal_unique_by_case_number_and_year @@ -39,7 +39,7 @@ models: - year - case_no config: - error_if: ">365779" + error_if: ">365830" # `change` should be an enum - dbt_utils.expression_is_true: name: vw_pin_appeal_no_unexpected_change_values @@ -85,7 +85,7 @@ models: case when char_renovation = '1' then true else false end ) config: - error_if: ">73925" + error_if: ">73941" # TODO: Characteristics columns should adhere to pre-determined criteria - name: vw_pin_address_test description: '{{ doc("vw_pin_address_test") }}' @@ -111,7 +111,7 @@ models: - mail_address_zipcode_1 - mail_address_zipcode_2 config: - error_if: ">879261" + error_if: ">880552" # TODO: Mailing address changes after validated sale(?) # TODO: Site addresses are all in Cook County - name: vw_pin_condo_char_test From 4d552aa4c07631083436ef22b63f73b29c0805f1 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 4 Aug 2023 17:13:56 -0500 Subject: [PATCH 24/82] Add step to cleanup resources to build_and_test_dbt workflow --- .github/scripts/cleanup_dbt_resources.sh | 14 ++++++++++++++ .github/workflows/build_and_test_dbt.yaml | 10 ++++++++++ 2 files changed, 24 insertions(+) create mode 100755 .github/scripts/cleanup_dbt_resources.sh diff --git a/.github/scripts/cleanup_dbt_resources.sh b/.github/scripts/cleanup_dbt_resources.sh new file mode 100755 index 000000000..b3c996bd5 --- /dev/null +++ b/.github/scripts/cleanup_dbt_resources.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +schemas=$(dbt --quiet list --resource-type model --output json --output-keys schema) +echo "Deleting the following schemas from Athena:" +echo +echo "$schemas" + +echo "$schemas" \ + | sort \ + | uniq \ + | jq ' .schema' \ + | xargs -i bash -c 'aws glue delete-database --name {}' + +echo +echo "Done!" diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index d66c5f8c9..631393922 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -102,3 +102,13 @@ jobs: fi working-directory: ${{ env.PROJECT_DIR }} shell: bash + + - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} + name: Install cleanup requirements + run: apt update && apt install jq + + - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} + name: Clean up resources + run: ./.github/scripts/cleanup_dbt_resources.sh + working-directory: ${{ env.PROJECT_DIR }} + shell: bash From 27f556cc1bb64ade5c678607064834fc79f53b97 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 10:03:18 -0500 Subject: [PATCH 25/82] Clean up cleanup_dbt_resources.sh script for use in CI --- .github/scripts/cleanup_dbt_resources.sh | 25 +++++++++++++++++------ .github/workflows/build_and_test_dbt.yaml | 2 +- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/.github/scripts/cleanup_dbt_resources.sh b/.github/scripts/cleanup_dbt_resources.sh index b3c996bd5..b7471ef68 100755 --- a/.github/scripts/cleanup_dbt_resources.sh +++ b/.github/scripts/cleanup_dbt_resources.sh @@ -1,14 +1,27 @@ #!/usr/bin/env bash -schemas=$(dbt --quiet list --resource-type model --output json --output-keys schema) +set -euo pipefail + +if [[ "$#" -eq 0 ]]; then + echo "Missing first argument representing dbt target" + exit 1 +fi + +if [ "$1" == "prod" ]; then + echo "Target cannot be 'prod'" + exit 1 +fi + +schemas_json=$(dbt --quiet list --resource-type model --target "$1" \ + --output json --output-keys schema) || (echo "Error in dbt call" && exit 1) +schemas=$(echo "$schemas_json"| sort | uniq | jq ' .schema') || (\ + echo "Error in schema parsing" && exit 1 +) + echo "Deleting the following schemas from Athena:" echo echo "$schemas" -echo "$schemas" \ - | sort \ - | uniq \ - | jq ' .schema' \ - | xargs -i bash -c 'aws glue delete-database --name {}' +echo "$schemas" | xargs -i bash -c 'aws glue delete-database --name {} || exit 255' echo echo "Done!" diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 631393922..f77e23efc 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -109,6 +109,6 @@ jobs: - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} name: Clean up resources - run: ./.github/scripts/cleanup_dbt_resources.sh + run: ./.github/scripts/cleanup_dbt_resources.sh ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From 15a86d2b7fd4d0e29e40eaf710ce84ce2086bf8b Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 10:32:40 -0500 Subject: [PATCH 26/82] Bump allowed errors in dbt tests due to data problems --- .github/workflows/build_and_test_dbt.yaml | 4 ++-- dbt/models/default/schema.yml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index f77e23efc..7f3521a48 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -104,11 +104,11 @@ jobs: shell: bash - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} - name: Install cleanup requirements + name: Install requirements for cleaning up dbt resources run: apt update && apt install jq - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} - name: Clean up resources + name: Clean up dbt resources run: ./.github/scripts/cleanup_dbt_resources.sh ci working-directory: ${{ env.PROJECT_DIR }} shell: bash diff --git a/dbt/models/default/schema.yml b/dbt/models/default/schema.yml index b1d4b22fc..e0a869b0f 100644 --- a/dbt/models/default/schema.yml +++ b/dbt/models/default/schema.yml @@ -31,7 +31,7 @@ models: - pin - year config: - error_if: ">280658" + error_if: ">280659" # Unique by case number and year - unique_combination_of_columns: name: vw_pin_appeal_unique_by_case_number_and_year @@ -39,7 +39,7 @@ models: - year - case_no config: - error_if: ">365830" + error_if: ">365855" # `change` should be an enum - dbt_utils.expression_is_true: name: vw_pin_appeal_no_unexpected_change_values @@ -111,7 +111,7 @@ models: - mail_address_zipcode_1 - mail_address_zipcode_2 config: - error_if: ">880552" + error_if: ">880581" # TODO: Mailing address changes after validated sale(?) # TODO: Site addresses are all in Cook County - name: vw_pin_condo_char_test From 7ab4b69f074ba51e24a6c20f876ee8c3494a1a4c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 10:43:39 -0500 Subject: [PATCH 27/82] Update build_and_test_dbt workflow to run when PRs are closed --- .github/workflows/build_and_test_dbt.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 7f3521a48..c624d2d6c 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -3,6 +3,13 @@ name: Build and test dbt on: pull_request: branches: [master, data-catalog] + # Specifying event types manually allows us to run this flow when the + # PR is closed so that we can clean up staging dbt resources + types: + - opened + - synchronize + - closed + - reopened push: branches: [master, data-catalog] From 9304b2f7c765a1f5790f4d54b1255e6159eb4846 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 10:55:36 -0500 Subject: [PATCH 28/82] Try apt-get instead of apt for installing jq in build_and_test_dbt workflow --- .github/workflows/build_and_test_dbt.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index c624d2d6c..6d4846331 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -112,7 +112,7 @@ jobs: - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} name: Install requirements for cleaning up dbt resources - run: apt update && apt install jq + run: apt-get update && apt-get install jq - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} name: Clean up dbt resources From b291800efd0d7c94a7d4715c220a21d1b51dfeb8 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 10:56:03 -0500 Subject: [PATCH 29/82] Temporarily disable PR event restriction on dbt cleanup install step for testing --- .github/workflows/build_and_test_dbt.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 6d4846331..9152f8ffb 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -110,8 +110,7 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash - - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} - name: Install requirements for cleaning up dbt resources + - name: Install requirements for cleaning up dbt resources run: apt-get update && apt-get install jq - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} From b4ec438e66949309f0aacdaa6b2122f19db3e794 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 10:58:08 -0500 Subject: [PATCH 30/82] Try sudo apt-get for installing jq in build_and_test_dbt workflow --- .github/workflows/build_and_test_dbt.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 9152f8ffb..df5b8baef 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -111,7 +111,8 @@ jobs: shell: bash - name: Install requirements for cleaning up dbt resources - run: apt-get update && apt-get install jq + run: sudo apt-get update && sudo apt-get install jq + shell: bash - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} name: Clean up dbt resources From 52fc11a6f9d0aefc6732345d837f4e4f8cd5ddda Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 11:03:10 -0500 Subject: [PATCH 31/82] Remove installation step for dbt cleanup in build_and_test_dbt workflow It appears that jq is already installed in the GitHub Actions Ubuntu images, so we don't need a separate step to install it in our container. --- .github/workflows/build_and_test_dbt.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index df5b8baef..0a2ce56bd 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -110,10 +110,6 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash - - name: Install requirements for cleaning up dbt resources - run: sudo apt-get update && sudo apt-get install jq - shell: bash - - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} name: Clean up dbt resources run: ./.github/scripts/cleanup_dbt_resources.sh ci From 640387f892646ed4ac97ffad519d3fe9dad849bb Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 11:05:49 -0500 Subject: [PATCH 32/82] Enforce jq as a requirement for cleanup_dbt_resources.sh script --- .github/scripts/cleanup_dbt_resources.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/scripts/cleanup_dbt_resources.sh b/.github/scripts/cleanup_dbt_resources.sh index b7471ef68..f33aa6f4e 100755 --- a/.github/scripts/cleanup_dbt_resources.sh +++ b/.github/scripts/cleanup_dbt_resources.sh @@ -11,6 +11,11 @@ if [ "$1" == "prod" ]; then exit 1 fi +if ! command -v wfef &> /dev/null; then + echo "jq binary not found; it must be installed to run this script" + exit 1 +fi + schemas_json=$(dbt --quiet list --resource-type model --target "$1" \ --output json --output-keys schema) || (echo "Error in dbt call" && exit 1) schemas=$(echo "$schemas_json"| sort | uniq | jq ' .schema') || (\ From dfe87850cc41f7de62aca82fd8e5a7cf86dca09e Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 11:28:55 -0500 Subject: [PATCH 33/82] Fix path to cleanup_dbt_resources.sh on CI --- .github/workflows/build_and_test_dbt.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 0a2ce56bd..0cffed9f4 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -112,6 +112,6 @@ jobs: - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} name: Clean up dbt resources - run: ./.github/scripts/cleanup_dbt_resources.sh ci + run: ../.github/scripts/cleanup_dbt_resources.sh ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From 6d685a0d24202590001f2e306ab098c6b1b0bdf7 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 11:32:21 -0500 Subject: [PATCH 34/82] Revert "Remove installation step for dbt cleanup in build_and_test_dbt workflow" This reverts commit 52fc11a6f9d0aefc6732345d837f4e4f8cd5ddda. --- .github/workflows/build_and_test_dbt.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 0cffed9f4..5f508a8f1 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -110,6 +110,10 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash + - name: Install requirements for cleaning up dbt resources + run: sudo apt-get update && sudo apt-get install jq + shell: bash + - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} name: Clean up dbt resources run: ../.github/scripts/cleanup_dbt_resources.sh ci From 1e5331de1b7eded89778cd760507cc78e1020fc4 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 11:36:16 -0500 Subject: [PATCH 35/82] Temporarily disable PR event restriction on cleanup in build_and_test_dbt --- .github/workflows/build_and_test_dbt.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 5f508a8f1..89c2a6230 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -114,8 +114,7 @@ jobs: run: sudo apt-get update && sudo apt-get install jq shell: bash - - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} - name: Clean up dbt resources + - name: Clean up dbt resources run: ../.github/scripts/cleanup_dbt_resources.sh ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From 620877b58990edfc637ffbcaa2e21dd3155fb263 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 11:37:23 -0500 Subject: [PATCH 36/82] Revert "Enforce jq as a requirement for cleanup_dbt_resources.sh script" This reverts commit 640387f892646ed4ac97ffad519d3fe9dad849bb. --- .github/scripts/cleanup_dbt_resources.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/scripts/cleanup_dbt_resources.sh b/.github/scripts/cleanup_dbt_resources.sh index f33aa6f4e..b7471ef68 100755 --- a/.github/scripts/cleanup_dbt_resources.sh +++ b/.github/scripts/cleanup_dbt_resources.sh @@ -11,11 +11,6 @@ if [ "$1" == "prod" ]; then exit 1 fi -if ! command -v wfef &> /dev/null; then - echo "jq binary not found; it must be installed to run this script" - exit 1 -fi - schemas_json=$(dbt --quiet list --resource-type model --target "$1" \ --output json --output-keys schema) || (echo "Error in dbt call" && exit 1) schemas=$(echo "$schemas_json"| sort | uniq | jq ' .schema') || (\ From f2d050828b1e31c7344d85c949805b88e7c43feb Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 11:58:41 -0500 Subject: [PATCH 37/82] Revert "Temporarily disable PR event restriction on cleanup in build_and_test_dbt" This reverts commit 1e5331de1b7eded89778cd760507cc78e1020fc4. --- .github/workflows/build_and_test_dbt.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 89c2a6230..5f508a8f1 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -114,7 +114,8 @@ jobs: run: sudo apt-get update && sudo apt-get install jq shell: bash - - name: Clean up dbt resources + - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} + name: Clean up dbt resources run: ../.github/scripts/cleanup_dbt_resources.sh ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From cf5995b54ffc599f11d67f17771adc215a257d68 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 12:00:30 -0500 Subject: [PATCH 38/82] Revert "Temporarily disable PR event restriction on dbt cleanup install step for testing" This reverts commit b291800efd0d7c94a7d4715c220a21d1b51dfeb8. --- .github/workflows/build_and_test_dbt.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 5f508a8f1..c2e537986 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -110,7 +110,8 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash - - name: Install requirements for cleaning up dbt resources + - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} + name: Install requirements for cleaning up dbt resources run: sudo apt-get update && sudo apt-get install jq shell: bash From d34c83bf2b5adacbc0dc45f31d9ee8cdf27661f7 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 12:03:40 -0500 Subject: [PATCH 39/82] Temporarily run test_dbt_models workflow on PRs so we can dispatch it manually --- .github/workflows/test_dbt_models.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index df2c2f9e8..25b3b79cb 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -1,6 +1,8 @@ name: Test dbt models on: + pull_request: + branches: [master, data-catalog] workflow_dispatch: inputs: use_build_cache: From d2909ea181b2a1c5133f16ece698a6c7f6c16a4e Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 12:07:48 -0500 Subject: [PATCH 40/82] Give more verbose names to dbt workflow jobs --- .github/workflows/build_and_test_dbt.yaml | 2 +- .github/workflows/test_dbt_models.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index c2e537986..d25a044fa 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -14,7 +14,7 @@ on: branches: [master, data-catalog] jobs: - build-and-test: + build-and-test-dbt: runs-on: ubuntu-latest # These permissions are needed to interact with GitHub's OIDC Token endpoint # so that we can authenticate with AWS diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 25b3b79cb..281ddaee3 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -13,7 +13,7 @@ on: default: true jobs: - run-tests: + test-dbt-models: runs-on: ubuntu-latest # These permissions are needed to interact with GitHub's OIDC Token endpoint # so that we can authenticate with AWS From 7538f9e4d9ec3911fcf2e11de073b2231c53ac6c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 12:07:59 -0500 Subject: [PATCH 41/82] Revert "Temporarily run test_dbt_models workflow on PRs so we can dispatch it manually" This reverts commit d34c83bf2b5adacbc0dc45f31d9ee8cdf27661f7. --- .github/workflows/test_dbt_models.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 281ddaee3..f4b31b3b3 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -1,8 +1,6 @@ name: Test dbt models on: - pull_request: - branches: [master, data-catalog] workflow_dispatch: inputs: use_build_cache: From c3a7a62a752704c62bab17cc816935e701e751fc Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 12:35:18 -0500 Subject: [PATCH 42/82] Try adding push to test_dbt_models workflow definition to test dispatch --- .github/workflows/test_dbt_models.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index f4b31b3b3..353560568 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -9,6 +9,9 @@ on: type: boolean required: false default: true + push: + branches: + - 'jeancochrane/31-data-catalog-define-github-actions-workflows-for-building-the-dbt-dag-and-running-tests' jobs: test-dbt-models: From e10540f5c6feffc47005f6efc4e5af1a76fac66d Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 13:29:53 -0500 Subject: [PATCH 43/82] Revert "Try adding push to test_dbt_models workflow definition to test dispatch" This reverts commit c3a7a62a752704c62bab17cc816935e701e751fc. --- .github/workflows/test_dbt_models.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 353560568..f4b31b3b3 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -9,9 +9,6 @@ on: type: boolean required: false default: true - push: - branches: - - 'jeancochrane/31-data-catalog-define-github-actions-workflows-for-building-the-dbt-dag-and-running-tests' jobs: test-dbt-models: From b5cec8dba395cc85746655ba1d450c66249d1a5f Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 14:24:51 -0500 Subject: [PATCH 44/82] Add docstring to cleanup_dbt_resources.sh --- .github/scripts/cleanup_dbt_resources.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/scripts/cleanup_dbt_resources.sh b/.github/scripts/cleanup_dbt_resources.sh index b7471ef68..68e00c616 100755 --- a/.github/scripts/cleanup_dbt_resources.sh +++ b/.github/scripts/cleanup_dbt_resources.sh @@ -1,4 +1,12 @@ #!/usr/bin/env bash +# Clean up dbt resources created by a CI run or by local development. +# +# Takes one argument representing the target environment to clean up, +# one of `dev` or `ci`. E.g.: +# +# ./cleanup_dbt_resources.sh dev +# +# Assumes that jq is installed and available on the caller's path. set -euo pipefail if [[ "$#" -eq 0 ]]; then From 68f914c6bc0452c5748d883ea19e94b2ed660422 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 14:26:15 -0500 Subject: [PATCH 45/82] Run `dbt run` with --defer on CI to inherit built resources --- .github/workflows/build_and_test_dbt.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index d25a044fa..e33c57595 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -90,7 +90,7 @@ jobs: run: | if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then echo "Running build on modified resources only" - dbt run --target "$TARGET" -s state:modified --state target/ + dbt run --target "$TARGET" -s state:modified --defer --state target/ else echo "Running build on all resources" dbt run --target "$TARGET" From 1d4ec5a91dad43c055ecca229a1a373199ee3558 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 14:26:38 -0500 Subject: [PATCH 46/82] Don't use build cache in test_dbt_models workflow --- .github/workflows/test_dbt_models.yaml | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index f4b31b3b3..2871d382d 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -1,14 +1,6 @@ name: Test dbt models -on: - workflow_dispatch: - inputs: - use_build_cache: - description: >- - Use the master branch build cache to avoid rebuilding models. - type: boolean - required: false - default: true +on: workflow_dispatch jobs: test-dbt-models: @@ -28,13 +20,6 @@ jobs: - name: Load environment variables uses: ./.github/actions/load_environment_variables - - if: ${{ inputs.use_build_cache }} - name: Cache dbt manifest - uses: actions/cache@v3 - with: - path: ${{ env.MANIFEST_DIR }} - key: ${{ env.CACHE_NAME }}-master - - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 with: @@ -42,6 +27,6 @@ jobs: aws-region: us-east-1 - name: Test models - run: dbt test --target prod + run: dbt test --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From 8af875c2f0632cbcf6df8990a8109483b7ddec86 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 14:38:26 -0500 Subject: [PATCH 47/82] Try adding push to test_dbt_models workflow definition to test it again --- .github/workflows/test_dbt_models.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 2871d382d..db5d2b5a5 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -1,6 +1,10 @@ name: Test dbt models -on: workflow_dispatch +on: + workflow_dispatch: + push: + branches: + - 'jeancochrane/31-data-catalog-define-github-actions-workflows-for-building-the-dbt-dag-and-running-tests' jobs: test-dbt-models: From 87353b27c354f52d63bec498454bede6e050ae41 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 14:43:12 -0500 Subject: [PATCH 48/82] Temporarily add --debug flag to dbt call in test_dbt_models --- .github/workflows/test_dbt_models.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index db5d2b5a5..e73591f03 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -31,6 +31,6 @@ jobs: aws-region: us-east-1 - name: Test models - run: dbt test --target ci + run: dbt --debug test --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From 37ba0bbfbba4edb12994c7edc396c9de1e494f03 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 14:45:45 -0500 Subject: [PATCH 49/82] Change `push` to `pull_request` for testing test_dbt_models workflow --- .github/workflows/test_dbt_models.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index e73591f03..9a8f5948b 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -2,9 +2,8 @@ name: Test dbt models on: workflow_dispatch: - push: - branches: - - 'jeancochrane/31-data-catalog-define-github-actions-workflows-for-building-the-dbt-dag-and-running-tests' + pull_request: + branches: [master, data-catalog] jobs: test-dbt-models: From b36fec79898756c87fd67fd839ed6b3d5b13ac3c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 15:00:56 -0500 Subject: [PATCH 50/82] Remove --debug flag from test_dbt_models workflow --- .github/workflows/test_dbt_models.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 9a8f5948b..3e703c370 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -30,6 +30,6 @@ jobs: aws-region: us-east-1 - name: Test models - run: dbt --debug test --target ci + run: dbt test --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From 5bdf9a2fa33fda5d3421d24a4554488d41ea134b Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 15:05:04 -0500 Subject: [PATCH 51/82] Change test_dbt_models workflow to only run on dispatch --- .github/workflows/test_dbt_models.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 3e703c370..2871d382d 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -1,9 +1,6 @@ name: Test dbt models -on: - workflow_dispatch: - pull_request: - branches: [master, data-catalog] +on: workflow_dispatch jobs: test-dbt-models: From 9b4378d6fb593926baeaf7d766753148d77b9d3b Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 7 Aug 2023 16:11:37 -0500 Subject: [PATCH 52/82] Add draft GitHub Pages deployment workflow Try removing environment config from deploy workflow Pin actions/upload-pages-artifact to v2 in deploy_dbt_docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 41 ++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/deploy_dbt_docs.yaml diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml new file mode 100644 index 000000000..42cb554f6 --- /dev/null +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -0,0 +1,41 @@ +name: Deploy dbt docs + +on: + push: + branches: + - master + - data-catalog + # TODO: Remove this after testing + - '34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site' + +jobs: + deploy-dbt-docs: + runs-on: ubuntu-latest + # These permissions are required to make a GitHub Pages deployment + permissions: + pages: write # To deploy to Pages + id-token: write # To verify the deployment comes from an valid source + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements + + - name: Load environment variables + uses: ./.github/actions/load_environment_variables + + - name: Generate docs + run: dbt docs generate + working-directory: ${{ env.PROJECT_DIR }} + shell: bash + + - name: Upload docs directory artifact + uses: actions/upload-pages-artifact@v2 + with: + path: "target/" + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 From 9708b5c1407931a963376c4f6bc1ea86f23ee3b7 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 09:46:29 -0500 Subject: [PATCH 53/82] Configure AWS credentials in deploy_dbt_docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 42cb554f6..574826d08 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -26,6 +26,12 @@ jobs: - name: Load environment variables uses: ./.github/actions/load_environment_variables + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + aws-region: us-east-1 + - name: Generate docs run: dbt docs generate working-directory: ${{ env.PROJECT_DIR }} From 687ecebb3cb995b17e83e928465057c2bbeb9a29 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 09:54:12 -0500 Subject: [PATCH 54/82] Run dbt docs generate against ci target in deploy_dbt_docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 574826d08..5e2643a87 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -33,7 +33,7 @@ jobs: aws-region: us-east-1 - name: Generate docs - run: dbt docs generate + run: dbt docs generate --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash From 722529e4ef853b718547e86fbddfbb58bac80543 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 09:57:59 -0500 Subject: [PATCH 55/82] Test deploy_dbt_docs on pull_request event instead of push We need this for testing, since the required GITHUB_HEAD_REF env var is only available in the pull_request event context. --- .github/workflows/deploy_dbt_docs.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 5e2643a87..90b2ee1af 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -2,11 +2,11 @@ name: Deploy dbt docs on: push: + branches: [master, data-catalog] + # TODO: Remove this after testing + pull_request: branches: - - master - - data-catalog - # TODO: Remove this after testing - - '34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site' + - 'jeancochrane/31-data-catalog-define-github-actions-workflows-for-building-the-dbt-dag-and-running-tests' jobs: deploy-dbt-docs: From cef322ae62e0c1e65b5f40aa10a3488eaa6a3f60 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 10:11:07 -0500 Subject: [PATCH 56/82] Cache dbt and Python requirements in install_dbt_requirements action --- .github/actions/install_dbt_requirements/action.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/actions/install_dbt_requirements/action.yaml b/.github/actions/install_dbt_requirements/action.yaml index 743872b27..6086b0caa 100644 --- a/.github/actions/install_dbt_requirements/action.yaml +++ b/.github/actions/install_dbt_requirements/action.yaml @@ -16,11 +16,18 @@ runs: uses: actions/setup-python@v4 with: python-version: 3.x + cache: pip - name: Install python requirements run: python -m pip install -r ${{ inputs.requirements_file_path }} shell: bash + - name: Cache dbt requirements + uses: actions/cache@v3 + with: + path: ${{ inputs.dbt_project_dir }}/dbt_packages + key: dbt-${{ hashFiles(format('{0}/packages.yml', inputs.dbt_project_dir)) }} + - name: Install dbt requirements run: dbt deps working-directory: ${{ inputs.dbt_project_dir }} From e51c677bc22593f33a71131d7f7662c3cf348989 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 10:30:13 -0500 Subject: [PATCH 57/82] Use sed to strip comment lines in load_environment_variables composite action --- .github/actions/load_environment_variables/action.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/load_environment_variables/action.yaml b/.github/actions/load_environment_variables/action.yaml index 941d09f13..fe112f9a8 100644 --- a/.github/actions/load_environment_variables/action.yaml +++ b/.github/actions/load_environment_variables/action.yaml @@ -11,5 +11,6 @@ inputs: runs: using: composite steps: - - run: sed "" ${{ inputs.env_var_file_path }} >> "$GITHUB_ENV" + # Use sed to strip comment lines + - run: sed "/#/d" ${{ inputs.env_var_file_path }} >> "$GITHUB_ENV" shell: bash From f9b5d86bc32c8f345fb2d07d136eae1ac23e29f9 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 10:32:53 -0500 Subject: [PATCH 58/82] Kebab case dbt build and test workflow names --- .github/workflows/build_and_test_dbt.yaml | 2 +- .github/workflows/test_dbt_models.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index e33c57595..fbadfee25 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -1,4 +1,4 @@ -name: Build and test dbt +name: build-and-test-dbt on: pull_request: diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 2871d382d..7e72a460f 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -1,4 +1,4 @@ -name: Test dbt models +name: test-dbt-models on: workflow_dispatch From 704ed13d79918028c3bc0e66daec37ac759be9fc Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:03:38 -0500 Subject: [PATCH 59/82] Factor out cleanup-dbt-resources into its own workflow --- .github/workflows/build_and_test_dbt.yaml | 18 --------- .github/workflows/cleanup_dbt_resources.yaml | 39 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 18 deletions(-) create mode 100644 .github/workflows/cleanup_dbt_resources.yaml diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index fbadfee25..b33a80594 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -3,13 +3,6 @@ name: build-and-test-dbt on: pull_request: branches: [master, data-catalog] - # Specifying event types manually allows us to run this flow when the - # PR is closed so that we can clean up staging dbt resources - types: - - opened - - synchronize - - closed - - reopened push: branches: [master, data-catalog] @@ -109,14 +102,3 @@ jobs: fi working-directory: ${{ env.PROJECT_DIR }} shell: bash - - - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} - name: Install requirements for cleaning up dbt resources - run: sudo apt-get update && sudo apt-get install jq - shell: bash - - - if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} - name: Clean up dbt resources - run: ../.github/scripts/cleanup_dbt_resources.sh ci - working-directory: ${{ env.PROJECT_DIR }} - shell: bash diff --git a/.github/workflows/cleanup_dbt_resources.yaml b/.github/workflows/cleanup_dbt_resources.yaml new file mode 100644 index 000000000..08f471d6e --- /dev/null +++ b/.github/workflows/cleanup_dbt_resources.yaml @@ -0,0 +1,39 @@ +name: cleanup-dbt-resources + +on: + pull_request: + branches: [master, data-catalog] + types: [closed] + +jobs: + cleanup-dbt-resources: + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint + # so that we can authenticate with AWS + permissions: + id-token: write + contents: read + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements + + - name: Install requirements for cleaning up dbt resources + run: sudo apt-get update && sudo apt-get install jq + shell: bash + + - name: Load environment variables + uses: ./.github/actions/load_environment_variables + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + aws-region: us-east-1 + + - name: Clean up dbt resources + run: ../.github/scripts/cleanup_dbt_resources.sh ci + working-directory: ${{ env.PROJECT_DIR }} + shell: bash From 1738374bce948dfd1453de4e928d1d229347503e Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:05:15 -0500 Subject: [PATCH 60/82] Set GITHUB_HEAD_REF var in test_dbt_models workflow This is necessary to support running test_dbt_models against the data-catalog branch, which is our plan while we continue to develop on that long-lived branch. --- .github/workflows/test_dbt_models.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 7e72a460f..c1134c4a9 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -27,6 +27,11 @@ jobs: aws-region: us-east-1 - name: Test models + # Target is currently set to CI because we expect this action to be + # run against the long-lived data-catalog branch, but we should change + # this to prod when we merge that branch into master run: dbt test --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash + env: + GITHUB_HEAD_REF: data-catalog From 672a7d47628fcd1f868dbc07cd8ae8e8c560efff Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:24:52 -0500 Subject: [PATCH 61/82] Bump threshold for vw_pin_appeal dbt test failures --- dbt/models/default/schema.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/schema.yml b/dbt/models/default/schema.yml index e0a869b0f..7aa7d735f 100644 --- a/dbt/models/default/schema.yml +++ b/dbt/models/default/schema.yml @@ -31,7 +31,7 @@ models: - pin - year config: - error_if: ">280659" + error_if: ">280662" # Unique by case number and year - unique_combination_of_columns: name: vw_pin_appeal_unique_by_case_number_and_year @@ -39,7 +39,7 @@ models: - year - case_no config: - error_if: ">365855" + error_if: ">365894" # `change` should be an enum - dbt_utils.expression_is_true: name: vw_pin_appeal_no_unexpected_change_values From 3b3aebee443fe36b0726ba3e27ee2573d7cb2960 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:30:52 -0500 Subject: [PATCH 62/82] Set GITHUB_HEAD_REF when running deploy-dbt-docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 90b2ee1af..7417076d7 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -1,12 +1,12 @@ -name: Deploy dbt docs +name: deploy-dbt-docs on: push: - branches: [master, data-catalog] - # TODO: Remove this after testing - pull_request: branches: - - 'jeancochrane/31-data-catalog-define-github-actions-workflows-for-building-the-dbt-dag-and-running-tests' + - master + - data-catalog + # TODO: Remove this after testing + - 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site jobs: deploy-dbt-docs: @@ -36,6 +36,8 @@ jobs: run: dbt docs generate --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash + env: + GITHUB_HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 From c232ed19c1ae942d88963fb0882f7b0346072477 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:34:25 -0500 Subject: [PATCH 63/82] Temporarily add --debug flag to dbt docs generate call in deploy-dbt-docs --- .github/workflows/deploy_dbt_docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 7417076d7..9cde8897b 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -33,7 +33,7 @@ jobs: aws-region: us-east-1 - name: Generate docs - run: dbt docs generate --target ci + run: dbt --debug docs generate --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash env: From 86f50d743f129785456dd2e38e3aa00e08a7eb52 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:40:15 -0500 Subject: [PATCH 64/82] Try passing GITHUB_HEAD_REF explicitly to dbt docs generate in deploy-dbt-docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 9cde8897b..50fbd89ac 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -33,7 +33,7 @@ jobs: aws-region: us-east-1 - name: Generate docs - run: dbt --debug docs generate --target ci + run: GITHUB_HEAD_REF=$GITHUB_HEAD_REF dbt docs generate --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash env: From ae1be0eb8b2318a5cd2ed63f679fcb5ab10eafbd Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:44:18 -0500 Subject: [PATCH 65/82] Try appending GITHUB_HEAD_REF to GITHUB_ENV in deploy-dbt-docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 50fbd89ac..249490364 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -33,11 +33,11 @@ jobs: aws-region: us-east-1 - name: Generate docs - run: GITHUB_HEAD_REF=$GITHUB_HEAD_REF dbt docs generate --target ci + run: | + echo 'GITHUB_HEAD_REF=34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site' >> "$GITHUB_ENV" + dbt docs generate --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash - env: - GITHUB_HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 From cae336481fe5450d9d9618b16eaedffb674b1a60 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:53:17 -0500 Subject: [PATCH 66/82] Temporarily log GITHUB_HEAD_REF in generate_schema_name macro for debugging --- dbt/macros/generate_schema_name.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql index e46df7ea1..252b84f08 100644 --- a/dbt/macros/generate_schema_name.sql +++ b/dbt/macros/generate_schema_name.sql @@ -29,6 +29,7 @@ {%- if target.name == "dev" -%} {%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%} {%- elif target.name == "ci" -%} + {{ log("GITHUB_HEAD_REF: " ~ env_var_func("GITHUB_HEAD_REF"), info=True) }} {%- set github_head_ref = kebab_slugify(env_var_func("GITHUB_HEAD_REF")) -%} {%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%} {%- else -%} {%- set schema_prefix = "" -%} From 780f1d5e652cc5f637c90294f693467c727ddbc4 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:56:25 -0500 Subject: [PATCH 67/82] Try a different method of passing GITHUB_HEAD_REF to deploy-dbt-docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 249490364..7417076d7 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -33,11 +33,11 @@ jobs: aws-region: us-east-1 - name: Generate docs - run: | - echo 'GITHUB_HEAD_REF=34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site' >> "$GITHUB_ENV" - dbt docs generate --target ci + run: dbt docs generate --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash + env: + GITHUB_HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 From c65083a308a146502e3b5727678ab483a3f5cd4a Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 11:59:11 -0500 Subject: [PATCH 68/82] Try using HEAD_REF instead of GITHUB_HEAD_REF in generate_schema_name macro --- .github/workflows/deploy_dbt_docs.yaml | 2 +- dbt/macros/generate_schema_name.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 7417076d7..b05cc6811 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -37,7 +37,7 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash env: - GITHUB_HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site + HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql index 252b84f08..1a3076973 100644 --- a/dbt/macros/generate_schema_name.sql +++ b/dbt/macros/generate_schema_name.sql @@ -29,8 +29,8 @@ {%- if target.name == "dev" -%} {%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%} {%- elif target.name == "ci" -%} - {{ log("GITHUB_HEAD_REF: " ~ env_var_func("GITHUB_HEAD_REF"), info=True) }} - {%- set github_head_ref = kebab_slugify(env_var_func("GITHUB_HEAD_REF")) -%} + {{ log("HEAD_REF: " ~ env_var_func("HEAD_REF"), info=True) }} + {%- set github_head_ref = kebab_slugify(env_var_func("HEAD_REF")) -%} {%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%} {%- else -%} {%- set schema_prefix = "" -%} {%- endif -%} From 9a77e31ee1ff1109a2e6b969d6973c9c1f16e2b4 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 12:05:21 -0500 Subject: [PATCH 69/82] Undo GITHUB_HEAD_REF -> HEAD_REF renaming in generate_schema_name macro --- .github/workflows/deploy_dbt_docs.yaml | 2 +- dbt/macros/generate_schema_name.sql | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index b05cc6811..7417076d7 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -37,7 +37,7 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash env: - HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site + GITHUB_HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql index 1a3076973..e46df7ea1 100644 --- a/dbt/macros/generate_schema_name.sql +++ b/dbt/macros/generate_schema_name.sql @@ -29,8 +29,7 @@ {%- if target.name == "dev" -%} {%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%} {%- elif target.name == "ci" -%} - {{ log("HEAD_REF: " ~ env_var_func("HEAD_REF"), info=True) }} - {%- set github_head_ref = kebab_slugify(env_var_func("HEAD_REF")) -%} + {%- set github_head_ref = kebab_slugify(env_var_func("GITHUB_HEAD_REF")) -%} {%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%} {%- else -%} {%- set schema_prefix = "" -%} {%- endif -%} From 60c2bb10b4731869ef51e25c15ef71ffcf5087a4 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 12:19:53 -0500 Subject: [PATCH 70/82] Factor out composite GitHub action for configure_dbt_environment to set HEAD_REF env var It turns out that we can't override the GITHUB_HEAD_REF variable in a GitHub Actions workflow, which means we can't set it in order to treat the data-catalog branch environment as a CI target. To get around this, this commit changes up the generate_schema_name macro to read from a new HEAD_REF env var instead of GITHUB_HEAD_REF, and then factors out a configure_dbt_environment composite action that our dbt workflows can use to set this environment variable appropriately. --- .../configure_dbt_environment/action.yaml | 29 +++++++++++++++++++ .github/workflows/build_and_test_dbt.yaml | 25 ++-------------- .github/workflows/cleanup_dbt_resources.yaml | 3 ++ .github/workflows/test_dbt_models.yaml | 3 ++ dbt/macros/generate_schema_name.sql | 2 +- .../tests/test_generate_schema_name.sql | 2 +- 6 files changed, 39 insertions(+), 25 deletions(-) create mode 100644 .github/actions/configure_dbt_environment/action.yaml diff --git a/.github/actions/configure_dbt_environment/action.yaml b/.github/actions/configure_dbt_environment/action.yaml new file mode 100644 index 000000000..b9415701b --- /dev/null +++ b/.github/actions/configure_dbt_environment/action.yaml @@ -0,0 +1,29 @@ +name: Configure dbt environment +description: Set environment variables based on the active dbt project (CI or prod) +runs: + using: composite + steps: + - name: Configure dbt environment + run: | + if [[ $GITHUB_REF_NAME == 'master' ]]; then + echo "On master branch, setting dbt env to prod" + { + echo "TARGET=prod"; + echo "CACHE_KEY=master"; + } >> "$GITHUB_ENV" + elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then + echo "On data catalog branch, setting dbt env to CI" + { + echo "TARGET=ci"; + echo "CACHE_KEY=data-catalog"; + echo "HEAD_REF=data-catalog"; + } >> "$GITHUB_ENV" + else + echo "On pull request branch, setting dbt env to CI" + { + echo "TARGET=ci"; + echo "CACHE_KEY=$GITHUB_HEAD_REF"; + echo "HEAD_REF=$GITHUB_HEAD_REF" + } >> "$GITHUB_ENV" + fi + shell: bash diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index b33a80594..68f529962 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -30,29 +30,8 @@ jobs: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} aws-region: us-east-1 - - name: Set environment for branch - run: | - if [[ $GITHUB_REF_NAME == 'master' ]]; then - echo "On master branch" - { - echo "TARGET=prod"; - echo "CACHE_KEY=master"; - } >> "$GITHUB_ENV" - elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then - echo "On data catalog branch" - { - echo "TARGET=ci"; - echo "CACHE_KEY=data-catalog"; - echo "GITHUB_HEAD_REF=data-catalog"; - } >> "$GITHUB_ENV" - else - echo "On pull request branch" - { - echo "TARGET=ci"; - echo "CACHE_KEY=$GITHUB_HEAD_REF"; - } >> "$GITHUB_ENV" - fi - shell: bash + - name: Configure dbt environment + uses: ./.github/actions/configure_dbt_environment - name: Cache dbt manifest id: cache diff --git a/.github/workflows/cleanup_dbt_resources.yaml b/.github/workflows/cleanup_dbt_resources.yaml index 08f471d6e..2c785d11f 100644 --- a/.github/workflows/cleanup_dbt_resources.yaml +++ b/.github/workflows/cleanup_dbt_resources.yaml @@ -33,6 +33,9 @@ jobs: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} aws-region: us-east-1 + - name: Configure dbt environment + uses: ./.github/actions/configure_dbt_environment + - name: Clean up dbt resources run: ../.github/scripts/cleanup_dbt_resources.sh ci working-directory: ${{ env.PROJECT_DIR }} diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index c1134c4a9..1e2a93a19 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -26,6 +26,9 @@ jobs: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} aws-region: us-east-1 + - name: Configure dbt environment + uses: ./.github/actions/configure_dbt_environment + - name: Test models # Target is currently set to CI because we expect this action to be # run against the long-lived data-catalog branch, but we should change diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql index e46df7ea1..53c261afd 100644 --- a/dbt/macros/generate_schema_name.sql +++ b/dbt/macros/generate_schema_name.sql @@ -29,7 +29,7 @@ {%- if target.name == "dev" -%} {%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%} {%- elif target.name == "ci" -%} - {%- set github_head_ref = kebab_slugify(env_var_func("GITHUB_HEAD_REF")) -%} + {%- set github_head_ref = kebab_slugify(env_var_func("HEAD_REF")) -%} {%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%} {%- else -%} {%- set schema_prefix = "" -%} {%- endif -%} diff --git a/dbt/macros/tests/test_generate_schema_name.sql b/dbt/macros/tests/test_generate_schema_name.sql index 5116a26df..afc2de207 100644 --- a/dbt/macros/tests/test_generate_schema_name.sql +++ b/dbt/macros/tests/test_generate_schema_name.sql @@ -7,7 +7,7 @@ {% macro mock_env_var(var_name) %} {% if var_name == "USER" %} {{ return("testuser") }} - {% elif var_name == "GITHUB_HEAD_REF" %} {{ return("testuser/feature-branch-1") }} + {% elif var_name == "HEAD_REF" %} {{ return("testuser/feature-branch-1") }} {% else %} {{ return("") }} {% endif %} {% endmacro %} From 197e910fe07bc98770d8de4484241209c7ff54ad Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 13:40:14 -0500 Subject: [PATCH 71/82] Rename GITHUB_HEAD_REF -> HEAD_REF in deploy_dbt_docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 7417076d7..b05cc6811 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -37,7 +37,7 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash env: - GITHUB_HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site + HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 From a3fcc87145e9148b7e492f557790d7d2d34172d3 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 13:46:37 -0500 Subject: [PATCH 72/82] Upload pages artifact from fully qualified path in deploy_dbt_docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index b05cc6811..7e1c40a37 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -42,7 +42,7 @@ jobs: - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 with: - path: "target/" + path: ${{ format('{0}/target', env.PROJECT_DIR) }} - name: Deploy to GitHub Pages id: deployment From eeb976f2371453523abc39171cb2c7d9ff1fea14 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 13:55:25 -0500 Subject: [PATCH 73/82] Explicitly specify the files to upload in deploy_dbt_docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 7e1c40a37..db024cd8c 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -39,10 +39,19 @@ jobs: env: HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site + - name: Package doc files for upload + run: | + mkdir _site + for file in index.html catalog.json manifest.json; do + cp "target/$file" "_site/$file" + done + working-directory: ${{ env.PROJECT_DIR }} + shell: bash + - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 with: - path: ${{ format('{0}/target', env.PROJECT_DIR) }} + path: ${{ format('{0}/_site', env.PROJECT_DIR) }} - name: Deploy to GitHub Pages id: deployment From 0aa549bae68fd9414f6b97fa64a6ba58e54760b5 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 8 Aug 2023 14:10:02 -0500 Subject: [PATCH 74/82] Only run deploy-dbt-docs workflow after successful build-and-test-dbt runs on main branch --- .github/workflows/deploy_dbt_docs.yaml | 43 +++++++++++++++++--------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index db024cd8c..e2ef79427 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -1,12 +1,10 @@ name: deploy-dbt-docs on: - push: - branches: - - master - - data-catalog - # TODO: Remove this after testing - - 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site + workflow_run: + workflows: [build-and-test-dbt] + branches: [master, data-catalog] + types: [completed] jobs: deploy-dbt-docs: @@ -17,29 +15,42 @@ jobs: id-token: write # To verify the deployment comes from an valid source steps: - - name: Checkout + # There is currently no way to early-exit a run with a success status + # in GitHub actions, so we have to mark each of these steps with the + # same `if` condition to restrict deploys to successful builds :( + # https://github.com/actions/runner/issues/662 + - if: github.event.workflow_run.conclusion == 'success' + name: Checkout uses: actions/checkout@v3 - - name: Install dbt requirements + - if: github.event.workflow_run.conclusion == 'success' + name: Install dbt requirements uses: ./.github/actions/install_dbt_requirements - - name: Load environment variables + - if: github.event.workflow_run.conclusion == 'success' + name: Load environment variables uses: ./.github/actions/load_environment_variables - - name: Configure AWS credentials + - if: github.event.workflow_run.conclusion == 'success' + name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 with: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} aws-region: us-east-1 - - name: Generate docs + - if: github.event.workflow_run.conclusion == 'success' + name: Generate docs + # Target is currently set to CI because we expect this action to be + # run against the long-lived data-catalog branch, but we should change + # this to prod when we merge that branch into master run: dbt docs generate --target ci working-directory: ${{ env.PROJECT_DIR }} shell: bash env: - HEAD_REF: 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site + HEAD_REF: data-catalog - - name: Package doc files for upload + - if: github.event.workflow_run.conclusion == 'success' + name: Package doc files for upload run: | mkdir _site for file in index.html catalog.json manifest.json; do @@ -48,11 +59,13 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash - - name: Upload docs directory artifact + - if: github.event.workflow_run.conclusion == 'success' + name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 with: path: ${{ format('{0}/_site', env.PROJECT_DIR) }} - - name: Deploy to GitHub Pages + - if: github.event.workflow_run.conclusion == 'success' + name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v2 From ee4a7602b27bc7142e7f6bcc3e778187946dc3c6 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 09:26:52 -0500 Subject: [PATCH 75/82] Use top-level `if` condition in deploy-dbt-docs workflow job --- .github/workflows/deploy_dbt_docs.yaml | 31 +++++++++----------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index e2ef79427..83ee322f1 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -5,6 +5,7 @@ on: workflows: [build-and-test-dbt] branches: [master, data-catalog] types: [completed] + workflow_dispatch: jobs: deploy-dbt-docs: @@ -13,33 +14,24 @@ jobs: permissions: pages: write # To deploy to Pages id-token: write # To verify the deployment comes from an valid source - + if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' steps: - # There is currently no way to early-exit a run with a success status - # in GitHub actions, so we have to mark each of these steps with the - # same `if` condition to restrict deploys to successful builds :( - # https://github.com/actions/runner/issues/662 - - if: github.event.workflow_run.conclusion == 'success' - name: Checkout + - name: Checkout uses: actions/checkout@v3 - - if: github.event.workflow_run.conclusion == 'success' - name: Install dbt requirements + - name: Install dbt requirements uses: ./.github/actions/install_dbt_requirements - - if: github.event.workflow_run.conclusion == 'success' - name: Load environment variables + - name: Load environment variables uses: ./.github/actions/load_environment_variables - - if: github.event.workflow_run.conclusion == 'success' - name: Configure AWS credentials + - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 with: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} aws-region: us-east-1 - - if: github.event.workflow_run.conclusion == 'success' - name: Generate docs + - name: Generate docs # Target is currently set to CI because we expect this action to be # run against the long-lived data-catalog branch, but we should change # this to prod when we merge that branch into master @@ -49,8 +41,7 @@ jobs: env: HEAD_REF: data-catalog - - if: github.event.workflow_run.conclusion == 'success' - name: Package doc files for upload + - name: Package doc files for upload run: | mkdir _site for file in index.html catalog.json manifest.json; do @@ -59,13 +50,11 @@ jobs: working-directory: ${{ env.PROJECT_DIR }} shell: bash - - if: github.event.workflow_run.conclusion == 'success' - name: Upload docs directory artifact + - name: Upload docs directory artifact uses: actions/upload-pages-artifact@v2 with: path: ${{ format('{0}/_site', env.PROJECT_DIR) }} - - if: github.event.workflow_run.conclusion == 'success' - name: Deploy to GitHub Pages + - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v2 From ec2f703ed00961faadbcb17b5b9db98035d475ad Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 09:30:32 -0500 Subject: [PATCH 76/82] Test running deploy-dbt-docs workflow on PR branch --- .github/actions/configure_dbt_environment/action.yaml | 7 +++++++ .github/workflows/build_and_test_dbt.yaml | 2 +- .github/workflows/deploy_dbt_docs.yaml | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/actions/configure_dbt_environment/action.yaml b/.github/actions/configure_dbt_environment/action.yaml index c52df556c..6d928b669 100644 --- a/.github/actions/configure_dbt_environment/action.yaml +++ b/.github/actions/configure_dbt_environment/action.yaml @@ -27,6 +27,13 @@ runs: echo "CACHE_KEY=data-catalog"; echo "HEAD_REF=data-catalog"; } >> "$GITHUB_ENV" + elif [[ $GITHUB_REF_NAME == '34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site' ]]; then + echo "On dbt docs branch, setting dbt env to CI" + { + echo "TARGET=ci"; + echo "CACHE_KEY=$GITHUB_REF_NAME"; + echo "HEAD_REF=$GITHUB_REF_NAME" + } >> "$GITHUB_ENV" else echo "CI context did not match any of the expected environments" exit 1 diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 68f529962..32cb37a7a 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -4,7 +4,7 @@ on: pull_request: branches: [master, data-catalog] push: - branches: [master, data-catalog] + branches: [master, data-catalog, 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site] jobs: build-and-test-dbt: diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 83ee322f1..df88a7741 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -3,7 +3,7 @@ name: deploy-dbt-docs on: workflow_run: workflows: [build-and-test-dbt] - branches: [master, data-catalog] + branches: [master, data-catalog, 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site] types: [completed] workflow_dispatch: From 7559fab04d1b4b6d74710ec320e46971894b825e Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 09:54:20 -0500 Subject: [PATCH 77/82] Bump error thresholds in dbt tests --- dbt/models/default/schema.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/schema.yml b/dbt/models/default/schema.yml index 7aa7d735f..819a50110 100644 --- a/dbt/models/default/schema.yml +++ b/dbt/models/default/schema.yml @@ -39,7 +39,7 @@ models: - year - case_no config: - error_if: ">365894" + error_if: ">365905" # `change` should be an enum - dbt_utils.expression_is_true: name: vw_pin_appeal_no_unexpected_change_values @@ -111,7 +111,7 @@ models: - mail_address_zipcode_1 - mail_address_zipcode_2 config: - error_if: ">880581" + error_if: ">880607" # TODO: Mailing address changes after validated sale(?) # TODO: Site addresses are all in Cook County - name: vw_pin_condo_char_test @@ -180,6 +180,6 @@ models: group_column: year count_column: class config: - error_if: ">23" + error_if: ">24" # TODO: Data completeness correlates with availability of spatial data # by year From 291c1a53101f895fc346421932fa0b1ae920ab58 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 10:13:58 -0500 Subject: [PATCH 78/82] Try removing `if` conditional from deploy-dbt-docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index df88a7741..4d9ddf433 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -14,7 +14,6 @@ jobs: permissions: pages: write # To deploy to Pages id-token: write # To verify the deployment comes from an valid source - if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' steps: - name: Checkout uses: actions/checkout@v3 From ab72942957387225033d76170a180cc302ff9fa6 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 10:17:08 -0500 Subject: [PATCH 79/82] Try removing workflow_dispatch from deploy-dbt-docs workflow --- .github/workflows/deploy_dbt_docs.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 4d9ddf433..740b3d28d 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -5,7 +5,6 @@ on: workflows: [build-and-test-dbt] branches: [master, data-catalog, 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site] types: [completed] - workflow_dispatch: jobs: deploy-dbt-docs: From 4d4c10bfc09326406be4aa518b3cbebc1b367469 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 10:22:02 -0500 Subject: [PATCH 80/82] Revert "Try removing workflow_dispatch from deploy-dbt-docs workflow" This reverts commit ab72942957387225033d76170a180cc302ff9fa6. --- .github/workflows/deploy_dbt_docs.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 740b3d28d..4d9ddf433 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -5,6 +5,7 @@ on: workflows: [build-and-test-dbt] branches: [master, data-catalog, 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site] types: [completed] + workflow_dispatch: jobs: deploy-dbt-docs: From 4784ed3220350cc5321864b6b874ee132019049e Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 10:22:07 -0500 Subject: [PATCH 81/82] Revert "Try removing `if` conditional from deploy-dbt-docs workflow" This reverts commit 291c1a53101f895fc346421932fa0b1ae920ab58. --- .github/workflows/deploy_dbt_docs.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 4d9ddf433..df88a7741 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -14,6 +14,7 @@ jobs: permissions: pages: write # To deploy to Pages id-token: write # To verify the deployment comes from an valid source + if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' steps: - name: Checkout uses: actions/checkout@v3 From a72801ae4de7c33b0870d64967926b2d734c0b33 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 9 Aug 2023 10:22:28 -0500 Subject: [PATCH 82/82] Revert "Test running deploy-dbt-docs workflow on PR branch" This reverts commit ec2f703ed00961faadbcb17b5b9db98035d475ad. --- .github/actions/configure_dbt_environment/action.yaml | 7 ------- .github/workflows/build_and_test_dbt.yaml | 2 +- .github/workflows/deploy_dbt_docs.yaml | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/actions/configure_dbt_environment/action.yaml b/.github/actions/configure_dbt_environment/action.yaml index 6d928b669..c52df556c 100644 --- a/.github/actions/configure_dbt_environment/action.yaml +++ b/.github/actions/configure_dbt_environment/action.yaml @@ -27,13 +27,6 @@ runs: echo "CACHE_KEY=data-catalog"; echo "HEAD_REF=data-catalog"; } >> "$GITHUB_ENV" - elif [[ $GITHUB_REF_NAME == '34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site' ]]; then - echo "On dbt docs branch, setting dbt env to CI" - { - echo "TARGET=ci"; - echo "CACHE_KEY=$GITHUB_REF_NAME"; - echo "HEAD_REF=$GITHUB_REF_NAME" - } >> "$GITHUB_ENV" else echo "CI context did not match any of the expected environments" exit 1 diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 32cb37a7a..68f529962 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -4,7 +4,7 @@ on: pull_request: branches: [master, data-catalog] push: - branches: [master, data-catalog, 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site] + branches: [master, data-catalog] jobs: build-and-test-dbt: diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index df88a7741..83ee322f1 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -3,7 +3,7 @@ name: deploy-dbt-docs on: workflow_run: workflows: [build-and-test-dbt] - branches: [master, data-catalog, 34-data-catalog-define-github-actions-workflow-for-rebuilding-and-deploying-docs-site] + branches: [master, data-catalog] types: [completed] workflow_dispatch: