diff --git a/.github/actions/configure_dbt_environment/action.yaml b/.github/actions/configure_dbt_environment/action.yaml index 2cbd39122..5846430a2 100644 --- a/.github/actions/configure_dbt_environment/action.yaml +++ b/.github/actions/configure_dbt_environment/action.yaml @@ -11,14 +11,16 @@ runs: echo "On pull request branch, setting dbt env to CI" { echo "TARGET=ci"; - echo "CACHE_KEY=$GITHUB_HEAD_REF"; + echo "CACHE_KEY=pr-caches/$GITHUB_HEAD_REF"; + echo "CACHE_RESTORE_KEY=master-cache" echo "HEAD_REF=$GITHUB_HEAD_REF" } >> "$GITHUB_ENV" elif [[ $GITHUB_REF_NAME == 'master' ]]; then echo "On master branch, setting dbt env to prod" { echo "TARGET=prod"; - echo "CACHE_KEY=master"; + echo "CACHE_KEY=master-cache"; + echo "CACHE_RESTORE_KEY=master-cache" } >> "$GITHUB_ENV" else echo "CI context did not match any of the expected environments" diff --git a/.github/actions/restore_dbt_cache/action.yaml b/.github/actions/restore_dbt_cache/action.yaml new file mode 100644 index 000000000..e5a5bdb3d --- /dev/null +++ b/.github/actions/restore_dbt_cache/action.yaml @@ -0,0 +1,74 @@ +name: Restore dbt cache +description: Attempts to restore dbt cache from S3 storage. +inputs: + path: + description: The local path to restore the cache to in case of a hit. + required: true + key: + description: The cache key to query for. + required: true + restore-key: + description: An additional key to use as a fallback for the cache. + required: true + bucket: + description: The S3 bucket that should store the cache. + required: false + default: ccao-dbt-cache-us-east-1 +outputs: + cache-hit: + description: >- + Boolean indicating whether a match was found for the cache key. + value: ${{ steps.query-cache-keys.outputs.cache-hit }} + exact-match: + description: >- + Boolean indicating whether a cache hit was an exact match. Always + false if cache-hit is false. + value: ${{ steps.query-cache-keys.outputs.exact-match }} + cache-matched-key: + description: The cache key that matched, if any. Empty if cache-hit is false. + value: ${{ steps.query-cache-keys.outputs.cache-matched-key }} +runs: + using: composite + steps: + - name: Check for a cache key match + id: query-cache-keys + run: | + if aws s3api head-object --bucket "$BUCKET" --key "$KEY/manifest.json"; then + echo "Cache hit: Found exact match" + { + echo "cache-hit=true"; + echo "exact-match=true"; + echo "cache-matched-key=$KEY" + } >> $GITHUB_OUTPUT + else + echo "Did not find exact match for cache key, checking fallback" + if aws s3api head-object --bucket "$BUCKET" --key "$RESTORE_KEY/manifest.json"; then + echo "Cache hit: Found fallback match" + { + echo "cache-hit=true"; + echo "exact-match=false"; + echo "cache-matched-key=$RESTORE_KEY" + } >> $GITHUB_OUTPUT + else + echo "Cache miss: Did not find fallback match for cache key" + { + echo "cache-hit=false"; + echo "exact-match=false"; + echo "cache-matched-key=''"; + } >> $GITHUB_OUTPUT + fi + fi + shell: bash + env: + KEY: ${{ inputs.key }} + RESTORE_KEY: ${{ inputs.restore-key }} + BUCKET: ${{ inputs.bucket }} + + - if: steps.query-cache-keys.outputs.cache-hit == 'true' + name: Copy cache to path + run: aws s3 cp "s3://$BUCKET/$KEY/manifest.json" "$CACHE_PATH/manifest.json" + shell: bash + env: + KEY: ${{ steps.query-cache-keys.outputs.cache-matched-key }} + CACHE_PATH: ${{ inputs.path }} + BUCKET: ${{ inputs.bucket }} diff --git a/.github/actions/save_dbt_cache/action.yaml b/.github/actions/save_dbt_cache/action.yaml new file mode 100644 index 000000000..03a1748e5 --- /dev/null +++ b/.github/actions/save_dbt_cache/action.yaml @@ -0,0 +1,23 @@ +name: Save dbt cache +description: Updates dbt cache using S3 storage. +inputs: + path: + description: The local path to the state dir to upload as the new cache. + required: true + key: + description: The key to use for the cache. + required: true + bucket: + description: The S3 bucket that should store the cache. + required: false + default: ccao-dbt-cache-us-east-1 +runs: + using: composite + steps: + - name: Save dbt cache + run: aws s3 cp "$CACHE_PATH/manifest.json" "s3://$BUCKET/$KEY/manifest.json" + shell: bash + env: + KEY: ${{ inputs.key }} + CACHE_PATH: ${{ inputs.path }} + BUCKET: ${{ inputs.bucket }} diff --git a/.github/variables/dbt.env b/.github/variables/dbt.env index 76c7a3ac2..ae8e70393 100644 --- a/.github/variables/dbt.env +++ b/.github/variables/dbt.env @@ -1,4 +1,3 @@ -CACHE_NAME=dbt-cache PROJECT_DIR=dbt STATE_DIR=state TARGET_DIR=target diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index 373c789c6..44886a4be 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -23,35 +23,20 @@ jobs: with: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} - # We have to use the separate `restore`/`save` actions instead of the - # unified `cache` action because only `restore` provides access to the - # `cache-matched-key` and `cache-primary-key` outputs as of v3 - name: Restore dbt state cache id: cache - uses: actions/cache/restore@v3 + uses: ./.github/actions/restore_dbt_cache with: path: ${{ env.PROJECT_DIR }}/${{ env.STATE_DIR }} - key: ${{ env.CACHE_NAME }}-${{ env.CACHE_KEY }} - restore-keys: | - ${{ env.CACHE_NAME }}-master + key: ${{ env.CACHE_KEY }} + restore-key: ${{ env.CACHE_RESTORE_KEY }} - # If we restore the cache from the `restore-keys` key, the `cache-hit` - # output will be 'false' but the `cache-matched-key` output will be - # the name of the `restore-keys` key; we want to count this case as a hit - - if: | - steps.cache.outputs.cache-hit == 'true' || - steps.cache.outputs.cache-matched-key == format( - '{0}-master', env.CACHE_NAME - ) + - if: steps.cache.outputs.cache-hit == 'true' name: Set command args to build/test modified resources run: echo "MODIFIED_RESOURCES_ONLY=true" >> "$GITHUB_ENV" shell: bash - - if: | - steps.cache.outputs.cache-hit != 'true' && - steps.cache.outputs.cache-matched-key != format( - '{0}-master', env.CACHE_NAME - ) + - if: steps.cache.outputs.cache-hit != 'true' name: Set command args to build/test all resources run: echo "MODIFIED_RESOURCES_ONLY=false" >> "$GITHUB_ENV" shell: bash @@ -86,7 +71,7 @@ jobs: shell: bash - name: Update dbt state cache - uses: actions/cache/save@v3 + uses: ./.github/actions/save_dbt_cache with: path: ${{ env.PROJECT_DIR }}/${{ env.TARGET_DIR }} - key: ${{ env.CACHE_NAME }}-${{ env.CACHE_KEY }} + key: ${{ env.CACHE_KEY }}