diff --git a/README.md b/README.md index 1b9e9fcb..d4ca3b08 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ This will prompt for parameters for initialization. Some of these parameters are We expect Data Scientists to specify ``Project_Only`` to get started in a development capacity, and when ready to move the project to Staging/Production, CI/CD can be set up. We expect that step to be done by Machine Learning Engineers (MLEs) who can specify ``CICD_Only`` during initialization and use the provided workflow to setup CI/CD for one or more projects. * ``input_root_dir``: name of the root directory. When initializing with ``CICD_and_Project``, this field will automatically be set to ``input_project_name``. - * ``input_cloud``: Cloud provider you use with Databricks (AWS or Azure), note GCP is not supported at this time. + * ``input_cloud``: Cloud provider you use with Databricks (AWS, Azure, or GCP). Others must be correctly specified for CI/CD to work: * ``input_cicd_platform`` : CI/CD platform of choice (GitHub Actions or GitHub Actions for GitHub Enterprise Servers or Azure DevOps) @@ -198,7 +198,7 @@ a generated new ML project. To do this, you can create an example project from your local checkout of the repo, and inspect its contents/run tests within the project. -We provide example project configs for Azure (using both GitHub and Azure DevOps) and AWS (using GitHub) under `tests/example-project-configs`. +We provide example project configs for Azure (using both GitHub and Azure DevOps), AWS (using GitHub), and GCP (using GitHub) under `tests/example-project-configs`. To create an example Azure project, using Azure DevOps as the CI/CD platform, run the following from the desired parent directory of the example project: @@ -214,3 +214,10 @@ To create an example AWS project, using GitHub Actions for CI/CD, run: MLOPS_STACKS_PATH=~/mlops-stacks databricks bundle init "$MLOPS_STACKS_PATH" --config-file "$MLOPS_STACKS_PATH/tests/example-project-configs/aws/aws-github.json" ``` + +To create an example GCP project, using GitHub Actions for CI/CD, run: +``` +# Note: update MLOPS_STACKS_PATH to the path to your local checkout of the MLOps Stacks repo +MLOPS_STACKS_PATH=~/mlops-stacks +databricks bundle init "$MLOPS_STACKS_PATH" --config-file "$MLOPS_STACKS_PATH/tests/example-project-configs/gcp/gcp-github.json" +``` \ No newline at end of file diff --git a/databricks_template_schema.json b/databricks_template_schema.json index 674c9e36..99a68768 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -42,7 +42,7 @@ "type": "string", "description": "\nSelect cloud", "default": "azure", - "enum": ["azure", "aws"] + "enum": ["azure", "aws", "gcp"] }, "input_cicd_platform": { "order": 5, @@ -61,7 +61,7 @@ "input_databricks_staging_workspace_host": { "order": 6, "type": "string", - "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-staging-workspace.cloud.databricks.com{{end}}", + "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-staging-workspace.cloud.databricks.com{{else if eq .input_cloud `gcp`}}https://your-staging-workspace.gcp.databricks.com{{end}}", "description": "\nURL of staging Databricks workspace,\nIt will run PR CI and preview changes before they're deployed to production.\nDefault", "pattern": "^(https.*)?$", "pattern_match_failure_message": "Databricks staging workspace host URLs must start with https. Got invalid workspace host.", @@ -76,7 +76,7 @@ "input_databricks_prod_workspace_host": { "order": 7, "type": "string", - "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-prod-workspace.cloud.databricks.com{{end}}", + "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-prod-workspace.cloud.databricks.com{{else if eq .input_cloud `gcp`}}https://your-prod-workspace.gcp.databricks.com{{end}}", "description": "\nURL of production Databricks workspace.\nDefault", "pattern": "^(https.*)?$", "pattern_match_failure_message": "Databricks production workspace host URLs must start with https. Got invalid workspace host.", diff --git a/hooks/pre_gen_project.py b/hooks/pre_gen_project.py index 7f075d92..289b3607 100644 --- a/hooks/pre_gen_project.py +++ b/hooks/pre_gen_project.py @@ -1,4 +1,3 @@ - if __name__ == "__main__": raise ValueError( "MLOps project creation has been migrated to use Databricks CLI. " diff --git a/library/functions.tmpl b/library/functions.tmpl index 31fc6bab..9d66c9e9 100644 --- a/library/functions.tmpl +++ b/library/functions.tmpl @@ -12,9 +12,15 @@ {{- else -}} https://docs.databricks.com/{{ print .path }} {{- end -}} + {{- else if eq .cloud `gcp` -}} + {{- if eq .path `repos/git-operations-with-repos#add-a-repo-and-connect-remotely-later` -}} + https://docs.gcp.databricks.com/en/repos/git-operations-with-repos.html#clone-a-repo-connected-to-a-remote-repo + {{- else -}} + https://docs.gcp.databricks.com/{{ print .path }} + {{- end -}} {{- else if eq .cloud `azure` -}} https://learn.microsoft.com/azure/databricks/{{ (regexp `\.html`).ReplaceAllString (print .path) `` }} {{- else -}} - {{ fail `Invalid selection of cloud in function generate_doc_link. Please choose from [azure, aws]` }} + {{ fail `Invalid selection of cloud in function generate_doc_link. Please choose from [azure, aws, gcp]` }} {{- end -}} {{- end }} diff --git a/library/input_validation.tmpl b/library/input_validation.tmpl index 2f54565f..9ad0d304 100644 --- a/library/input_validation.tmpl +++ b/library/input_validation.tmpl @@ -1,2 +1,6 @@ {{ define `validation` }} + - Validate to not try UC + GCP + {{- if and (eq .input_include_models_in_unity_catalog `yes`) (eq .input_cloud `gcp`) -}} + {{ fail `The Model Registry in Unity Catalog cannot be used with GCP at this time. Please only use one of the two or neither.` }} + {{- end -}} {{- end -}} diff --git a/library/template_variables.tmpl b/library/template_variables.tmpl index 586dc80d..3be5234e 100644 --- a/library/template_variables.tmpl +++ b/library/template_variables.tmpl @@ -9,6 +9,8 @@ https://adb-xxxx.xx.azuredatabricks.net {{- else if eq .input_cloud `aws` -}} https://your-prod-workspace.cloud.databricks.com + {{- else if eq .input_cloud `gcp` -}} + https://your-prod-workspace.gcp.databricks.com {{- end -}} {{- end -}} {{- end }} @@ -23,6 +25,8 @@ https://adb-xxxx.xx.azuredatabricks.net {{- else if eq .input_cloud `aws` -}} https://your-staging-workspace.cloud.databricks.com + {{- else if eq .input_cloud `gcp` -}} + https://your-staging-workspace.gcp.databricks.com {{- end -}} {{- end -}} {{- end }} @@ -33,6 +37,8 @@ i3.xlarge {{- else if (eq .input_cloud `azure`) -}} Standard_D3_v2 + {{- else if (eq .input_cloud `gcp`) -}} + n2-highmem-4 {{- else -}} {{ fail `Unknown cloud platform` }} {{- end -}} diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl index bfb689b9..c4e88965 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl @@ -68,12 +68,12 @@ jobs: displayName: Initialize CI/CD Bundle env: DATABRICKS_HOST: {{template `databricks_staging_workspace_host` .}} - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} # Create Branch and Commit CICD Bundle diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl index 3afb94fc..48145ac1 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl @@ -58,12 +58,12 @@ stages: workingDirectory: $(workingDirectory) displayName: 'Validate bundle for staging' env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} @@ -91,12 +91,12 @@ stages: workingDirectory: $(workingDirectory) displayName: 'Validate bundle for prod' env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(PROD_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(PROD_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(PROD_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(PROD_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(PROD_WORKSPACE_TOKEN) {{- end }} # Run StagingBundleCD stage after successfully merging into the {{ .input_default_branch }} branch @@ -136,12 +136,12 @@ stages: workingDirectory: $(workingDirectory) displayName: 'Validate bundle for staging' env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} # Deploy bundle to Staging workspace @@ -150,12 +150,12 @@ stages: workingDirectory: $(workingDirectory) displayName: 'Deploy bundle to staging' env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} # Run prod bundle CD stage after successfully merging into the {{ .input_release_branch }} branch @@ -195,12 +195,12 @@ stages: workingDirectory: $(workingDirectory) displayName: 'Validate bundle for prod' env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(PROD_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(PROD_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(PROD_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(PROD_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(PROD_WORKSPACE_TOKEN) {{- end }} # Deploy bundle to prod workspace @@ -209,10 +209,10 @@ stages: workingDirectory: $(workingDirectory) displayName: 'Deploy bundle to prod' env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(PROD_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(PROD_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(PROD_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(PROD_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(PROD_WORKSPACE_TOKEN) {{- end }} diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl index c4ddfd20..b8cc17d2 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl @@ -86,12 +86,12 @@ jobs: workingDirectory: $(workingDirectory) displayName: Validate bundle for test deployment target in staging workspace env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} # Deploy bundle to staging workspace @@ -100,12 +100,12 @@ jobs: workingDirectory: $(workingDirectory) displayName: Deploy bundle to test deployment target in staging workspace env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} {{ if (eq .input_include_feature_store `yes`) }} @@ -115,12 +115,12 @@ jobs: workingDirectory: $(workingDirectory) displayName: Run Feature Engineering Workflow for test deployment target in Staging Workspace env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} {{ end }} @@ -130,10 +130,10 @@ jobs: workingDirectory: $(workingDirectory) displayName: Run training workflow for test deployment target in staging workspace env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: $(STAGING_AZURE_SP_TENANT_ID) ARM_CLIENT_ID: $(STAGING_AZURE_SP_APPLICATION_ID) ARM_CLIENT_SECRET: $(STAGING_AZURE_SP_CLIENT_SECRET) + {{ else -}} + DATABRICKS_TOKEN: $(STAGING_WORKSPACE_TOKEN) {{- end }} diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 448fb2f7..a8efc6dc 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -11,12 +11,12 @@ on: env: DATABRICKS_HOST: {{template `databricks_staging_workspace_host` .}} - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: {{`${{ secrets.STAGING_AZURE_SP_TENANT_ID }}`}} ARM_CLIENT_ID: {{`${{ secrets.STAGING_AZURE_SP_APPLICATION_ID }}`}} ARM_CLIENT_SECRET: {{`${{ secrets.STAGING_AZURE_SP_CLIENT_SECRET }}`}} + {{ else -}} + DATABRICKS_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} {{- end }} jobs: diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl index 10f007fd..6201b0e2 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl @@ -15,12 +15,12 @@ defaults: working-directory: ./{{template `project_name_alphanumeric_underscore` .}} env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: {{`${{ secrets.PROD_WORKSPACE_TOKEN }}`}} - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: {{`${{ secrets.PROD_AZURE_SP_TENANT_ID }}`}} ARM_CLIENT_ID: {{`${{ secrets.PROD_AZURE_SP_APPLICATION_ID }}`}} ARM_CLIENT_SECRET: {{`${{ secrets.PROD_AZURE_SP_CLIENT_SECRET }}`}} + {{ else -}} + DATABRICKS_TOKEN: {{`${{ secrets.PROD_WORKSPACE_TOKEN }}`}} {{- end }} jobs: diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl index 442af3d1..d3451442 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl @@ -15,12 +15,12 @@ defaults: working-directory: ./{{template `project_name_alphanumeric_underscore` .}} env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: {{`${{ secrets.STAGING_AZURE_SP_TENANT_ID }}`}} ARM_CLIENT_ID: {{`${{ secrets.STAGING_AZURE_SP_APPLICATION_ID }}`}} ARM_CLIENT_SECRET: {{`${{ secrets.STAGING_AZURE_SP_CLIENT_SECRET }}`}} + {{ else -}} + DATABRICKS_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} {{- end }} jobs: diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl index 49d65a55..cdddcf61 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl @@ -14,16 +14,16 @@ defaults: working-directory: ./{{template `project_name_alphanumeric_underscore` .}}/ env: - {{ if (eq .input_cloud `aws`) -}} - STAGING_WORKSPACE_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} - PROD_WORKSPACE_TOKEN: {{`${{ secrets.PROD_WORKSPACE_TOKEN }}`}} - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} STAGING_ARM_TENANT_ID: {{`${{ secrets.STAGING_AZURE_SP_TENANT_ID }}`}} STAGING_ARM_CLIENT_ID: {{`${{ secrets.STAGING_AZURE_SP_APPLICATION_ID }}`}} STAGING_ARM_CLIENT_SECRET: {{`${{ secrets.STAGING_AZURE_SP_CLIENT_SECRET }}`}} PROD_ARM_TENANT_ID: {{`${{ secrets.PROD_AZURE_SP_TENANT_ID }}`}} PROD_ARM_CLIENT_ID: {{`${{ secrets.PROD_AZURE_SP_APPLICATION_ID }}`}} PROD_ARM_CLIENT_SECRET: {{`${{ secrets.PROD_AZURE_SP_CLIENT_SECRET }}`}} + {{ else -}} + STAGING_WORKSPACE_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} + PROD_WORKSPACE_TOKEN: {{`${{ secrets.PROD_WORKSPACE_TOKEN }}`}} {{- end }} jobs: @@ -38,12 +38,12 @@ jobs: - name: Validate Bundle For Staging id: validate env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: {{`${{ env.STAGING_WORKSPACE_TOKEN }}`}} - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: {{`${{ env.STAGING_ARM_TENANT_ID }}`}} ARM_CLIENT_ID: {{`${{ env.STAGING_ARM_CLIENT_ID }}`}} ARM_CLIENT_SECRET: {{`${{ env.STAGING_ARM_CLIENT_SECRET }}`}} + {{ else -}} + DATABRICKS_TOKEN: {{`${{ env.STAGING_WORKSPACE_TOKEN }}`}} {{- end }} run: | databricks bundle validate -t {{ .input_staging_catalog_name }} > ../validate_output.txt @@ -82,12 +82,12 @@ jobs: - name: Validate Bundle For Prod id: validate env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: {{`${{ env.PROD_WORKSPACE_TOKEN }}`}} - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: {{`${{ env.PROD_ARM_TENANT_ID }}`}} ARM_CLIENT_ID: {{`${{ env.PROD_ARM_CLIENT_ID }}`}} ARM_CLIENT_SECRET: {{`${{ env.PROD_ARM_CLIENT_SECRET }}`}} + {{ else -}} + DATABRICKS_TOKEN: {{`${{ env.PROD_WORKSPACE_TOKEN }}`}} {{- end }} run: | databricks bundle validate -t {{ .input_prod_catalog_name }} > ../validate_output.txt diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl index df5c1bb6..e3e40fa9 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl @@ -11,12 +11,12 @@ defaults: working-directory: ./{{template `project_name_alphanumeric_underscore` .}}/ env: - {{ if (eq .input_cloud `aws`) -}} - DATABRICKS_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} - {{- else if (eq .input_cloud `azure`) -}} + {{ if (eq .input_cloud `azure`) -}} ARM_TENANT_ID: {{`${{ secrets.STAGING_AZURE_SP_TENANT_ID }}`}} ARM_CLIENT_ID: {{`${{ secrets.STAGING_AZURE_SP_APPLICATION_ID }}`}} ARM_CLIENT_SECRET: {{`${{ secrets.STAGING_AZURE_SP_CLIENT_SECRET }}`}} + {{ else -}} + DATABRICKS_TOKEN: {{`${{ secrets.STAGING_WORKSPACE_TOKEN }}`}} {{- end }} concurrency: {{template `project_name_alphanumeric_underscore` .}}-{{ if (eq .input_include_feature_store `yes`) }}feature-{{end}}training-integration-test-staging diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index 0b98461b..4a282727 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -68,7 +68,7 @@ for details. For your convenience, we also have Terraform modules that can be used to [create](https://registry.terraform.io/modules/databricks/mlops-azure-project-with-sp-creation/databricks/latest) or [link](https://registry.terraform.io/modules/databricks/mlops-azure-project-with-sp-linking/databricks/latest) service principals. -{{ else if eq .input_cloud `aws` }} +{{ else }} To authenticate and manage ML resources created by CI/CD, [service principals]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "administration-guide/users-groups/service-principals.html")) }}) for the project should be created and added to both staging and prod workspaces. Follow @@ -76,8 +76,10 @@ for the project should be created and added to both staging and prod workspaces. and [Add a service principal to a workspace]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "administration-guide/users-groups/service-principals.html#add-a-service-principal-to-a-workspace")) }}) for details. +{{ if eq .input_cloud `aws` }} For your convenience, we also have a [Terraform module](https://registry.terraform.io/modules/databricks/mlops-aws-project/databricks/latest) that can set up your service principals. {{ end }} +{{ end }} #### Configure Service Principal (SP) permissions If the created project uses **Unity Catalog**, we expect a catalog to exist with the name of the deployment target by default. @@ -105,21 +107,7 @@ i.e. for each environment #### Set secrets for CI/CD -{{ if and (eq .input_cicd_platform `github_actions`) (eq .input_cloud `aws`) }} -After creating the service principals and adding them to the respective staging and prod workspaces, follow -[Manage access tokens for a service principal]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "administration-guide/users-groups/service-principals.html#manage-access-tokens-for-a-service-principal")) }}) -to get service principal tokens for staging and prod workspace and follow [Encrypted secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets) -to add the secrets to GitHub: -- `STAGING_WORKSPACE_TOKEN` : service principal token for staging workspace -- `PROD_WORKSPACE_TOKEN` : service principal token for prod workspace -- `WORKFLOW_TOKEN` : [Github token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic) with workflow permissions. This secret is needed for the Deploy CI/CD Workflow. - -Next, be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General: -- Allow `Read and write permissions`, -- Allow workflows to be able to open pull requests (PRs). - {{ end }} - -{{ if and (eq .input_cicd_platform `github_actions`) (eq .input_cloud `azure`) }} +{{ if eq .input_cloud `azure` }} After creating the service principals and adding them to the respective staging and prod workspaces, refer to [Manage access tokens for a service principal]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "administration-guide/users-groups/service-principals#--manage-access-tokens-for-a-service-principal")) }}) and [Get Azure AD tokens for service principals]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "dev-tools/api/latest/aad/service-prin-aad-token")) }}) @@ -133,7 +121,19 @@ to add the following secrets to GitHub: - `STAGING_AZURE_SP_CLIENT_SECRET` - `WORKFLOW_TOKEN` : [Github token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic) with workflow permissions. This secret is needed for the Deploy CI/CD Workflow. Be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General to allow `Read and write permissions`. - {{ end }} +{{ else }} +After creating the service principals and adding them to the respective staging and prod workspaces, follow +[Manage access tokens for a service principal]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "administration-guide/users-groups/service-principals.html#manage-access-tokens-for-a-service-principal")) }}) +to get service principal tokens for staging and prod workspace and follow [Encrypted secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets) +to add the secrets to GitHub: +- `STAGING_WORKSPACE_TOKEN` : service principal token for staging workspace +- `PROD_WORKSPACE_TOKEN` : service principal token for prod workspace +- `WORKFLOW_TOKEN` : [Github token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic) with workflow permissions. This secret is needed for the Deploy CI/CD Workflow. + +Next, be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General: +- Allow `Read and write permissions`, +- Allow workflows to be able to open pull requests (PRs). +{{ end }} ### Setting up CI/CD workflows After setting up authentication for CI/CD, you can now set up CI/CD workflows. We provide a [Deploy CICD workflow](../.github/workflows/deploy-cicd.yml) that can be used to generate the other CICD workflows mentioned below for projects. @@ -260,9 +260,9 @@ The ultimate aim of the service connection approach is to use two separate servi > Note that you will have to update this code snippet with the respective service connection names, depending on which Databricks workspace you are deploying resources to. 1. Create separate Azure Pipelines under your Azure DevOps project using the ‘Existing Azure Pipelines YAML file’ option. Create one pipeline for each script. See [here](https://docs.microsoft.com/en-us/azure/devops/pipelines/create-first-pipeline) for more details on creating Azure Pipelines. -6. Define [build validation branch policies](https://learn.microsoft.com/en-us/azure/devops/repos/git/branch-policies?view=azure-devops&tabs=browser#build-validation) for the `{{ .input_default_branch }}` branch using the Azure build pipelines created in step 1. This is required so that any PR changes to the `{{ .input_default_branch }}` must build successfully before PRs can complete. +1. Define [build validation branch policies](https://learn.microsoft.com/en-us/azure/devops/repos/git/branch-policies?view=azure-devops&tabs=browser#build-validation) for the `{{ .input_default_branch }}` branch using the Azure build pipelines created in step 1. This is required so that any PR changes to the `{{ .input_default_branch }}` must build successfully before PRs can complete. In the case of a monorepo, where there are multiple projects under a single repository, set a [path filter](https://learn.microsoft.com/en-us/azure/devops/repos/git/branch-policies?view=azure-devops&tabs=browser#path-filters) on the build validation policies, such that devops pipelines are only triggered when there are changes to the respective projects (e.g. the path filter would be `/project1/*` to trigger a devops pipeline when changes are made to _only_ files under the `project1` folder). - {{ end }} +{{ end }} ### Setting up CI/CD workflows After setting up authentication for CI/CD, you can now set up CI/CD workflows. We provide a [Deploy CICD workflow](../.azure/devops-pipelines/deploy-cicd.yml) that can be used to generate the other CICD workflows mentioned below for projects. diff --git a/tests/example-project-configs/gcp/gcp-github.json b/tests/example-project-configs/gcp/gcp-github.json new file mode 100644 index 00000000..63f5838e --- /dev/null +++ b/tests/example-project-configs/gcp/gcp-github.json @@ -0,0 +1,16 @@ +{ + "input_root_dir": "example-gcp-github-project", + "input_project_name": "example-gcp-github-project", + "input_cloud": "gcp", + "input_cicd_platform": "github_actions", + "input_databricks_staging_workspace_host": "https://your-staging-workspace.gcp.databricks.com", + "input_databricks_prod_workspace_host": "https://your-prod-workspace.gcp.databricks.com", + "input_default_branch": "main", + "input_release_branch": "release", + "input_read_user_group": "users", + "input_include_feature_store": "no", + "input_include_mlflow_recipes": "no", + "input_include_models_in_unity_catalog": "yes", + "input_schema_name": "test_project_schema_name", + "input_unity_catalog_read_user_group": "account users" +} diff --git a/tests/test_create_project.py b/tests/test_create_project.py index 64dd7258..23fa76e7 100644 --- a/tests/test_create_project.py +++ b/tests/test_create_project.py @@ -39,6 +39,11 @@ "input_databricks_staging_workspace_host": "https://your-staging-workspace.cloud.databricks.com", "input_databricks_prod_workspace_host": "https://your-prod-workspace.cloud.databricks.com", } +DEFAULT_PARAMS_GCP = { + "input_cloud": "gcp", + "input_databricks_staging_workspace_host": "https://your-staging-workspace.gcp.databricks.com", + "input_databricks_prod_workspace_host": "https://your-prod-workspace.gcp.databricks.com", +} def assert_no_disallowed_strings_in_files( @@ -77,7 +82,12 @@ def should_check_file_for_disallowed_strings(path): @parametrize_by_project_generation_params -def test_no_template_strings_after_param_substitution(generated_project_dir): +def test_no_template_strings_after_param_substitution( + cloud, include_models_in_unity_catalog, generated_project_dir +): + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return assert_no_disallowed_strings_in_files( file_paths=[ os.path.join(generated_project_dir, path) @@ -117,7 +127,10 @@ def test_no_databricks_doc_strings_before_project_generation(): @pytest.mark.large @parametrize_by_project_generation_params -def test_markdown_links(generated_project_dir): +def test_markdown_links(cloud, include_models_in_unity_catalog, generated_project_dir): + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return markdown_checker_configs(generated_project_dir) subprocess.run( """ @@ -173,6 +186,9 @@ def test_generate_project_with_default_values( - The default param values in the substitution logic in the pre_gen_project.py hook are up to date. - The default param values in the help strings in databricks_template_schema.json are up to date. """ + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return context = { "input_project_name": TEST_PROJECT_NAME, "input_root_dir": TEST_PROJECT_NAME, @@ -190,6 +206,8 @@ def test_generate_project_with_default_values( params = {**DEFAULT_PARAM_VALUES, **DEFAULT_PARAMS_AZURE} elif cloud == "aws": params = {**DEFAULT_PARAM_VALUES, **DEFAULT_PARAMS_AWS} + elif cloud == "gcp": + params = {**DEFAULT_PARAM_VALUES, **DEFAULT_PARAMS_GCP} for param, value in params.items(): assert f"{param}={value}" in test_file_contents @@ -234,6 +252,9 @@ def test_generate_project_check_delta_output( """ Asserts the behavior of Delta Table-related artifacts when generating MLOps Stacks. """ + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return context = prepareContext( cloud, cicd_platform, @@ -275,6 +296,9 @@ def test_generate_project_check_feature_store_output( """ Asserts the behavior of feature store-related artifacts when generating MLOps Stacks. """ + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return context = prepareContext( cloud, cicd_platform, @@ -312,6 +336,9 @@ def test_generate_project_check_recipe_output( """ Asserts the behavior of MLflow Recipes-related artifacts when generating MLOps Stacks. """ + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return context = prepareContext( cloud, cicd_platform, @@ -351,6 +378,7 @@ def test_workspace_dir_strip_query_params( workspace_host = { "aws": "https://dbc-my-aws-workspace.cloud.databricks.com", "azure": "https://adb-mycoolworkspace.11.azuredatabricks.net", + "gcp": "https://dbc-my-gcp-workspace.gcp.databricks.com", }[cloud] workspace_url = f"{workspace_host}{workspace_url_suffix}" context = { diff --git a/tests/test_github_actions.py b/tests/test_github_actions.py index ebae9bd9..1b17a8c0 100644 --- a/tests/test_github_actions.py +++ b/tests/test_github_actions.py @@ -23,9 +23,14 @@ ], ) @parametrize_by_cloud -def test_generated_yaml_format(cicd_platform, generated_project_dir): +def test_generated_yaml_format( + cloud, include_models_in_unity_catalog, generated_project_dir +): # Note: actionlint only works when the directory is a git project. Thus we begin by initiatilizing # the generated mlops project with git. + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return subprocess.run( """ git init @@ -54,8 +59,13 @@ def test_generated_yaml_format(cicd_platform, generated_project_dir): ], ) @parametrize_by_cloud -def test_run_unit_tests_workflow(cicd_platform, generated_project_dir): +def test_run_unit_tests_workflow( + cloud, include_models_in_unity_catalog, generated_project_dir +): """Test that the GitHub workflow for running unit tests in the materialized project passes""" + if cloud == "gcp" and include_models_in_unity_catalog == "yes": + # Skip test for GCP with Unity Catalog + return # We only test the unit test workflow, as it's the only one that doesn't require # Databricks REST API subprocess.run( diff --git a/tests/utils.py b/tests/utils.py index 3d448cb6..c733cbe8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -27,14 +27,22 @@ AWS_DEFAULT_PARAMS = { **AZURE_DEFAULT_PARAMS, + "input_cloud": "aws", "input_databricks_staging_workspace_host": "https://your-staging-workspace.cloud.databricks.com", "input_databricks_prod_workspace_host": "https://your-prod-workspace.cloud.databricks.com", } +GCP_DEFAULT_PARAMS = { + **AZURE_DEFAULT_PARAMS, + "input_cloud": "gcp", + "input_databricks_staging_workspace_host": "https://your-staging-workspace.gcp.databricks.com", + "input_databricks_prod_workspace_host": "https://your-prod-workspace.gcp.databricks.com", +} + def parametrize_by_cloud(fn): @wraps(fn) - @pytest.mark.parametrize("cloud", ["aws", "azure"]) + @pytest.mark.parametrize("cloud", ["aws", "azure", "gcp"]) def wrapper(*args, **kwargs): return fn(*args, **kwargs) @@ -42,7 +50,7 @@ def wrapper(*args, **kwargs): def parametrize_by_project_generation_params(fn): - @pytest.mark.parametrize("cloud", ["aws", "azure"]) + @pytest.mark.parametrize("cloud", ["aws", "azure", "gcp"]) @pytest.mark.parametrize( "cicd_platform", [ @@ -144,12 +152,16 @@ def markdown_checker_configs(tmpdir): def generate(directory, databricks_cli, context): + if context.get("input_cloud") == "aws": + default_params = AWS_DEFAULT_PARAMS + elif context.get("input_cloud") == "gcp": + if context.get("input_include_models_in_unity_catalog") == "yes": + return + default_params = GCP_DEFAULT_PARAMS + else: + default_params = AZURE_DEFAULT_PARAMS params = { - **( - AWS_DEFAULT_PARAMS - if context.get("input_cloud") == "aws" - else AZURE_DEFAULT_PARAMS - ), + **default_params, **context, } json_string = json.dumps(params)