diff --git a/databricks_template_schema.json b/databricks_template_schema.json index 6f010668..db607bca 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -5,7 +5,7 @@ "input_setup_cicd_and_project": { "order": 1, "type": "string", - "description": "{{if false}}\n\nERROR: This template is not supported by your current Databricks CLI version.\nPlease hit control-C and go to https://docs.databricks.com/en/dev-tools/cli/install.html for instructions on upgrading the CLI to the minimum version supported by MLOps Stacks.\n\n\n{{end}}\nSelect if both CI/CD and the Project should be set up, or only one of them. You can always set up the other later by running initialization again", + "description": "{{if false}}\n\nERROR: This template is not supported by your current Databricks CLI version.\nPlease hit control-C and go to https://docs.databricks.com/en/dev-tools/cli/install.html for instructions on upgrading the CLI to the minimum version supported by MLOps Stacks.\n\n\n{{end}}\nSelect if both CI/CD and the Project should be set up, or only one of them.\nYou can always set up the other later by running initialization again", "default": "CICD_and_Project", "enum": ["CICD_and_Project", "Project_Only", "CICD_Only"] }, @@ -28,7 +28,7 @@ "order": 3, "type": "string", "default": "{{ .input_project_name }}", - "description": "\nRoot directory name. For monorepos, this is the name of the root directory that contains all the projects. Default", + "description": "\nRoot directory name.\nFor monorepos, name of the root directory that contains all the projects.\nDefault", "skip_prompt_if": { "properties": { "input_setup_cicd_and_project": { @@ -62,7 +62,7 @@ "order": 6, "type": "string", "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-staging-workspace.cloud.databricks.com{{end}}", - "description": "\nURL of staging Databricks workspace, used to run CI tests on PRs and preview config changes before they're deployed to production. Default", + "description": "\nURL of staging Databricks workspace,\nIt will run PR CI and preview changes before they're deployed to production.\nDefault", "pattern": "^(https.*)?$", "pattern_match_failure_message": "Databricks staging workspace host URLs must start with https. Got invalid workspace host.", "skip_prompt_if": { @@ -77,7 +77,7 @@ "order": 7, "type": "string", "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-prod-workspace.cloud.databricks.com{{end}}", - "description": "\nURL of production Databricks workspace. Default", + "description": "\nURL of production Databricks workspace.\nDefault", "pattern": "^(https.*)?$", "pattern_match_failure_message": "Databricks production workspace host URLs must start with https. Got invalid workspace host.", "skip_prompt_if": { @@ -92,7 +92,7 @@ "order": 8, "type": "string", "default": "main", - "description": "\nName of the default branch, where the prod and staging ML resources are deployed from and the latest ML code is staged. Default", + "description": "\nName of the default branch,\nStaging resources are deployed from this branch and stages the latest ML code.\nDefault", "skip_prompt_if": { "properties": { "input_setup_cicd_and_project": { @@ -105,7 +105,7 @@ "order": 9, "type": "string", "default": "release", - "description": "\nName of the release branch. The production jobs (model training, batch inference) defined in this stack pull ML code from this branch. Default", + "description": "\nName of the release branch.\nThe training and other production jobs pull ML code from this branch.\nDefault", "skip_prompt_if": { "properties": { "input_setup_cicd_and_project": { @@ -118,7 +118,7 @@ "order": 10, "type": "string", "default": "users", - "description": "\nUser group name to give READ permissions to for project resources (ML jobs, integration test job runs, and machine learning resources). A group with this name must exist in both the staging and prod workspaces. Default", + "description": "\nUser group name to give READ permissions to for project resources\n(ML jobs, integration test job runs, and machine learning resources).\nA group with this name must exist in both the staging and prod workspaces.\nDefault", "skip_prompt_if": { "properties": { "input_setup_cicd_and_project": { @@ -144,7 +144,7 @@ "input_schema_name": { "order": 12, "type": "string", - "description": "\nName of schema to use when registering a model in Unity Catalog. \nNote that this schema must already exist, and we recommend keeping the name the same as the project name as well as giving the service principals the right access. Default", + "description": "\nName of schema to use when registering a model in Unity Catalog.\nThis schema must already exist and service principals must have access.\nWe recommend using the project name.\nDefault", "default": "{{if (eq .input_include_models_in_unity_catalog `no`)}}schema{{else}}{{ .input_project_name }}{{end}}", "pattern": "^[^ .\\-\\/]*$", "pattern_match_failure_message": "Valid schema names cannot contain any of the following characters: \" \", \".\", \"-\", \"\\\", \"/\"", @@ -171,7 +171,7 @@ "order": 13, "type": "string", "default": "account users", - "description": "\nUser group name to give EXECUTE privileges to models in Unity Catalog. A group with this name must exist in the Unity Catalog that the staging and prod workspaces can access. Default", + "description": "\nUser group name to give EXECUTE privileges to models in Unity Catalog (UC).\nIt must exist in UC with access granted to the staging and prod workspaces.\nDefault", "skip_prompt_if": { "anyOf":[ { diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl index f5e09964..3afb94fc 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl @@ -49,7 +49,7 @@ stages: # Install Databricks CLI - script: | - curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/{{template `cli_version` .}}/install.sh | sh displayName: 'Install Databricks CLI' # Validate bundle to be deployed to the staging workspace @@ -82,7 +82,7 @@ stages: # Install Databricks CLI - script: | - curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/{{template `cli_version` .}}/install.sh | sh displayName: 'Install Databricks CLI' # Validate bundle to be deployed to the prod workspace @@ -127,7 +127,7 @@ stages: # Install Databricks CLI - script: | - curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/{{template `cli_version` .}}/install.sh | sh displayName: 'Install Databricks CLI' # Validate bundle to be deployed to the Staging workspace @@ -186,7 +186,7 @@ stages: # Install Databricks CLI - script: | - curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/{{template `cli_version` .}}/install.sh | sh displayName: 'Install Databricks CLI' # Validate bundle to be deployed to the prod workspace diff --git a/template/{{.input_root_dir}}/README.md.tmpl b/template/{{.input_root_dir}}/README.md.tmpl index f9a40a41..a3857433 100644 --- a/template/{{.input_root_dir}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/README.md.tmpl @@ -16,10 +16,10 @@ This project contains the following components: | Component | Description | |----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} +{{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} | ML Code | Example ML project code, with unit tested Python modules and notebooks | | ML Resources as Code | ML pipeline resources (training and batch inference jobs with schedules, etc) configured and deployed through [databricks CLI bundles]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "dev-tools/cli/bundle-cli.html")) }}) | -{{ end }} +{{- end }} | CI/CD | {{ if (eq .input_cicd_platform `github_actions`) }}[GitHub Actions](https://github.com/actions) workflows to test and deploy ML code and resources {{ else if (eq .input_cicd_platform `azure_devops`) }}[Azure DevOps Pipelines](https://azure.microsoft.com/en-gb/products/devops/pipelines/) to test and deploy ML code and resources{{ end }} | contained in the following files: @@ -27,14 +27,14 @@ contained in the following files: ``` {{ .input_root_dir }} <- Root directory. Both monorepo and polyrepo are supported. │ -{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} +{{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} ├── {{template `project_name_alphanumeric_underscore` .}} <- Contains python code, notebooks and ML resources related to one ML project. │ │ │ ├── requirements.txt <- Specifies Python dependencies for ML code (for example: model training, batch inference). │ │ │ ├── databricks.yml <- databricks.yml is the root bundle file for the ML project that can be loaded by databricks CLI bundles. It defines the bundle name, workspace URL and resource config component to be included. │ │ -{{ if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) }} +{{- if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) }} │ ├── training <- Training folder contains Notebook that trains and registers the model. │ │ │ ├── validation <- Optional model validation step before deploying a model. @@ -59,7 +59,7 @@ contained in the following files: │ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment │ │ │ ├── monitoring-workflow-resource.yml <- ML resource config definition for data monitoring workflow -{{ else if (eq .input_include_feature_store `yes`) }} +{{- else if (eq .input_include_feature_store `yes`) }} │ ├── training <- Training folder contains Notebook that trains and registers the model with feature store support. │ │ │ ├── feature_engineering <- Feature computation code (Python modules) that implements the feature transforms. @@ -90,7 +90,7 @@ contained in the following files: │ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment │ │ │ ├── monitoring-workflow-resource.yml <- ML resource config definition for data monitoring workflow -{{ else }} +{{- else }} │ ├── training <- Folder for model development via MLflow recipes. │ │ │ │ │ ├── steps <- MLflow recipe steps (Python modules) implementing ML pipeline logic, e.g. model training and evaluation. Most @@ -128,14 +128,14 @@ contained in the following files: │ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment │ │ │ ├── monitoring-workflow-resource.yml <- ML resource config definition for data monitoring workflow -{{ end }} -{{ end }} +{{- end }} +{{- end }} │ -{{ if or (eq .input_cicd_platform `github_actions`) (eq .input_cicd_platform `github_actions_for_github_enterprise_servers`) }} +{{- if or (eq .input_cicd_platform `github_actions`) (eq .input_cicd_platform `github_actions_for_github_enterprise_servers`) }} ├── .github <- Configuration folder for CI/CD using GitHub Actions. {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} The CI/CD workflows deploy ML resources defined in the `./resources/*` folder with databricks CLI bundles.{{ end }} -{{ else if (eq .input_cicd_platform `azure_devops`) }} +{{- else if (eq .input_cicd_platform `azure_devops`) }} ├── .azure <- Configuration folder for CI/CD using Azure DevOps Pipelines. {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} The CI/CD workflows deploy ML resources defined in the `./resources/*` folder with databricks CLI bundles.{{ end }} -{{ end }} +{{- end }} │ ├── docs <- Contains documentation for the repo. │ diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl index fbd901ed..84d3bc5c 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl @@ -37,7 +37,7 @@ contained in the following files: │ │ │ ├── databricks.yml <- databricks.yml is the root bundle file for the ML project that can be loaded by databricks CLI bundles. It defines the bundle name, workspace URL and resource config component to be included. │ │ -{{ if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) -}} +{{- if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) }} │ ├── training <- Training folder contains Notebook that trains and registers the model. │ │ │ ├── validation <- Optional model validation step before deploying a model. @@ -62,7 +62,7 @@ contained in the following files: │ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment │ │ │ ├── monitoring-workflow-resource.yml <- ML resource config definition for data monitoring workflow -{{ else if (eq .input_include_feature_store `yes`) -}} +{{- else if (eq .input_include_feature_store `yes`) }} │ ├── training <- Training folder contains Notebook that trains and registers the model with feature store support. │ │ │ ├── feature_engineering <- Feature computation code (Python modules) that implements the feature transforms. @@ -93,7 +93,7 @@ contained in the following files: │ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment │ │ │ ├── monitoring-workflow-resource.yml <- ML resource config definition for data monitoring workflow -{{ else -}} +{{- else }} │ ├── training <- Folder for model development via MLflow recipes. │ │ │ │ │ ├── steps <- MLflow recipe steps (Python modules) implementing ML pipeline logic, e.g. model training and evaluation. Most @@ -131,7 +131,7 @@ contained in the following files: │ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment │ │ │ ├── monitoring-workflow-resource.yml <- ML resource config definition for data monitoring workflow -{{ end -}} +{{- end }} ``` {{ if (eq .input_include_feature_store `yes`) }}