diff --git a/.github/workflows/README.md b/.github/workflows/README.md
new file mode 100644
index 0000000..04356c8
--- /dev/null
+++ b/.github/workflows/README.md
@@ -0,0 +1,7 @@
+# CI/CD Workflow Definitions
+This directory contains CI/CD workflow definitions using [GitHub Actions](https://docs.github.com/en/actions),
+under ``workflows``. These workflows cover testing and deployment of both ML code (for model training, batch inference, etc) and the 
+Databricks ML asset definitions under ``mlops_stacks_gcp_fs/assets``. 
+
+To set up CI/CD for a new project,
+please refer to [ML asset config - set up CI CD](../../mlops_stacks_gcp_fs/assets/README.md#set-up-ci-and-cd).
diff --git a/.github/workflows/mlops_stacks_gcp_fs-bundle-cd-prod.yml b/.github/workflows/mlops_stacks_gcp_fs-bundle-cd-prod.yml
new file mode 100644
index 0000000..c8c8168
--- /dev/null
+++ b/.github/workflows/mlops_stacks_gcp_fs-bundle-cd-prod.yml
@@ -0,0 +1,34 @@
+# This GitHub workflow deploys Bundle assets (ML asset config and more)
+# defined under mlops_stacks_gcp_fs/assets/*
+# and mlops_stacks_gcp_fs/databricks.yml with prod deployment target configs,
+# when PRs are merged into the release branch
+name: Bundle Deployment for mlops_stacks_gcp_fs Prod
+
+on:
+  push:
+    branches:
+      - 'release'
+  workflow_dispatch:
+
+defaults:
+  run:
+    working-directory: ./mlops_stacks_gcp_fs
+
+env:
+  DATABRICKS_TOKEN: ${{ secrets.PROD_WORKSPACE_TOKEN }}
+
+jobs:
+  prod:
+    concurrency: mlops_stacks_gcp_fs-prod-bundle-job
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      - uses: databricks/setup-cli@v0.211.0
+      - name: Validate Bundle For Prod
+        id: validate
+        run: |
+          databricks bundle validate -t prod
+      - name: Deploy Bundle to Prod
+        id: deploy
+        run: |
+          databricks bundle deploy -t prod
diff --git a/.github/workflows/mlops_stacks_gcp_fs-bundle-cd-staging.yml b/.github/workflows/mlops_stacks_gcp_fs-bundle-cd-staging.yml
new file mode 100644
index 0000000..e3cb067
--- /dev/null
+++ b/.github/workflows/mlops_stacks_gcp_fs-bundle-cd-staging.yml
@@ -0,0 +1,34 @@
+# This GitHub workflow deploys Bundle assets (ML asset config and more)
+# defined under mlops_stacks_gcp_fs/assets/*
+# and mlops_stacks_gcp_fs/databricks.yml with staging deployment target configs,
+# when PRs are merged into the default branch
+name: Bundle Deployment for mlops_stacks_gcp_fs Staging
+
+on:
+  push:
+    branches:
+      - 'main'
+  workflow_dispatch:
+
+defaults:
+  run:
+    working-directory: ./mlops_stacks_gcp_fs
+
+env:
+  DATABRICKS_TOKEN: ${{ secrets.STAGING_WORKSPACE_TOKEN }}
+
+jobs:
+  staging:
+    concurrency: mlops_stacks_gcp_fs-staging-bundle-job
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      - uses: databricks/setup-cli@v0.211.0
+      - name: Validate Bundle For Staging
+        id: validate
+        run: |
+          databricks bundle validate -t staging
+      - name: Deploy Bundle to Staging
+        id: deploy
+        run: |
+          databricks bundle deploy -t staging
diff --git a/.github/workflows/mlops_stacks_gcp_fs-bundle-ci.yml b/.github/workflows/mlops_stacks_gcp_fs-bundle-ci.yml
new file mode 100644
index 0000000..02ac728
--- /dev/null
+++ b/.github/workflows/mlops_stacks_gcp_fs-bundle-ci.yml
@@ -0,0 +1,93 @@
+# This GitHub workflow validates Bundle config (ML asset config and more)
+# defined under mlops_stacks_gcp_fs/assets/*
+# and mlops_stacks_gcp_fs/databricks.yml, when PRs are merged into the main branch
+name: Bundle validation for mlops_stacks_gcp_fs
+
+on:
+  workflow_dispatch:
+  pull_request_target:
+
+defaults:
+  run:
+    working-directory: ./mlops_stacks_gcp_fs/
+
+env:
+  STAGING_WORKSPACE_TOKEN: ${{ secrets.STAGING_WORKSPACE_TOKEN }}
+  PROD_WORKSPACE_TOKEN: ${{ secrets.PROD_WORKSPACE_TOKEN }}
+
+jobs:
+  staging:
+    concurrency: mlops_stacks_gcp_fs-staging-bundle-job
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha || github.sha }}
+      - uses: databricks/setup-cli@v0.211.0
+      - name: Validate Bundle For Staging
+        id: validate
+        env:
+          DATABRICKS_TOKEN: ${{ env.STAGING_WORKSPACE_TOKEN }}
+        run: |
+          databricks bundle validate -t staging > ../validate_output.txt
+      - name: Create Comment with Bundle Configuration
+        uses: actions/github-script@v6
+        id: comment
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const fileContents = fs.readFileSync('validate_output.txt', 'utf8');
+            const output = `#### Bundle Staging Config Validated 🖌
+            <details><summary>Staging Validation Output</summary>
+
+            \`\`\`\n
+            ${fileContents}
+            \`\`\`
+
+            </details>`
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: output
+            })
+
+  prod:
+    concurrency: mlops_stacks_gcp_fs-prod-bundle-job
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha || github.sha }}
+      - uses: databricks/setup-cli@v0.211.0
+      - name: Validate Bundle For Prod
+        id: validate
+        env:
+          DATABRICKS_TOKEN: ${{ env.PROD_WORKSPACE_TOKEN }}
+        run: |
+          databricks bundle validate -t prod > ../validate_output.txt
+      - name: Create Comment with Bundle Configuration
+        uses: actions/github-script@v6
+        id: comment
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const fileContents = fs.readFileSync('validate_output.txt', 'utf8');
+            const output = `#### Bundle Prod Config Validated 🖌
+            <details><summary>Prod Validation Output</summary>
+
+            \`\`\`\n
+            ${fileContents}
+            \`\`\`
+
+            </details>`
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: output
+            })
diff --git a/.github/workflows/mlops_stacks_gcp_fs-lint-cicd-workflow-files.yml b/.github/workflows/mlops_stacks_gcp_fs-lint-cicd-workflow-files.yml
new file mode 100644
index 0000000..59a5c5b
--- /dev/null
+++ b/.github/workflows/mlops_stacks_gcp_fs-lint-cicd-workflow-files.yml
@@ -0,0 +1,19 @@
+name: Lint CI/CD workflow files
+on:
+  pull_request:
+    paths:
+      - '.github/workflows/**'
+  workflow_dispatch:
+  
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Download actionlint
+        id: get_actionlint
+        run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash)
+        shell: bash
+      - name: Check workflow files
+        run: ${{ steps.get_actionlint.outputs.executable }} -color
+        shell: bash
diff --git a/.github/workflows/mlops_stacks_gcp_fs-run-tests-fs.yml b/.github/workflows/mlops_stacks_gcp_fs-run-tests-fs.yml
new file mode 100644
index 0000000..7156cfc
--- /dev/null
+++ b/.github/workflows/mlops_stacks_gcp_fs-run-tests-fs.yml
@@ -0,0 +1,59 @@
+name: Feature and Training Integration Tests for mlops_stacks_gcp_fs
+on:
+  workflow_dispatch:
+  pull_request:
+
+defaults:
+  run:
+    working-directory: ./mlops_stacks_gcp_fs/
+
+env:
+  DATABRICKS_TOKEN: ${{ secrets.STAGING_WORKSPACE_TOKEN }}
+
+concurrency: mlops_stacks_gcp_fs-feature-training-integration-test-staging
+
+jobs:
+  unit_tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      # Feature store tests bring up a local Spark session, so Java is required.
+      - uses: actions/setup-java@v2
+        with:
+          distribution: 'temurin'
+          java-version: '11'
+      - name: Install dependencies
+        run: |
+            python -m pip install --upgrade pip
+            pip install -r requirements.txt
+            pip install -r ../test-requirements.txt
+      - name: Run tests with pytest
+        run: |
+            pytest
+
+  integration_test:
+    needs: unit_tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - uses: databricks/setup-cli@v0.211.0
+      - name: Validate Bundle For Test Deployment Target in Staging Workspace
+        id: validate
+        run: |
+          databricks bundle validate -t test
+      - name: Deploy Bundle to Test Deployment Target in Staging Workspace
+        id: deploy
+        run: |
+          databricks bundle deploy -t test
+      - name: Run Feature Engineering Workflow for Test Deployment Target in Staging Workspace
+        id: feature_engineering
+        run: |
+          databricks bundle run write_feature_table_job -t test
+      - name: Run Training Workflow for Test Deployment Target in Staging Workspace
+        id: training
+        run: |
+          databricks bundle run model_training_job -t test
diff --git a/mlops_stacks_gcp_fs/README.md b/mlops_stacks_gcp_fs/README.md
new file mode 100644
index 0000000..a7a72e9
--- /dev/null
+++ b/mlops_stacks_gcp_fs/README.md
@@ -0,0 +1,5 @@
+# mlops_stacks_gcp_fs
+
+This directory contains python code, notebooks and ML asset configs related to one ML project.
+
+See the [Project overview](../docs/project-overview.md) for details on code structure of project directory.
\ No newline at end of file
diff --git a/mlops_stacks_gcp_fs/__init__.py b/mlops_stacks_gcp_fs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/assets/README.md b/mlops_stacks_gcp_fs/assets/README.md
index ce30069..7788f48 100644
--- a/mlops_stacks_gcp_fs/assets/README.md
+++ b/mlops_stacks_gcp_fs/assets/README.md
@@ -134,7 +134,7 @@ new_cluster: &new_cluster
     spark_version: 13.3.x-cpu-ml-scala2.12
     node_type_id: n2-highmem-4
     custom_tags:
-      clusterSource: mlops-stack/0.2
+      clusterSource: mlops-stack
 
 resources:
   jobs:
@@ -189,7 +189,7 @@ new_cluster: &new_cluster
     spark_version: 13.3.x-cpu-ml-scala2.12
     node_type_id: n2-highmem-4
     custom_tags:
-      clusterSource: mlops-stack/0.2
+      clusterSource: mlops-stack
 
 resources:
   jobs:
diff --git a/mlops_stacks_gcp_fs/assets/batch-inference-workflow-asset.yml b/mlops_stacks_gcp_fs/assets/batch-inference-workflow-asset.yml
new file mode 100644
index 0000000..9fe6e29
--- /dev/null
+++ b/mlops_stacks_gcp_fs/assets/batch-inference-workflow-asset.yml
@@ -0,0 +1,41 @@
+new_cluster: &new_cluster
+  new_cluster:
+    num_workers: 3
+    spark_version: 13.3.x-cpu-ml-scala2.12
+    node_type_id: n2-highmem-4
+    custom_tags:
+      clusterSource: mlops-stack
+
+common_permissions: &permissions
+  permissions:
+    - level: CAN_VIEW
+      group_name: users
+
+resources:
+  jobs:
+    batch_inference_job:
+      name: ${bundle.target}-mlops_stacks_gcp_fs-batch-inference-job
+      tasks:
+        - task_key: batch_inference_job
+          <<: *new_cluster
+          notebook_task:
+            notebook_path: ../deployment/batch_inference/notebooks/BatchInference.py
+            base_parameters:
+              env: ${bundle.target}
+              input_table_name: hive_metastore.default.taxi_scoring_sample_feature_store_inference_input
+              output_table_name: ${bundle.target}_mlops_stacks_gcp_fs_predictions
+              model_name: ${var.model_name}
+              # git source information of current ML asset deployment. It will be persisted as part of the workflow run
+              git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit}
+
+      schedule:
+        quartz_cron_expression: "0 0 11 * * ?" # daily at 11am
+        timezone_id: UTC
+      <<: *permissions
+      # If you want to turn on notifications for this job, please uncomment the below code,
+      # and provide a list of emails to the on_failure argument.
+      #
+      #  email_notifications:
+      #    on_failure:
+      #      - first@company.com
+      #      - second@company.com
diff --git a/mlops_stacks_gcp_fs/assets/feature-engineering-workflow-asset.yml b/mlops_stacks_gcp_fs/assets/feature-engineering-workflow-asset.yml
new file mode 100644
index 0000000..a8f8669
--- /dev/null
+++ b/mlops_stacks_gcp_fs/assets/feature-engineering-workflow-asset.yml
@@ -0,0 +1,64 @@
+new_cluster: &new_cluster
+  new_cluster:
+    num_workers: 3
+    spark_version: 13.3.x-cpu-ml-scala2.12
+    node_type_id: n2-highmem-4
+    custom_tags:
+      clusterSource: mlops-stack
+
+common_permissions: &permissions
+  permissions:
+    - level: CAN_VIEW
+      group_name: users
+
+resources:
+  jobs:
+    write_feature_table_job:
+      name: ${bundle.target}-mlops_stacks_gcp_fs-write-feature-table-job
+      job_clusters:
+        - job_cluster_key: write_feature_table_job_cluster
+          <<: *new_cluster
+      tasks:
+        - task_key: PickupFeatures
+          job_cluster_key: write_feature_table_job_cluster
+          notebook_task:
+            notebook_path: ../feature_engineering/notebooks/GenerateAndWriteFeatures.py
+            base_parameters:
+              # TODO modify these arguments to reflect your setup.
+              input_table_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled
+              # TODO: Empty start/end dates will process the whole range. Update this as needed to process recent data.
+              input_start_date: ""
+              input_end_date: ""
+              timestamp_column: tpep_pickup_datetime
+              output_table_name: feature_store_taxi_example.${bundle.target}_mlops_stacks_gcp_fs_trip_pickup_features
+              features_transform_module: pickup_features
+              primary_keys: zip
+              # git source information of current ML asset deployment. It will be persisted as part of the workflow run
+              git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit}
+        - task_key: DropoffFeatures
+          job_cluster_key: write_feature_table_job_cluster
+          notebook_task:
+            notebook_path: ../feature_engineering/notebooks/GenerateAndWriteFeatures.py
+            base_parameters:
+              # TODO: modify these arguments to reflect your setup.
+              input_table_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled
+              # TODO: Empty start/end dates will process the whole range. Update this as needed to process recent data.
+              input_start_date: ""
+              input_end_date: ""
+              timestamp_column: tpep_dropoff_datetime
+              output_table_name: feature_store_taxi_example.${bundle.target}_mlops_stacks_gcp_fs_trip_dropoff_features
+              features_transform_module: dropoff_features
+              primary_keys: zip
+              # git source information of current ML asset deployment. It will be persisted as part of the workflow run
+              git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit}
+      schedule:
+        quartz_cron_expression: "0 0 7 * * ?" # daily at 7am
+        timezone_id: UTC
+      <<: *permissions
+      # If you want to turn on notifications for this job, please uncomment the below code,
+      # and provide a list of emails to the on_failure argument.
+      #
+      #  email_notifications:
+      #    on_failure:
+      #      - first@company.com
+      #      - second@company.com
diff --git a/mlops_stacks_gcp_fs/assets/ml-artifacts-asset.yml b/mlops_stacks_gcp_fs/assets/ml-artifacts-asset.yml
new file mode 100644
index 0000000..3b3b305
--- /dev/null
+++ b/mlops_stacks_gcp_fs/assets/ml-artifacts-asset.yml
@@ -0,0 +1,54 @@
+# Deployment target specific values
+targets:
+  dev:
+    resources:
+      models:
+        model:
+          description: MLflow registered model for the "mlops_stacks_gcp_fs" ML Project for ${bundle.target} deployment target.
+
+  test:
+    resources:
+      models:
+        model:
+          description: MLflow registered model for the "mlops_stacks_gcp_fs" ML Project for ${bundle.target} deployment target.
+
+  staging:
+    resources:
+      models:
+        model:
+          description: MLflow registered model for the "mlops_stacks_gcp_fs" ML Project for ${bundle.target} deployment target.
+
+  prod:
+    resources:
+      models:
+        model:
+          description: |
+            MLflow registered model for the "mlops_stacks_gcp_fs" ML Project. See the corresponding [Git repo]($#{var.git_repo_url}) for details on the project.
+
+            Links:
+              * [Recurring model training job](https://416411475796958.8.gcp.databricks.com#job/${resources.jobs.model_training_job.id}): trains fresh model versions using the latest ML code.
+              * [Recurring batch inference job](https://416411475796958.8.gcp.databricks.com#job/${resources.jobs.batch_inference_job.id}): applies the latest ${bundle.target} model version for batch inference.
+
+# Allow users to read the experiment and the model
+common_permissions: &permissions
+  permissions:
+    - level: CAN_READ
+      group_name: users
+
+
+
+# Defines model and experiments
+resources:
+  models:
+    model:
+      name: ${var.model_name}
+      <<: *permissions
+      depends_on:
+        - resources.jobs.model_training_job.id
+        - resources.jobs.batch_inference_job.id
+
+  experiments:
+    experiment:
+      name: ${var.experiment_name}
+      <<: *permissions
+      description: MLflow Experiment used to track runs for mlops_stacks_gcp_fs project.
diff --git a/mlops_stacks_gcp_fs/assets/model-workflow-asset.yml b/mlops_stacks_gcp_fs/assets/model-workflow-asset.yml
new file mode 100644
index 0000000..f7fcc37
--- /dev/null
+++ b/mlops_stacks_gcp_fs/assets/model-workflow-asset.yml
@@ -0,0 +1,99 @@
+new_cluster: &new_cluster
+  new_cluster:
+    num_workers: 3
+    spark_version: 13.3.x-cpu-ml-scala2.12
+    node_type_id: n2-highmem-4
+    custom_tags:
+      clusterSource: mlops-stack
+
+common_permissions: &permissions
+  permissions:
+    - level: CAN_VIEW
+      group_name: users
+
+resources:
+  jobs:
+    model_training_job:
+      name: ${bundle.target}-mlops_stacks_gcp_fs-model-training-job
+      job_clusters:
+        - job_cluster_key: model_training_job_cluster
+          <<: *new_cluster
+      tasks:
+        - task_key: Train
+          job_cluster_key: model_training_job_cluster
+          notebook_task:
+            notebook_path: ../training/notebooks/TrainWithFeatureStore.py
+            base_parameters:
+              env: ${bundle.target}
+              # TODO: Update training_data_path
+              training_data_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled
+              experiment_name: ${var.experiment_name}
+              model_name: ${var.model_name}
+              pickup_features_table: feature_store_taxi_example.${bundle.target}_mlops_stacks_gcp_fs_trip_pickup_features
+              dropoff_features_table: feature_store_taxi_example.${bundle.target}_mlops_stacks_gcp_fs_trip_dropoff_features
+              # git source information of current ML asset deployment. It will be persisted as part of the workflow run
+              git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit}
+        - task_key: ModelValidation
+          job_cluster_key: model_training_job_cluster
+          depends_on:
+            - task_key: Train
+          notebook_task:
+            notebook_path: ../validation/notebooks/ModelValidation.py
+            base_parameters:
+              experiment_name: ${var.experiment_name}
+              # The `run_mode` defines whether model validation is enabled or not.
+              # It can be one of the three values:
+              # `disabled` : Do not run the model validation notebook.
+              # `dry_run`  : Run the model validation notebook. Ignore failed model validation rules and proceed to move
+              #               model to Production stage.
+              # `enabled`  : Run the model validation notebook. Move model to Production stage only if all model validation
+              #               rules are passing.
+              # TODO: update run_mode
+              run_mode: dry_run
+              # Whether to load the current registered "Production" stage model as baseline.
+              # Baseline model is a requirement for relative change and absolute change validation thresholds.
+              # TODO: update enable_baseline_comparison
+              enable_baseline_comparison: "false"
+              # Please refer to data parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+              # TODO: update validation_input
+              validation_input: SELECT * FROM delta.`dbfs:/databricks-datasets/nyctaxi-with-zipcodes/subsampled`
+              # A string describing the model type. The model type can be either "regressor" and "classifier".
+              # Please refer to model_type parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+              # TODO: update model_type
+              model_type: regressor
+              # The string name of a column from data that contains evaluation labels.
+              # Please refer to targets parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+              # TODO: targets
+              targets: fare_amount
+              # Specifies the name of the function in mlops_stacks_gcp_fs/training_validation_deployment/validation/validation.py that returns custom metrics.
+              # TODO(optional): custom_metrics_loader_function
+              custom_metrics_loader_function: custom_metrics
+              # Specifies the name of the function in mlops_stacks_gcp_fs/training_validation_deployment/validation/validation.py that returns model validation thresholds.
+              # TODO(optional): validation_thresholds_loader_function
+              validation_thresholds_loader_function: validation_thresholds
+              # Specifies the name of the function in mlops_stacks_gcp_fs/training_validation_deployment/validation/validation.py that returns evaluator_config.
+              # TODO(optional): evaluator_config_loader_function
+              evaluator_config_loader_function: evaluator_config
+              # git source information of current ML asset deployment. It will be persisted as part of the workflow run
+              git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit}
+        - task_key: ModelDeployment
+          job_cluster_key: model_training_job_cluster
+          depends_on:
+            - task_key: ModelValidation
+          notebook_task:
+            notebook_path: ../deployment/model_deployment/notebooks/ModelDeployment.py
+            base_parameters:
+              env: ${bundle.target}
+              # git source information of current ML asset deployment. It will be persisted as part of the workflow run
+              git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit}
+      schedule:
+        quartz_cron_expression: "0 0 9 * * ?" # daily at 9am
+        timezone_id: UTC
+      <<: *permissions
+      # If you want to turn on notifications for this job, please uncomment the below code,
+      # and provide a list of emails to the on_failure argument.
+      #
+      #  email_notifications:
+      #    on_failure:
+      #      - first@company.com
+      #      - second@company.com
diff --git a/mlops_stacks_gcp_fs/assets/monitoring-workflow-asset.yml b/mlops_stacks_gcp_fs/assets/monitoring-workflow-asset.yml
new file mode 100644
index 0000000..a4d505d
--- /dev/null
+++ b/mlops_stacks_gcp_fs/assets/monitoring-workflow-asset.yml
@@ -0,0 +1 @@
+# TODO: Add data monitoring support for mlops
diff --git a/mlops_stacks_gcp_fs/databricks.yml b/mlops_stacks_gcp_fs/databricks.yml
new file mode 100644
index 0000000..fee3f77
--- /dev/null
+++ b/mlops_stacks_gcp_fs/databricks.yml
@@ -0,0 +1,38 @@
+# The name of the bundle. run `databricks bundle schema` to see the full bundle settings schema.
+bundle:
+  name: mlops_stacks_gcp_fs
+
+variables:
+  experiment_name:
+    description: Experiment name for the model training.
+    default: /Users/${workspace.current_user.userName}/${bundle.target}-mlops_stacks_gcp_fs-experiment
+  model_name:
+    description: Model name for the model training.
+    default: ${bundle.target}-mlops_stacks_gcp_fs-model
+
+include:
+  # Assets folder contains ML artifact assets for the ml project that defines model and experiment
+  # And workflows assets for the ml project including model training -> validation -> deployment,
+  # feature engineering,  batch inference, data monitoring, metric refresh, alerts and triggering retraining
+  - ./assets/*.yml
+
+# Deployment Target specific values for workspace
+targets:
+  dev:
+    default: true
+    workspace:
+      # TODO: add dev workspace URL
+      host:
+
+  staging:
+    workspace:
+      host: https://416411475796958.8.gcp.databricks.com
+
+  prod:
+    workspace:
+      host: https://416411475796958.8.gcp.databricks.com
+
+  test:
+    workspace:
+      host: https://416411475796958.8.gcp.databricks.com
+
diff --git a/mlops_stacks_gcp_fs/deployment/batch_inference/README.md b/mlops_stacks_gcp_fs/deployment/batch_inference/README.md
new file mode 100644
index 0000000..b801e10
--- /dev/null
+++ b/mlops_stacks_gcp_fs/deployment/batch_inference/README.md
@@ -0,0 +1,42 @@
+# Batch Inference
+To set up batch inference job via scheduled Databricks workflow, please refer to [mlops_stacks_gcp_fs/assets/README.md](../../assets/README.md)
+
+## Prepare the batch inference input table for the example Project
+Please run the following code in a notebook to generate the example batch inference input table.
+
+```
+from pyspark.sql.functions import to_timestamp, lit
+from pyspark.sql.types import IntegerType
+import math
+from datetime import timedelta, timezone
+
+def rounded_unix_timestamp(dt, num_minutes=15):
+    """
+    Ceilings datetime dt to interval num_minutes, then returns the unix timestamp.
+    """
+    nsecs = dt.minute * 60 + dt.second + dt.microsecond * 1e-6
+    delta = math.ceil(nsecs / (60 * num_minutes)) * (60 * num_minutes) - nsecs
+    return int((dt + timedelta(seconds=delta)).replace(tzinfo=timezone.utc).timestamp())
+
+
+rounded_unix_timestamp_udf = udf(rounded_unix_timestamp, IntegerType())
+
+df = spark.table("delta.`dbfs:/databricks-datasets/nyctaxi-with-zipcodes/subsampled`")
+df.withColumn(
+    "rounded_pickup_datetime",
+    to_timestamp(rounded_unix_timestamp_udf(df["tpep_pickup_datetime"], lit(15))),
+).withColumn(
+    "rounded_dropoff_datetime",
+    to_timestamp(rounded_unix_timestamp_udf(df["tpep_dropoff_datetime"], lit(30))),
+).drop(
+    "tpep_pickup_datetime"
+).drop(
+    "tpep_dropoff_datetime"
+).drop(
+    "fare_amount"
+).write.mode(
+    "overwrite"
+).saveAsTable(
+    name="hive_metastore.default.taxi_scoring_sample_feature_store_inference_input"
+)
+```
diff --git a/mlops_stacks_gcp_fs/deployment/batch_inference/notebooks/BatchInference.py b/mlops_stacks_gcp_fs/deployment/batch_inference/notebooks/BatchInference.py
new file mode 100644
index 0000000..73d26bd
--- /dev/null
+++ b/mlops_stacks_gcp_fs/deployment/batch_inference/notebooks/BatchInference.py
@@ -0,0 +1,99 @@
+# Databricks notebook source
+##################################################################################
+# Batch Inference Notebook
+#
+# This notebook is an example of applying a model for batch inference against an input delta table,
+# It is configured and can be executed as the batch_inference_job in the batch_inference_job workflow defined under
+# ``mlops_stacks_gcp_fs/assets/batch-inference-workflow-asset.yml``
+#
+# Parameters:
+#
+#  * env (optional)  - String name of the current environment (dev, staging, or prod). Defaults to "dev"
+#  * input_table_name (required)  - Delta table name containing your input data.
+#  * output_table_name (required) - Delta table name where the predictions will be written to.
+#                                   Note that this will create a new version of the Delta table if
+#                                   the table already exists
+#  * model_name (required) - The name of the model to be used in batch inference.
+##################################################################################
+
+
+# List of input args needed to run the notebook as a job.
+# Provide them via DB widgets or notebook arguments.
+#
+# Name of the current environment
+dbutils.widgets.dropdown("env", "dev", ["dev", "staging", "prod"], "Environment Name")
+# A Hive-registered Delta table containing the input features.
+dbutils.widgets.text("input_table_name", "", label="Input Table Name")
+# Delta table to store the output predictions.
+dbutils.widgets.text("output_table_name", "", label="Output Table Name")
+# Batch inference model name
+dbutils.widgets.text(
+    "model_name", "dev-mlops_stacks_gcp_fs-model", label="Model Name"
+)
+
+
+# COMMAND ----------
+
+import os
+
+notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
+%cd $notebook_path
+
+# COMMAND ----------
+
+# MAGIC %pip install -r ../../../requirements.txt
+
+# COMMAND ----------
+
+dbutils.library.restartPython()
+
+# COMMAND ----------
+
+import sys
+import os
+notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
+%cd $notebook_path
+%cd ..
+sys.path.append("../..")
+
+# COMMAND ----------
+
+# DBTITLE 1,Define input and output variables
+from utils import get_deployed_model_stage_for_env
+
+env = dbutils.widgets.get("env")
+input_table_name = dbutils.widgets.get("input_table_name")
+output_table_name = dbutils.widgets.get("output_table_name")
+model_name = dbutils.widgets.get("model_name")
+assert input_table_name != "", "input_table_name notebook parameter must be specified"
+assert output_table_name != "", "output_table_name notebook parameter must be specified"
+assert model_name != "", "model_name notebook parameter must be specified"
+stage = get_deployed_model_stage_for_env(env)
+model_uri = f"models:/{model_name}/{stage}"
+
+# COMMAND ----------
+
+from mlflow import MlflowClient
+
+# Get model version from stage
+model_version_infos = MlflowClient().search_model_versions("name = '%s'" % model_name)
+model_version = max(
+    int(version.version)
+    for version in model_version_infos
+    if version.current_stage == stage
+)
+
+# COMMAND ----------
+
+# Get datetime
+from datetime import datetime
+
+ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+# COMMAND ----------
+# DBTITLE 1,Load model and run inference
+
+from predict import predict_batch
+
+predict_batch(spark, model_uri, input_table_name, output_table_name, model_version, ts)
+dbutils.notebook.exit(output_table_name)
diff --git a/mlops_stacks_gcp_fs/deployment/batch_inference/predict.py b/mlops_stacks_gcp_fs/deployment/batch_inference/predict.py
new file mode 100644
index 0000000..d9d2975
--- /dev/null
+++ b/mlops_stacks_gcp_fs/deployment/batch_inference/predict.py
@@ -0,0 +1,32 @@
+import mlflow
+from pyspark.sql.functions import struct, lit, to_timestamp
+
+
+def predict_batch(
+    spark_session, model_uri, input_table_name, output_table_name, model_version, ts
+):
+    """
+    Apply the model at the specified URI for batch inference on the table with name input_table_name,
+    writing results to the table with name output_table_name
+    """
+    
+    table = spark_session.table(input_table_name)
+    
+    from databricks.feature_store import FeatureStoreClient
+    
+    fs_client = FeatureStoreClient()
+
+    prediction_df = fs_client.score_batch(
+        model_uri,
+        table
+    )
+    output_df = (
+        prediction_df.withColumn("prediction", prediction_df["prediction"])
+        .withColumn("model_version", lit(model_version))
+        .withColumn("inference_timestamp", to_timestamp(lit(ts)))
+    )
+    
+    output_df.display()
+    # Model predictions are written to the Delta table provided as input.
+    # Delta is the default format in Databricks Runtime 8.0 and above.
+    output_df.write.format("delta").mode("overwrite").saveAsTable(output_table_name)
\ No newline at end of file
diff --git a/mlops_stacks_gcp_fs/deployment/model_deployment/deploy.py b/mlops_stacks_gcp_fs/deployment/model_deployment/deploy.py
new file mode 100644
index 0000000..a5f3616
--- /dev/null
+++ b/mlops_stacks_gcp_fs/deployment/model_deployment/deploy.py
@@ -0,0 +1,34 @@
+import sys
+import pathlib
+
+sys.path.append(str(pathlib.Path(__file__).parent.parent.parent.resolve()))
+from utils import get_deployed_model_stage_for_env
+from mlflow.tracking import MlflowClient
+
+
+def deploy(model_uri, env):
+    """
+    Deploys an already-registered model produced by moving it into the appropriate stage for model deployment.
+
+    :param model_uri: URI of the model to deploy. Must be in the format "models:/<name>/<version-id>", as described in
+                      https://www.mlflow.org/docs/latest/model-registry.html#fetching-an-mlflow-model-from-the-model-registry
+    :param env: name of the environment in which we're performing deployment, i.e one of "dev", "staging", "prod".
+                Defaults to "dev"
+    :return:
+    """
+    _, model_name, version = model_uri.split("/")
+    client = MlflowClient()
+    mv = client.get_model_version(model_name, version)
+    target_stage = get_deployed_model_stage_for_env(env)
+    if mv.current_stage != target_stage:
+        client.transition_model_version_stage(
+            name=model_name,
+            version=version,
+            stage=target_stage,
+            archive_existing_versions=True,
+        )
+    print(f"Successfully deployed model with URI {model_uri} to {env}")
+
+
+if __name__ == "__main__":
+    deploy(model_uri=sys.argv[1], env=sys.argv[2])
diff --git a/mlops_stacks_gcp_fs/deployment/model_deployment/notebooks/ModelDeployment.py b/mlops_stacks_gcp_fs/deployment/model_deployment/notebooks/ModelDeployment.py
new file mode 100644
index 0000000..cc769ad
--- /dev/null
+++ b/mlops_stacks_gcp_fs/deployment/model_deployment/notebooks/ModelDeployment.py
@@ -0,0 +1,52 @@
+# Databricks notebook source
+##################################################################################
+# Helper notebook to transition the model stage. This notebook is run
+# after the Train.py notebook as part of a multi-task job, in order to transition model
+# to target stage after training completes.
+#
+# Note that we deploy the model to the stage in MLflow Model Registry equivalent to the
+# environment in which the multi-task job is executed (e.g deploy the trained model to
+# stage=Production if triggered in the prod environment). In a practical setting, we would
+# recommend enabling the model validation step between  model training and automatically
+# registering the model to the Production stage in prod.
+#
+# This notebook has the following parameters:
+#
+#  * env (required)  - String name of the current environment for model deployment, which decides the target stage.
+#  * model_uri (required)  - URI of the model to deploy. Must be in the format "models:/<name>/<version-id>", as described in
+#                            https://www.mlflow.org/docs/latest/model-registry.html#fetching-an-mlflow-model-from-the-model-registry
+#                            This parameter is read as a task value
+#                            (https://docs.databricks.com/dev-tools/databricks-utils.html#get-command-dbutilsjobstaskvaluesget),
+#                            rather than as a notebook widget. That is, we assume a preceding task (the Train.py
+#                            notebook) has set a task value with key "model_uri".
+##################################################################################
+
+# List of input args needed to run the notebook as a job.
+# Provide them via DB widgets or notebook arguments.
+#
+# Name of the current environment
+dbutils.widgets.dropdown("env", "None", ["None", "staging", "prod"], "Environment Name")
+
+# COMMAND ----------
+
+import os
+import sys
+notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
+%cd $notebook_path
+%cd ..
+sys.path.append("../..")
+
+# COMMAND ----------
+
+from deploy import deploy
+
+model_uri = dbutils.jobs.taskValues.get("Train", "model_uri", debugValue="")
+env = dbutils.widgets.get("env")
+assert env != "None", "env notebook parameter must be specified"
+assert model_uri != "", "model_uri notebook parameter must be specified"
+deploy(model_uri, env)
+
+# COMMAND ----------
+print(
+    f"Successfully completed model deployment for {model_uri}"
+)
diff --git a/mlops_stacks_gcp_fs/feature_engineering/README.md b/mlops_stacks_gcp_fs/feature_engineering/README.md
new file mode 100644
index 0000000..b44d39c
--- /dev/null
+++ b/mlops_stacks_gcp_fs/feature_engineering/README.md
@@ -0,0 +1,4 @@
+# Feature Engineering
+To set up the feature engineering job via scheduled Databricks workflow, please refer to [mlops_stacks_gcp_fs/assets/README.md](../assets/README.md)
+
+For additional details on using the feature store, please refer to [mlops_stacks_gcp_fs/docs/ml-developer-guide-fs.md](../../docs/ml-developer-guide-fs.md).
\ No newline at end of file
diff --git a/mlops_stacks_gcp_fs/feature_engineering/__init__.py b/mlops_stacks_gcp_fs/feature_engineering/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/feature_engineering/features/__init__.py b/mlops_stacks_gcp_fs/feature_engineering/features/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/feature_engineering/features/dropoff_features.py b/mlops_stacks_gcp_fs/feature_engineering/features/dropoff_features.py
new file mode 100644
index 0000000..c1d93c6
--- /dev/null
+++ b/mlops_stacks_gcp_fs/feature_engineering/features/dropoff_features.py
@@ -0,0 +1,64 @@
+"""
+This sample module contains features logic that can be used to generate and populate tables in Feature Store. 
+You should plug in your own features computation logic in the compute_features_fn method below.
+"""
+import pyspark.sql.functions as F
+from pyspark.sql.types import IntegerType, StringType, TimestampType
+from pytz import timezone
+
+
+@F.udf(returnType=IntegerType())
+def _is_weekend(dt):
+    tz = "America/New_York"
+    return int(dt.astimezone(timezone(tz)).weekday() >= 5)  # 5 = Saturday, 6 = Sunday
+
+
+@F.udf(returnType=StringType())
+def _partition_id(dt):
+    # datetime -> "YYYY-MM"
+    return f"{dt.year:04d}-{dt.month:02d}"
+
+
+def _filter_df_by_ts(df, ts_column, start_date, end_date):
+    if ts_column and start_date:
+        df = df.filter(F.col(ts_column) >= start_date)
+    if ts_column and end_date:
+        df = df.filter(F.col(ts_column) < end_date)
+    return df
+
+
+def compute_features_fn(input_df, timestamp_column, start_date, end_date):
+    """Contains logic to compute features.
+
+    Given an input dataframe and time ranges, this function should compute features, populate an output dataframe and
+    return it. This method will be called from a  Feature Store pipeline job and the output dataframe will be written
+    to a Feature Store table. You should update this method with your own feature computation logic.
+
+    The timestamp_column, start_date, end_date args are optional but strongly recommended for time-series based
+    features.
+
+    TODO: Update and adapt the sample code for your use case
+
+    :param input_df: Input dataframe.
+    :param timestamp_column: Column containing the timestamp. This column is used to limit the range of feature
+    computation. It is also used as the timestamp key column when populating the feature table, so it needs to be
+    returned in the output.
+    :param start_date: Start date of the feature computation interval.
+    :param end_date:  End date of the feature computation interval.
+    :return: Output dataframe containing computed features given the input arguments.
+    """
+    df = _filter_df_by_ts(input_df, timestamp_column, start_date, end_date)
+    dropoffzip_features = (
+        df.groupBy("dropoff_zip", F.window(timestamp_column, "30 minute"))
+        .agg(F.count("*").alias("count_trips_window_30m_dropoff_zip"))
+        .select(
+            F.col("dropoff_zip").alias("zip"),
+            F.unix_timestamp(F.col("window.end"))
+            .alias(timestamp_column)
+            .cast(TimestampType()),
+            _partition_id(F.to_timestamp(F.col("window.end"))).alias("yyyy_mm"),
+            F.col("count_trips_window_30m_dropoff_zip").cast(IntegerType()),
+            _is_weekend(F.col("window.end")).alias("dropoff_is_weekend"),
+        )
+    )
+    return dropoffzip_features
diff --git a/mlops_stacks_gcp_fs/feature_engineering/features/pickup_features.py b/mlops_stacks_gcp_fs/feature_engineering/features/pickup_features.py
new file mode 100644
index 0000000..895a9b8
--- /dev/null
+++ b/mlops_stacks_gcp_fs/feature_engineering/features/pickup_features.py
@@ -0,0 +1,63 @@
+"""
+This sample module contains  features logic that can be used to generate and populate tables in Feature Store.
+You should plug in your own features computation logic in the compute_features_fn method below.
+"""
+import pyspark.sql.functions as F
+from pyspark.sql.types import FloatType, IntegerType, StringType, TimestampType
+from pytz import timezone
+
+
+@F.udf(returnType=StringType())
+def _partition_id(dt):
+    # datetime -> "YYYY-MM"
+    return f"{dt.year:04d}-{dt.month:02d}"
+
+
+def _filter_df_by_ts(df, ts_column, start_date, end_date):
+    if ts_column and start_date:
+        df = df.filter(F.col(ts_column) >= start_date)
+    if ts_column and end_date:
+        df = df.filter(F.col(ts_column) < end_date)
+    return df
+
+
+def compute_features_fn(input_df, timestamp_column, start_date, end_date):
+    """Contains logic to compute features.
+
+    Given an input dataframe and time ranges, this function should compute features, populate an output dataframe and
+    return it. This method will be called from a  Feature Store pipeline job and the output dataframe will be written
+    to a Feature Store table. You should update this method with your own feature computation logic.
+
+    The timestamp_column, start_date, end_date args are optional but strongly recommended for time-series based
+    features.
+
+    TODO: Update and adapt the sample code for your use case
+
+    :param input_df: Input dataframe.
+    :param timestamp_column: Column containing a timestamp. This column is used to limit the range of feature
+    computation. It is also used as the timestamp key column when populating the feature table, so it needs to be
+    returned in the output.
+    :param start_date: Start date of the feature computation interval.
+    :param end_date:  End date of the feature computation interval.
+    :return: Output dataframe containing computed features given the input arguments.
+    """
+    df = _filter_df_by_ts(input_df, timestamp_column, start_date, end_date)
+    pickupzip_features = (
+        df.groupBy(
+            "pickup_zip", F.window(timestamp_column, "1 hour", "15 minutes")
+        )  # 1 hour window, sliding every 15 minutes
+        .agg(
+            F.mean("fare_amount").alias("mean_fare_window_1h_pickup_zip"),
+            F.count("*").alias("count_trips_window_1h_pickup_zip"),
+        )
+        .select(
+            F.col("pickup_zip").alias("zip"),
+            F.unix_timestamp(F.col("window.end"))
+            .alias(timestamp_column)
+            .cast(TimestampType()),
+            _partition_id(F.to_timestamp(F.col("window.end"))).alias("yyyy_mm"),
+            F.col("mean_fare_window_1h_pickup_zip").cast(FloatType()),
+            F.col("count_trips_window_1h_pickup_zip").cast(IntegerType()),
+        )
+    )
+    return pickupzip_features
diff --git a/mlops_stacks_gcp_fs/feature_engineering/notebooks/GenerateAndWriteFeatures.py b/mlops_stacks_gcp_fs/feature_engineering/notebooks/GenerateAndWriteFeatures.py
new file mode 100644
index 0000000..1c6fce7
--- /dev/null
+++ b/mlops_stacks_gcp_fs/feature_engineering/notebooks/GenerateAndWriteFeatures.py
@@ -0,0 +1,139 @@
+# Databricks notebook source
+##################################################################################
+# Generate and Write Features Notebook
+#
+# This notebook can be used to generate and write features to a Databricks Feature Store table.
+# It is configured and can be executed as the tasks in the write_feature_table_job workflow defined under
+# ``mlops_stacks_gcp_fs/assets/feature-engineering-workflow-asset.yml``
+#
+# Parameters:
+#
+# * input_table_path (required)   - Path to input data.
+# * output_table_name (required)  - Fully qualified schema + Delta table name for the feature table where the features
+# *                                 will be written to. Note that this will create the Feature table if it does not
+# *                                 exist.
+# * primary_keys (required)       - A comma separated string of primary key columns of the output feature table.
+# *
+# * timestamp_column (optional)   - Timestamp column of the input data. Used to limit processing based on
+# *                                 date ranges. This column is used as the timestamp_key column in the feature table.
+# * input_start_date (optional)   - Used to limit feature computations based on timestamp_column values.
+# * input_end_date (optional)     - Used to limit feature computations based on timestamp_column values.
+# *
+# * features_transform_module (required) - Python module containing the feature transform logic.
+##################################################################################
+
+
+# List of input args needed to run this notebook as a job.
+# Provide them via DB widgets or notebook arguments.
+#
+# A Hive-registered Delta table containing the input data.
+dbutils.widgets.text(
+    "input_table_path",
+    "/databricks-datasets/nyctaxi-with-zipcodes/subsampled",
+    label="Input Table Name",
+)
+# Input start date.
+dbutils.widgets.text("input_start_date", "", label="Input Start Date")
+# Input end date.
+dbutils.widgets.text("input_end_date", "", label="Input End Date")
+# Timestamp column. Will be used to filter input start/end dates.
+# This column is also used as a timestamp key of the feature table.
+dbutils.widgets.text(
+    "timestamp_column", "tpep_pickup_datetime", label="Timestamp column"
+)
+
+# Feature table to store the computed features.
+dbutils.widgets.text(
+    "output_table_name",
+    "feature_store_taxi_example.trip_pickup_features",
+    label="Output Feature Table Name",
+)
+
+# Feature transform module name.
+dbutils.widgets.text(
+    "features_transform_module", "pickup_features", label="Features transform file."
+)
+# Primary Keys columns for the feature table;
+dbutils.widgets.text(
+    "primary_keys",
+    "zip",
+    label="Primary keys columns for the feature table, comma separated.",
+)
+
+# COMMAND ----------
+
+import os
+notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
+%cd $notebook_path
+%cd ../features
+
+
+# COMMAND ----------
+# DBTITLE 1,Define input and output variables
+
+input_table_path = dbutils.widgets.get("input_table_path")
+output_table_name = dbutils.widgets.get("output_table_name")
+input_start_date = dbutils.widgets.get("input_start_date")
+input_end_date = dbutils.widgets.get("input_end_date")
+ts_column = dbutils.widgets.get("timestamp_column")
+features_module = dbutils.widgets.get("features_transform_module")
+pk_columns = dbutils.widgets.get("primary_keys")
+
+assert input_table_path != "", "input_table_path notebook parameter must be specified"
+assert output_table_name != "", "output_table_name notebook parameter must be specified"
+
+# Extract database name. Needs to be updated for Unity Catalog.
+output_database = output_table_name.split(".")[0]
+
+# COMMAND ----------
+# DBTITLE 1,Create database.
+
+spark.sql("CREATE DATABASE IF NOT EXISTS " + output_database)
+
+# COMMAND ----------
+# DBTITLE 1, Read input data.
+
+raw_data = spark.read.format("delta").load(input_table_path)
+
+
+# COMMAND ----------
+# DBTITLE 1,Compute features.
+
+# Compute the features. This is done by dynamically loading the features module.
+from importlib import import_module
+
+mod = import_module(features_module)
+compute_features_fn = getattr(mod, "compute_features_fn")
+
+features_df = compute_features_fn(
+    input_df=raw_data,
+    timestamp_column=ts_column,
+    start_date=input_start_date,
+    end_date=input_end_date,
+)
+
+# COMMAND ----------
+# DBTITLE 1, Write computed features.
+
+from databricks import feature_store
+
+fs = feature_store.FeatureStoreClient()
+
+
+# Create the feature table if it does not exist first.
+# Note that this is a no-op if a table with the same name and schema already exists.
+fs.create_table(
+    name=output_table_name,
+    primary_keys=[x.strip() for x in pk_columns.split(",")],
+    timestamp_keys=[ts_column],
+    df=features_df,
+)
+
+# Write the computed features dataframe.
+fs.write_table(
+    name=output_table_name,
+    df=features_df,
+    mode="merge",
+)
+
+dbutils.notebook.exit(0)
diff --git a/mlops_stacks_gcp_fs/monitoring/README.md b/mlops_stacks_gcp_fs/monitoring/README.md
new file mode 100644
index 0000000..909eb5e
--- /dev/null
+++ b/mlops_stacks_gcp_fs/monitoring/README.md
@@ -0,0 +1,5 @@
+# Monitoring
+
+Databricks Data Monitoring is currently in Private Preview. 
+
+Please contact a Databricks representative for more information.
diff --git a/mlops_stacks_gcp_fs/pytest.ini b/mlops_stacks_gcp_fs/pytest.ini
new file mode 100644
index 0000000..04680e7
--- /dev/null
+++ b/mlops_stacks_gcp_fs/pytest.ini
@@ -0,0 +1,4 @@
+# Configure pytest to detect local modules in the current directory
+# See https://docs.pytest.org/en/7.1.x/reference/reference.html#confval-pythonpath for details
+[pytest]
+pythonpath = .
diff --git a/mlops_stacks_gcp_fs/requirements.txt b/mlops_stacks_gcp_fs/requirements.txt
new file mode 100644
index 0000000..286a0f6
--- /dev/null
+++ b/mlops_stacks_gcp_fs/requirements.txt
@@ -0,0 +1,8 @@
+mlflow==2.7.1
+numpy>=1.23.0
+pandas>=1.4.3
+scikit-learn>=1.1.1
+matplotlib>=3.5.2
+Jinja2==3.0.3
+pyspark~=3.3.0
+pytz~=2022.2.1
diff --git a/mlops_stacks_gcp_fs/tests/__init__.py b/mlops_stacks_gcp_fs/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/tests/feature_engineering/__init__.py b/mlops_stacks_gcp_fs/tests/feature_engineering/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/tests/feature_engineering/dropoff_features_test.py b/mlops_stacks_gcp_fs/tests/feature_engineering/dropoff_features_test.py
new file mode 100644
index 0000000..832748c
--- /dev/null
+++ b/mlops_stacks_gcp_fs/tests/feature_engineering/dropoff_features_test.py
@@ -0,0 +1,44 @@
+import pyspark.sql
+import pytest
+import pandas as pd
+from datetime import datetime
+from pyspark.sql import SparkSession
+
+from mlops_stacks_gcp_fs.feature_engineering.features.dropoff_features import (
+    compute_features_fn,
+)
+
+
+@pytest.fixture(scope="session")
+def spark(request):
+    """fixture for creating a spark session
+    Args:
+        request: pytest.FixtureRequest object
+    """
+    spark = (
+        SparkSession.builder.master("local[1]")
+        .appName("pytest-pyspark-local-testing")
+        .getOrCreate()
+    )
+    request.addfinalizer(lambda: spark.stop())
+
+    return spark
+
+
+@pytest.mark.usefixtures("spark")
+def test_dropoff_features_fn(spark):
+    input_df = pd.DataFrame(
+        {
+            "tpep_pickup_datetime": [datetime(2022, 1, 10)],
+            "tpep_dropoff_datetime": [datetime(2022, 1, 10)],
+            "dropoff_zip": [94400],
+            "trip_distance": [2],
+            "fare_amount": [100],
+        }
+    )
+    spark_df = spark.createDataFrame(input_df)
+    output_df = compute_features_fn(
+        spark_df, "tpep_pickup_datetime", datetime(2022, 1, 1), datetime(2022, 1, 15)
+    )
+    assert isinstance(output_df, pyspark.sql.DataFrame)
+    assert output_df.count() == 1
diff --git a/mlops_stacks_gcp_fs/tests/feature_engineering/pickup_features_test.py b/mlops_stacks_gcp_fs/tests/feature_engineering/pickup_features_test.py
new file mode 100644
index 0000000..01e4d16
--- /dev/null
+++ b/mlops_stacks_gcp_fs/tests/feature_engineering/pickup_features_test.py
@@ -0,0 +1,42 @@
+import pyspark.sql
+import pytest
+import pandas as pd
+from datetime import datetime
+from pyspark.sql import SparkSession
+
+from mlops_stacks_gcp_fs.feature_engineering.features.pickup_features import compute_features_fn
+
+
+@pytest.fixture(scope="session")
+def spark(request):
+    """fixture for creating a spark session
+    Args:
+        request: pytest.FixtureRequest object
+    """
+    spark = (
+        SparkSession.builder.master("local[1]")
+        .appName("pytest-pyspark-local-testing")
+        .getOrCreate()
+    )
+    request.addfinalizer(lambda: spark.stop())
+
+    return spark
+
+
+@pytest.mark.usefixtures("spark")
+def test_pickup_features_fn(spark):
+    input_df = pd.DataFrame(
+        {
+            "tpep_pickup_datetime": [datetime(2022, 1, 12)],
+            "tpep_dropoff_datetime": [datetime(2022, 1, 12)],
+            "pickup_zip": [94400],
+            "trip_distance": [2],
+            "fare_amount": [100],
+        }
+    )
+    spark_df = spark.createDataFrame(input_df)
+    output_df = compute_features_fn(
+        spark_df, "tpep_pickup_datetime", datetime(2022, 1, 1), datetime(2022, 1, 15)
+    )
+    assert isinstance(output_df, pyspark.sql.DataFrame)
+    assert output_df.count() == 4  # 4 15-min intervals over 1 hr window.
diff --git a/mlops_stacks_gcp_fs/tests/training/__init__.py b/mlops_stacks_gcp_fs/tests/training/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/tests/training/test_notebooks.py b/mlops_stacks_gcp_fs/tests/training/test_notebooks.py
new file mode 100644
index 0000000..7293b1b
--- /dev/null
+++ b/mlops_stacks_gcp_fs/tests/training/test_notebooks.py
@@ -0,0 +1,9 @@
+import pathlib
+
+
+def test_notebook_format():
+    # Verify that all Databricks notebooks have the required header
+    paths = list(pathlib.Path("./notebooks").glob("**/*.py"))
+    for f in paths:
+        notebook_str = open(str(f)).read()
+        assert notebook_str.startswith("# Databricks notebook source")
diff --git a/mlops_stacks_gcp_fs/training/__init__.py b/mlops_stacks_gcp_fs/training/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/training/data/sample.parquet b/mlops_stacks_gcp_fs/training/data/sample.parquet
new file mode 100644
index 0000000..4efd171
Binary files /dev/null and b/mlops_stacks_gcp_fs/training/data/sample.parquet differ
diff --git a/mlops_stacks_gcp_fs/training/notebooks/TrainWithFeatureStore.py b/mlops_stacks_gcp_fs/training/notebooks/TrainWithFeatureStore.py
new file mode 100644
index 0000000..633143f
--- /dev/null
+++ b/mlops_stacks_gcp_fs/training/notebooks/TrainWithFeatureStore.py
@@ -0,0 +1,280 @@
+# Databricks notebook source
+##################################################################################
+# Model Training Notebook using Databricks Feature Store
+#
+# This notebook shows an example of a Model Training pipeline using Databricks Feature Store tables.
+# It is configured and can be executed as the "Train" task in the model_training_job workflow defined under
+# ``mlops_stacks_gcp_fs/assets/model-workflow-asset.yml``
+#
+# Parameters:
+# * env (required):                 - Environment the notebook is run in (staging, or prod). Defaults to "staging".
+# * training_data_path (required)   - Path to the training data.
+# * experiment_name (required)      - MLflow experiment name for the training runs. Will be created if it doesn't exist.
+# * model_name (required)           - MLflow registered model name to use for the trained model. Will be created if it
+# *                                   doesn't exist.
+##################################################################################
+
+# COMMAND ----------
+
+# MAGIC %load_ext autoreload
+# MAGIC %autoreload 2
+
+# COMMAND ----------
+
+import os
+notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
+%cd $notebook_path
+
+# COMMAND ----------
+
+# MAGIC %pip install -r ../../requirements.txt
+
+# COMMAND ----------
+
+dbutils.library.restartPython()
+
+# COMMAND ----------
+# DBTITLE 1, Notebook arguments
+
+# List of input args needed to run this notebook as a job.
+# Provide them via DB widgets or notebook arguments.
+
+# Notebook Environment
+dbutils.widgets.dropdown("env", "staging", ["staging", "prod"], "Environment Name")
+env = dbutils.widgets.get("env")
+
+# Path to the Hive-registered Delta table containing the training data.
+dbutils.widgets.text(
+    "training_data_path",
+    "/databricks-datasets/nyctaxi-with-zipcodes/subsampled",
+    label="Path to the training data",
+)
+
+# MLflow experiment name.
+dbutils.widgets.text(
+    "experiment_name",
+    f"/dev-mlops_stacks_gcp_fs-experiment",
+    label="MLflow experiment name",
+)
+# MLflow registered model name to use for the trained mode.
+dbutils.widgets.text(
+    "model_name", "dev-mlops_stacks_gcp_fs-model", label="Model Name"
+)
+
+# Pickup features table name
+dbutils.widgets.text(
+    "pickup_features_table",
+    "feature_store_taxi_example.trip_pickup_features",
+    label="Pickup Features Table",
+)
+
+# Dropoff features table name
+dbutils.widgets.text(
+    "dropoff_features_table",
+    "feature_store_taxi_example.trip_dropoff_features",
+    label="Dropoff Features Table",
+)
+
+# COMMAND ----------
+# DBTITLE 1,Define input and output variables
+
+input_table_path = dbutils.widgets.get("training_data_path")
+experiment_name = dbutils.widgets.get("experiment_name")
+model_name = dbutils.widgets.get("model_name")
+
+# COMMAND ----------
+# DBTITLE 1, Set experiment
+
+import mlflow
+
+mlflow.set_experiment(experiment_name)
+
+
+# COMMAND ----------
+# DBTITLE 1, Load raw data
+
+raw_data = spark.read.format("delta").load(input_table_path)
+raw_data.display()
+
+# COMMAND ----------
+# DBTITLE 1, Helper functions
+
+from datetime import timedelta, timezone
+import math
+import mlflow.pyfunc
+import pyspark.sql.functions as F
+from pyspark.sql.types import IntegerType
+
+
+def rounded_unix_timestamp(dt, num_minutes=15):
+    """
+    Ceilings datetime dt to interval num_minutes, then returns the unix timestamp.
+    """
+    nsecs = dt.minute * 60 + dt.second + dt.microsecond * 1e-6
+    delta = math.ceil(nsecs / (60 * num_minutes)) * (60 * num_minutes) - nsecs
+    return int((dt + timedelta(seconds=delta)).replace(tzinfo=timezone.utc).timestamp())
+
+
+rounded_unix_timestamp_udf = F.udf(rounded_unix_timestamp, IntegerType())
+
+
+def rounded_taxi_data(taxi_data_df):
+    # Round the taxi data timestamp to 15 and 30 minute intervals so we can join with the pickup and dropoff features
+    # respectively.
+    taxi_data_df = (
+        taxi_data_df.withColumn(
+            "rounded_pickup_datetime",
+            F.to_timestamp(
+                rounded_unix_timestamp_udf(
+                    taxi_data_df["tpep_pickup_datetime"], F.lit(15)
+                )
+            ),
+        )
+        .withColumn(
+            "rounded_dropoff_datetime",
+            F.to_timestamp(
+                rounded_unix_timestamp_udf(
+                    taxi_data_df["tpep_dropoff_datetime"], F.lit(30)
+                )
+            ),
+        )
+        .drop("tpep_pickup_datetime")
+        .drop("tpep_dropoff_datetime")
+    )
+    taxi_data_df.createOrReplaceTempView("taxi_data")
+    return taxi_data_df
+
+
+def get_latest_model_version(model_name):
+    latest_version = 1
+    mlflow_client = MlflowClient()
+    for mv in mlflow_client.search_model_versions(f"name='{model_name}'"):
+        version_int = int(mv.version)
+        if version_int > latest_version:
+            latest_version = version_int
+    return latest_version
+
+
+# COMMAND ----------
+# DBTITLE 1, Read taxi data for training
+
+taxi_data = rounded_taxi_data(raw_data)
+taxi_data.display()
+
+# COMMAND ----------
+# DBTITLE 1, Create FeatureLookups
+
+from databricks.feature_store import FeatureLookup
+import mlflow
+
+pickup_features_table = dbutils.widgets.get("pickup_features_table")
+dropoff_features_table = dbutils.widgets.get("dropoff_features_table")
+
+pickup_feature_lookups = [
+    FeatureLookup(
+        table_name=pickup_features_table,
+        feature_names=[
+            "mean_fare_window_1h_pickup_zip",
+            "count_trips_window_1h_pickup_zip",
+        ],
+        lookup_key=["pickup_zip"],
+        timestamp_lookup_key=["rounded_pickup_datetime"],
+    ),
+]
+
+dropoff_feature_lookups = [
+    FeatureLookup(
+        table_name=dropoff_features_table,
+        feature_names=["count_trips_window_30m_dropoff_zip", "dropoff_is_weekend"],
+        lookup_key=["dropoff_zip"],
+        timestamp_lookup_key=["rounded_dropoff_datetime"],
+    ),
+]
+
+# COMMAND ----------
+# DBTITLE 1, Create Training Dataset
+
+from databricks import feature_store
+
+# End any existing runs (in the case this notebook is being run for a second time)
+mlflow.end_run()
+
+# Start an mlflow run, which is needed for the feature store to log the model
+mlflow.start_run()
+
+# Since the rounded timestamp columns would likely cause the model to overfit the data
+# unless additional feature engineering was performed, exclude them to avoid training on them.
+exclude_columns = ["rounded_pickup_datetime", "rounded_dropoff_datetime"]
+
+fs = feature_store.FeatureStoreClient()
+
+# Create the training set that includes the raw input data merged with corresponding features from both feature tables
+training_set = fs.create_training_set(
+    taxi_data,
+    feature_lookups=pickup_feature_lookups + dropoff_feature_lookups,
+    label="fare_amount",
+    exclude_columns=exclude_columns,
+)
+
+# Load the TrainingSet into a dataframe which can be passed into sklearn for training a model
+training_df = training_set.load_df()
+
+# COMMAND ----------
+
+# Display the training dataframe, and note that it contains both the raw input data and the features from the Feature Store, like `dropoff_is_weekend`
+training_df.display()
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC Train a LightGBM model on the data returned by `TrainingSet.to_df`, then log the model with `FeatureStoreClient.log_model`. The model will be packaged with feature metadata.
+
+# COMMAND ----------
+# DBTITLE 1, Train model
+
+import lightgbm as lgb
+from sklearn.model_selection import train_test_split
+import mlflow.lightgbm
+from mlflow.tracking import MlflowClient
+
+
+features_and_label = training_df.columns
+
+# Collect data into a Pandas array for training
+data = training_df.toPandas()[features_and_label]
+
+train, test = train_test_split(data, random_state=123)
+X_train = train.drop(["fare_amount"], axis=1)
+X_test = test.drop(["fare_amount"], axis=1)
+y_train = train.fare_amount
+y_test = test.fare_amount
+
+mlflow.lightgbm.autolog()
+train_lgb_dataset = lgb.Dataset(X_train, label=y_train.values)
+test_lgb_dataset = lgb.Dataset(X_test, label=y_test.values)
+
+param = {"num_leaves": 32, "objective": "regression", "metric": "rmse"}
+num_rounds = 100
+
+# Train a lightGBM model
+model = lgb.train(param, train_lgb_dataset, num_rounds)
+
+# COMMAND ----------
+# DBTITLE 1, Log model and return output.
+
+# Log the trained model with MLflow and package it with feature lookup information.
+fs.log_model(
+    model,
+    artifact_path="model_packaged",
+    flavor=mlflow.lightgbm,
+    training_set=training_set,
+    registered_model_name=model_name,
+)
+
+# The returned model URI is needed by the model deployment notebook.
+model_version = get_latest_model_version(model_name)
+model_uri = f"models:/{model_name}/{model_version}"
+dbutils.jobs.taskValues.set("model_uri", model_uri)
+dbutils.jobs.taskValues.set("model_name", model_name)
+dbutils.jobs.taskValues.set("model_version", model_version)
+dbutils.notebook.exit(model_uri)
diff --git a/mlops_stacks_gcp_fs/training/steps/__init__.py b/mlops_stacks_gcp_fs/training/steps/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mlops_stacks_gcp_fs/training/steps/custom_metrics.py b/mlops_stacks_gcp_fs/training/steps/custom_metrics.py
new file mode 100644
index 0000000..23d5eb2
--- /dev/null
+++ b/mlops_stacks_gcp_fs/training/steps/custom_metrics.py
@@ -0,0 +1,47 @@
+"""
+This module defines custom metric functions that are invoked during the 'train' and 'evaluate'
+steps to provide model performance insights. Custom metric functions defined in this module are
+referenced in the ``metrics`` section of ``recipe.yaml``, for example:
+
+.. code-block:: yaml
+    :caption: Example custom metrics definition in ``recipe.yaml``
+
+    metrics:
+      custom:
+        - name: weighted_mean_squared_error
+          function: weighted_mean_squared_error
+          greater_is_better: False
+"""
+
+from typing import Dict
+
+from pandas import DataFrame
+from sklearn.metrics import mean_squared_error
+
+
+def weighted_mean_squared_error(
+    eval_df: DataFrame,
+    builtin_metrics: Dict[str, int],  # pylint: disable=unused-argument
+) -> int:
+    """
+    Computes the weighted mean squared error (MSE) metric.
+
+    :param eval_df: A Pandas DataFrame containing the following columns:
+
+                    - ``"prediction"``: Predictions produced by submitting input data to the model.
+                    - ``"target"``: Ground truth values corresponding to the input data.
+
+    :param builtin_metrics: A dictionary containing the built-in metrics that are calculated
+                            automatically during model evaluation. The keys are the names of the
+                            metrics and the values are the scalar values of the metrics. For more
+                            information, see
+                            https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate.
+    :return: A single-entry dictionary containing the MSE metric. The key is the metric name and
+             the value is the scalar metric value. Note that custom metric functions can return
+             dictionaries with multiple metric entries as well.
+    """
+    return mean_squared_error(
+        eval_df["prediction"],
+        eval_df["target"],
+        sample_weight=1 / eval_df["prediction"].values,
+    )
diff --git a/mlops_stacks_gcp_fs/training/steps/ingest.py b/mlops_stacks_gcp_fs/training/steps/ingest.py
new file mode 100644
index 0000000..7dfa89f
--- /dev/null
+++ b/mlops_stacks_gcp_fs/training/steps/ingest.py
@@ -0,0 +1,40 @@
+"""
+This module defines the following routines used by the 'ingest' step of the regression recipe:
+
+- ``load_file_as_dataframe``: Defines customizable logic for parsing dataset formats that are not
+  natively parsed by MLflow Recipes (i.e. formats other than Parquet, Delta, and Spark SQL).
+"""
+
+import logging
+
+from pandas import DataFrame
+
+_logger = logging.getLogger(__name__)
+
+
+def load_file_as_dataframe(file_path: str, file_format: str) -> DataFrame:
+    """
+    Load content from the specified dataset file as a Pandas DataFrame.
+
+    This method is used to load dataset types that are not natively  managed by MLflow Recipes
+    (datasets that are not in Parquet, Delta Table, or Spark SQL Table format). This method is
+    called once for each file in the dataset, and MLflow Recipes automatically combines the
+    resulting DataFrames together.
+
+    :param file_path: The path to the dataset file.
+    :param file_format: The file format string, such as "csv".
+    :return: A Pandas DataFrame representing the content of the specified file.
+    """
+
+    if file_format == "csv":
+        import pandas
+
+        _logger.warning(
+            "Loading dataset CSV using `pandas.read_csv()` with default arguments and assumed index"
+            " column 0 which may not produce the desired schema. If the schema is not correct, you"
+            " can adjust it by modifying the `load_file_as_dataframe()` function in"
+            " `steps/ingest.py`"
+        )
+        return pandas.read_csv(file_path, index_col=0)
+    else:
+        raise NotImplementedError
diff --git a/mlops_stacks_gcp_fs/training/steps/split.py b/mlops_stacks_gcp_fs/training/steps/split.py
new file mode 100644
index 0000000..5fa7f92
--- /dev/null
+++ b/mlops_stacks_gcp_fs/training/steps/split.py
@@ -0,0 +1,37 @@
+"""
+This module defines the following routines used by the 'split' step of the regression recipe:
+
+- ``process_splits``: Defines customizable logic for processing & cleaning the training, validation,
+  and test datasets produced by the data splitting procedure.
+"""
+
+from pandas import DataFrame
+
+
+def process_splits(
+    train_df: DataFrame, validation_df: DataFrame, test_df: DataFrame
+) -> (DataFrame, DataFrame, DataFrame):
+    """
+    Perform additional processing on the split datasets.
+
+    :param train_df: The training dataset produced by the data splitting procedure.
+    :param validation_df: The validation dataset produced by the data splitting procedure.
+    :param test_df: The test dataset produced by the data splitting procedure.
+    :return: A tuple containing, in order: the processed training dataset, the processed
+             validation dataset, and the processed test dataset.
+    """
+
+    def process(df: DataFrame):
+        # Drop invalid data points
+        cleaned = df.dropna()
+        # Filter out invalid fare amounts and trip distance
+        cleaned = cleaned[
+            (cleaned["fare_amount"] > 0)
+            & (cleaned["trip_distance"] < 400)
+            & (cleaned["trip_distance"] > 0)
+            & (cleaned["fare_amount"] < 1000)
+        ]
+
+        return cleaned
+
+    return process(train_df), process(validation_df), process(test_df)
diff --git a/mlops_stacks_gcp_fs/training/steps/train.py b/mlops_stacks_gcp_fs/training/steps/train.py
new file mode 100644
index 0000000..c61b2bd
--- /dev/null
+++ b/mlops_stacks_gcp_fs/training/steps/train.py
@@ -0,0 +1,17 @@
+"""
+This module defines the following routines used by the 'train' step of the regression recipe:
+
+- ``estimator_fn``: Defines the customizable estimator type and parameters that are used
+  during training to produce a model pipeline.
+"""
+
+
+def estimator_fn():
+    """
+    Returns an *unfitted* estimator that defines ``fit()`` and ``predict()`` methods.
+    The estimator's input and output signatures should be compatible with scikit-learn
+    estimators.
+    """
+    from sklearn.linear_model import SGDRegressor
+
+    return SGDRegressor(random_state=42)
diff --git a/mlops_stacks_gcp_fs/training/steps/transform.py b/mlops_stacks_gcp_fs/training/steps/transform.py
new file mode 100644
index 0000000..7851b89
--- /dev/null
+++ b/mlops_stacks_gcp_fs/training/steps/transform.py
@@ -0,0 +1,62 @@
+"""
+This module defines the following routines used by the 'transform' step of the regression recipe:
+
+- ``transformer_fn``: Defines customizable logic for transforming input data before it is passed
+  to the estimator during model inference.
+"""
+
+from pandas import DataFrame
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer
+
+
+def calculate_features(df: DataFrame):
+    """
+    Extend the input dataframe with pickup day of week and hour, and trip duration.
+    Drop the now-unneeded pickup datetime and dropoff datetime columns.
+    """
+    df["pickup_dow"] = df["tpep_pickup_datetime"].dt.dayofweek
+    df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour
+    trip_duration = df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]
+    df["trip_duration"] = trip_duration.map(lambda x: x.total_seconds() / 60)
+    df.drop(columns=["tpep_pickup_datetime", "tpep_dropoff_datetime"], inplace=True)
+    return df
+
+
+def transformer_fn():
+    """
+    Returns an *unfitted* transformer that defines ``fit()`` and ``transform()`` methods.
+    The transformer's input and output signatures should be compatible with scikit-learn
+    transformers.
+    """
+    return Pipeline(
+        steps=[
+            (
+                "calculate_time_and_duration_features",
+                FunctionTransformer(calculate_features, feature_names_out="one-to-one"),
+            ),
+            (
+                "encoder",
+                ColumnTransformer(
+                    transformers=[
+                        (
+                            "hour_encoder",
+                            OneHotEncoder(categories="auto", sparse=False),
+                            ["pickup_hour"],
+                        ),
+                        (
+                            "day_encoder",
+                            OneHotEncoder(categories="auto", sparse=False),
+                            ["pickup_dow"],
+                        ),
+                        (
+                            "std_scaler",
+                            StandardScaler(),
+                            ["trip_distance", "trip_duration"],
+                        ),
+                    ]
+                ),
+            ),
+        ]
+    )
diff --git a/mlops_stacks_gcp_fs/utils.py b/mlops_stacks_gcp_fs/utils.py
new file mode 100644
index 0000000..aa30820
--- /dev/null
+++ b/mlops_stacks_gcp_fs/utils.py
@@ -0,0 +1,21 @@
+"""This module contains utils shared between different notebooks"""
+
+def get_deployed_model_stage_for_env(env):
+    """
+    Get the model version stage under which the latest deployed model version can be found
+    for the current environment
+    :param env: Current environment
+    :return: Model version stage
+    """
+    # For a registered model version to be served, it needs to be in either the Staging or Production
+    # model registry stage
+    # (https://docs.databricks.com/applications/machine-learning/manage-model-lifecycle/index.html#transition-a-model-stage).
+    # For models in dev and staging environments, we deploy the model to the "Staging" stage, and in prod we deploy to the
+    # "Production" stage
+    _MODEL_STAGE_FOR_ENV = {
+        "dev": "Staging",
+        "staging": "Staging",
+        "prod": "Production",
+        "test": "Production",
+    }
+    return _MODEL_STAGE_FOR_ENV[env]
diff --git a/mlops_stacks_gcp_fs/validation/README.md b/mlops_stacks_gcp_fs/validation/README.md
new file mode 100644
index 0000000..539107e
--- /dev/null
+++ b/mlops_stacks_gcp_fs/validation/README.md
@@ -0,0 +1,2 @@
+# Model Validation
+To enable model validation as part of scheduled databricks workflow, please refer to [mlops_stacks_gcp_fs/assets/README.md](../assets/README.md)
\ No newline at end of file
diff --git a/mlops_stacks_gcp_fs/validation/notebooks/ModelValidation.py b/mlops_stacks_gcp_fs/validation/notebooks/ModelValidation.py
new file mode 100644
index 0000000..5441846
--- /dev/null
+++ b/mlops_stacks_gcp_fs/validation/notebooks/ModelValidation.py
@@ -0,0 +1,283 @@
+# Databricks notebook source
+##################################################################################
+# Model Validation Notebook
+##
+# This notebook uses mlflow model validation API to run mode validation after training and registering a model
+# in model registry, before deploying it to the"Production" stage.
+#
+# It runs as part of CD and by an automated model training job -> validation -> deployment job defined under ``mlops_stacks_gcp_fs/assets/model-workflow-asset.yml``
+#
+#
+# Parameters:
+#
+# * env                                     - Name of the environment the notebook is run in (staging, or prod). Defaults to "prod".
+# * `run_mode`                              - The `run_mode` defines whether model validation is enabled or not. It can be one of the three values:
+#                                             * `disabled` : Do not run the model validation notebook.
+#                                             * `dry_run`  : Run the model validation notebook. Ignore failed model validation rules and proceed to move
+#                                                            model to the"Production" stage.
+#                                             * `enabled`  : Run the model validation notebook. Move model to the "Production" stage only if all model validation
+#                                                            rules are passing.
+# * enable_baseline_comparison              - Whether to load the current registered "Production" stage model as baseline.
+#                                             Baseline model is a requirement for relative change and absolute change validation thresholds.
+# * validation_input                        - Validation input. Please refer to data parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+# * model_type                              - A string describing the model type. The model type can be either "regressor" and "classifier".
+#                                             Please refer to model_type parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+# * targets                                 - The string name of a column from data that contains evaluation labels.
+#                                             Please refer to targets parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+# * custom_metrics_loader_function          - Specifies the name of the function in mlops_stacks_gcp_fs/validation/validation.py that returns custom metrics.
+# * validation_thresholds_loader_function   - Specifies the name of the function in mlops_stacks_gcp_fs/validation/validation.py that returns model validation thresholds.
+#
+# For details on mlflow evaluate API, see doc https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+# For details and examples about performing model validation, see the Model Validation documentation https://mlflow.org/docs/latest/models.html#model-validation
+#
+##################################################################################
+
+# COMMAND ----------
+
+# MAGIC %load_ext autoreload
+# MAGIC %autoreload 2
+
+# COMMAND ----------
+
+import os
+notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
+%cd $notebook_path
+
+# COMMAND ----------
+
+# MAGIC %pip install -r ../../requirements.txt
+
+# COMMAND ----------
+
+dbutils.library.restartPython()
+
+# COMMAND ----------
+
+import os
+notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
+%cd $notebook_path
+%cd ../
+
+# COMMAND ----------
+
+dbutils.widgets.text(
+    "experiment_name",
+    "/dev-mlops_stacks_gcp_fs-experiment",
+    "Experiment Name",
+)
+dbutils.widgets.dropdown("run_mode", "disabled", ["disabled", "dry_run", "enabled"], "Run Mode")
+dbutils.widgets.dropdown("enable_baseline_comparison", "false", ["true", "false"], "Enable Baseline Comparison")
+dbutils.widgets.text("validation_input", "SELECT * FROM delta.`dbfs:/databricks-datasets/nyctaxi-with-zipcodes/subsampled`", "Validation Input")
+
+dbutils.widgets.text("model_type", "regressor", "Model Type")
+dbutils.widgets.text("targets", "fare_amount", "Targets")
+dbutils.widgets.text("custom_metrics_loader_function", "custom_metrics", "Custom Metrics Loader Function")
+dbutils.widgets.text("validation_thresholds_loader_function", "validation_thresholds", "Validation Thresholds Loader Function")
+dbutils.widgets.text("evaluator_config_loader_function", "evaluator_config", "Evaluator Config Loader Function")
+dbutils.widgets.text("model_name", "dev-mlops_stacks_gcp_fs-model", "Model Name")
+dbutils.widgets.text("model_version", "", "Candidate Model Version")
+
+# COMMAND ----------
+
+print(
+    "Currently model validation is not supported for models registered with feature store. Please refer to "
+    "issue https://github.com/databricks/mlops-stacks/issues/70 for more details."
+)
+dbutils.notebook.exit(0)
+run_mode = dbutils.widgets.get("run_mode").lower()
+assert run_mode == "disabled" or run_mode == "dry_run" or run_mode == "enabled"
+
+if run_mode == "disabled":
+    print(
+        "Model validation is in DISABLED mode. Exit model validation without blocking model deployment."
+    )
+    dbutils.notebook.exit(0)
+dry_run = run_mode == "dry_run"
+
+if dry_run:
+    print(
+        "Model validation is in DRY_RUN mode. Validation threshold validation failures will not block model deployment."
+    )
+else:
+    print(
+        "Model validation is in ENABLED mode. Validation threshold validation failures will block model deployment."
+    )
+
+# COMMAND ----------
+
+import importlib
+import mlflow
+import os
+import tempfile
+import traceback
+
+from mlflow.tracking.client import MlflowClient
+
+client = MlflowClient()
+
+# set experiment
+experiment_name = dbutils.widgets.get("experiment_name")
+mlflow.set_experiment(experiment_name)
+
+# set model evaluation parameters that can be inferred from the job
+model_uri = dbutils.jobs.taskValues.get("Train", "model_uri", debugValue="")
+model_name = dbutils.jobs.taskValues.get("Train", "model_name", debugValue="")
+model_version = dbutils.jobs.taskValues.get("Train", "model_version", debugValue="")
+
+if model_uri == "":
+    model_name = dbutils.widgets.get("model_name")
+    model_version = dbutils.widgets.get("model_version")
+    model_uri = "models:/" + model_name + "/" + model_version
+
+baseline_model_uri = "models:/" + model_name + "/Production"
+
+evaluators = "default"
+assert model_uri != "", "model_uri notebook parameter must be specified"
+assert model_name != "", "model_name notebook parameter must be specified"
+assert model_version != "", "model_version notebook parameter must be specified"
+
+# COMMAND ----------
+
+# take input
+enable_baseline_comparison = dbutils.widgets.get("enable_baseline_comparison")
+assert enable_baseline_comparison == "true" or enable_baseline_comparison == "false"
+enable_baseline_comparison = enable_baseline_comparison == "true"
+
+validation_input = dbutils.widgets.get("validation_input")
+assert validation_input
+data = spark.sql(validation_input)
+
+model_type = dbutils.widgets.get("model_type")
+targets = dbutils.widgets.get("targets")
+
+assert model_type
+assert targets
+
+custom_metrics_loader_function_name = dbutils.widgets.get("custom_metrics_loader_function")
+validation_thresholds_loader_function_name = dbutils.widgets.get("validation_thresholds_loader_function")
+evaluator_config_loader_function_name = dbutils.widgets.get("evaluator_config_loader_function")
+assert custom_metrics_loader_function_name
+assert validation_thresholds_loader_function_name
+assert evaluator_config_loader_function_name
+custom_metrics_loader_function = getattr(
+    importlib.import_module("validation"), custom_metrics_loader_function_name
+)
+validation_thresholds_loader_function = getattr(
+    importlib.import_module("validation"), validation_thresholds_loader_function_name
+)
+evaluator_config_loader_function = getattr(
+    importlib.import_module("validation"), evaluator_config_loader_function_name
+)
+custom_metrics = custom_metrics_loader_function()
+validation_thresholds = validation_thresholds_loader_function()
+evaluator_config = evaluator_config_loader_function()
+
+# COMMAND ----------
+
+# helper methods
+def get_run_link(run_info):
+    return "[Run](#mlflow/experiments/{0}/runs/{1})".format(
+        run_info.experiment_id, run_info.run_id
+    )
+
+
+def get_training_run(model_name, model_version):
+    version = client.get_model_version(model_name, model_version)
+    return mlflow.get_run(run_id=version.run_id)
+
+
+def generate_run_name(training_run):
+    return None if not training_run else training_run.info.run_name + "-validation"
+
+
+def generate_description(training_run):
+    return (
+        None
+        if not training_run
+        else "Model Training Details: {0}\n".format(get_run_link(training_run.info))
+    )
+
+
+def log_to_model_description(run, success):
+    run_link = get_run_link(run.info)
+    description = client.get_model_version(model_name, model_version).description
+    status = "SUCCESS" if success else "FAILURE"
+    if description != "":
+        description += "\n\n---\n\n"
+    description += "Model Validation Status: {0}\nValidation Details: {1}".format(
+        status, run_link
+    )
+    client.update_model_version(
+        name=model_name, version=model_version, description=description
+    )
+
+# COMMAND ----------
+
+training_run = get_training_run(model_name, model_version)
+
+# run evaluate
+with mlflow.start_run(
+    run_name=generate_run_name(training_run),
+    description=generate_description(training_run),
+) as run, tempfile.TemporaryDirectory() as tmp_dir:
+    validation_thresholds_file = os.path.join(tmp_dir, "validation_thresholds.txt")
+    with open(validation_thresholds_file, "w") as f:
+        if validation_thresholds:
+            for metric_name in validation_thresholds:
+                f.write(
+                    "{0:30}  {1}\n".format(
+                        metric_name, str(validation_thresholds[metric_name])
+                    )
+                )
+    mlflow.log_artifact(validation_thresholds_file)
+
+    try:
+        eval_result = mlflow.evaluate(
+            model=model_uri,
+            data=data,
+            targets=targets,
+            model_type=model_type,
+            evaluators=evaluators,
+            validation_thresholds=validation_thresholds,
+            custom_metrics=custom_metrics,
+            baseline_model=None
+            if not enable_baseline_comparison
+            else baseline_model_uri,
+            evaluator_config=evaluator_config,
+        )
+        metrics_file = os.path.join(tmp_dir, "metrics.txt")
+        with open(metrics_file, "w") as f:
+            f.write(
+                "{0:30}  {1:30}  {2}\n".format("metric_name", "candidate", "baseline")
+            )
+            for metric in eval_result.metrics:
+                candidate_metric_value = str(eval_result.metrics[metric])
+                baseline_metric_value = "N/A"
+                if metric in eval_result.baseline_model_metrics:
+                    mlflow.log_metric(
+                        "baseline_" + metric, eval_result.baseline_model_metrics[metric]
+                    )
+                    baseline_metric_value = str(
+                        eval_result.baseline_model_metrics[metric]
+                    )
+                f.write(
+                    "{0:30}  {1:30}  {2}\n".format(
+                        metric, candidate_metric_value, baseline_metric_value
+                    )
+                )
+        mlflow.log_artifact(metrics_file)
+        log_to_model_description(run, True)
+        
+    except Exception as err:
+        log_to_model_description(run, False)
+        error_file = os.path.join(tmp_dir, "error.txt")
+        with open(error_file, "w") as f:
+            f.write("Validation failed : " + str(err) + "\n")
+            f.write(traceback.format_exc())
+        mlflow.log_artifact(error_file)
+        if not dry_run:
+            raise err
+        else:
+            print(
+                "Model validation failed in DRY_RUN. It will not block model deployment."
+            )
diff --git a/mlops_stacks_gcp_fs/validation/validation.py b/mlops_stacks_gcp_fs/validation/validation.py
new file mode 100644
index 0000000..ac4f2ea
--- /dev/null
+++ b/mlops_stacks_gcp_fs/validation/validation.py
@@ -0,0 +1,41 @@
+import numpy as np
+from mlflow.models import make_metric, MetricThreshold
+
+# Custom metrics to be included. Return empty list if custom metrics are not needed.
+# Please refer to custom_metrics parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+# TODO(optional) : custom_metrics
+def custom_metrics():
+
+    # TODO(optional) : define custom metric function to be included in custom_metrics.
+    def squared_diff_plus_one(eval_df, _builtin_metrics):
+        """
+        This example custom metric function creates a metric based on the ``prediction`` and
+        ``target`` columns in ``eval_df`.
+        """
+        return np.sum(np.abs(eval_df["prediction"] - eval_df["target"] + 1) ** 2)
+
+    return [make_metric(eval_fn=squared_diff_plus_one, greater_is_better=False)]
+
+
+# Define model validation rules. Return empty dict if validation rules are not needed.
+# Please refer to validation_thresholds parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+# TODO(optional) : validation_thresholds
+def validation_thresholds():
+    return {
+        "max_error": MetricThreshold(
+            threshold=500, higher_is_better=False  # max_error should be <= 500
+        ),
+        "mean_squared_error": MetricThreshold(
+            threshold=20,  # mean_squared_error should be <= 20
+            # min_absolute_change=0.01,  # mean_squared_error should be at least 0.01 greater than baseline model accuracy
+            # min_relative_change=0.01,  # mean_squared_error should be at least 1 percent greater than baseline model accuracy
+            higher_is_better=False,
+        ),
+    }
+
+
+# Define evaluator config. Return empty dict if validation rules are not needed.
+# Please refer to evaluator_config parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate
+# TODO(optional) : evaluator_config
+def evaluator_config():
+    return {}
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 0000000..296b56e
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1,2 @@
+pytest>=7.1.2
+pandas==1.5.3
\ No newline at end of file