diff --git a/gcp/examples/workspace/main.tf b/gcp/examples/workspace/main.tf index 4bbeb44..7428f0e 100644 --- a/gcp/examples/workspace/main.tf +++ b/gcp/examples/workspace/main.tf @@ -1,13 +1,11 @@ module "customer_managed_vpc" { - source = "../../modules/customer_managed_vpc/" + source = "../../modules/workspace_deployment/" google_project = var.google_project google_region = var.google_region databricks_account_id = var.databricks_account_id - google_zone = var.google_zone databricks_google_service_account = var.databricks_google_service_account - backend_rest_psce = var.backend_rest_psce - relay_psce = var.relay_psce + workspace_pe = var.workspace_pe relay_pe = var.relay_pe google_pe_subnet = var.google_pe_subnet @@ -17,4 +15,9 @@ module "customer_managed_vpc" { workspace_service_attachment = var.workspace_service_attachment ip_addresses=var.ip_addresses account_console_url = var.account_console_url + key_name = var.key_name + keyring_name = var.keyring_name + use_existing_cmek = var.use_existing_cmek + cmek_resource_id = var.cmek_resource_id + hive_metastore_ip = var.hive_metastore_ip } diff --git a/gcp/examples/workspace/vars.tf b/gcp/examples/workspace/vars.tf index 0f580db..bdaaa23 100644 --- a/gcp/examples/workspace/vars.tf +++ b/gcp/examples/workspace/vars.tf @@ -20,6 +20,12 @@ variable "google_zone" {} variable "backend_rest_psce" {} variable "relay_psce" {} +variable "key_name" {} +variable "keyring_name" {} +variable "use_existing_cmek" {} +variable "hive_metastore_ip" {} +variable "cmek_resource_id" {} + variable "workspace_pe" {} variable "relay_pe" {} diff --git a/gcp/modules/workspace_deployment/cmek.tf b/gcp/modules/workspace_deployment/cmek.tf new file mode 100644 index 0000000..065dab0 --- /dev/null +++ b/gcp/modules/workspace_deployment/cmek.tf @@ -0,0 +1,35 @@ + +# create key ring +resource "google_kms_key_ring" "databricks_key_ring" { + count = var.use_existing_cmek ? 0 : 1 + provider = google + name = var.keyring_name + location = var.google_region +} + +# create key used for encryption +resource "google_kms_crypto_key" "databricks_key" { + count = var.use_existing_cmek ? 0 : 1 + provider = google + name = var.key_name + key_ring = google_kms_key_ring.databricks_key_ring[0].id + purpose = "ENCRYPT_DECRYPT" + rotation_period = "31536000s" # Set rotation period to 1 year in seconds, need to be greater than 1 day + +} + + + + +# # assign CMEK on Databricks side +resource "databricks_mws_customer_managed_keys" "this" { + provider = databricks.accounts + account_id = var.databricks_account_id + gcp_key_info { + kms_key_id = google_kms_crypto_key.databricks_key[0].id + } + use_cases = ["STORAGE","MANAGED","MANAGED_SERVICES"] + lifecycle { + ignore_changes = all + } +} \ No newline at end of file diff --git a/gcp/modules/customer_managed_vpc/init.tf b/gcp/modules/workspace_deployment/init.tf similarity index 86% rename from gcp/modules/customer_managed_vpc/init.tf rename to gcp/modules/workspace_deployment/init.tf index cb7740a..2b0f113 100644 --- a/gcp/modules/customer_managed_vpc/init.tf +++ b/gcp/modules/workspace_deployment/init.tf @@ -2,13 +2,15 @@ variable "databricks_account_id" {} variable "databricks_google_service_account" {} variable "google_project" {} variable "google_region" {} -variable "google_zone" {} -variable "backend_rest_psce" {} -variable "relay_psce" {} variable "workspace_pe" {} variable "relay_pe" {} +variable "use_existing_cmek" {} +variable "key_name" {} +variable "keyring_name" {} +variable "cmek_resource_id" {} + variable "account_console_url" {} # primary subnet providing ip addresses to PSC endpoints @@ -25,15 +27,15 @@ variable "google_pe_subnet_secondary_ip_range" { default = "192.168.10.0/24" } -variable "network_ip_cidr_range"{ +variable "nodes_ip_cidr_range"{ default = "10.0.0.0/16" } -variable "network_secondary_ip_cidr_range1"{ +variable "pod_ip_cidr_range"{ default = "10.1.0.0/16" } -variable "network_secondary_ip_cidr_range2"{ +variable "service_ip_cidr_range"{ default = "10.2.0.0/20" } @@ -46,6 +48,11 @@ variable "ip_addresses" { type = list(string) } +// Regional value for the Hive Metastore IP (allowing Egress to this public IP) +variable "hive_metastore_ip" { + default = "34.76.244.202" +} + @@ -61,10 +68,11 @@ terraform { required_providers { databricks = { source = "databricks/databricks" - version = ">=1.23.0" + version = ">=1.51.0" } google = { source = "hashicorp/google" + version = ">= 6.2.0" } } } @@ -72,7 +80,6 @@ terraform { provider "google" { project = var.google_project region = var.google_region - zone = var.google_zone } // initialize provider in "accounts" mode to provision new workspace diff --git a/gcp/modules/customer_managed_vpc/psc.tf b/gcp/modules/workspace_deployment/psc.tf similarity index 100% rename from gcp/modules/customer_managed_vpc/psc.tf rename to gcp/modules/workspace_deployment/psc.tf diff --git a/gcp/modules/customer_managed_vpc/readme.md b/gcp/modules/workspace_deployment/readme.md similarity index 100% rename from gcp/modules/customer_managed_vpc/readme.md rename to gcp/modules/workspace_deployment/readme.md diff --git a/gcp/modules/customer_managed_vpc/secret_management.tf b/gcp/modules/workspace_deployment/secret_management.tf similarity index 100% rename from gcp/modules/customer_managed_vpc/secret_management.tf rename to gcp/modules/workspace_deployment/secret_management.tf diff --git a/gcp/modules/workspace_deployment/vpc-firewall.tf b/gcp/modules/workspace_deployment/vpc-firewall.tf new file mode 100644 index 0000000..f5cdbcd --- /dev/null +++ b/gcp/modules/workspace_deployment/vpc-firewall.tf @@ -0,0 +1,91 @@ +resource "google_compute_firewall" "deny_egress" { + name = "deny-egress-${google_compute_network.dbx_private_vpc.name}" + direction = "EGRESS" + priority = 1100 + destination_ranges = ["0.0.0.0/0"] + source_ranges = [] + # allow = [] + deny { + protocol = "all" + } + network = google_compute_network.dbx_private_vpc.self_link +} + + +# This is the only Egress rule that goes to a public internet IP +# It can be avoided if the workspace is UC-enabled and that the spark config is configured to avoid fetching the metastore IP +resource "google_compute_firewall" "to_databricks_managed_hive" { + name = "to-databricks-managed-hive-${google_compute_network.dbx_private_vpc.name}" + direction = "EGRESS" + priority = 1010 + destination_ranges = [] + source_ranges = [var.hive_metastore_ip] + allow { + protocol = "tcp" + ports = ["3306"] + } + network = google_compute_network.dbx_private_vpc.self_link +} + +resource "google_compute_firewall" "to_gke_health_checks" { + name = "to-gke-health-checks-${google_compute_network.dbx_private_vpc.name}" + direction = "EGRESS" + priority = 1010 + destination_ranges = ["35.191.0.0/16", "130.211.0.0/22"] + source_ranges = [] + allow { + protocol = "tcp" + ports = ["443", "80"] + } + network = google_compute_network.dbx_private_vpc.self_link +} + +resource "google_compute_firewall" "from_gke_health_checks" { + name = "from-gke-health-checks-${google_compute_network.dbx_private_vpc.name}" + direction = "INGRESS" + priority = 1010 + destination_ranges = [] + source_ranges = ["35.191.0.0/16", "130.211.0.0/22"] + allow { + protocol = "tcp" + ports = ["443", "80"] + } + network = google_compute_network.dbx_private_vpc.self_link +} + +resource "google_compute_firewall" "to_gke_cp" { + name = "to-gke-cp-${google_compute_network.dbx_private_vpc.name}" + direction = "EGRESS" + priority = 1010 + destination_ranges = ["10.32.0.0/28"] + source_ranges = [] + allow { + protocol = "tcp" + ports = ["443", "10250"] + } + network = google_compute_network.dbx_private_vpc.self_link +} + +resource "google_compute_firewall" "to_google_apis" { + name = "to-google-apis-${google_compute_network.dbx_private_vpc.name}" + direction = "EGRESS" + priority = 1010 + destination_ranges = ["199.36.153.4/30"] + source_ranges = [] + allow { + protocol = "all" + } + network = google_compute_network.dbx_private_vpc.self_link +} + +resource "google_compute_firewall" "to_gke_nodes_subnet" { + name = "to-gke-nodes-subnet-${google_compute_network.dbx_private_vpc.name}" + direction = "EGRESS" + priority = 1010 + destination_ranges = [var.nodes_ip_cidr_range, var.pod_ip_cidr_range, var.service_ip_cidr_range] + source_ranges = [] + allow { + protocol = "all" + } + network = google_compute_network.dbx_private_vpc.self_link +} \ No newline at end of file diff --git a/gcp/modules/customer_managed_vpc/vpc.tf b/gcp/modules/workspace_deployment/vpc.tf similarity index 94% rename from gcp/modules/customer_managed_vpc/vpc.tf rename to gcp/modules/workspace_deployment/vpc.tf index f86a7ef..09d1969 100644 --- a/gcp/modules/customer_managed_vpc/vpc.tf +++ b/gcp/modules/workspace_deployment/vpc.tf @@ -6,16 +6,16 @@ resource "google_compute_network" "dbx_private_vpc" { resource "google_compute_subnetwork" "network-with-private-secondary-ip-ranges" { name = "test-dbx-${random_string.suffix.result}" - ip_cidr_range = var.network_ip_cidr_range + ip_cidr_range = var.nodes_ip_cidr_range region = var.google_region network = google_compute_network.dbx_private_vpc.id secondary_ip_range { range_name = "pods" - ip_cidr_range = var.network_secondary_ip_cidr_range1 + ip_cidr_range = var.pod_ip_cidr_range } secondary_ip_range { range_name = "svc" - ip_cidr_range = var.network_secondary_ip_cidr_range2 + ip_cidr_range = var.service_ip_cidr_range } private_ip_google_access = true } diff --git a/gcp/modules/customer_managed_vpc/workspace.tf b/gcp/modules/workspace_deployment/workspace.tf similarity index 97% rename from gcp/modules/customer_managed_vpc/workspace.tf rename to gcp/modules/workspace_deployment/workspace.tf index eac35e9..e45518e 100644 --- a/gcp/modules/customer_managed_vpc/workspace.tf +++ b/gcp/modules/workspace_deployment/workspace.tf @@ -1,5 +1,4 @@ resource "databricks_mws_private_access_settings" "pas" { - account_id = var.databricks_account_id provider = databricks.accounts private_access_settings_name = "pas-${random_string.suffix.result}" region = google_compute_subnetwork.network-with-private-secondary-ip-ranges.region