diff --git a/aws-gov/README.md b/aws-gov/README.md index b25be1a..e7ef45e 100644 --- a/aws-gov/README.md +++ b/aws-gov/README.md @@ -1,4 +1,4 @@ -# Security Reference Architecture Template +# Security Reference Architectures (SRA) - Terraform Templates ## Introduction @@ -21,7 +21,7 @@ There are four separate operation modes you can choose for the underlying networ - **Sandbox**: Sandbox or open egress. Selecting 'sandbox' as the operation mode allows traffic to flow freely to the public internet. This mode is suitable for sandbox or development scenarios where data exfiltration protection is of minimal concern, and developers need to access public APIs, packages, and more. -- **Firewall**: Firewall or limited egress. Choosing 'firewall' as the operation mode permits traffic flow only to a selected list of public addresses. This mode is applicable in situations where open internet access is necessary for certain tasks, but unfiltered traffic is not an option due to the sensitivity of the workloads or data. **NOTE**: Due to a limitation in the AWS Network Firewall's ability to use fully qualified domain names for non-HTTP/HTTPS traffic, an external data source is required for the external Hive metastore. For production scenarios, we recommend using Unity Catalog or self-hosted Hive metastores. +- **Firewall**: Firewall or limited egress. Choosing 'firewall' as the operation mode permits traffic flow only to a selected list of public addresses. This mode is applicable in situations where open internet access is necessary for certain tasks, but unfiltered traffic is not an option due to the sensitivity of the workloads or data. **NOTE**: Due to a limitation in the AWS Network Firewall's ability to use fully qualified domain names for non-HTTP/HTTPS traffic, an external data source is required for the external Hive metastore. For sensitive production workloads, it is recommended to use isolated operation mode and Unity Catalog, a self-hosted Hive metastore, or to explore other firewall services to address AWS Network Firewall's limitations. - **Isolated**: Isolated or no egress. Opting for 'isolated' as the operation mode prevents any traffic to the public internet. Traffic is limited to AWS private endpoints, either to AWS services or the Databricks control plane. This mode should be used in cases where access to the public internet is completely unsupported. **NOTE**: Apache Derby Metastore will be required for clusters and non-serverless SQL Warehouses. For more information, please view this [knowledge article](https://kb.databricks.com/metastore/set-up-embedded-metastore). diff --git a/aws-gov/tf/modules/sra/data_plane_hardening/firewall/firewall.tf b/aws-gov/tf/modules/sra/data_plane_hardening/firewall/firewall.tf index f6b5ff9..18536c0 100644 --- a/aws-gov/tf/modules/sra/data_plane_hardening/firewall/firewall.tf +++ b/aws-gov/tf/modules/sra/data_plane_hardening/firewall/firewall.tf @@ -186,13 +186,8 @@ resource "aws_networkfirewall_rule_group" "databricks_fqdn_allowlist" { } } -// Data for IP allow list -data "external" "metastore_ip" { - program = ["sh", "${path.module}/metastore_ip.sh"] - - query = { - metastore_domain = var.hive_metastore_fqdn - } +data "dns_a_record_set" "metastore_dns" { + host = var.hive_metastore_fqdn } // JDBC Firewall group IP allow list @@ -205,10 +200,28 @@ resource "aws_networkfirewall_rule_group" "databricks_metastore_allowlist" { rule_order = "STRICT_ORDER" } rules_source { + dynamic "stateful_rule" { + for_each = toset(data.dns_a_record_set.metastore_dns.addrs) + content { + action = "PASS" + header { + destination = stateful_rule.value + destination_port = 3306 + direction = "FORWARD" + protocol = "TCP" + source = "ANY" + source_port = "ANY" + } + rule_option { + keyword = "sid" + settings = ["1"] + } + } + } stateful_rule { - action = "PASS" + action = "DROP" header { - destination = data.external.metastore_ip.result["ip"] + destination = "0.0.0.0/0" destination_port = 3306 direction = "FORWARD" protocol = "TCP" @@ -217,7 +230,7 @@ resource "aws_networkfirewall_rule_group" "databricks_metastore_allowlist" { } rule_option { keyword = "sid" - settings = ["1"] + settings = ["2"] } } } @@ -250,7 +263,6 @@ resource "aws_networkfirewall_firewall_policy" "databricks_nfw_policy" { priority = 2 resource_arn = aws_networkfirewall_rule_group.databricks_metastore_allowlist.arn } - } tags = { diff --git a/aws-gov/tf/modules/sra/data_plane_hardening/firewall/metastore_ip.sh b/aws-gov/tf/modules/sra/data_plane_hardening/firewall/metastore_ip.sh deleted file mode 100755 index 6062790..0000000 --- a/aws-gov/tf/modules/sra/data_plane_hardening/firewall/metastore_ip.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -eval "$(jq -r '@sh "METASTORE_DOMAIN=\(.metastore_domain)"')" - -IP=$(dig +short $METASTORE_DOMAIN | tail -n1) -echo "Resolved IP: $IP" >&2 - -if [ -z "$IP" ]; then - echo "Error: Failed to resolve IP for $METASTORE_DOMAIN" >&2 - exit 1 -fi - -jq -n --arg ip "$IP" '{"ip":$ip}' \ No newline at end of file diff --git a/aws-gov/tf/modules/sra/data_plane_hardening/firewall/provider.tf b/aws-gov/tf/modules/sra/data_plane_hardening/firewall/provider.tf index 7afdcf4..7617f6b 100644 --- a/aws-gov/tf/modules/sra/data_plane_hardening/firewall/provider.tf +++ b/aws-gov/tf/modules/sra/data_plane_hardening/firewall/provider.tf @@ -3,5 +3,8 @@ terraform { aws = { source = "hashicorp/aws" } + dns = { + source = "hashicorp/dns" + } } } \ No newline at end of file diff --git a/aws/tf/modules/sra/data_plane_hardening/firewall/firewall.tf b/aws/tf/modules/sra/data_plane_hardening/firewall/firewall.tf index 9606ee8..c32a665 100644 --- a/aws/tf/modules/sra/data_plane_hardening/firewall/firewall.tf +++ b/aws/tf/modules/sra/data_plane_hardening/firewall/firewall.tf @@ -186,13 +186,8 @@ resource "aws_networkfirewall_rule_group" "databricks_fqdn_allowlist" { } } -// Data for IP allow list -data "external" "metastore_ip" { - program = ["sh", "${path.module}/metastore_ip.sh"] - - query = { - metastore_domain = var.hive_metastore_fqdn - } +data "dns_a_record_set" "metastore_dns" { + host = var.hive_metastore_fqdn } // JDBC Firewall group IP allow list @@ -205,19 +200,22 @@ resource "aws_networkfirewall_rule_group" "databricks_metastore_allowlist" { rule_order = "STRICT_ORDER" } rules_source { - stateful_rule { - action = "PASS" - header { - destination = data.external.metastore_ip.result["ip"] - destination_port = 3306 - direction = "FORWARD" - protocol = "TCP" - source = "ANY" - source_port = "ANY" - } - rule_option { - keyword = "sid" - settings = ["1"] + dynamic "stateful_rule" { + for_each = toset(data.dns_a_record_set.metastore_dns.addrs) + content { + action = "PASS" + header { + destination = stateful_rule.value + destination_port = 3306 + direction = "FORWARD" + protocol = "TCP" + source = "ANY" + source_port = "ANY" + } + rule_option { + keyword = "sid" + settings = ["1"] + } } } stateful_rule { @@ -288,4 +286,4 @@ resource "aws_networkfirewall_firewall" "nfw" { Name = "${var.resource_prefix}-${var.region}-databricks-nfw" Project = var.resource_prefix } -} +} \ No newline at end of file diff --git a/aws/tf/modules/sra/data_plane_hardening/firewall/metastore_ip.sh b/aws/tf/modules/sra/data_plane_hardening/firewall/metastore_ip.sh deleted file mode 100755 index 6062790..0000000 --- a/aws/tf/modules/sra/data_plane_hardening/firewall/metastore_ip.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -eval "$(jq -r '@sh "METASTORE_DOMAIN=\(.metastore_domain)"')" - -IP=$(dig +short $METASTORE_DOMAIN | tail -n1) -echo "Resolved IP: $IP" >&2 - -if [ -z "$IP" ]; then - echo "Error: Failed to resolve IP for $METASTORE_DOMAIN" >&2 - exit 1 -fi - -jq -n --arg ip "$IP" '{"ip":$ip}' \ No newline at end of file diff --git a/aws/tf/modules/sra/data_plane_hardening/firewall/provider.tf b/aws/tf/modules/sra/data_plane_hardening/firewall/provider.tf index 7afdcf4..7617f6b 100644 --- a/aws/tf/modules/sra/data_plane_hardening/firewall/provider.tf +++ b/aws/tf/modules/sra/data_plane_hardening/firewall/provider.tf @@ -3,5 +3,8 @@ terraform { aws = { source = "hashicorp/aws" } + dns = { + source = "hashicorp/dns" + } } } \ No newline at end of file